Merge branch 'hotplug' into devel

Conflicts:
	arch/arm/kernel/head-common.S
diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S
index 58a3e63..bbecaac 100644
--- a/arch/arm/kernel/head-common.S
+++ b/arch/arm/kernel/head-common.S
@@ -15,55 +15,6 @@
 #define ATAG_CORE_SIZE ((2*4 + 3*4) >> 2)
 #define ATAG_CORE_SIZE_EMPTY ((2*4) >> 2)
 
-	.align	2
-	.type	__switch_data, %object
-__switch_data:
-	.long	__mmap_switched
-	.long	__data_loc			@ r4
-	.long	_sdata				@ r5
-	.long	__bss_start			@ r6
-	.long	_end				@ r7
-	.long	processor_id			@ r4
-	.long	__machine_arch_type		@ r5
-	.long	__atags_pointer			@ r6
-	.long	cr_alignment			@ r7
-	.long	init_thread_union + THREAD_START_SP @ sp
-
-/*
- * The following fragment of code is executed with the MMU on in MMU mode,
- * and uses absolute addresses; this is not position independent.
- *
- *  r0  = cp#15 control register
- *  r1  = machine ID
- *  r2  = atags pointer
- *  r9  = processor ID
- */
-__mmap_switched:
-	adr	r3, __switch_data + 4
-
-	ldmia	r3!, {r4, r5, r6, r7}
-	cmp	r4, r5				@ Copy data segment if needed
-1:	cmpne	r5, r6
-	ldrne	fp, [r4], #4
-	strne	fp, [r5], #4
-	bne	1b
-
-	mov	fp, #0				@ Clear BSS (and zero fp)
-1:	cmp	r6, r7
-	strcc	fp, [r6],#4
-	bcc	1b
-
- ARM(	ldmia	r3, {r4, r5, r6, r7, sp})
- THUMB(	ldmia	r3, {r4, r5, r6, r7}	)
- THUMB(	ldr	sp, [r3, #16]		)
-	str	r9, [r4]			@ Save processor ID
-	str	r1, [r5]			@ Save machine type
-	str	r2, [r6]			@ Save atags pointer
-	bic	r4, r0, #CR_A			@ Clear 'A' bit
-	stmia	r7, {r0, r4}			@ Save control register values
-	b	start_kernel
-ENDPROC(__mmap_switched)
-
 /*
  * Exception handling.  Something went wrong and we can't proceed.  We
  * ought to tell the user, but since we don't have any guarantee that
@@ -73,21 +24,7 @@
  * and hope for the best (useful if bootloader fails to pass a proper
  * machine ID for example).
  */
-__error_p:
-#ifdef CONFIG_DEBUG_LL
-	adr	r0, str_p1
-	bl	printascii
-	mov	r0, r9
-	bl	printhex8
-	adr	r0, str_p2
-	bl	printascii
-	b	__error
-str_p1:	.asciz	"\nError: unrecognized/unsupported processor variant (0x"
-str_p2:	.asciz	").\n"
-	.align
-#endif
-ENDPROC(__error_p)
-
+	__HEAD
 __error_a:
 #ifdef CONFIG_DEBUG_LL
 	mov	r4, r1				@ preserve machine ID
@@ -97,7 +34,7 @@
 	bl	printhex8
 	adr	r0, str_a2
 	bl	printascii
-	adr	r3, 4f
+	adr	r3, __lookup_machine_type_data
 	ldmia	r3, {r4, r5, r6}		@ get machine desc list
 	sub	r4, r3, r4			@ get offset between virt&phys
 	add	r5, r5, r4			@ convert virt addresses to
@@ -125,78 +62,6 @@
 	.align
 #endif
 
-__error:
-#ifdef CONFIG_ARCH_RPC
-/*
- * Turn the screen red on a error - RiscPC only.
- */
-	mov	r0, #0x02000000
-	mov	r3, #0x11
-	orr	r3, r3, r3, lsl #8
-	orr	r3, r3, r3, lsl #16
-	str	r3, [r0], #4
-	str	r3, [r0], #4
-	str	r3, [r0], #4
-	str	r3, [r0], #4
-#endif
-1:	mov	r0, r0
-	b	1b
-ENDPROC(__error)
-
-
-/*
- * Read processor ID register (CP#15, CR0), and look up in the linker-built
- * supported processor list.  Note that we can't use the absolute addresses
- * for the __proc_info lists since we aren't running with the MMU on
- * (and therefore, we are not in the correct address space).  We have to
- * calculate the offset.
- *
- *	r9 = cpuid
- * Returns:
- *	r3, r4, r6 corrupted
- *	r5 = proc_info pointer in physical address space
- *	r9 = cpuid (preserved)
- */
-__lookup_processor_type:
-	adr	r3, 3f
-	ldmia	r3, {r5 - r7}
-	add	r3, r3, #8
-	sub	r3, r3, r7			@ get offset between virt&phys
-	add	r5, r5, r3			@ convert virt addresses to
-	add	r6, r6, r3			@ physical address space
-1:	ldmia	r5, {r3, r4}			@ value, mask
-	and	r4, r4, r9			@ mask wanted bits
-	teq	r3, r4
-	beq	2f
-	add	r5, r5, #PROC_INFO_SZ		@ sizeof(proc_info_list)
-	cmp	r5, r6
-	blo	1b
-	mov	r5, #0				@ unknown processor
-2:	mov	pc, lr
-ENDPROC(__lookup_processor_type)
-
-/*
- * This provides a C-API version of the above function.
- */
-ENTRY(lookup_processor_type)
-	stmfd	sp!, {r4 - r7, r9, lr}
-	mov	r9, r0
-	bl	__lookup_processor_type
-	mov	r0, r5
-	ldmfd	sp!, {r4 - r7, r9, pc}
-ENDPROC(lookup_processor_type)
-
-/*
- * Look in <asm/procinfo.h> and arch/arm/kernel/arch.[ch] for
- * more information about the __proc_info and __arch_info structures.
- */
-	.align	2
-3:	.long	__proc_info_begin
-	.long	__proc_info_end
-4:	.long	.
-	.long	__arch_info_begin
-	.long	__arch_info_end
-
 /*
  * Lookup machine architecture in the linker-build list of architectures.
  * Note that we can't use the absolute addresses for the __arch_info
@@ -209,7 +74,7 @@
  *  r5 = mach_info pointer in physical address space
  */
 __lookup_machine_type:
-	adr	r3, 4b
+	adr	r3, __lookup_machine_type_data
 	ldmia	r3, {r4, r5, r6}
 	sub	r3, r3, r4			@ get offset between virt&phys
 	add	r5, r5, r3			@ convert virt addresses to
@@ -225,15 +90,16 @@
 ENDPROC(__lookup_machine_type)
 
 /*
- * This provides a C-API version of the above function.
+ * Look in arch/arm/kernel/arch.[ch] for information about the
+ * __arch_info structures.
  */
-ENTRY(lookup_machine_type)
-	stmfd	sp!, {r4 - r6, lr}
-	mov	r1, r0
-	bl	__lookup_machine_type
-	mov	r0, r5
-	ldmfd	sp!, {r4 - r6, pc}
-ENDPROC(lookup_machine_type)
+	.align	2
+	.type	__lookup_machine_type_data, %object
+__lookup_machine_type_data:
+	.long	.
+	.long	__arch_info_begin
+	.long	__arch_info_end
+	.size	__lookup_machine_type_data, . - __lookup_machine_type_data
 
 /* Determine validity of the r2 atags pointer.  The heuristic requires
  * that the pointer be aligned, in the first 16k of physical RAM and
@@ -265,3 +131,150 @@
 1:	mov	r2, #0
 	mov	pc, lr
 ENDPROC(__vet_atags)
+
+/*
+ * The following fragment of code is executed with the MMU on in MMU mode,
+ * and uses absolute addresses; this is not position independent.
+ *
+ *  r0  = cp#15 control register
+ *  r1  = machine ID
+ *  r2  = atags pointer
+ *  r9  = processor ID
+ */
+	__INIT
+__mmap_switched:
+	adr	r3, __mmap_switched_data
+
+	ldmia	r3!, {r4, r5, r6, r7}
+	cmp	r4, r5				@ Copy data segment if needed
+1:	cmpne	r5, r6
+	ldrne	fp, [r4], #4
+	strne	fp, [r5], #4
+	bne	1b
+
+	mov	fp, #0				@ Clear BSS (and zero fp)
+1:	cmp	r6, r7
+	strcc	fp, [r6],#4
+	bcc	1b
+
+ ARM(	ldmia	r3, {r4, r5, r6, r7, sp})
+ THUMB(	ldmia	r3, {r4, r5, r6, r7}	)
+ THUMB(	ldr	sp, [r3, #16]		)
+	str	r9, [r4]			@ Save processor ID
+	str	r1, [r5]			@ Save machine type
+	str	r2, [r6]			@ Save atags pointer
+	bic	r4, r0, #CR_A			@ Clear 'A' bit
+	stmia	r7, {r0, r4}			@ Save control register values
+	b	start_kernel
+ENDPROC(__mmap_switched)
+
+	.align	2
+	.type	__mmap_switched_data, %object
+__mmap_switched_data:
+	.long	__data_loc			@ r4
+	.long	_sdata				@ r5
+	.long	__bss_start			@ r6
+	.long	_end				@ r7
+	.long	processor_id			@ r4
+	.long	__machine_arch_type		@ r5
+	.long	__atags_pointer			@ r6
+	.long	cr_alignment			@ r7
+	.long	init_thread_union + THREAD_START_SP @ sp
+	.size	__mmap_switched_data, . - __mmap_switched_data
+
+/*
+ * This provides a C-API version of __lookup_machine_type
+ */
+ENTRY(lookup_machine_type)
+	stmfd	sp!, {r4 - r6, lr}
+	mov	r1, r0
+	bl	__lookup_machine_type
+	mov	r0, r5
+	ldmfd	sp!, {r4 - r6, pc}
+ENDPROC(lookup_machine_type)
+
+/*
+ * This provides a C-API version of __lookup_processor_type
+ */
+ENTRY(lookup_processor_type)
+	stmfd	sp!, {r4 - r6, r9, lr}
+	mov	r9, r0
+	bl	__lookup_processor_type
+	mov	r0, r5
+	ldmfd	sp!, {r4 - r6, r9, pc}
+ENDPROC(lookup_processor_type)
+
+/*
+ * Read processor ID register (CP#15, CR0), and look up in the linker-built
+ * supported processor list.  Note that we can't use the absolute addresses
+ * for the __proc_info lists since we aren't running with the MMU on
+ * (and therefore, we are not in the correct address space).  We have to
+ * calculate the offset.
+ *
+ *	r9 = cpuid
+ * Returns:
+ *	r3, r4, r6 corrupted
+ *	r5 = proc_info pointer in physical address space
+ *	r9 = cpuid (preserved)
+ */
+	__CPUINIT
+__lookup_processor_type:
+	adr	r3, __lookup_processor_type_data
+	ldmia	r3, {r4 - r6}
+	sub	r3, r3, r4			@ get offset between virt&phys
+	add	r5, r5, r3			@ convert virt addresses to
+	add	r6, r6, r3			@ physical address space
+1:	ldmia	r5, {r3, r4}			@ value, mask
+	and	r4, r4, r9			@ mask wanted bits
+	teq	r3, r4
+	beq	2f
+	add	r5, r5, #PROC_INFO_SZ		@ sizeof(proc_info_list)
+	cmp	r5, r6
+	blo	1b
+	mov	r5, #0				@ unknown processor
+2:	mov	pc, lr
+ENDPROC(__lookup_processor_type)
+
+/*
+ * Look in <asm/procinfo.h> for information about the __proc_info structure.
+ */
+	.align	2
+	.type	__lookup_processor_type_data, %object
+__lookup_processor_type_data:
+	.long	.
+	.long	__proc_info_begin
+	.long	__proc_info_end
+	.size	__lookup_processor_type_data, . - __lookup_processor_type_data
+
+__error_p:
+#ifdef CONFIG_DEBUG_LL
+	adr	r0, str_p1
+	bl	printascii
+	mov	r0, r9
+	bl	printhex8
+	adr	r0, str_p2
+	bl	printascii
+	b	__error
+str_p1:	.asciz	"\nError: unrecognized/unsupported processor variant (0x"
+str_p2:	.asciz	").\n"
+	.align
+#endif
+ENDPROC(__error_p)
+
+__error:
+#ifdef CONFIG_ARCH_RPC
+/*
+ * Turn the screen red on a error - RiscPC only.
+ */
+	mov	r0, #0x02000000
+	mov	r3, #0x11
+	orr	r3, r3, r3, lsl #8
+	orr	r3, r3, r3, lsl #16
+	str	r3, [r0], #4
+	str	r3, [r0], #4
+	str	r3, [r0], #4
+	str	r3, [r0], #4
+#endif
+1:	mov	r0, r0
+	b	1b
+ENDPROC(__error)
diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
index 573b803..814ce1a 100644
--- a/arch/arm/kernel/head-nommu.S
+++ b/arch/arm/kernel/head-nommu.S
@@ -48,8 +48,6 @@
 	movs	r8, r5				@ invalid machine (r5=0)?
 	beq	__error_a			@ yes, error 'a'
 
-	ldr	r13, __switch_data		@ address to jump to after
-						@ the initialization is done
 	adr	lr, BSYM(__after_proc_init)	@ return (PIC) address
  ARM(	add	pc, r10, #PROCINFO_INITFUNC	)
  THUMB(	add	r12, r10, #PROCINFO_INITFUNC	)
@@ -87,8 +85,7 @@
 	mcr	p15, 0, r0, c1, c0, 0		@ write control reg
 #endif /* CONFIG_CPU_CP15 */
 
-	mov	r3, r13
-	mov	pc, r3				@ clear the BSS and jump
+	b	__mmap_switched			@ clear the BSS and jump
 						@ to start_kernel
 ENDPROC(__after_proc_init)
 	.ltorg
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index b44d21e..7673904 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -98,113 +98,15 @@
 	 * above.  On return, the CPU will be ready for the MMU to be
 	 * turned on, and r0 will hold the CPU control register value.
 	 */
-	ldr	r13, __switch_data		@ address to jump to after
+	ldr	r13, =__mmap_switched		@ address to jump to after
 						@ mmu has been enabled
-	adr	lr, BSYM(__enable_mmu)		@ return (PIC) address
+	adr	lr, BSYM(1f)			@ return (PIC) address
  ARM(	add	pc, r10, #PROCINFO_INITFUNC	)
  THUMB(	add	r12, r10, #PROCINFO_INITFUNC	)
  THUMB(	mov	pc, r12				)
+1:	b	__enable_mmu
 ENDPROC(stext)
-
-#if defined(CONFIG_SMP)
-ENTRY(secondary_startup)
-	/*
-	 * Common entry point for secondary CPUs.
-	 *
-	 * Ensure that we're in SVC mode, and IRQs are disabled.  Lookup
-	 * the processor type - there is no need to check the machine type
-	 * as it has already been validated by the primary processor.
-	 */
-	setmode	PSR_F_BIT | PSR_I_BIT | SVC_MODE, r9
-	mrc	p15, 0, r9, c0, c0		@ get processor id
-	bl	__lookup_processor_type
-	movs	r10, r5				@ invalid processor?
-	moveq	r0, #'p'			@ yes, error 'p'
-	beq	__error
-
-	/*
-	 * Use the page tables supplied from  __cpu_up.
-	 */
-	adr	r4, __secondary_data
-	ldmia	r4, {r5, r7, r12}		@ address to jump to after
-	sub	r4, r4, r5			@ mmu has been enabled
-	ldr	r4, [r7, r4]			@ get secondary_data.pgdir
-	adr	lr, BSYM(__enable_mmu)		@ return address
-	mov	r13, r12			@ __secondary_switched address
- ARM(	add	pc, r10, #PROCINFO_INITFUNC	) @ initialise processor
-						  @ (return control reg)
- THUMB(	add	r12, r10, #PROCINFO_INITFUNC	)
- THUMB(	mov	pc, r12				)
-ENDPROC(secondary_startup)
-
-	/*
-	 * r6  = &secondary_data
-	 */
-ENTRY(__secondary_switched)
-	ldr	sp, [r7, #4]			@ get secondary_data.stack
-	mov	fp, #0
-	b	secondary_start_kernel
-ENDPROC(__secondary_switched)
-
-	.type	__secondary_data, %object
-__secondary_data:
-	.long	.
-	.long	secondary_data
-	.long	__secondary_switched
-#endif /* defined(CONFIG_SMP) */
-
-
-
-/*
- * Setup common bits before finally enabling the MMU.  Essentially
- * this is just loading the page table pointer and domain access
- * registers.
- */
-__enable_mmu:
-#ifdef CONFIG_ALIGNMENT_TRAP
-	orr	r0, r0, #CR_A
-#else
-	bic	r0, r0, #CR_A
-#endif
-#ifdef CONFIG_CPU_DCACHE_DISABLE
-	bic	r0, r0, #CR_C
-#endif
-#ifdef CONFIG_CPU_BPREDICT_DISABLE
-	bic	r0, r0, #CR_Z
-#endif
-#ifdef CONFIG_CPU_ICACHE_DISABLE
-	bic	r0, r0, #CR_I
-#endif
-	mov	r5, #(domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \
-		      domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \
-		      domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \
-		      domain_val(DOMAIN_IO, DOMAIN_CLIENT))
-	mcr	p15, 0, r5, c3, c0, 0		@ load domain access register
-	mcr	p15, 0, r4, c2, c0, 0		@ load page table pointer
-	b	__turn_mmu_on
-ENDPROC(__enable_mmu)
-
-/*
- * Enable the MMU.  This completely changes the structure of the visible
- * memory space.  You will not be able to trace execution through this.
- * If you have an enquiry about this, *please* check the linux-arm-kernel
- * mailing list archives BEFORE sending another post to the list.
- *
- *  r0  = cp#15 control register
- *  r13 = *virtual* address to jump to upon completion
- *
- * other registers depend on the function called upon completion
- */
-	.align	5
-__turn_mmu_on:
-	mov	r0, r0
-	mcr	p15, 0, r0, c1, c0, 0		@ write control reg
-	mrc	p15, 0, r3, c0, c0, 0		@ read id reg
-	mov	r3, r3
-	mov	r3, r13
-	mov	pc, r3
-ENDPROC(__turn_mmu_on)
-
+	.ltorg
 
 /*
  * Setup the initial page tables.  We only setup the barest
@@ -216,7 +118,7 @@
  * r10 = procinfo
  *
  * Returns:
- *  r0, r3, r6, r7 corrupted
+ *  r0, r3, r5-r7 corrupted
  *  r4 = physical page table address
  */
 __create_page_tables:
@@ -238,20 +140,30 @@
 	ldr	r7, [r10, #PROCINFO_MM_MMUFLAGS] @ mm_mmuflags
 
 	/*
-	 * Create identity mapping for first MB of kernel to
-	 * cater for the MMU enable.  This identity mapping
-	 * will be removed by paging_init().  We use our current program
-	 * counter to determine corresponding section base address.
+	 * Create identity mapping to cater for __enable_mmu.
+	 * This identity mapping will be removed by paging_init().
 	 */
-	mov	r6, pc
-	mov	r6, r6, lsr #20			@ start of kernel section
-	orr	r3, r7, r6, lsl #20		@ flags + kernel base
-	str	r3, [r4, r6, lsl #2]		@ identity mapping
+	adr	r0, __enable_mmu_loc
+	ldmia	r0, {r3, r5, r6}
+	sub	r0, r0, r3			@ virt->phys offset
+	add	r5, r5, r0			@ phys __enable_mmu
+	add	r6, r6, r0			@ phys __enable_mmu_end
+	mov	r5, r5, lsr #20
+	mov	r6, r6, lsr #20
+
+1:	orr	r3, r7, r5, lsl #20		@ flags + kernel base
+	str	r3, [r4, r5, lsl #2]		@ identity mapping
+	teq	r5, r6
+	addne	r5, r5, #1			@ next section
+	bne	1b
 
 	/*
 	 * Now setup the pagetables for our kernel direct
 	 * mapped region.
 	 */
+	mov	r3, pc
+	mov	r3, r3, lsr #20
+	orr	r3, r7, r3, lsl #20
 	add	r0, r4,  #(KERNEL_START & 0xff000000) >> 18
 	str	r3, [r0, #(KERNEL_START & 0x00f00000) >> 18]!
 	ldr	r6, =(KERNEL_END - 1)
@@ -335,6 +247,122 @@
 	mov	pc, lr
 ENDPROC(__create_page_tables)
 	.ltorg
+__enable_mmu_loc:
+	.long	.
+	.long	__enable_mmu
+	.long	__enable_mmu_end
+
+#if defined(CONFIG_SMP)
+	__CPUINIT
+ENTRY(secondary_startup)
+	/*
+	 * Common entry point for secondary CPUs.
+	 *
+	 * Ensure that we're in SVC mode, and IRQs are disabled.  Lookup
+	 * the processor type - there is no need to check the machine type
+	 * as it has already been validated by the primary processor.
+	 */
+	setmode	PSR_F_BIT | PSR_I_BIT | SVC_MODE, r9
+	mrc	p15, 0, r9, c0, c0		@ get processor id
+	bl	__lookup_processor_type
+	movs	r10, r5				@ invalid processor?
+	moveq	r0, #'p'			@ yes, error 'p'
+	beq	__error_p
+
+	/*
+	 * Use the page tables supplied from  __cpu_up.
+	 */
+	adr	r4, __secondary_data
+	ldmia	r4, {r5, r7, r12}		@ address to jump to after
+	sub	r4, r4, r5			@ mmu has been enabled
+	ldr	r4, [r7, r4]			@ get secondary_data.pgdir
+	adr	lr, BSYM(__enable_mmu)		@ return address
+	mov	r13, r12			@ __secondary_switched address
+ ARM(	add	pc, r10, #PROCINFO_INITFUNC	) @ initialise processor
+						  @ (return control reg)
+ THUMB(	add	r12, r10, #PROCINFO_INITFUNC	)
+ THUMB(	mov	pc, r12				)
+ENDPROC(secondary_startup)
+
+	/*
+	 * r6  = &secondary_data
+	 */
+ENTRY(__secondary_switched)
+	ldr	sp, [r7, #4]			@ get secondary_data.stack
+	mov	fp, #0
+	b	secondary_start_kernel
+ENDPROC(__secondary_switched)
+
+	.type	__secondary_data, %object
+__secondary_data:
+	.long	.
+	.long	secondary_data
+	.long	__secondary_switched
+#endif /* defined(CONFIG_SMP) */
+
+
+
+/*
+ * Setup common bits before finally enabling the MMU.  Essentially
+ * this is just loading the page table pointer and domain access
+ * registers.
+ *
+ *  r0  = cp#15 control register
+ *  r1  = machine ID
+ *  r2  = atags pointer
+ *  r4  = page table pointer
+ *  r9  = processor ID
+ *  r13 = *virtual* address to jump to upon completion
+ */
+__enable_mmu:
+#ifdef CONFIG_ALIGNMENT_TRAP
+	orr	r0, r0, #CR_A
+#else
+	bic	r0, r0, #CR_A
+#endif
+#ifdef CONFIG_CPU_DCACHE_DISABLE
+	bic	r0, r0, #CR_C
+#endif
+#ifdef CONFIG_CPU_BPREDICT_DISABLE
+	bic	r0, r0, #CR_Z
+#endif
+#ifdef CONFIG_CPU_ICACHE_DISABLE
+	bic	r0, r0, #CR_I
+#endif
+	mov	r5, #(domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \
+		      domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \
+		      domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \
+		      domain_val(DOMAIN_IO, DOMAIN_CLIENT))
+	mcr	p15, 0, r5, c3, c0, 0		@ load domain access register
+	mcr	p15, 0, r4, c2, c0, 0		@ load page table pointer
+	b	__turn_mmu_on
+ENDPROC(__enable_mmu)
+
+/*
+ * Enable the MMU.  This completely changes the structure of the visible
+ * memory space.  You will not be able to trace execution through this.
+ * If you have an enquiry about this, *please* check the linux-arm-kernel
+ * mailing list archives BEFORE sending another post to the list.
+ *
+ *  r0  = cp#15 control register
+ *  r1  = machine ID
+ *  r2  = atags pointer
+ *  r9  = processor ID
+ *  r13 = *virtual* address to jump to upon completion
+ *
+ * other registers depend on the function called upon completion
+ */
+	.align	5
+__turn_mmu_on:
+	mov	r0, r0
+	mcr	p15, 0, r0, c1, c0, 0		@ write control reg
+	mrc	p15, 0, r3, c0, c0, 0		@ read id reg
+	mov	r3, r3
+	mov	r3, r13
+	mov	pc, r3
+__enable_mmu_end:
+ENDPROC(__turn_mmu_on)
+
 
 #ifdef CONFIG_SMP_ON_UP
 __fixup_smp:
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 32e16da..8c19595 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -33,6 +33,7 @@
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
 #include <asm/processor.h>
+#include <asm/sections.h>
 #include <asm/tlbflush.h>
 #include <asm/ptrace.h>
 #include <asm/localtimer.h>
@@ -67,12 +68,47 @@
 	IPI_CPU_STOP,
 };
 
+static inline void identity_mapping_add(pgd_t *pgd, unsigned long start,
+	unsigned long end)
+{
+	unsigned long addr, prot;
+	pmd_t *pmd;
+
+	prot = PMD_TYPE_SECT | PMD_SECT_AP_WRITE;
+	if (cpu_architecture() <= CPU_ARCH_ARMv5TEJ && !cpu_is_xscale())
+		prot |= PMD_BIT4;
+
+	for (addr = start & PGDIR_MASK; addr < end;) {
+		pmd = pmd_offset(pgd + pgd_index(addr), addr);
+		pmd[0] = __pmd(addr | prot);
+		addr += SECTION_SIZE;
+		pmd[1] = __pmd(addr | prot);
+		addr += SECTION_SIZE;
+		flush_pmd_entry(pmd);
+		outer_clean_range(__pa(pmd), __pa(pmd + 1));
+	}
+}
+
+static inline void identity_mapping_del(pgd_t *pgd, unsigned long start,
+	unsigned long end)
+{
+	unsigned long addr;
+	pmd_t *pmd;
+
+	for (addr = start & PGDIR_MASK; addr < end; addr += PGDIR_SIZE) {
+		pmd = pmd_offset(pgd + pgd_index(addr), addr);
+		pmd[0] = __pmd(0);
+		pmd[1] = __pmd(0);
+		clean_pmd_entry(pmd);
+		outer_clean_range(__pa(pmd), __pa(pmd + 1));
+	}
+}
+
 int __cpuinit __cpu_up(unsigned int cpu)
 {
 	struct cpuinfo_arm *ci = &per_cpu(cpu_data, cpu);
 	struct task_struct *idle = ci->idle;
 	pgd_t *pgd;
-	pmd_t *pmd;
 	int ret;
 
 	/*
@@ -101,11 +137,16 @@
 	 * a 1:1 mapping for the physical address of the kernel.
 	 */
 	pgd = pgd_alloc(&init_mm);
-	pmd = pmd_offset(pgd + pgd_index(PHYS_OFFSET), PHYS_OFFSET);
-	*pmd = __pmd((PHYS_OFFSET & PGDIR_MASK) |
-		     PMD_TYPE_SECT | PMD_SECT_AP_WRITE);
-	flush_pmd_entry(pmd);
-	outer_clean_range(__pa(pmd), __pa(pmd + 1));
+	if (!pgd)
+		return -ENOMEM;
+
+	if (PHYS_OFFSET != PAGE_OFFSET) {
+#ifndef CONFIG_HOTPLUG_CPU
+		identity_mapping_add(pgd, __pa(__init_begin), __pa(__init_end));
+#endif
+		identity_mapping_add(pgd, __pa(_stext), __pa(_etext));
+		identity_mapping_add(pgd, __pa(_sdata), __pa(_edata));
+	}
 
 	/*
 	 * We need to tell the secondary core where to find
@@ -143,8 +184,14 @@
 	secondary_data.stack = NULL;
 	secondary_data.pgdir = 0;
 
-	*pmd = __pmd(0);
-	clean_pmd_entry(pmd);
+	if (PHYS_OFFSET != PAGE_OFFSET) {
+#ifndef CONFIG_HOTPLUG_CPU
+		identity_mapping_del(pgd, __pa(__init_begin), __pa(__init_end));
+#endif
+		identity_mapping_del(pgd, __pa(_stext), __pa(_etext));
+		identity_mapping_del(pgd, __pa(_sdata), __pa(_edata));
+	}
+
 	pgd_free(&init_mm, pgd);
 
 	if (ret) {
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 065d35d..1953e3d 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -8,6 +8,19 @@
 #include <asm/memory.h>
 #include <asm/page.h>
 	
+#define PROC_INFO							\
+	VMLINUX_SYMBOL(__proc_info_begin) = .;				\
+	*(.proc.info.init)						\
+	VMLINUX_SYMBOL(__proc_info_end) = .;
+
+#ifdef CONFIG_HOTPLUG_CPU
+#define ARM_CPU_DISCARD(x)
+#define ARM_CPU_KEEP(x)		x
+#else
+#define ARM_CPU_DISCARD(x)	x
+#define ARM_CPU_KEEP(x)
+#endif
+
 OUTPUT_ARCH(arm)
 ENTRY(stext)
 
@@ -31,9 +44,7 @@
 			HEAD_TEXT
 			INIT_TEXT
 		_einittext = .;
-		__proc_info_begin = .;
-			*(.proc.info.init)
-		__proc_info_end = .;
+		ARM_CPU_DISCARD(PROC_INFO)
 		__arch_info_begin = .;
 			*(.arch.info.init)
 		__arch_info_end = .;
@@ -73,10 +84,8 @@
 	/DISCARD/ : {
 		*(.ARM.exidx.exit.text)
 		*(.ARM.extab.exit.text)
-#ifndef CONFIG_HOTPLUG_CPU
-		*(.ARM.exidx.cpuexit.text)
-		*(.ARM.extab.cpuexit.text)
-#endif
+		ARM_CPU_DISCARD(*(.ARM.exidx.cpuexit.text))
+		ARM_CPU_DISCARD(*(.ARM.extab.cpuexit.text))
 #ifndef CONFIG_HOTPLUG
 		*(.ARM.exidx.devexit.text)
 		*(.ARM.extab.devexit.text)
@@ -105,6 +114,7 @@
 			*(.glue_7)
 			*(.glue_7t)
 		*(.got)			/* Global offset table		*/
+			ARM_CPU_KEEP(PROC_INFO)
 	}
 
 	RO_DATA(PAGE_SIZE)
diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S
index 203a4e9..a6f5f84 100644
--- a/arch/arm/mm/proc-arm1020.S
+++ b/arch/arm/mm/proc-arm1020.S
@@ -430,7 +430,7 @@
 #endif /* CONFIG_MMU */
 	mov	pc, lr
 
-	__INIT
+	__CPUINIT
 
 	.type	__arm1020_setup, #function
 __arm1020_setup:
diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S
index 1a511e7..afc06b9 100644
--- a/arch/arm/mm/proc-arm1020e.S
+++ b/arch/arm/mm/proc-arm1020e.S
@@ -412,7 +412,7 @@
 #endif /* CONFIG_MMU */
 	mov	pc, lr
 
-	__INIT
+	__CPUINIT
 
 	.type	__arm1020e_setup, #function
 __arm1020e_setup:
diff --git a/arch/arm/mm/proc-arm1022.S b/arch/arm/mm/proc-arm1022.S
index 1ffa4eb..8915e0b 100644
--- a/arch/arm/mm/proc-arm1022.S
+++ b/arch/arm/mm/proc-arm1022.S
@@ -394,7 +394,7 @@
 #endif /* CONFIG_MMU */
 	mov	pc, lr
 
-	__INIT
+	__CPUINIT
 
 	.type	__arm1022_setup, #function
 __arm1022_setup:
diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S
index 5697c34..ff446c5 100644
--- a/arch/arm/mm/proc-arm1026.S
+++ b/arch/arm/mm/proc-arm1026.S
@@ -384,7 +384,7 @@
 	mov	pc, lr
 
 
-	__INIT
+	__CPUINIT
 
 	.type	__arm1026_setup, #function
 __arm1026_setup:
diff --git a/arch/arm/mm/proc-arm6_7.S b/arch/arm/mm/proc-arm6_7.S
index 64e0b32..6a7be18 100644
--- a/arch/arm/mm/proc-arm6_7.S
+++ b/arch/arm/mm/proc-arm6_7.S
@@ -238,7 +238,7 @@
 		mcr	p15, 0, r1, c1, c0, 0		@ turn off MMU etc
 		mov	pc, r0
 
-		__INIT
+		__CPUINIT
 
 		.type	__arm6_setup, #function
 __arm6_setup:	mov	r0, #0
diff --git a/arch/arm/mm/proc-arm720.S b/arch/arm/mm/proc-arm720.S
index 9d96824..c285395 100644
--- a/arch/arm/mm/proc-arm720.S
+++ b/arch/arm/mm/proc-arm720.S
@@ -113,7 +113,7 @@
 		mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
 		mov	pc, r0
 
-	__INIT
+	__CPUINIT
 
 	.type	__arm710_setup, #function
 __arm710_setup:
diff --git a/arch/arm/mm/proc-arm740.S b/arch/arm/mm/proc-arm740.S
index 6c1a9ab..38b27dc 100644
--- a/arch/arm/mm/proc-arm740.S
+++ b/arch/arm/mm/proc-arm740.S
@@ -55,7 +55,7 @@
 	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
 	mov	pc, r0
 
-	__INIT
+	__CPUINIT
 
 	.type	__arm740_setup, #function
 __arm740_setup:
diff --git a/arch/arm/mm/proc-arm7tdmi.S b/arch/arm/mm/proc-arm7tdmi.S
index 6a850db..0c9786d 100644
--- a/arch/arm/mm/proc-arm7tdmi.S
+++ b/arch/arm/mm/proc-arm7tdmi.S
@@ -46,7 +46,7 @@
 ENTRY(cpu_arm7tdmi_reset)
 		mov	pc, r0
 
-		__INIT
+		__CPUINIT
 
 		.type	__arm7tdmi_setup, #function
 __arm7tdmi_setup:
diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S
index 86f80aa..fecf570 100644
--- a/arch/arm/mm/proc-arm920.S
+++ b/arch/arm/mm/proc-arm920.S
@@ -375,7 +375,7 @@
 #endif
 	mov	pc, lr
 
-	__INIT
+	__CPUINIT
 
 	.type	__arm920_setup, #function
 __arm920_setup:
diff --git a/arch/arm/mm/proc-arm922.S b/arch/arm/mm/proc-arm922.S
index f76ce9b..e3cbf87 100644
--- a/arch/arm/mm/proc-arm922.S
+++ b/arch/arm/mm/proc-arm922.S
@@ -379,7 +379,7 @@
 #endif /* CONFIG_MMU */
 	mov	pc, lr
 
-	__INIT
+	__CPUINIT
 
 	.type	__arm922_setup, #function
 __arm922_setup:
diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S
index 657bd3f..572424c 100644
--- a/arch/arm/mm/proc-arm925.S
+++ b/arch/arm/mm/proc-arm925.S
@@ -428,7 +428,7 @@
 #endif /* CONFIG_MMU */
 	mov	pc, lr
 
-	__INIT
+	__CPUINIT
 
 	.type	__arm925_setup, #function
 __arm925_setup:
diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S
index 73f1f3c..63d168b 100644
--- a/arch/arm/mm/proc-arm926.S
+++ b/arch/arm/mm/proc-arm926.S
@@ -389,7 +389,7 @@
 #endif
 	mov	pc, lr
 
-	__INIT
+	__CPUINIT
 
 	.type	__arm926_setup, #function
 __arm926_setup:
diff --git a/arch/arm/mm/proc-arm940.S b/arch/arm/mm/proc-arm940.S
index fffb061..f6a6282 100644
--- a/arch/arm/mm/proc-arm940.S
+++ b/arch/arm/mm/proc-arm940.S
@@ -264,7 +264,7 @@
 	.long	arm940_dma_unmap_area
 	.long	arm940_dma_flush_range
 
-	__INIT
+	__CPUINIT
 
 	.type	__arm940_setup, #function
 __arm940_setup:
diff --git a/arch/arm/mm/proc-arm946.S b/arch/arm/mm/proc-arm946.S
index 249a605..ea2e7f2 100644
--- a/arch/arm/mm/proc-arm946.S
+++ b/arch/arm/mm/proc-arm946.S
@@ -317,7 +317,7 @@
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
 	mov	pc, lr
 
-	__INIT
+	__CPUINIT
 
 	.type	__arm946_setup, #function
 __arm946_setup:
diff --git a/arch/arm/mm/proc-arm9tdmi.S b/arch/arm/mm/proc-arm9tdmi.S
index db47566..db67e31 100644
--- a/arch/arm/mm/proc-arm9tdmi.S
+++ b/arch/arm/mm/proc-arm9tdmi.S
@@ -46,7 +46,7 @@
 ENTRY(cpu_arm9tdmi_reset)
 		mov	pc, r0
 
-		__INIT
+		__CPUINIT
 
 		.type	__arm9tdmi_setup, #function
 __arm9tdmi_setup:
diff --git a/arch/arm/mm/proc-fa526.S b/arch/arm/mm/proc-fa526.S
index 7803fdf..7c9ad62 100644
--- a/arch/arm/mm/proc-fa526.S
+++ b/arch/arm/mm/proc-fa526.S
@@ -134,7 +134,7 @@
 #endif
 	mov	pc, lr
 
-	__INIT
+	__CPUINIT
 
 	.type	__fa526_setup, #function
 __fa526_setup:
diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S
index b304d01..578da69 100644
--- a/arch/arm/mm/proc-feroceon.S
+++ b/arch/arm/mm/proc-feroceon.S
@@ -494,7 +494,7 @@
 #endif
 	mov	pc, lr
 
-	__INIT
+	__CPUINIT
 
 	.type	__feroceon_setup, #function
 __feroceon_setup:
diff --git a/arch/arm/mm/proc-mohawk.S b/arch/arm/mm/proc-mohawk.S
index 5f6892f..4458ee6 100644
--- a/arch/arm/mm/proc-mohawk.S
+++ b/arch/arm/mm/proc-mohawk.S
@@ -338,7 +338,7 @@
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
 	mov	pc, lr
 
-	__INIT
+	__CPUINIT
 
 	.type	__mohawk_setup, #function
 __mohawk_setup:
diff --git a/arch/arm/mm/proc-sa110.S b/arch/arm/mm/proc-sa110.S
index a201eb0..5aa8d59 100644
--- a/arch/arm/mm/proc-sa110.S
+++ b/arch/arm/mm/proc-sa110.S
@@ -156,7 +156,7 @@
 #endif
 	mov	pc, lr
 
-	__INIT
+	__CPUINIT
 
 	.type	__sa110_setup, #function
 __sa110_setup:
diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S
index 7ddc480..2ac4e6f 100644
--- a/arch/arm/mm/proc-sa1100.S
+++ b/arch/arm/mm/proc-sa1100.S
@@ -169,7 +169,7 @@
 #endif
 	mov	pc, lr
 
-	__INIT
+	__CPUINIT
 
 	.type	__sa1100_setup, #function
 __sa1100_setup:
diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S
index b95662d..59a7e1f 100644
--- a/arch/arm/mm/proc-v6.S
+++ b/arch/arm/mm/proc-v6.S
@@ -135,7 +135,7 @@
 
 	.align
 
-	__INIT
+	__CPUINIT
 
 /*
  *	__v6_setup
@@ -193,6 +193,8 @@
 v6_crval:
 	crval	clear=0x01e0fb7f, mmuset=0x00c0387d, ucset=0x00c0187c
 
+	__INITDATA
+
 	.type	v6_processor_functions, #object
 ENTRY(v6_processor_functions)
 	.word	v6_early_abort
@@ -206,6 +208,8 @@
 	.word	cpu_v6_set_pte_ext
 	.size	v6_processor_functions, . - v6_processor_functions
 
+	.section ".rodata"
+
 	.type	cpu_arch_name, #object
 cpu_arch_name:
 	.asciz	"armv6"
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index df422fe..055e9d5 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -168,7 +168,7 @@
 	.ascii	"ARMv7 Processor"
 	.align
 
-	__INIT
+	__CPUINIT
 
 /*
  *	__v7_setup
@@ -325,6 +325,8 @@
 __v7_setup_stack:
 	.space	4 * 11				@ 11 registers
 
+	__INITDATA
+
 	.type	v7_processor_functions, #object
 ENTRY(v7_processor_functions)
 	.word	v7_early_abort
@@ -338,6 +340,8 @@
 	.word	cpu_v7_set_pte_ext
 	.size	v7_processor_functions, . - v7_processor_functions
 
+	.section ".rodata"
+
 	.type	cpu_arch_name, #object
 cpu_arch_name:
 	.asciz	"armv7"
diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S
index 361a51e..cad07e4 100644
--- a/arch/arm/mm/proc-xsc3.S
+++ b/arch/arm/mm/proc-xsc3.S
@@ -404,7 +404,7 @@
 
 	.align
 
-	__INIT
+	__CPUINIT
 
 	.type	__xsc3_setup, #function
 __xsc3_setup:
diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S
index 1407597..cb245ed 100644
--- a/arch/arm/mm/proc-xscale.S
+++ b/arch/arm/mm/proc-xscale.S
@@ -506,7 +506,7 @@
 
 	.align
 
-	__INIT
+	__CPUINIT
 
 	.type	__xscale_setup, #function
 __xscale_setup: