powerpc: Support for relocatable kdump kernel

This adds relocatable kernel support for kdump. With this one can
use the same regular kernel to capture the kdump. A signature (0xfeed1234)
is passed in r6 from panic code to the next kernel through kexec_sequence
and purgatory code. The signature is used to differentiate between
kdump kernel and non-kdump kernels.

The purgatory code compares the signature and sets the __kdump_flag in
head_64.S.  During the boot up, kernel code checks __kdump_flag and if it
is set, the kernel will behave as relocatable kdump kernel. This kernel
will boot at the address where it was loaded by kexec-tools ie. at the
address reserved through crashkernel boot parameter.

CONFIG_CRASH_DUMP depends on CONFIG_RELOCATABLE option to build kdump
kernel as relocatable. So the same kernel can be used as production and
kdump kernel.

This patch incorporates the changes suggested by Paul Mackerras to avoid
GOT use and to avoid two copies of the code.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Mohan Kumar M <mohan@in.ibm.com>
Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c
index 97e0563..19671ac 100644
--- a/arch/powerpc/kernel/crash_dump.c
+++ b/arch/powerpc/kernel/crash_dump.c
@@ -30,6 +30,7 @@
 /* Stores the physical address of elf header of crash image. */
 unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
 
+#ifndef CONFIG_RELOCATABLE
 void __init reserve_kdump_trampoline(void)
 {
 	lmb_reserve(0, KDUMP_RESERVE_LIMIT);
@@ -68,6 +69,7 @@
 
 	DBG(" <- setup_kdump_trampoline()\n");
 }
+#endif /* CONFIG_RELOCATABLE */
 
 /*
  * Note: elfcorehdr_addr is not just limited to vmcore. It is also used by
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 84856be..69489bd 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -97,6 +97,12 @@
 __secondary_hold_acknowledge:
 	.llong	0x0
 
+	/* This flag is set by purgatory if we should be a kdump kernel. */
+	/* Do not move this variable as purgatory knows about it. */
+	.globl	__kdump_flag
+__kdump_flag:
+	.llong	0x0
+
 #ifdef CONFIG_PPC_ISERIES
 	/*
 	 * At offset 0x20, there is a pointer to iSeries LPAR data.
@@ -1384,7 +1390,13 @@
 	/* process relocations for the final address of the kernel */
 	lis	r25,PAGE_OFFSET@highest	/* compute virtual base of kernel */
 	sldi	r25,r25,32
-	mr	r3,r25
+#ifdef CONFIG_CRASH_DUMP
+	ld	r7,__kdump_flag-_stext(r26)
+	cmpldi	cr0,r7,1	/* kdump kernel ? - stay where we are */
+	bne	1f
+	add	r25,r25,r26
+#endif
+1:	mr	r3,r25
 	bl	.relocate
 #endif
 
@@ -1398,11 +1410,26 @@
 	li	r3,0			/* target addr */
 	mr.	r4,r26			/* In some cases the loader may  */
 	beq	9f			/* have already put us at zero */
-	lis	r5,(copy_to_here - _stext)@ha
-	addi	r5,r5,(copy_to_here - _stext)@l /* # bytes of memory to copy */
 	li	r6,0x100		/* Start offset, the first 0x100 */
 					/* bytes were copied earlier.	 */
 
+#ifdef CONFIG_CRASH_DUMP
+/*
+ * Check if the kernel has to be running as relocatable kernel based on the
+ * variable __kdump_flag, if it is set the kernel is treated as relocatable
+ * kernel, otherwise it will be moved to PHYSICAL_START
+ */
+	ld	r7,__kdump_flag-_stext(r26)
+	cmpldi	cr0,r7,1
+	bne	3f
+
+	li	r5,__end_interrupts - _stext	/* just copy interrupts */
+	b	5f
+3:
+#endif
+	lis	r5,(copy_to_here - _stext)@ha
+	addi	r5,r5,(copy_to_here - _stext)@l /* # bytes of memory to copy */
+
 	bl	.copy_and_flush		/* copy the first n bytes	 */
 					/* this includes the code being	 */
 					/* executed here.		 */
@@ -1411,15 +1438,15 @@
 	mtctr	r8
 	bctr
 
+p_end:	.llong	_end - _stext
+
 4:	/* Now copy the rest of the kernel up to _end */
 	addis	r5,r26,(p_end - _stext)@ha
 	ld	r5,(p_end - _stext)@l(r5)	/* get _end */
-	bl	.copy_and_flush		/* copy the rest */
+5:	bl	.copy_and_flush		/* copy the rest */
 
 9:	b	.start_here_multiplatform
 
-p_end:	.llong	_end - _stext
-
 /*
  * Copy routine used to copy the kernel to start at physical address 0
  * and flush and invalidate the caches as needed.
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index ea1ba89..3857d7e 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -458,6 +458,42 @@
 	spin_unlock_irqrestore(&(tbl->it_lock), flags);
 }
 
+static void iommu_table_clear(struct iommu_table *tbl)
+{
+	if (!__kdump_flag) {
+		/* Clear the table in case firmware left allocations in it */
+		ppc_md.tce_free(tbl, tbl->it_offset, tbl->it_size);
+		return;
+	}
+
+#ifdef CONFIG_CRASH_DUMP
+	if (ppc_md.tce_get) {
+		unsigned long index, tceval, tcecount = 0;
+
+		/* Reserve the existing mappings left by the first kernel. */
+		for (index = 0; index < tbl->it_size; index++) {
+			tceval = ppc_md.tce_get(tbl, index + tbl->it_offset);
+			/*
+			 * Freed TCE entry contains 0x7fffffffffffffff on JS20
+			 */
+			if (tceval && (tceval != 0x7fffffffffffffffUL)) {
+				__set_bit(index, tbl->it_map);
+				tcecount++;
+			}
+		}
+
+		if ((tbl->it_size - tcecount) < KDUMP_MIN_TCE_ENTRIES) {
+			printk(KERN_WARNING "TCE table is full; freeing ");
+			printk(KERN_WARNING "%d entries for the kdump boot\n",
+				KDUMP_MIN_TCE_ENTRIES);
+			for (index = tbl->it_size - KDUMP_MIN_TCE_ENTRIES;
+				index < tbl->it_size; index++)
+				__clear_bit(index, tbl->it_map);
+		}
+	}
+#endif
+}
+
 /*
  * Build a iommu_table structure.  This contains a bit map which
  * is used to manage allocation of the tce space.
@@ -484,38 +520,7 @@
 	tbl->it_largehint = tbl->it_halfpoint;
 	spin_lock_init(&tbl->it_lock);
 
-#ifdef CONFIG_CRASH_DUMP
-	if (ppc_md.tce_get) {
-		unsigned long index;
-		unsigned long tceval;
-		unsigned long tcecount = 0;
-
-		/*
-		 * Reserve the existing mappings left by the first kernel.
-		 */
-		for (index = 0; index < tbl->it_size; index++) {
-			tceval = ppc_md.tce_get(tbl, index + tbl->it_offset);
-			/*
-			 * Freed TCE entry contains 0x7fffffffffffffff on JS20
-			 */
-			if (tceval && (tceval != 0x7fffffffffffffffUL)) {
-				__set_bit(index, tbl->it_map);
-				tcecount++;
-			}
-		}
-		if ((tbl->it_size - tcecount) < KDUMP_MIN_TCE_ENTRIES) {
-			printk(KERN_WARNING "TCE table is full; ");
-			printk(KERN_WARNING "freeing %d entries for the kdump boot\n",
-				KDUMP_MIN_TCE_ENTRIES);
-			for (index = tbl->it_size - KDUMP_MIN_TCE_ENTRIES;
-				index < tbl->it_size; index++)
-				__clear_bit(index, tbl->it_map);
-		}
-	}
-#else
-	/* Clear the hardware table in case firmware left allocations in it */
-	ppc_md.tce_free(tbl, tbl->it_offset, tbl->it_size);
-#endif
+	iommu_table_clear(tbl);
 
 	if (!welcomed) {
 		printk(KERN_INFO "IOMMU table initialized, virtual merging %s\n",
diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c
index aab7688..ac2a21f 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -88,11 +88,13 @@
 
 	crash_size = crashk_res.end - crashk_res.start + 1;
 
+#ifndef CONFIG_RELOCATABLE
 	if (crashk_res.start != KDUMP_KERNELBASE)
 		printk("Crash kernel location must be 0x%x\n",
 				KDUMP_KERNELBASE);
 
 	crashk_res.start = KDUMP_KERNELBASE;
+#endif
 	crash_size = PAGE_ALIGN(crash_size);
 	crashk_res.end = crashk_res.start + crash_size - 1;
 
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
index 4bd8b4f..e6efec7 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -255,11 +255,14 @@
 /* Our assembly helper, in kexec_stub.S */
 extern NORET_TYPE void kexec_sequence(void *newstack, unsigned long start,
 					void *image, void *control,
-					void (*clear_all)(void)) ATTRIB_NORET;
+					void (*clear_all)(void),
+					unsigned long kdump_flag) ATTRIB_NORET;
 
 /* too late to fail here */
 void default_machine_kexec(struct kimage *image)
 {
+	unsigned long kdump_flag = 0;
+
 	/* prepare control code if any */
 
 	/*
@@ -270,8 +273,10 @@
         * using debugger IPI.
         */
 
-       if (crashing_cpu == -1)
-               kexec_prepare_cpus();
+	if (crashing_cpu == -1)
+		kexec_prepare_cpus();
+	else
+		kdump_flag = KDUMP_SIGNATURE;
 
 	/* switch to a staticly allocated stack.  Based on irq stack code.
 	 * XXX: the task struct will likely be invalid once we do the copy!
@@ -284,7 +289,7 @@
 	 */
 	kexec_sequence(&kexec_stack, image->start, image,
 			page_address(image->control_code_page),
-			ppc_md.hpte_clear_all);
+			ppc_md.hpte_clear_all, kdump_flag);
 	/* NOTREACHED */
 }
 
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 3053fe5..a243fd0 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -611,10 +611,12 @@
 
 
 /*
- * kexec_sequence(newstack, start, image, control, clear_all())
+ * kexec_sequence(newstack, start, image, control, clear_all(), kdump_flag)
  *
  * does the grungy work with stack switching and real mode switches
  * also does simple calls to other code
+ *
+ * kdump_flag says whether the next kernel should be a kdump kernel.
  */
 
 _GLOBAL(kexec_sequence)
@@ -647,7 +649,7 @@
 	mr	r29,r5			/* image (virt) */
 	mr	r28,r6			/* control, unused */
 	mr	r27,r7			/* clear_all() fn desc */
-	mr	r26,r8			/* spare */
+	mr	r26,r8			/* kdump flag */
 	lhz	r25,PACAHWCPUID(r13)	/* get our phys cpu from paca */
 
 	/* disable interrupts, we are overwriting kernel data next */
@@ -709,5 +711,6 @@
 	mr	r4,r30	# start, aka phys mem offset
 	mtlr	4
 	li	r5,0
-	blr	/* image->start(physid, image->start, 0); */
+	mr	r6,r26			/* kdump_flag */
+	blr	/* image->start(physid, image->start, 0, kdump_flag); */
 #endif /* CONFIG_KEXEC */