[PARISC] Make sure use of RFI conforms to PA 2.0 and 1.1 arch docs

2.6.12-rc4-pa3 : first pass at making sure use of RFI conforms to
PA 2.0 arch pages F-4 and F-5, PA 1.1 Arch page 3-19 and 3-20.

The discussion revolves around all the rules for clearing PSW Q-bit.
The hard part is meeting all the rules for "relied upon translation".

.align directive is used to guarantee the critical sequence ends more than
8 instructions (32 bytes) from the end of page.

Signed-off-by: Grant Grundler <grundler@parisc-linux.org>

Signed-off-by: Kyle McMartin <kyle@parisc-linux.org>
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index be0f07f..65a82c2 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -30,9 +30,9 @@
  *  - save registers to kernel stack and handle in assembly or C */
 
 
+#include <asm/psw.h>
 #include <asm/assembly.h>	/* for LDREG/STREG defines */
 #include <asm/pgtable.h>
-#include <asm/psw.h>
 #include <asm/signal.h>
 #include <asm/unistd.h>
 #include <asm/thread_info.h>
@@ -67,19 +67,22 @@
 
 	/* Switch to virtual mapping, trashing only %r1 */
 	.macro  virt_map
-	rsm     PSW_SM_Q,%r0
-	tovirt_r1 %r29
-	mfsp	%sr7, %r1
-	or,=    %r0,%r1,%r0 /* Only save sr7 in sr3 if sr7 != 0 */
-	mtsp	%r1, %sr3
+	/* pcxt_ssm_bug */
+	rsm	PSW_SM_I, %r0	/* barrier for "Relied upon Translation */
 	mtsp	%r0, %sr4
 	mtsp	%r0, %sr5
+	mfsp	%sr7, %r1
+	or,=    %r0,%r1,%r0	/* Only save sr7 in sr3 if sr7 != 0 */
+	mtsp	%r1, %sr3
+	tovirt_r1 %r29
+	load32	KERNEL_PSW, %r1
+
+	rsm     PSW_SM_QUIET,%r0	/* second "heavy weight" ctl op */
 	mtsp	%r0, %sr6
 	mtsp	%r0, %sr7
-	load32	KERNEL_PSW, %r1
-	mtctl	%r1, %cr22
 	mtctl	%r0, %cr17	/* Clear IIASQ tail */
 	mtctl	%r0, %cr17	/* Clear IIASQ head */
+	mtctl	%r1, %ipsw
 	load32	4f, %r1
 	mtctl	%r1, %cr18	/* Set IIAOQ tail */
 	ldo	4(%r1), %r1
@@ -888,9 +891,6 @@
 	 * this way, then we will need to copy %sr3 in to PT_SR[3..7], and
 	 * adjust IASQ[0..1].
 	 *
-	 * Note that the following code uses a "relied upon translation".
-	 * See the parisc ACD for details. The ssm is necessary due to a
-	 * PCXT bug.
 	 */
 
 	.align 4096
@@ -985,24 +985,19 @@
 	rest_fp         %r1
 	rest_general    %r29
 
-	/* Create a "relied upon translation" PA 2.0 Arch. F-5 */
-	ssm		0,%r0
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
+	/* inverse of virt_map */
+	pcxt_ssm_bug
+	rsm             PSW_SM_QUIET,%r0	/* prepare for rfi */
 	tophys_r1       %r29
-	rsm             (PSW_SM_Q|PSW_SM_P|PSW_SM_D|PSW_SM_I),%r0
 
 	/* Restore space id's and special cr's from PT_REGS
-	 * structure pointed to by r29 */
+	 * structure pointed to by r29
+	 */
 	rest_specials	%r29
 
-	/* Important: Note that rest_stack restores r29
-	 * last (we are using it)! It also restores r1 and r30. */
+	/* IMPORTANT: rest_stack restores r29 last (we are using it)!
+	 * It also restores r1 and r30.
+	 */
 	rest_stack
 
 	rfi
@@ -1153,15 +1148,17 @@
 
 	CMPIB=,n        6,%r26,skip_save_ior
 
-	/* save_specials left ipsw value in r8 for us to test */
 
 	mfctl           %cr20, %r16 /* isr */
+	nop		/* serialize mfctl on PA 2.0 to avoid 4 cycle penalty */
 	mfctl           %cr21, %r17 /* ior */
 
+
 #ifdef __LP64__
 	/*
 	 * If the interrupted code was running with W bit off (32 bit),
 	 * clear the b bits (bits 0 & 1) in the ior.
+	 * save_specials left ipsw value in r8 for us to test.
 	 */
 	extrd,u,*<>     %r8,PSW_W_BIT,1,%r0
 	depdi           0,1,2,%r17
@@ -1487,10 +1484,10 @@
 	add,l           %r1,%r24,%r1           /* doesn't affect c/b bits */
 
 nadtlb_nullify:
-	mfctl           %cr22,%r8              /* Get ipsw */
+	mfctl           %ipsw,%r8
 	ldil            L%PSW_N,%r9
 	or              %r8,%r9,%r8            /* Set PSW_N */
-	mtctl           %r8,%cr22
+	mtctl           %r8,%ipsw
 
 	rfir
 	nop
diff --git a/arch/parisc/kernel/head.S b/arch/parisc/kernel/head.S
index 28405ed..2b87385 100644
--- a/arch/parisc/kernel/head.S
+++ b/arch/parisc/kernel/head.S
@@ -224,8 +224,6 @@
 	mtctl	%r0,%cr12
 	mtctl	%r0,%cr13
 
-	/* Prepare to RFI! Man all the cannons! */
-
 	/* Initialize the global data pointer */
 	loadgp
 
@@ -254,46 +252,16 @@
 $install_iva:
 	mtctl		%r10,%cr14
 
-#ifdef __LP64__
-	b		aligned_rfi
+	b		aligned_rfi  /* Prepare to RFI! Man all the cannons! */
 	nop
 
-	.align          256
+	.align 128
 aligned_rfi:
-	ssm             0,0
-	nop             /* 1 */
-	nop             /* 2 */
-	nop             /* 3 */
-	nop             /* 4 */
-	nop             /* 5 */
-	nop             /* 6 */
-	nop             /* 7 */
-	nop             /* 8 */
-#endif
+	pcxt_ssm_bug
 
-#ifdef __LP64__ /* move to psw.h? */
-#define		PSW_BITS	PSW_Q+PSW_I+PSW_D+PSW_P+PSW_R
-#else
-#define		PSW_BITS	PSW_SM_Q
-#endif
+	rsm		PSW_SM_QUIET,%r0	/* off troublesome PSW bits */
+	/* Don't need NOPs, have 8 compliant insn before rfi */
 
-$rfi:	
-	/* turn off troublesome PSW bits */
-	rsm		PSW_BITS,%r0
-
-	/* kernel PSW:
-	 *  - no interruptions except HPMC and TOC (which are handled by PDC)
-	 *  - Q bit set (IODC / PDC interruptions)
-	 *  - big-endian
-	 *  - virtually mapped
-	 */
-	load32		KERNEL_PSW,%r10
-	mtctl		%r10,%ipsw
-
-	/* Set the space pointers for the post-RFI world
-	** Clear the two-level IIA Space Queue, effectively setting
-	** Kernel space.
-	*/
 	mtctl		%r0,%cr17	/* Clear IIASQ tail */
 	mtctl		%r0,%cr17	/* Clear IIASQ head */
 
@@ -301,8 +269,11 @@
 	mtctl		%r11,%cr18	/* IIAOQ head */
 	ldo		4(%r11),%r11
 	mtctl		%r11,%cr18	/* IIAOQ tail */
+
+	load32		KERNEL_PSW,%r10
+	mtctl		%r10,%ipsw
 	
-	/* Jump to hyperspace */
+	/* Jump through hyperspace to Virt Mode */
 	rfi
 	nop
 
diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S
index 77e03bc..71ade44 100644
--- a/arch/parisc/kernel/pacache.S
+++ b/arch/parisc/kernel/pacache.S
@@ -40,8 +40,8 @@
 	.level	2.0
 #endif
 
-#include <asm/assembly.h>
 #include <asm/psw.h>
+#include <asm/assembly.h>
 #include <asm/pgtable.h>
 #include <asm/cache.h>
 
@@ -62,32 +62,23 @@
 	 * to happen in real mode with all interruptions disabled.
 	 */
 
-	/*
-	 * Once again, we do the rfi dance ... some day we need examine
-	 * all of our uses of this type of code and see what can be
-	 * consolidated.
-	 */
+	/* pcxt_ssm_bug	- relied upon translation! PA 2.0 Arch. F-4 and F-5 */
+	rsm	PSW_SM_I, %r19		/* save I-bit state */
+	load32		PA(1f), %r1
+	nop
+	nop
+	nop
+	nop
+	nop
 
-	rsm		PSW_SM_I, %r19		/* relied upon translation! PA 2.0 Arch. F-5 */
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	
-	rsm		PSW_SM_Q, %r0		/* Turn off Q bit to load iia queue */
-	ldil		L%REAL_MODE_PSW, %r1
-	ldo		R%REAL_MODE_PSW(%r1), %r1
-	mtctl		%r1, %cr22
+	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
 	mtctl		%r0, %cr17		/* Clear IIASQ tail */
 	mtctl		%r0, %cr17		/* Clear IIASQ head */
-	ldil		L%PA(1f), %r1
-	ldo		R%PA(1f)(%r1), %r1
 	mtctl		%r1, %cr18		/* IIAOQ head */
 	ldo		4(%r1), %r1
 	mtctl		%r1, %cr18		/* IIAOQ tail */
+	load32		REAL_MODE_PSW, %r1
+	mtctl           %r1, %ipsw
 	rfi
 	nop
 
@@ -178,29 +169,36 @@
 	ADDIB>		-1, %r22, fdtoneloop	/* Outer loop count decr */
 	add		%r21, %r20, %r20	/* increment space */
 
+
 fdtdone:
+	/*
+	 * Switch back to virtual mode
+	 */
+	/* pcxt_ssm_bug */
+	rsm		PSW_SM_I, %r0
+	load32		2f, %r1
+	nop
+	nop
+	nop
+	nop
+	nop
 
-	/* Switch back to virtual mode */
-
-	rsm		PSW_SM_Q, %r0		/* clear Q bit to load iia queue */
-	ldil		L%KERNEL_PSW, %r1
-	ldo		R%KERNEL_PSW(%r1), %r1
-	or		%r1, %r19, %r1		/* Set I bit if set on entry */
-	mtctl		%r1, %cr22
+	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
 	mtctl		%r0, %cr17		/* Clear IIASQ tail */
 	mtctl		%r0, %cr17		/* Clear IIASQ head */
-	ldil		L%(2f), %r1
-	ldo		R%(2f)(%r1), %r1
 	mtctl		%r1, %cr18		/* IIAOQ head */
 	ldo		4(%r1), %r1
 	mtctl		%r1, %cr18		/* IIAOQ tail */
+	load32		KERNEL_PSW, %r1
+	or		%r1, %r19, %r1	/* I-bit to state on entry */
+	mtctl		%r1, %ipsw	/* restore I-bit (entire PSW) */
 	rfi
 	nop
 
 2:      bv		%r0(%r2)
 	nop
-	.exit
 
+	.exit
 	.procend
 
 	.export flush_instruction_cache_local,code
@@ -238,7 +236,7 @@
 
 fisync:
 	sync
-	mtsm		%r22
+	mtsm		%r22			/* restore I-bit */
 	bv		%r0(%r2)
 	nop
 	.exit
@@ -281,7 +279,7 @@
 fdsync:
 	syncdma
 	sync
-	mtsm		%r22
+	mtsm		%r22			/* restore I-bit */
 	bv		%r0(%r2)
 	nop
 	.exit
@@ -988,11 +986,12 @@
 	bv		%r0(%r2)
 	nop
 	.exit
-
 	.procend
 
-	.align	128
-
+	/* align should cover use of rfi in disable_sr_hashing_asm and
+	 * srdis_done.
+	 */
+	.align	256
 	.export disable_sr_hashing_asm,code
 
 disable_sr_hashing_asm:
@@ -1000,28 +999,26 @@
 	.callinfo NO_CALLS
 	.entry
 
-	/* Switch to real mode */
+	/*
+	 * Switch to real mode
+	 */
+	/* pcxt_ssm_bug */
+	rsm		PSW_SM_I, %r0
+	load32		PA(1f), %r1
+	nop
+	nop
+	nop
+	nop
+	nop
 
-	ssm		0, %r0			/* relied upon translation! */
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	
-	rsm		(PSW_SM_Q|PSW_SM_I), %r0 /* disable Q&I to load the iia queue */
-	ldil		L%REAL_MODE_PSW, %r1
-	ldo		R%REAL_MODE_PSW(%r1), %r1
-	mtctl		%r1, %cr22
+	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
 	mtctl		%r0, %cr17		/* Clear IIASQ tail */
 	mtctl		%r0, %cr17		/* Clear IIASQ head */
-	ldil		L%PA(1f), %r1
-	ldo		R%PA(1f)(%r1), %r1
 	mtctl		%r1, %cr18		/* IIAOQ head */
 	ldo		4(%r1), %r1
 	mtctl		%r1, %cr18		/* IIAOQ tail */
+	load32		REAL_MODE_PSW, %r1
+	mtctl		%r1, %ipsw
 	rfi
 	nop
 
@@ -1053,27 +1050,31 @@
 
 srdis_pa20:
 
-	/* Disable Space Register Hashing for PCXU,PCXU+,PCXW,PCXW+ */
+	/* Disable Space Register Hashing for PCXU,PCXU+,PCXW,PCXW+,PCXW2 */
 
 	.word		0x144008bc		/* mfdiag %dr2, %r28 */
 	depdi		0, 54,1, %r28		/* clear DIAG_SPHASH_ENAB (bit 54) */
 	.word		0x145c1840		/* mtdiag %r28, %dr2 */
 
+
 srdis_done:
-
 	/* Switch back to virtual mode */
+	rsm		PSW_SM_I, %r0		/* prep to load iia queue */
+	load32 	   	2f, %r1
+	nop
+	nop
+	nop
+	nop
+	nop
 
-	rsm		PSW_SM_Q, %r0		/* clear Q bit to load iia queue */
-	ldil		L%KERNEL_PSW, %r1
-	ldo		R%KERNEL_PSW(%r1), %r1
-	mtctl		%r1, %cr22
+	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
 	mtctl		%r0, %cr17		/* Clear IIASQ tail */
 	mtctl		%r0, %cr17		/* Clear IIASQ head */
-	ldil 	   	L%(2f), %r1
-	ldo     	R%(2f)(%r1), %r1
 	mtctl		%r1, %cr18		/* IIAOQ head */
 	ldo		4(%r1), %r1
 	mtctl		%r1, %cr18		/* IIAOQ tail */
+	load32		KERNEL_PSW, %r1
+	mtctl		%r1, %ipsw
 	rfi
 	nop
 
diff --git a/arch/parisc/kernel/real2.S b/arch/parisc/kernel/real2.S
index 8dd5def..2310fc1 100644
--- a/arch/parisc/kernel/real2.S
+++ b/arch/parisc/kernel/real2.S
@@ -7,8 +7,8 @@
  * Copyright (C) 2000 Hewlett Packard (Paul Bame bame@puffin.external.hp.com)
  *
  */
-#include <asm/assembly.h>
 #include <asm/psw.h>
+#include <asm/assembly.h>
 
 	.section	.bss
 	.export real_stack
@@ -147,20 +147,17 @@
 	.text
 rfi_virt2real:
 	/* switch to real mode... */
-	ssm		0,0		/* See "relied upon translation" */
-	nop				/* PA 2.0 Arch. F-5 */
-	nop
-	nop
+	rsm		PSW_SM_I,%r0
+	load32		PA(rfi_v2r_1), %r1
 	nop
 	nop
 	nop
 	nop
 	nop
 	
-	rsm             (PSW_SM_Q|PSW_SM_I),%r0  /* disable Q & I bits to load iia queue */
+	rsm             PSW_SM_Q,%r0  /* disable Q & I bits to load iia queue */
 	mtctl		%r0, %cr17	/* Clear IIASQ tail */
 	mtctl		%r0, %cr17	/* Clear IIASQ head */
-	load32		PA(rfi_v2r_1), %r1
 	mtctl		%r1, %cr18	/* IIAOQ head */
 	ldo		4(%r1), %r1
 	mtctl		%r1, %cr18	/* IIAOQ tail */
@@ -184,10 +181,8 @@
 	.text
 	.align 128
 rfi_real2virt:
-	ssm		0,0		/* See "relied upon translation" */
-	nop				/* PA 2.0 Arch. F-5 */
-	nop
-	nop
+	rsm		PSW_SM_I,%r0
+	load32		(rfi_r2v_1), %r1
 	nop
 	nop
 	nop
@@ -197,7 +192,6 @@
 	rsm             PSW_SM_Q,%r0    /* disable Q bit to load iia queue */
 	mtctl		%r0, %cr17	/* Clear IIASQ tail */
 	mtctl		%r0, %cr17	/* Clear IIASQ head */
-	load32		(rfi_r2v_1), %r1
 	mtctl		%r1, %cr18	/* IIAOQ head */
 	ldo		4(%r1), %r1
 	mtctl		%r1, %cr18	/* IIAOQ tail */
diff --git a/include/asm-parisc/assembly.h b/include/asm-parisc/assembly.h
index 30b0234..b24a99e 100644
--- a/include/asm-parisc/assembly.h
+++ b/include/asm-parisc/assembly.h
@@ -450,5 +450,30 @@
 	REST_CR	(%cr22, PT_PSW	(\regs))
 	.endm
 
+
+	/* First step to create a "relied upon translation"
+	 * See PA 2.0 Arch. page F-4 and F-5.
+	 *
+	 * The ssm was originally necessary due to a "PCxT bug".
+	 * But someone decided it needed to be added to the architecture
+	 * and this "feature" went into rev3 of PA-RISC 1.1 Arch Manual.
+	 * It's been carried forward into PA 2.0 Arch as well. :^(
+	 *
+	 * "ssm 0,%r0" is a NOP with side effects (prefetch barrier).
+	 * rsm/ssm prevents the ifetch unit from speculatively fetching
+	 * instructions past this line in the code stream.
+	 * PA 2.0 processor will single step all insn in the same QUAD (4 insn).
+	 */
+	.macro	pcxt_ssm_bug
+	rsm	PSW_SM_I,%r0
+	nop	/* 1 */
+	nop	/* 2 */
+	nop	/* 3 */
+	nop	/* 4 */
+	nop	/* 5 */
+	nop	/* 6 */
+	nop	/* 7 */
+	.endm
+
 #endif /* __ASSEMBLY__ */
 #endif
diff --git a/include/asm-parisc/psw.h b/include/asm-parisc/psw.h
index 5132302..4334d6c 100644
--- a/include/asm-parisc/psw.h
+++ b/include/asm-parisc/psw.h
@@ -1,4 +1,7 @@
 #ifndef _PARISC_PSW_H
+
+#include <linux/config.h>
+
 #define	PSW_I	0x00000001
 #define	PSW_D	0x00000002
 #define	PSW_P	0x00000004
@@ -9,6 +12,16 @@
 #define	PSW_G	0x00000040	/* PA1.x only */
 #define PSW_O	0x00000080	/* PA2.0 only */
 
+/* ssm/rsm instructions number PSW_W and PSW_E differently */
+#define PSW_SM_I	PSW_I	/* Enable External Interrupts */
+#define PSW_SM_D	PSW_D
+#define PSW_SM_P	PSW_P
+#define PSW_SM_Q	PSW_Q	/* Enable Interrupt State Collection */
+#define PSW_SM_R	PSW_R	/* Enable Recover Counter Trap */
+#define PSW_SM_W	0x200	/* PA2.0 only : Enable Wide Mode */
+
+#define PSW_SM_QUIET	PSW_SM_R+PSW_SM_Q+PSW_SM_P+PSW_SM_D+PSW_SM_I
+
 #define PSW_CB	0x0000ff00
 
 #define	PSW_M	0x00010000
@@ -30,33 +43,21 @@
 #define	PSW_Z	0x40000000	/* PA1.x only */
 #define	PSW_Y	0x80000000	/* PA1.x only */
 
-#ifdef __LP64__
-#define PSW_HI_CB 0x000000ff    /* PA2.0 only */
+#ifdef CONFIG_64BIT
+#  define PSW_HI_CB 0x000000ff    /* PA2.0 only */
 #endif
 
-/* PSW bits to be used with ssm/rsm */
-#define PSW_SM_I        0x1
-#define PSW_SM_D        0x2
-#define PSW_SM_P        0x4
-#define PSW_SM_Q        0x8
-#define PSW_SM_R        0x10
-#define PSW_SM_F        0x20
-#define PSW_SM_G        0x40
-#define PSW_SM_O        0x80
-#define PSW_SM_E        0x100
-#define PSW_SM_W        0x200
-
-#ifdef __LP64__
-#  define USER_PSW      (PSW_C | PSW_Q | PSW_P | PSW_D | PSW_I)
-#  define KERNEL_PSW    (PSW_W | PSW_C | PSW_Q | PSW_P | PSW_D)
-#  define REAL_MODE_PSW (PSW_W | PSW_Q)
-#  define USER_PSW_MASK (PSW_W | PSW_T | PSW_N | PSW_X | PSW_B | PSW_V | PSW_CB)
-#  define USER_PSW_HI_MASK (PSW_HI_CB)
-#else
-#  define USER_PSW      (PSW_C | PSW_Q | PSW_P | PSW_D | PSW_I)
-#  define KERNEL_PSW    (PSW_C | PSW_Q | PSW_P | PSW_D)
-#  define REAL_MODE_PSW (PSW_Q)
-#  define USER_PSW_MASK (PSW_T | PSW_N | PSW_X | PSW_B | PSW_V | PSW_CB)
+#ifdef CONFIG_64BIT
+#  define USER_PSW_HI_MASK	PSW_HI_CB
+#  define WIDE_PSW		PSW_W
+#else 
+#  define WIDE_PSW		0
 #endif
 
+/* Used when setting up for rfi */
+#define KERNEL_PSW    (WIDE_PSW | PSW_C | PSW_Q | PSW_P | PSW_D)
+#define REAL_MODE_PSW (WIDE_PSW | PSW_Q)
+#define USER_PSW_MASK (WIDE_PSW | PSW_T | PSW_N | PSW_X | PSW_B | PSW_V | PSW_CB)
+#define USER_PSW      (PSW_C | PSW_Q | PSW_P | PSW_D | PSW_I)
+
 #endif
diff --git a/include/asm-parisc/tlbflush.h b/include/asm-parisc/tlbflush.h
index eb27b78..efbb2d8 100644
--- a/include/asm-parisc/tlbflush.h
+++ b/include/asm-parisc/tlbflush.h
@@ -64,29 +64,26 @@
 {
 	unsigned long npages;
 
-	
 	npages = ((end - (start & PAGE_MASK)) + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
-	if (npages >= 512)  /* XXX arbitrary, should be tuned */
+	if (npages >= 512)  /* 2MB of space: arbitrary, should be tuned */
 		flush_tlb_all();
 	else {
 
 		mtsp(vma->vm_mm->context,1);
+		purge_tlb_start();
 		if (split_tlb) {
-			purge_tlb_start();
 			while (npages--) {
 				pdtlb(start);
 				pitlb(start);
 				start += PAGE_SIZE;
 			}
-			purge_tlb_end();
 		} else {
-			purge_tlb_start();
 			while (npages--) {
 				pdtlb(start);
 				start += PAGE_SIZE;
 			}
-			purge_tlb_end();
 		}
+		purge_tlb_end();
 	}
 }