[IA64] Multiple outstanding ptc.g instruction support

According to SDM2.2, Itanium supports multiple outstanding ptc.g instructions.
But current kernel function ia64_global_tlb_purge() uses a spinlock to serialize
ptc.g instructions issued by multiple processors. This serialization might have
scalability issue on a big SMP machine where many processors could purge TLB
in parallel.

The patch fixes this problem by issuing multiple ptc.g instructions in
ia64_global_tlb_purge(). It also adds support for the "PALO" table to get
a platform view of the max number of outstanding ptc.g instructions (which
may be different from the processor view found from PAL_VM_SUMMARY).

PALO specification can be found at: http://www.dig64.org/home/DIG64_PALO_R1_0.pdf

spinaphore implementation by Matthew Wilcox.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
diff --git a/include/asm-ia64/sal.h b/include/asm-ia64/sal.h
index f4904db..3cd637a 100644
--- a/include/asm-ia64/sal.h
+++ b/include/asm-ia64/sal.h
@@ -296,6 +296,9 @@
     EFI_GUID(0xe429faf8, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81)
 #define SAL_PLAT_BUS_ERR_SECT_GUID  \
     EFI_GUID(0xe429faf9, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81)
+#define PROCESSOR_ABSTRACTION_LAYER_OVERWRITE_GUID \
+    EFI_GUID(0x6cb0a200, 0x893a, 0x11da, 0x96, 0xd2, 0x0, 0x10, 0x83, 0xff, \
+		0xca, 0x4d)
 
 #define MAX_CACHE_ERRORS	6
 #define MAX_TLB_ERRORS		6
@@ -879,6 +882,20 @@
 
 extern void ia64_sal_handler_init(void *entry_point, void *gpval);
 
+#define PALO_MAX_TLB_PURGES	0xFFFF
+#define PALO_SIG	"PALO"
+
+struct palo_table {
+	u8  signature[4];	/* Should be "PALO" */
+	u32 length;
+	u8  minor_revision;
+	u8  major_revision;
+	u8  checksum;
+	u8  reserved1[5];
+	u16 max_tlb_purges;
+	u8  reserved2[6];
+};
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_IA64_SAL_H */