Merge branches 'x86/tracehook', 'x86/xsave' and 'x86/prototypes' into x86/signal

Conflicts:
	arch/x86/kernel/signal_64.c

Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 1150444..ba27dd9 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1425,6 +1425,12 @@
 
 	nolapic_timer	[X86-32,APIC] Do not use the local APIC timer.
 
+	nox2apic	[X86-64,APIC] Do not enable x2APIC mode.
+
+	x2apic_phys	[X86-64,APIC] Use x2apic physical mode instead of
+			default x2apic cluster mode on platforms
+			supporting x2apic.
+
 	noltlbs		[PPC] Do not use large page/tlb entries for kernel
 			lowmem mapping on PPC40x.
 
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ed92864..6b8fc90 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -29,6 +29,7 @@
 	select HAVE_FTRACE
 	select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
 	select HAVE_ARCH_KGDB if !X86_VOYAGER
+	select HAVE_ARCH_TRACEHOOK
 	select HAVE_GENERIC_DMA_COHERENT if X86_32
 	select HAVE_EFFICIENT_UNALIGNED_ACCESS
 
@@ -1643,6 +1644,14 @@
 	 workaround will setup a 1:1 mapping for the first
 	 16M to make floppy (an ISA device) work.
 
+config INTR_REMAP
+	bool "Support for Interrupt Remapping (EXPERIMENTAL)"
+	depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI && EXPERIMENTAL
+	help
+	 Supports Interrupt remapping for IO-APIC and MSI devices.
+	 To use x2apic mode in the CPU's which support x2APIC enhancements or
+	 to support platforms with CPU's having > 8 bit APIC ID, say Y.
+
 source "drivers/pci/pcie/Kconfig"
 
 source "drivers/pci/Kconfig"
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 2c518fb..6156ac2 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -415,3 +415,73 @@
 config X86_DEBUGCTLMSR
 	def_bool y
 	depends on !(MK6 || MWINCHIPC6 || MWINCHIP2 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386)
+
+menuconfig PROCESSOR_SELECT
+	default y
+	bool "Supported processor vendors" if EMBEDDED
+	help
+	  This lets you choose what x86 vendor support code your kernel
+	  will include.
+
+config CPU_SUP_INTEL_32
+	default y
+	bool "Support Intel processors" if PROCESSOR_SELECT
+	depends on !64BIT
+	help
+	  This enables extended support for Intel processors
+
+config CPU_SUP_INTEL_64
+	default y
+	bool "Support Intel processors" if PROCESSOR_SELECT
+	depends on 64BIT
+	help
+	  This enables extended support for Intel processors
+
+config CPU_SUP_CYRIX_32
+	default y
+	bool "Support Cyrix processors" if PROCESSOR_SELECT
+	depends on !64BIT
+	help
+	  This enables extended support for Cyrix processors
+
+config CPU_SUP_AMD_32
+	default y
+	bool "Support AMD processors" if PROCESSOR_SELECT
+	depends on !64BIT
+	help
+	  This enables extended support for AMD processors
+
+config CPU_SUP_AMD_64
+	default y
+	bool "Support AMD processors" if PROCESSOR_SELECT
+	depends on 64BIT
+	help
+	  This enables extended support for AMD processors
+
+config CPU_SUP_CENTAUR_32
+	default y
+	bool "Support Centaur processors" if PROCESSOR_SELECT
+	depends on !64BIT
+	help
+	  This enables extended support for Centaur processors
+
+config CPU_SUP_CENTAUR_64
+	default y
+	bool "Support Centaur processors" if PROCESSOR_SELECT
+	depends on 64BIT
+	help
+	  This enables extended support for Centaur processors
+
+config CPU_SUP_TRANSMETA_32
+	default y
+	bool "Support Transmeta processors" if PROCESSOR_SELECT
+	depends on !64BIT
+	help
+	  This enables extended support for Transmeta processors
+
+config CPU_SUP_UMC_32
+	default y
+	bool "Support UMC processors" if PROCESSOR_SELECT
+	depends on !64BIT
+	help
+	  This enables extended support for UMC processors
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 9fea737..aaf5a21 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -16,7 +16,7 @@
  */
 #undef CONFIG_PARAVIRT
 #ifdef CONFIG_X86_32
-#define _ASM_DESC_H_ 1
+#define ASM_X86__DESC_H 1
 #endif
 
 #ifdef CONFIG_X86_64
diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c
index 4b9ae7c..4d3ff03 100644
--- a/arch/x86/boot/cpucheck.c
+++ b/arch/x86/boot/cpucheck.c
@@ -38,12 +38,12 @@
 {
 	REQUIRED_MASK0,
 	REQUIRED_MASK1,
-	REQUIRED_MASK2,
-	REQUIRED_MASK3,
+	0, /* REQUIRED_MASK2 not implemented in this file */
+	0, /* REQUIRED_MASK3 not implemented in this file */
 	REQUIRED_MASK4,
-	REQUIRED_MASK5,
+	0, /* REQUIRED_MASK5 not implemented in this file */
 	REQUIRED_MASK6,
-	REQUIRED_MASK7,
+	0, /* REQUIRED_MASK7 not implemented in this file */
 };
 
 #define A32(a, b, c, d) (((d) << 24)+((c) << 16)+((b) << 8)+(a))
diff --git a/arch/x86/boot/mkcpustr.c b/arch/x86/boot/mkcpustr.c
index bbe7695..4589caa 100644
--- a/arch/x86/boot/mkcpustr.c
+++ b/arch/x86/boot/mkcpustr.c
@@ -15,7 +15,7 @@
 
 #include <stdio.h>
 
-#include "../kernel/cpu/feature_names.c"
+#include "../kernel/cpu/capflags.c"
 
 #if NCAPFLAGS > 8
 # error "Need to adjust the boot code handling of CPUID strings"
diff --git a/arch/x86/mach-es7000/Makefile b/arch/x86/es7000/Makefile
similarity index 100%
rename from arch/x86/mach-es7000/Makefile
rename to arch/x86/es7000/Makefile
diff --git a/arch/x86/mach-es7000/es7000.h b/arch/x86/es7000/es7000.h
similarity index 89%
rename from arch/x86/mach-es7000/es7000.h
rename to arch/x86/es7000/es7000.h
index c8d5aa1..4e62f6f 100644
--- a/arch/x86/mach-es7000/es7000.h
+++ b/arch/x86/es7000/es7000.h
@@ -1,7 +1,7 @@
 /*
  * Written by: Garry Forsgren, Unisys Corporation
  *             Natalie Protasevich, Unisys Corporation
- * This file contains the code to configure and interface 
+ * This file contains the code to configure and interface
  * with Unisys ES7000 series hardware system manager.
  *
  * Copyright (c) 2003 Unisys Corporation.  All Rights Reserved.
@@ -18,7 +18,7 @@
  * with this program; if not, write the Free Software Foundation, Inc., 59
  * Temple Place - Suite 330, Boston MA 02111-1307, USA.
  *
- * Contact information: Unisys Corporation, Township Line & Union Meeting 
+ * Contact information: Unisys Corporation, Township Line & Union Meeting
  * Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or:
  *
  * http://www.unisys.com
@@ -41,7 +41,7 @@
 #define	MIP_VALID		0x0100000000000000ULL
 #define	MIP_PORT(VALUE)	((VALUE >> 32) & 0xffff)
 
-#define	MIP_RD_LO(VALUE)	(VALUE & 0xffffffff)   
+#define	MIP_RD_LO(VALUE)	(VALUE & 0xffffffff)
 
 struct mip_reg_info {
 	unsigned long long mip_info;
@@ -51,11 +51,11 @@
 };
 
 struct part_info {
-	unsigned char type;   
+	unsigned char type;
 	unsigned char length;
 	unsigned char part_id;
 	unsigned char apic_mode;
-	unsigned long snum;    
+	unsigned long snum;
 	char ptype[16];
 	char sname[64];
 	char pname[64];
@@ -68,11 +68,11 @@
 };
 
 struct es7000_mem_info {
-	unsigned char type;   
+	unsigned char type;
 	unsigned char length;
 	unsigned char resv[6];
-	unsigned long long  start; 
-	unsigned long long  size; 
+	unsigned long long  start;
+	unsigned long long  size;
 };
 
 struct es7000_oem_table {
@@ -106,7 +106,7 @@
 };
 
 #define	MIP_SW_APIC		0x1020b
-#define	MIP_FUNC(VALUE) 	(VALUE & 0xff)
+#define	MIP_FUNC(VALUE)		(VALUE & 0xff)
 
 extern int parse_unisys_oem (char *oemptr);
 extern void setup_unisys(void);
diff --git a/arch/x86/mach-es7000/es7000plat.c b/arch/x86/es7000/es7000plat.c
similarity index 99%
rename from arch/x86/mach-es7000/es7000plat.c
rename to arch/x86/es7000/es7000plat.c
index 50189af..7789fde 100644
--- a/arch/x86/mach-es7000/es7000plat.c
+++ b/arch/x86/es7000/es7000plat.c
@@ -72,7 +72,7 @@
 			base += nr_ioapic_registers[i];
 	}
 
-	if (!ioapic && (gsi < 16)) 
+	if (!ioapic && (gsi < 16))
 		gsi += base;
 	return gsi;
 }
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 20af4c7..f25a101 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -179,9 +179,10 @@
 	u32 pretcode;
 	int sig;
 	struct sigcontext_ia32 sc;
-	struct _fpstate_ia32 fpstate;
+	struct _fpstate_ia32 fpstate_unused; /* look at kernel/sigframe.h */
 	unsigned int extramask[_COMPAT_NSIG_WORDS-1];
 	char retcode[8];
+	/* fp state follows here */
 };
 
 struct rt_sigframe
@@ -192,8 +193,8 @@
 	u32 puc;
 	compat_siginfo_t info;
 	struct ucontext_ia32 uc;
-	struct _fpstate_ia32 fpstate;
 	char retcode[8];
+	/* fp state follows here */
 };
 
 #define COPY(x)		{ 		\
@@ -215,7 +216,7 @@
 				   unsigned int *peax)
 {
 	unsigned int tmpflags, gs, oldgs, err = 0;
-	struct _fpstate_ia32 __user *buf;
+	void __user *buf;
 	u32 tmp;
 
 	/* Always make any pending restarted system calls return -EINTR */
@@ -259,26 +260,12 @@
 
 	err |= __get_user(tmp, &sc->fpstate);
 	buf = compat_ptr(tmp);
-	if (buf) {
-		if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
-			goto badframe;
-		err |= restore_i387_ia32(buf);
-	} else {
-		struct task_struct *me = current;
-
-		if (used_math()) {
-			clear_fpu(me);
-			clear_used_math();
-		}
-	}
+	err |= restore_i387_xstate_ia32(buf);
 
 	err |= __get_user(tmp, &sc->ax);
 	*peax = tmp;
 
 	return err;
-
-badframe:
-	return 1;
 }
 
 asmlinkage long sys32_sigreturn(struct pt_regs *regs)
@@ -350,7 +337,7 @@
  */
 
 static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
-				 struct _fpstate_ia32 __user *fpstate,
+				 void __user *fpstate,
 				 struct pt_regs *regs, unsigned int mask)
 {
 	int tmp, err = 0;
@@ -381,7 +368,7 @@
 	err |= __put_user((u32)regs->flags, &sc->flags);
 	err |= __put_user((u32)regs->sp, &sc->sp_at_signal);
 
-	tmp = save_i387_ia32(fpstate);
+	tmp = save_i387_xstate_ia32(fpstate);
 	if (tmp < 0)
 		err = -EFAULT;
 	else {
@@ -402,7 +389,8 @@
  * Determine which stack to use..
  */
 static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
-				 size_t frame_size)
+				 size_t frame_size,
+				 void **fpstate)
 {
 	unsigned long sp;
 
@@ -421,6 +409,11 @@
 		 ka->sa.sa_restorer)
 		sp = (unsigned long) ka->sa.sa_restorer;
 
+	if (used_math()) {
+		sp = sp - sig_xstate_ia32_size;
+		*fpstate = (struct _fpstate_ia32 *) sp;
+	}
+
 	sp -= frame_size;
 	/* Align the stack pointer according to the i386 ABI,
 	 * i.e. so that on function entry ((sp + 4) & 15) == 0. */
@@ -434,6 +427,7 @@
 	struct sigframe __user *frame;
 	void __user *restorer;
 	int err = 0;
+	void __user *fpstate = NULL;
 
 	/* copy_to_user optimizes that into a single 8 byte store */
 	static const struct {
@@ -448,7 +442,7 @@
 		0,
 	};
 
-	frame = get_sigframe(ka, regs, sizeof(*frame));
+	frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
 
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
 		goto give_sigsegv;
@@ -457,8 +451,7 @@
 	if (err)
 		goto give_sigsegv;
 
-	err |= ia32_setup_sigcontext(&frame->sc, &frame->fpstate, regs,
-					set->sig[0]);
+	err |= ia32_setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]);
 	if (err)
 		goto give_sigsegv;
 
@@ -522,6 +515,7 @@
 	struct rt_sigframe __user *frame;
 	void __user *restorer;
 	int err = 0;
+	void __user *fpstate = NULL;
 
 	/* __copy_to_user optimizes that into a single 8 byte store */
 	static const struct {
@@ -537,7 +531,7 @@
 		0,
 	};
 
-	frame = get_sigframe(ka, regs, sizeof(*frame));
+	frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
 
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
 		goto give_sigsegv;
@@ -550,13 +544,16 @@
 		goto give_sigsegv;
 
 	/* Create the ucontext.  */
-	err |= __put_user(0, &frame->uc.uc_flags);
+	if (cpu_has_xsave)
+		err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
+	else
+		err |= __put_user(0, &frame->uc.uc_flags);
 	err |= __put_user(0, &frame->uc.uc_link);
 	err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
 	err |= __put_user(sas_ss_flags(regs->sp),
 			  &frame->uc.uc_stack.ss_flags);
 	err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
-	err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate,
+	err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
 				     regs, set->sig[0]);
 	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
 	if (err)
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 3db651f..d6ea91a 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -38,7 +38,7 @@
 
 obj-$(CONFIG_X86_TRAMPOLINE)	+= trampoline.o
 obj-y				+= process.o
-obj-y				+= i387.o
+obj-y				+= i387.o xsave.o
 obj-y				+= ptrace.o
 obj-y				+= ds.o
 obj-$(CONFIG_X86_32)		+= tls.o
@@ -104,6 +104,8 @@
 ifeq ($(CONFIG_X86_64),y)
         obj-y				+= genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o
 	obj-y				+= bios_uv.o
+        obj-y				+= genx2apic_cluster.o
+        obj-y				+= genx2apic_phys.o
         obj-$(CONFIG_X86_PM_TIMER)	+= pmtimer_64.o
         obj-$(CONFIG_AUDIT)		+= audit_64.o
 
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index bfd10fd..27ef365 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -775,7 +775,7 @@
 
 	set_fixmap_nocache(FIX_APIC_BASE, address);
 	if (boot_cpu_physical_apicid == -1U) {
-		boot_cpu_physical_apicid  = GET_APIC_ID(read_apic_id());
+		boot_cpu_physical_apicid  = read_apic_id();
 #ifdef CONFIG_X86_32
 		apic_version[boot_cpu_physical_apicid] =
 			 GET_APIC_VERSION(apic_read(APIC_LVR));
@@ -1351,7 +1351,9 @@
 				acpi_ioapic = 1;
 
 				smp_found_config = 1;
+#ifdef CONFIG_X86_32
 				setup_apic_routing();
+#endif
 			}
 		}
 		if (error == -EINVAL) {
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 2763cb3..65a0c1b 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -145,35 +145,25 @@
 extern char __vsyscall_0;
 const unsigned char *const *find_nop_table(void)
 {
-	return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
-	       boot_cpu_data.x86 < 6 ? k8_nops : p6_nops;
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
+	    boot_cpu_has(X86_FEATURE_NOPL))
+		return p6_nops;
+	else
+		return k8_nops;
 }
 
 #else /* CONFIG_X86_64 */
 
-static const struct nop {
-	int cpuid;
-	const unsigned char *const *noptable;
-} noptypes[] = {
-	{ X86_FEATURE_K8, k8_nops },
-	{ X86_FEATURE_K7, k7_nops },
-	{ X86_FEATURE_P4, p6_nops },
-	{ X86_FEATURE_P3, p6_nops },
-	{ -1, NULL }
-};
-
 const unsigned char *const *find_nop_table(void)
 {
-	const unsigned char *const *noptable = intel_nops;
-	int i;
-
-	for (i = 0; noptypes[i].cpuid >= 0; i++) {
-		if (boot_cpu_has(noptypes[i].cpuid)) {
-			noptable = noptypes[i].noptable;
-			break;
-		}
-	}
-	return noptable;
+	if (boot_cpu_has(X86_FEATURE_K8))
+		return k8_nops;
+	else if (boot_cpu_has(X86_FEATURE_K7))
+		return k7_nops;
+	else if (boot_cpu_has(X86_FEATURE_NOPL))
+		return p6_nops;
+	else
+		return intel_nops;
 }
 
 #endif /* CONFIG_X86_64 */
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index f88bd0d..44cae65 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -145,13 +145,18 @@
 	return lapic_get_version() >= 0x14;
 }
 
-void apic_wait_icr_idle(void)
+/*
+ * Paravirt kernels also might be using these below ops. So we still
+ * use generic apic_read()/apic_write(), which might be pointing to different
+ * ops in PARAVIRT case.
+ */
+void xapic_wait_icr_idle(void)
 {
 	while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
 		cpu_relax();
 }
 
-u32 safe_apic_wait_icr_idle(void)
+u32 safe_xapic_wait_icr_idle(void)
 {
 	u32 send_status;
 	int timeout;
@@ -167,6 +172,34 @@
 	return send_status;
 }
 
+void xapic_icr_write(u32 low, u32 id)
+{
+	apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
+	apic_write(APIC_ICR, low);
+}
+
+u64 xapic_icr_read(void)
+{
+	u32 icr1, icr2;
+
+	icr2 = apic_read(APIC_ICR2);
+	icr1 = apic_read(APIC_ICR);
+
+	return icr1 | ((u64)icr2 << 32);
+}
+
+static struct apic_ops xapic_ops = {
+	.read = native_apic_mem_read,
+	.write = native_apic_mem_write,
+	.icr_read = xapic_icr_read,
+	.icr_write = xapic_icr_write,
+	.wait_icr_idle = xapic_wait_icr_idle,
+	.safe_wait_icr_idle = safe_xapic_wait_icr_idle,
+};
+
+struct apic_ops __read_mostly *apic_ops = &xapic_ops;
+EXPORT_SYMBOL_GPL(apic_ops);
+
 /**
  * enable_NMI_through_LVT0 - enable NMI through local vector table 0
  */
@@ -1205,7 +1238,7 @@
 	 * default configuration (or the MP table is broken).
 	 */
 	if (boot_cpu_physical_apicid == -1U)
-		boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
+		boot_cpu_physical_apicid = read_apic_id();
 
 }
 
@@ -1242,7 +1275,7 @@
 	 * might be zero if read from MP tables. Get it from LAPIC.
 	 */
 #ifdef CONFIG_CRASH_DUMP
-	boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
+	boot_cpu_physical_apicid = read_apic_id();
 #endif
 	physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
 
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 446c062..b625658 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -27,6 +27,7 @@
 #include <linux/clockchips.h>
 #include <linux/acpi_pmtmr.h>
 #include <linux/module.h>
+#include <linux/dmar.h>
 
 #include <asm/atomic.h>
 #include <asm/smp.h>
@@ -39,6 +40,7 @@
 #include <asm/proto.h>
 #include <asm/timex.h>
 #include <asm/apic.h>
+#include <asm/i8259.h>
 
 #include <mach_ipi.h>
 #include <mach_apic.h>
@@ -46,6 +48,11 @@
 static int disable_apic_timer __cpuinitdata;
 static int apic_calibrate_pmtmr __initdata;
 int disable_apic;
+int disable_x2apic;
+int x2apic;
+
+/* x2apic enabled before OS handover */
+int x2apic_preenabled;
 
 /* Local APIC timer works in C2 */
 int local_apic_timer_c2_ok;
@@ -118,13 +125,13 @@
 	return lapic_get_version() >= 0x14;
 }
 
-void apic_wait_icr_idle(void)
+void xapic_wait_icr_idle(void)
 {
 	while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
 		cpu_relax();
 }
 
-u32 safe_apic_wait_icr_idle(void)
+u32 safe_xapic_wait_icr_idle(void)
 {
 	u32 send_status;
 	int timeout;
@@ -140,6 +147,69 @@
 	return send_status;
 }
 
+void xapic_icr_write(u32 low, u32 id)
+{
+	apic_write(APIC_ICR2, id << 24);
+	apic_write(APIC_ICR, low);
+}
+
+u64 xapic_icr_read(void)
+{
+	u32 icr1, icr2;
+
+	icr2 = apic_read(APIC_ICR2);
+	icr1 = apic_read(APIC_ICR);
+
+	return (icr1 | ((u64)icr2 << 32));
+}
+
+static struct apic_ops xapic_ops = {
+	.read = native_apic_mem_read,
+	.write = native_apic_mem_write,
+	.icr_read = xapic_icr_read,
+	.icr_write = xapic_icr_write,
+	.wait_icr_idle = xapic_wait_icr_idle,
+	.safe_wait_icr_idle = safe_xapic_wait_icr_idle,
+};
+
+struct apic_ops __read_mostly *apic_ops = &xapic_ops;
+
+EXPORT_SYMBOL_GPL(apic_ops);
+
+static void x2apic_wait_icr_idle(void)
+{
+	/* no need to wait for icr idle in x2apic */
+	return;
+}
+
+static u32 safe_x2apic_wait_icr_idle(void)
+{
+	/* no need to wait for icr idle in x2apic */
+	return 0;
+}
+
+void x2apic_icr_write(u32 low, u32 id)
+{
+	wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
+}
+
+u64 x2apic_icr_read(void)
+{
+	unsigned long val;
+
+	rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
+	return val;
+}
+
+static struct apic_ops x2apic_ops = {
+	.read = native_apic_msr_read,
+	.write = native_apic_msr_write,
+	.icr_read = x2apic_icr_read,
+	.icr_write = x2apic_icr_write,
+	.wait_icr_idle = x2apic_wait_icr_idle,
+	.safe_wait_icr_idle = safe_x2apic_wait_icr_idle,
+};
+
 /**
  * enable_NMI_through_LVT0 - enable NMI through local vector table 0
  */
@@ -629,10 +699,10 @@
 	/*
 	 * The ID register is read/write in a real APIC.
 	 */
-	reg0 = read_apic_id();
+	reg0 = apic_read(APIC_ID);
 	apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
 	apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
-	reg1 = read_apic_id();
+	reg1 = apic_read(APIC_ID);
 	apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
 	apic_write(APIC_ID, reg0);
 	if (reg1 != (reg0 ^ APIC_ID_MASK))
@@ -833,6 +903,125 @@
 	apic_pm_activate();
 }
 
+void check_x2apic(void)
+{
+	int msr, msr2;
+
+	rdmsr(MSR_IA32_APICBASE, msr, msr2);
+
+	if (msr & X2APIC_ENABLE) {
+		printk("x2apic enabled by BIOS, switching to x2apic ops\n");
+		x2apic_preenabled = x2apic = 1;
+		apic_ops = &x2apic_ops;
+	}
+}
+
+void enable_x2apic(void)
+{
+	int msr, msr2;
+
+	rdmsr(MSR_IA32_APICBASE, msr, msr2);
+	if (!(msr & X2APIC_ENABLE)) {
+		printk("Enabling x2apic\n");
+		wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
+	}
+}
+
+void enable_IR_x2apic(void)
+{
+#ifdef CONFIG_INTR_REMAP
+	int ret;
+	unsigned long flags;
+
+	if (!cpu_has_x2apic)
+		return;
+
+	if (!x2apic_preenabled && disable_x2apic) {
+		printk(KERN_INFO
+		       "Skipped enabling x2apic and Interrupt-remapping "
+		       "because of nox2apic\n");
+		return;
+	}
+
+	if (x2apic_preenabled && disable_x2apic)
+		panic("Bios already enabled x2apic, can't enforce nox2apic");
+
+	if (!x2apic_preenabled && skip_ioapic_setup) {
+		printk(KERN_INFO
+		       "Skipped enabling x2apic and Interrupt-remapping "
+		       "because of skipping io-apic setup\n");
+		return;
+	}
+
+	ret = dmar_table_init();
+	if (ret) {
+		printk(KERN_INFO
+		       "dmar_table_init() failed with %d:\n", ret);
+
+		if (x2apic_preenabled)
+			panic("x2apic enabled by bios. But IR enabling failed");
+		else
+			printk(KERN_INFO
+			       "Not enabling x2apic,Intr-remapping\n");
+		return;
+	}
+
+	local_irq_save(flags);
+	mask_8259A();
+	save_mask_IO_APIC_setup();
+
+	ret = enable_intr_remapping(1);
+
+	if (ret && x2apic_preenabled) {
+		local_irq_restore(flags);
+		panic("x2apic enabled by bios. But IR enabling failed");
+	}
+
+	if (ret)
+		goto end;
+
+	if (!x2apic) {
+		x2apic = 1;
+		apic_ops = &x2apic_ops;
+		enable_x2apic();
+	}
+end:
+	if (ret)
+		/*
+		 * IR enabling failed
+		 */
+		restore_IO_APIC_setup();
+	else
+		reinit_intr_remapped_IO_APIC(x2apic_preenabled);
+
+	unmask_8259A();
+	local_irq_restore(flags);
+
+	if (!ret) {
+		if (!x2apic_preenabled)
+			printk(KERN_INFO
+			       "Enabled x2apic and interrupt-remapping\n");
+		else
+			printk(KERN_INFO
+			       "Enabled Interrupt-remapping\n");
+	} else
+		printk(KERN_ERR
+		       "Failed to enable Interrupt-remapping and x2apic\n");
+#else
+	if (!cpu_has_x2apic)
+		return;
+
+	if (x2apic_preenabled)
+		panic("x2apic enabled prior OS handover,"
+		      " enable CONFIG_INTR_REMAP");
+
+	printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping "
+	       " and x2apic\n");
+#endif
+
+	return;
+}
+
 /*
  * Detect and enable local APICs on non-SMP boards.
  * Original code written by Keir Fraser.
@@ -872,7 +1061,7 @@
 	 * Fetch the APIC ID of the BSP in case we have a
 	 * default configuration (or the MP table is broken).
 	 */
-	boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
+	boot_cpu_physical_apicid = read_apic_id();
 }
 
 /**
@@ -880,6 +1069,11 @@
  */
 void __init init_apic_mappings(void)
 {
+	if (x2apic) {
+		boot_cpu_physical_apicid = read_apic_id();
+		return;
+	}
+
 	/*
 	 * If no local APIC can be found then set up a fake all
 	 * zeroes page to simulate the local APIC and another
@@ -899,7 +1093,7 @@
 	 * Fetch the APIC ID of the BSP in case we have a
 	 * default configuration (or the MP table is broken).
 	 */
-	boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
+	boot_cpu_physical_apicid = read_apic_id();
 }
 
 /*
@@ -918,6 +1112,9 @@
 		return -1;
 	}
 
+	enable_IR_x2apic();
+	setup_apic_routing();
+
 	verify_local_APIC();
 
 	connect_bsp_APIC();
@@ -1093,6 +1290,11 @@
 	cpu_set(cpu, cpu_present_map);
 }
 
+int hard_smp_processor_id(void)
+{
+	return read_apic_id();
+}
+
 /*
  * Power management
  */
@@ -1129,7 +1331,7 @@
 
 	maxlvt = lapic_get_maxlvt();
 
-	apic_pm_state.apic_id = read_apic_id();
+	apic_pm_state.apic_id = apic_read(APIC_ID);
 	apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
 	apic_pm_state.apic_ldr = apic_read(APIC_LDR);
 	apic_pm_state.apic_dfr = apic_read(APIC_DFR);
@@ -1164,10 +1366,14 @@
 	maxlvt = lapic_get_maxlvt();
 
 	local_irq_save(flags);
-	rdmsr(MSR_IA32_APICBASE, l, h);
-	l &= ~MSR_IA32_APICBASE_BASE;
-	l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
-	wrmsr(MSR_IA32_APICBASE, l, h);
+	if (!x2apic) {
+		rdmsr(MSR_IA32_APICBASE, l, h);
+		l &= ~MSR_IA32_APICBASE_BASE;
+		l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
+		wrmsr(MSR_IA32_APICBASE, l, h);
+	} else
+		enable_x2apic();
+
 	apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
 	apic_write(APIC_ID, apic_pm_state.apic_id);
 	apic_write(APIC_DFR, apic_pm_state.apic_dfr);
@@ -1307,6 +1513,15 @@
 	return (clusters > 2);
 }
 
+static __init int setup_nox2apic(char *str)
+{
+	disable_x2apic = 1;
+	clear_cpu_cap(&boot_cpu_data, X86_FEATURE_X2APIC);
+	return 0;
+}
+early_param("nox2apic", setup_nox2apic);
+
+
 /*
  * APIC command line parameters
  */
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index aa89387..505543a 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -22,7 +22,7 @@
 
 #define __NO_STUBS 1
 #undef __SYSCALL
-#undef _ASM_X86_64_UNISTD_H_
+#undef ASM_X86__UNISTD_64_H
 #define __SYSCALL(nr, sym) [nr] = 1,
 static char syscalls[] = {
 #include <asm/unistd.h>
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index ee76eaa..3ede19a 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -3,22 +3,32 @@
 #
 
 obj-y			:= intel_cacheinfo.o addon_cpuid_features.o
-obj-y			+= proc.o feature_names.o
+obj-y			+= proc.o capflags.o powerflags.o
 
-obj-$(CONFIG_X86_32)	+= common.o bugs.o
+obj-$(CONFIG_X86_32)	+= common.o bugs.o cmpxchg.o
 obj-$(CONFIG_X86_64)	+= common_64.o bugs_64.o
-obj-$(CONFIG_X86_32)	+= amd.o
-obj-$(CONFIG_X86_64)	+= amd_64.o
-obj-$(CONFIG_X86_32)	+= cyrix.o
-obj-$(CONFIG_X86_32)	+= centaur.o
-obj-$(CONFIG_X86_64)	+= centaur_64.o
-obj-$(CONFIG_X86_32)	+= transmeta.o
-obj-$(CONFIG_X86_32)	+= intel.o
-obj-$(CONFIG_X86_64)	+= intel_64.o
-obj-$(CONFIG_X86_32)	+= umc.o
+
+obj-$(CONFIG_CPU_SUP_AMD_32)		+= amd.o
+obj-$(CONFIG_CPU_SUP_AMD_64)		+= amd_64.o
+obj-$(CONFIG_CPU_SUP_CYRIX_32)		+= cyrix.o
+obj-$(CONFIG_CPU_SUP_CENTAUR_32)	+= centaur.o
+obj-$(CONFIG_CPU_SUP_CENTAUR_64)	+= centaur_64.o
+obj-$(CONFIG_CPU_SUP_TRANSMETA_32)	+= transmeta.o
+obj-$(CONFIG_CPU_SUP_INTEL_32)		+= intel.o
+obj-$(CONFIG_CPU_SUP_INTEL_64)		+= intel_64.o
+obj-$(CONFIG_CPU_SUP_UMC_32)		+= umc.o
 
 obj-$(CONFIG_X86_MCE)	+= mcheck/
 obj-$(CONFIG_MTRR)	+= mtrr/
 obj-$(CONFIG_CPU_FREQ)	+= cpufreq/
 
 obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o
+
+quiet_cmd_mkcapflags = MKCAP   $@
+      cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@
+
+cpufeature = $(src)/../../../../include/asm-x86/cpufeature.h
+
+targets += capflags.c
+$(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.pl FORCE
+	$(call if_changed,mkcapflags)
diff --git a/arch/x86/kernel/cpu/cmpxchg.c b/arch/x86/kernel/cpu/cmpxchg.c
new file mode 100644
index 0000000..2056ccf
--- /dev/null
+++ b/arch/x86/kernel/cpu/cmpxchg.c
@@ -0,0 +1,72 @@
+/*
+ * cmpxchg*() fallbacks for CPU not supporting these instructions
+ */
+
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/module.h>
+
+#ifndef CONFIG_X86_CMPXCHG
+unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new)
+{
+	u8 prev;
+	unsigned long flags;
+
+	/* Poor man's cmpxchg for 386. Unsuitable for SMP */
+	local_irq_save(flags);
+	prev = *(u8 *)ptr;
+	if (prev == old)
+		*(u8 *)ptr = new;
+	local_irq_restore(flags);
+	return prev;
+}
+EXPORT_SYMBOL(cmpxchg_386_u8);
+
+unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new)
+{
+	u16 prev;
+	unsigned long flags;
+
+	/* Poor man's cmpxchg for 386. Unsuitable for SMP */
+	local_irq_save(flags);
+	prev = *(u16 *)ptr;
+	if (prev == old)
+		*(u16 *)ptr = new;
+	local_irq_restore(flags);
+	return prev;
+}
+EXPORT_SYMBOL(cmpxchg_386_u16);
+
+unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new)
+{
+	u32 prev;
+	unsigned long flags;
+
+	/* Poor man's cmpxchg for 386. Unsuitable for SMP */
+	local_irq_save(flags);
+	prev = *(u32 *)ptr;
+	if (prev == old)
+		*(u32 *)ptr = new;
+	local_irq_restore(flags);
+	return prev;
+}
+EXPORT_SYMBOL(cmpxchg_386_u32);
+#endif
+
+#ifndef CONFIG_X86_CMPXCHG64
+unsigned long long cmpxchg_486_u64(volatile void *ptr, u64 old, u64 new)
+{
+	u64 prev;
+	unsigned long flags;
+
+	/* Poor man's cmpxchg8b for 386 and 486. Unsuitable for SMP */
+	local_irq_save(flags);
+	prev = *(u64 *)ptr;
+	if (prev == old)
+		*(u64 *)ptr = new;
+	local_irq_restore(flags);
+	return prev;
+}
+EXPORT_SYMBOL(cmpxchg_486_u64);
+#endif
+
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 80ab20d..c63ec65 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -13,6 +13,7 @@
 #include <asm/mtrr.h>
 #include <asm/mce.h>
 #include <asm/pat.h>
+#include <asm/asm.h>
 #ifdef CONFIG_X86_LOCAL_APIC
 #include <asm/mpspec.h>
 #include <asm/apic.h>
@@ -341,6 +342,35 @@
 	early_get_cap(c);
 }
 
+/*
+ * The NOPL instruction is supposed to exist on all CPUs with
+ * family >= 6, unfortunately, that's not true in practice because
+ * of early VIA chips and (more importantly) broken virtualizers that
+ * are not easy to detect.  Hence, probe for it based on first
+ * principles.
+ */
+static void __cpuinit detect_nopl(struct cpuinfo_x86 *c)
+{
+	const u32 nopl_signature = 0x888c53b1; /* Random number */
+	u32 has_nopl = nopl_signature;
+
+	clear_cpu_cap(c, X86_FEATURE_NOPL);
+	if (c->x86 >= 6) {
+		asm volatile("\n"
+			     "1:      .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */
+			     "2:\n"
+			     "        .section .fixup,\"ax\"\n"
+			     "3:      xor %0,%0\n"
+			     "        jmp 2b\n"
+			     "        .previous\n"
+			     _ASM_EXTABLE(1b,3b)
+			     : "+a" (has_nopl));
+
+		if (has_nopl == nopl_signature)
+			set_cpu_cap(c, X86_FEATURE_NOPL);
+	}
+}
+
 static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
 {
 	u32 tfms, xlvl;
@@ -395,8 +425,8 @@
 		}
 
 		init_scattered_cpuid_features(c);
+		detect_nopl(c);
 	}
-
 }
 
 static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
@@ -709,9 +739,20 @@
 	/*
 	 * Force FPU initialization:
 	 */
-	current_thread_info()->status = 0;
+	if (cpu_has_xsave)
+		current_thread_info()->status = TS_XSAVE;
+	else
+		current_thread_info()->status = 0;
 	clear_used_math();
 	mxcsr_feature_mask_init();
+
+	/*
+	 * Boot processor to setup the FP and extended state context info.
+	 */
+	if (!smp_processor_id())
+		init_thread_xstate();
+
+	xsave_init();
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c
index dd6e3f1..af569a9 100644
--- a/arch/x86/kernel/cpu/common_64.c
+++ b/arch/x86/kernel/cpu/common_64.c
@@ -18,6 +18,7 @@
 #include <asm/mtrr.h>
 #include <asm/mce.h>
 #include <asm/pat.h>
+#include <asm/asm.h>
 #include <asm/numa.h>
 #ifdef CONFIG_X86_LOCAL_APIC
 #include <asm/mpspec.h>
@@ -215,6 +216,39 @@
 	}
 }
 
+/*
+ * The NOPL instruction is supposed to exist on all CPUs with
+ * family >= 6, unfortunately, that's not true in practice because
+ * of early VIA chips and (more importantly) broken virtualizers that
+ * are not easy to detect.  Hence, probe for it based on first
+ * principles.
+ *
+ * Note: no 64-bit chip is known to lack these, but put the code here
+ * for consistency with 32 bits, and to make it utterly trivial to
+ * diagnose the problem should it ever surface.
+ */
+static void __cpuinit detect_nopl(struct cpuinfo_x86 *c)
+{
+	const u32 nopl_signature = 0x888c53b1; /* Random number */
+	u32 has_nopl = nopl_signature;
+
+	clear_cpu_cap(c, X86_FEATURE_NOPL);
+	if (c->x86 >= 6) {
+		asm volatile("\n"
+			     "1:      .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */
+			     "2:\n"
+			     "        .section .fixup,\"ax\"\n"
+			     "3:      xor %0,%0\n"
+			     "        jmp 2b\n"
+			     "        .previous\n"
+			     _ASM_EXTABLE(1b,3b)
+			     : "+a" (has_nopl));
+
+		if (has_nopl == nopl_signature)
+			set_cpu_cap(c, X86_FEATURE_NOPL);
+	}
+}
+
 static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c);
 
 void __init early_cpu_init(void)
@@ -313,6 +347,8 @@
 		c->x86_phys_bits = eax & 0xff;
 	}
 
+	detect_nopl(c);
+
 	if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
 	    cpu_devs[c->x86_vendor]->c_early_init)
 		cpu_devs[c->x86_vendor]->c_early_init(c);
@@ -597,6 +633,8 @@
 	barrier();
 
 	check_efer();
+	if (cpu != 0 && x2apic)
+		enable_x2apic();
 
 	/*
 	 * set up and load the per-CPU TSS
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index e710a21..ada5050 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -116,7 +116,7 @@
 	setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
 
 	/* Load/Store Serialize to mem access disable (=reorder it) */
-	setCx86(CX86_PCR0, getCx86(CX86_PCR0) & ~0x80);
+	setCx86_old(CX86_PCR0, getCx86_old(CX86_PCR0) & ~0x80);
 	/* set load/store serialize from 1GB to 4GB */
 	ccr3 |= 0xe0;
 	setCx86(CX86_CCR3, ccr3);
@@ -127,11 +127,11 @@
 	printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n");
 
 	/* CCR2 bit 2: unlock NW bit */
-	setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04);
+	setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) & ~0x04);
 	/* set 'Not Write-through' */
 	write_cr0(read_cr0() | X86_CR0_NW);
 	/* CCR2 bit 2: lock NW bit and set WT1 */
-	setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14);
+	setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x14);
 }
 
 /*
@@ -145,14 +145,14 @@
 	local_irq_save(flags);
 
 	/* Suspend on halt power saving and enable #SUSP pin */
-	setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88);
+	setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x88);
 
 	ccr3 = getCx86(CX86_CCR3);
 	setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10);	/* enable MAPEN */
 
 
 	/* FPU fast, DTE cache, Mem bypass */
-	setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x38);
+	setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x38);
 	setCx86(CX86_CCR3, ccr3);			/* disable MAPEN */
 
 	set_cx86_memwb();
@@ -268,7 +268,7 @@
 		/* GXm supports extended cpuid levels 'ala' AMD */
 		if (c->cpuid_level == 2) {
 			/* Enable cxMMX extensions (GX1 Datasheet 54) */
-			setCx86(CX86_CCR7, getCx86(CX86_CCR7) | 1);
+			setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7) | 1);
 
 			/*
 			 * GXm : 0x30 ... 0x5f GXm  datasheet 51
@@ -291,7 +291,7 @@
 		if (dir1 > 7) {
 			dir0_msn++;  /* M II */
 			/* Enable MMX extensions (App note 108) */
-			setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1);
+			setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7)|1);
 		} else {
 			c->coma_bug = 1;      /* 6x86MX, it has the bug. */
 		}
@@ -406,7 +406,7 @@
 			local_irq_save(flags);
 			ccr3 = getCx86(CX86_CCR3);
 			setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10);       /* enable MAPEN  */
-			setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x80);  /* enable cpuid  */
+			setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x80);  /* enable cpuid  */
 			setCx86(CX86_CCR3, ccr3);                       /* disable MAPEN */
 			local_irq_restore(flags);
 		}
diff --git a/arch/x86/kernel/cpu/feature_names.c b/arch/x86/kernel/cpu/feature_names.c
deleted file mode 100644
index e43ad4a..0000000
--- a/arch/x86/kernel/cpu/feature_names.c
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Strings for the various x86 capability flags.
- *
- * This file must not contain any executable code.
- */
-
-#include <asm/cpufeature.h>
-
-/*
- * These flag bits must match the definitions in <asm/cpufeature.h>.
- * NULL means this bit is undefined or reserved; either way it doesn't
- * have meaning as far as Linux is concerned.  Note that it's important
- * to realize there is a difference between this table and CPUID -- if
- * applications want to get the raw CPUID data, they should access
- * /dev/cpu/<cpu_nr>/cpuid instead.
- */
-const char * const x86_cap_flags[NCAPINTS*32] = {
-	/* Intel-defined */
-	"fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
-	"cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
-	"pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
-	"fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe",
-
-	/* AMD-defined */
-	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-	NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
-	NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL,
-	NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm",
-	"3dnowext", "3dnow",
-
-	/* Transmeta-defined */
-	"recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
-	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
-	/* Other (Linux-defined) */
-	"cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr",
-	NULL, NULL, NULL, NULL,
-	"constant_tsc", "up", NULL, "arch_perfmon",
-	"pebs", "bts", NULL, NULL,
-	"rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
-	/* Intel-defined (#2) */
-	"pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
-	"tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
-	NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt",
-	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
-	/* VIA/Cyrix/Centaur-defined */
-	NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en",
-	"ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL,
-	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
-	/* AMD-defined (#2) */
-	"lahf_lm", "cmp_legacy", "svm", "extapic",
-	"cr8_legacy", "abm", "sse4a", "misalignsse",
-	"3dnowprefetch", "osvw", "ibs", "sse5",
-	"skinit", "wdt", NULL, NULL,
-	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
-	/* Auxiliary (Linux-defined) */
-	"ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-};
-
-const char *const x86_power_flags[32] = {
-	"ts",	/* temperature sensor */
-	"fid",  /* frequency id control */
-	"vid",  /* voltage id control */
-	"ttp",  /* thermal trip */
-	"tm",
-	"stc",
-	"100mhzsteps",
-	"hwpstate",
-	"",	/* tsc invariant mapped to constant_tsc */
-		/* nothing */
-};
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index b75f256..77618c7 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -23,13 +23,6 @@
 #include <mach_apic.h>
 #endif
 
-#ifdef CONFIG_X86_INTEL_USERCOPY
-/*
- * Alignment at which movsl is preferred for bulk memory copies.
- */
-struct movsl_mask movsl_mask __read_mostly;
-#endif
-
 static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
 {
 	/* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */
@@ -314,69 +307,5 @@
 
 cpu_vendor_dev_register(X86_VENDOR_INTEL, &intel_cpu_dev);
 
-#ifndef CONFIG_X86_CMPXCHG
-unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new)
-{
-	u8 prev;
-	unsigned long flags;
-
-	/* Poor man's cmpxchg for 386. Unsuitable for SMP */
-	local_irq_save(flags);
-	prev = *(u8 *)ptr;
-	if (prev == old)
-		*(u8 *)ptr = new;
-	local_irq_restore(flags);
-	return prev;
-}
-EXPORT_SYMBOL(cmpxchg_386_u8);
-
-unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new)
-{
-	u16 prev;
-	unsigned long flags;
-
-	/* Poor man's cmpxchg for 386. Unsuitable for SMP */
-	local_irq_save(flags);
-	prev = *(u16 *)ptr;
-	if (prev == old)
-		*(u16 *)ptr = new;
-	local_irq_restore(flags);
-	return prev;
-}
-EXPORT_SYMBOL(cmpxchg_386_u16);
-
-unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new)
-{
-	u32 prev;
-	unsigned long flags;
-
-	/* Poor man's cmpxchg for 386. Unsuitable for SMP */
-	local_irq_save(flags);
-	prev = *(u32 *)ptr;
-	if (prev == old)
-		*(u32 *)ptr = new;
-	local_irq_restore(flags);
-	return prev;
-}
-EXPORT_SYMBOL(cmpxchg_386_u32);
-#endif
-
-#ifndef CONFIG_X86_CMPXCHG64
-unsigned long long cmpxchg_486_u64(volatile void *ptr, u64 old, u64 new)
-{
-	u64 prev;
-	unsigned long flags;
-
-	/* Poor man's cmpxchg8b for 386 and 486. Unsuitable for SMP */
-	local_irq_save(flags);
-	prev = *(u64 *)ptr;
-	if (prev == old)
-		*(u64 *)ptr = new;
-	local_irq_restore(flags);
-	return prev;
-}
-EXPORT_SYMBOL(cmpxchg_486_u64);
-#endif
-
 /* arch_initcall(intel_cpu_init); */
 
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 6b0a10b..3f46afb 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -1,8 +1,8 @@
 /*
- *      Routines to indentify caches on Intel CPU.
+ *	Routines to indentify caches on Intel CPU.
  *
- *      Changes:
- *      Venkatesh Pallipadi	: Adding cache identification through cpuid(4)
+ *	Changes:
+ *	Venkatesh Pallipadi	: Adding cache identification through cpuid(4)
  *		Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
  *	Andi Kleen / Andreas Herrmann	: CPUID4 emulation on AMD.
  */
@@ -13,6 +13,7 @@
 #include <linux/compiler.h>
 #include <linux/cpu.h>
 #include <linux/sched.h>
+#include <linux/pci.h>
 
 #include <asm/processor.h>
 #include <asm/smp.h>
@@ -130,9 +131,18 @@
 	union _cpuid4_leaf_ebx ebx;
 	union _cpuid4_leaf_ecx ecx;
 	unsigned long size;
+	unsigned long can_disable;
 	cpumask_t shared_cpu_map;	/* future?: only cpus/node is needed */
 };
 
+#ifdef CONFIG_PCI
+static struct pci_device_id k8_nb_id[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) },
+	{}
+};
+#endif
+
 unsigned short			num_cache_leaves;
 
 /* AMD doesn't have CPUID4. Emulate it here to report the same
@@ -182,9 +192,10 @@
 static unsigned char levels[] __cpuinitdata = { 1, 1, 2, 3 };
 static unsigned char types[] __cpuinitdata = { 1, 2, 3, 3 };
 
-static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
-		       union _cpuid4_leaf_ebx *ebx,
-		       union _cpuid4_leaf_ecx *ecx)
+static void __cpuinit
+amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
+		     union _cpuid4_leaf_ebx *ebx,
+		     union _cpuid4_leaf_ecx *ecx)
 {
 	unsigned dummy;
 	unsigned line_size, lines_per_tag, assoc, size_in_kb;
@@ -251,27 +262,40 @@
 		(ebx->split.ways_of_associativity + 1) - 1;
 }
 
-static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
+static void __cpuinit
+amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf)
+{
+	if (index < 3)
+		return;
+	this_leaf->can_disable = 1;
+}
+
+static int
+__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
 {
 	union _cpuid4_leaf_eax 	eax;
 	union _cpuid4_leaf_ebx 	ebx;
 	union _cpuid4_leaf_ecx 	ecx;
 	unsigned		edx;
 
-	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
 		amd_cpuid4(index, &eax, &ebx, &ecx);
-	else
-		cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full,  &edx);
+		if (boot_cpu_data.x86 >= 0x10)
+			amd_check_l3_disable(index, this_leaf);
+	} else {
+		cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
+	}
+
 	if (eax.split.type == CACHE_TYPE_NULL)
 		return -EIO; /* better error ? */
 
 	this_leaf->eax = eax;
 	this_leaf->ebx = ebx;
 	this_leaf->ecx = ecx;
-	this_leaf->size = (ecx.split.number_of_sets + 1) *
-		(ebx.split.coherency_line_size + 1) *
-		(ebx.split.physical_line_partition + 1) *
-		(ebx.split.ways_of_associativity + 1);
+	this_leaf->size = (ecx.split.number_of_sets          + 1) *
+			  (ebx.split.coherency_line_size     + 1) *
+			  (ebx.split.physical_line_partition + 1) *
+			  (ebx.split.ways_of_associativity   + 1);
 	return 0;
 }
 
@@ -453,7 +477,7 @@
 
 /* pointer to _cpuid4_info array (for each cache leaf) */
 static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info);
-#define CPUID4_INFO_IDX(x, y)    (&((per_cpu(cpuid4_info, x))[y]))
+#define CPUID4_INFO_IDX(x, y)	(&((per_cpu(cpuid4_info, x))[y]))
 
 #ifdef CONFIG_SMP
 static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
@@ -490,7 +514,7 @@
 
 	this_leaf = CPUID4_INFO_IDX(cpu, index);
 	for_each_cpu_mask_nr(sibling, this_leaf->shared_cpu_map) {
-		sibling_leaf = CPUID4_INFO_IDX(sibling, index);	
+		sibling_leaf = CPUID4_INFO_IDX(sibling, index);
 		cpu_clear(cpu, sibling_leaf->shared_cpu_map);
 	}
 }
@@ -572,7 +596,7 @@
 
 /* pointer to array of kobjects for cpuX/cache/indexY */
 static DEFINE_PER_CPU(struct _index_kobject *, index_kobject);
-#define INDEX_KOBJECT_PTR(x, y)    (&((per_cpu(index_kobject, x))[y]))
+#define INDEX_KOBJECT_PTR(x, y)		(&((per_cpu(index_kobject, x))[y]))
 
 #define show_one_plus(file_name, object, val)				\
 static ssize_t show_##file_name						\
@@ -637,6 +661,99 @@
 	}
 }
 
+#define to_object(k)	container_of(k, struct _index_kobject, kobj)
+#define to_attr(a)	container_of(a, struct _cache_attr, attr)
+
+#ifdef CONFIG_PCI
+static struct pci_dev *get_k8_northbridge(int node)
+{
+	struct pci_dev *dev = NULL;
+	int i;
+
+	for (i = 0; i <= node; i++) {
+		do {
+			dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
+			if (!dev)
+				break;
+		} while (!pci_match_id(&k8_nb_id[0], dev));
+		if (!dev)
+			break;
+	}
+	return dev;
+}
+#else
+static struct pci_dev *get_k8_northbridge(int node)
+{
+	return NULL;
+}
+#endif
+
+static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf)
+{
+	int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
+	struct pci_dev *dev = NULL;
+	ssize_t ret = 0;
+	int i;
+
+	if (!this_leaf->can_disable)
+		return sprintf(buf, "Feature not enabled\n");
+
+	dev = get_k8_northbridge(node);
+	if (!dev) {
+		printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n");
+		return -EINVAL;
+	}
+
+	for (i = 0; i < 2; i++) {
+		unsigned int reg;
+
+		pci_read_config_dword(dev, 0x1BC + i * 4, &reg);
+
+		ret += sprintf(buf, "%sEntry: %d\n", buf, i);
+		ret += sprintf(buf, "%sReads:  %s\tNew Entries: %s\n",  
+			buf,
+			reg & 0x80000000 ? "Disabled" : "Allowed",
+			reg & 0x40000000 ? "Disabled" : "Allowed");
+		ret += sprintf(buf, "%sSubCache: %x\tIndex: %x\n",
+			buf, (reg & 0x30000) >> 16, reg & 0xfff);
+	}
+	return ret;
+}
+
+static ssize_t
+store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf,
+		    size_t count)
+{
+	int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
+	struct pci_dev *dev = NULL;
+	unsigned int ret, index, val;
+
+	if (!this_leaf->can_disable)
+		return 0;
+
+	if (strlen(buf) > 15)
+		return -EINVAL;
+
+	ret = sscanf(buf, "%x %x", &index, &val);
+	if (ret != 2)
+		return -EINVAL;
+	if (index > 1)
+		return -EINVAL;
+
+	val |= 0xc0000000;
+	dev = get_k8_northbridge(node);
+	if (!dev) {
+		printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n");
+		return -EINVAL;
+	}
+
+	pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000);
+	wbinvd();
+	pci_write_config_dword(dev, 0x1BC + index * 4, val);
+
+	return 1;
+}
+
 struct _cache_attr {
 	struct attribute attr;
 	ssize_t (*show)(struct _cpuid4_info *, char *);
@@ -657,6 +774,8 @@
 define_one_ro(shared_cpu_map);
 define_one_ro(shared_cpu_list);
 
+static struct _cache_attr cache_disable = __ATTR(cache_disable, 0644, show_cache_disable, store_cache_disable);
+
 static struct attribute * default_attrs[] = {
 	&type.attr,
 	&level.attr,
@@ -667,12 +786,10 @@
 	&size.attr,
 	&shared_cpu_map.attr,
 	&shared_cpu_list.attr,
+	&cache_disable.attr,
 	NULL
 };
 
-#define to_object(k) container_of(k, struct _index_kobject, kobj)
-#define to_attr(a) container_of(a, struct _cache_attr, attr)
-
 static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf)
 {
 	struct _cache_attr *fattr = to_attr(attr);
@@ -682,14 +799,22 @@
 	ret = fattr->show ?
 		fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
 			buf) :
-	       	0;
+		0;
 	return ret;
 }
 
 static ssize_t store(struct kobject * kobj, struct attribute * attr,
 		     const char * buf, size_t count)
 {
-	return 0;
+	struct _cache_attr *fattr = to_attr(attr);
+	struct _index_kobject *this_leaf = to_object(kobj);
+	ssize_t ret;
+
+	ret = fattr->store ?
+		fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
+			buf, count) :
+		0;
+	return ret;
 }
 
 static struct sysfs_ops sysfs_ops = {
diff --git a/arch/x86/kernel/cpu/mkcapflags.pl b/arch/x86/kernel/cpu/mkcapflags.pl
new file mode 100644
index 0000000..dfea390
--- /dev/null
+++ b/arch/x86/kernel/cpu/mkcapflags.pl
@@ -0,0 +1,32 @@
+#!/usr/bin/perl
+#
+# Generate the x86_cap_flags[] array from include/asm-x86/cpufeature.h
+#
+
+($in, $out) = @ARGV;
+
+open(IN, "< $in\0")   or die "$0: cannot open: $in: $!\n";
+open(OUT, "> $out\0") or die "$0: cannot create: $out: $!\n";
+
+print OUT "#include <asm/cpufeature.h>\n\n";
+print OUT "const char * const x86_cap_flags[NCAPINTS*32] = {\n";
+
+while (defined($line = <IN>)) {
+	if ($line =~ /^\s*\#\s*define\s+(X86_FEATURE_(\S+))\s+(.*)$/) {
+		$macro = $1;
+		$feature = $2;
+		$tail = $3;
+		if ($tail =~ /\/\*\s*\"([^"]*)\".*\*\//) {
+			$feature = $1;
+		}
+
+		if ($feature ne '') {
+			printf OUT "\t%-32s = \"%s\",\n",
+				"[$macro]", "\L$feature";
+		}
+	}
+}
+print OUT "};\n";
+
+close(IN);
+close(OUT);
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index b117d7f..58ac5d3 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -729,7 +729,7 @@
 	mtrr_type type;
 };
 
-struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
+static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
 static int __initdata debug_print;
 
 static int __init
diff --git a/arch/x86/kernel/cpu/powerflags.c b/arch/x86/kernel/cpu/powerflags.c
new file mode 100644
index 0000000..5abbea2
--- /dev/null
+++ b/arch/x86/kernel/cpu/powerflags.c
@@ -0,0 +1,20 @@
+/*
+ * Strings for the various x86 power flags
+ *
+ * This file must not contain any executable code.
+ */
+
+#include <asm/cpufeature.h>
+
+const char *const x86_power_flags[32] = {
+	"ts",	/* temperature sensor */
+	"fid",  /* frequency id control */
+	"vid",  /* voltage id control */
+	"ttp",  /* thermal trip */
+	"tm",
+	"stc",
+	"100mhzsteps",
+	"hwpstate",
+	"",	/* tsc invariant mapped to constant_tsc */
+		/* nothing */
+};
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
index eaff0bb..6c9bfc9 100644
--- a/arch/x86/kernel/genapic_64.c
+++ b/arch/x86/kernel/genapic_64.c
@@ -16,87 +16,63 @@
 #include <linux/ctype.h>
 #include <linux/init.h>
 #include <linux/hardirq.h>
+#include <linux/dmar.h>
 
 #include <asm/smp.h>
 #include <asm/ipi.h>
 #include <asm/genapic.h>
 
-#ifdef CONFIG_ACPI
-#include <acpi/acpi_bus.h>
-#endif
-
-DEFINE_PER_CPU(int, x2apic_extra_bits);
+extern struct genapic apic_flat;
+extern struct genapic apic_physflat;
+extern struct genapic apic_x2xpic_uv_x;
+extern struct genapic apic_x2apic_phys;
+extern struct genapic apic_x2apic_cluster;
 
 struct genapic __read_mostly *genapic = &apic_flat;
 
-static enum uv_system_type uv_system_type;
+static struct genapic *apic_probe[] __initdata = {
+	&apic_x2apic_uv_x,
+	&apic_x2apic_phys,
+	&apic_x2apic_cluster,
+	&apic_physflat,
+	NULL,
+};
 
 /*
  * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
  */
 void __init setup_apic_routing(void)
 {
-	if (uv_system_type == UV_NON_UNIQUE_APIC)
-		genapic = &apic_x2apic_uv_x;
-	else
-#ifdef CONFIG_ACPI
-	/*
-	 * Quirk: some x86_64 machines can only use physical APIC mode
-	 * regardless of how many processors are present (x86_64 ES7000
-	 * is an example).
-	 */
-	if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
-			(acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL))
-		genapic = &apic_physflat;
-	else
-#endif
+	if (genapic == &apic_x2apic_phys || genapic == &apic_x2apic_cluster) {
+		if (!intr_remapping_enabled)
+			genapic = &apic_flat;
+	}
 
-	if (max_physical_apicid < 8)
-		genapic = &apic_flat;
-	else
-		genapic = &apic_physflat;
-
-	printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name);
+	if (genapic == &apic_flat) {
+		if (max_physical_apicid >= 8)
+			genapic = &apic_physflat;
+		printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name);
+	}
 }
 
 /* Same for both flat and physical. */
 
-void send_IPI_self(int vector)
+void apic_send_IPI_self(int vector)
 {
 	__send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
 }
 
 int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 {
-	if (!strcmp(oem_id, "SGI")) {
-		if (!strcmp(oem_table_id, "UVL"))
-			uv_system_type = UV_LEGACY_APIC;
-		else if (!strcmp(oem_table_id, "UVX"))
-			uv_system_type = UV_X2APIC;
-		else if (!strcmp(oem_table_id, "UVH"))
-			uv_system_type = UV_NON_UNIQUE_APIC;
+	int i;
+
+	for (i = 0; apic_probe[i]; ++i) {
+		if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) {
+			genapic = apic_probe[i];
+			printk(KERN_INFO "Setting APIC routing to %s.\n",
+				genapic->name);
+			return 1;
+		}
 	}
 	return 0;
 }
-
-unsigned int read_apic_id(void)
-{
-	unsigned int id;
-
-	WARN_ON(preemptible() && num_online_cpus() > 1);
-	id = apic_read(APIC_ID);
-	if (uv_system_type >= UV_X2APIC)
-		id  |= __get_cpu_var(x2apic_extra_bits);
-	return id;
-}
-
-enum uv_system_type get_uv_system_type(void)
-{
-	return uv_system_type;
-}
-
-int is_uv_system(void)
-{
-	return uv_system_type != UV_NONE;
-}
-EXPORT_SYMBOL_GPL(is_uv_system);
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
index 786548a..9eca5ba 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/genapic_flat_64.c
@@ -15,9 +15,20 @@
 #include <linux/kernel.h>
 #include <linux/ctype.h>
 #include <linux/init.h>
+#include <linux/hardirq.h>
 #include <asm/smp.h>
 #include <asm/ipi.h>
 #include <asm/genapic.h>
+#include <mach_apicdef.h>
+
+#ifdef CONFIG_ACPI
+#include <acpi/acpi_bus.h>
+#endif
+
+static int __init flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+	return 1;
+}
 
 static cpumask_t flat_target_cpus(void)
 {
@@ -95,9 +106,33 @@
 		__send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
 }
 
+static unsigned int get_apic_id(unsigned long x)
+{
+	unsigned int id;
+
+	id = (((x)>>24) & 0xFFu);
+	return id;
+}
+
+static unsigned long set_apic_id(unsigned int id)
+{
+	unsigned long x;
+
+	x = ((id & 0xFFu)<<24);
+	return x;
+}
+
+static unsigned int read_xapic_id(void)
+{
+	unsigned int id;
+
+	id = get_apic_id(apic_read(APIC_ID));
+	return id;
+}
+
 static int flat_apic_id_registered(void)
 {
-	return physid_isset(GET_APIC_ID(read_apic_id()), phys_cpu_present_map);
+	return physid_isset(read_xapic_id(), phys_cpu_present_map);
 }
 
 static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask)
@@ -112,6 +147,7 @@
 
 struct genapic apic_flat =  {
 	.name = "flat",
+	.acpi_madt_oem_check = flat_acpi_madt_oem_check,
 	.int_delivery_mode = dest_LowestPrio,
 	.int_dest_mode = (APIC_DEST_LOGICAL != 0),
 	.target_cpus = flat_target_cpus,
@@ -121,8 +157,12 @@
 	.send_IPI_all = flat_send_IPI_all,
 	.send_IPI_allbutself = flat_send_IPI_allbutself,
 	.send_IPI_mask = flat_send_IPI_mask,
+	.send_IPI_self = apic_send_IPI_self,
 	.cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
 	.phys_pkg_id = phys_pkg_id,
+	.get_apic_id = get_apic_id,
+	.set_apic_id = set_apic_id,
+	.apic_id_mask = (0xFFu<<24),
 };
 
 /*
@@ -130,6 +170,21 @@
  * We cannot use logical delivery in this case because the mask
  * overflows, so use physical mode.
  */
+static int __init physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+#ifdef CONFIG_ACPI
+	/*
+	 * Quirk: some x86_64 machines can only use physical APIC mode
+	 * regardless of how many processors are present (x86_64 ES7000
+	 * is an example).
+	 */
+	if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
+		(acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL))
+		return 1;
+#endif
+
+	return 0;
+}
 
 static cpumask_t physflat_target_cpus(void)
 {
@@ -176,6 +231,7 @@
 
 struct genapic apic_physflat =  {
 	.name = "physical flat",
+	.acpi_madt_oem_check = physflat_acpi_madt_oem_check,
 	.int_delivery_mode = dest_Fixed,
 	.int_dest_mode = (APIC_DEST_PHYSICAL != 0),
 	.target_cpus = physflat_target_cpus,
@@ -185,6 +241,10 @@
 	.send_IPI_all = physflat_send_IPI_all,
 	.send_IPI_allbutself = physflat_send_IPI_allbutself,
 	.send_IPI_mask = physflat_send_IPI_mask,
+	.send_IPI_self = apic_send_IPI_self,
 	.cpu_mask_to_apicid = physflat_cpu_mask_to_apicid,
 	.phys_pkg_id = phys_pkg_id,
+	.get_apic_id = get_apic_id,
+	.set_apic_id = set_apic_id,
+	.apic_id_mask = (0xFFu<<24),
 };
diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c
new file mode 100644
index 0000000..fed9f68
--- /dev/null
+++ b/arch/x86/kernel/genx2apic_cluster.c
@@ -0,0 +1,164 @@
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/dmar.h>
+
+#include <asm/smp.h>
+#include <asm/ipi.h>
+#include <asm/genapic.h>
+
+DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
+
+static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+	if (cpu_has_x2apic)
+		return 1;
+
+	return 0;
+}
+
+/* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
+
+static cpumask_t x2apic_target_cpus(void)
+{
+	return cpumask_of_cpu(0);
+}
+
+/*
+ * for now each logical cpu is in its own vector allocation domain.
+ */
+static cpumask_t x2apic_vector_allocation_domain(int cpu)
+{
+	cpumask_t domain = CPU_MASK_NONE;
+	cpu_set(cpu, domain);
+	return domain;
+}
+
+static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
+				   unsigned int dest)
+{
+	unsigned long cfg;
+
+	cfg = __prepare_ICR(0, vector, dest);
+
+	/*
+	 * send the IPI.
+	 */
+	x2apic_icr_write(cfg, apicid);
+}
+
+/*
+ * for now, we send the IPI's one by one in the cpumask.
+ * TBD: Based on the cpu mask, we can send the IPI's to the cluster group
+ * at once. We have 16 cpu's in a cluster. This will minimize IPI register
+ * writes.
+ */
+static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
+{
+	unsigned long flags;
+	unsigned long query_cpu;
+
+	local_irq_save(flags);
+	for_each_cpu_mask(query_cpu, mask) {
+		__x2apic_send_IPI_dest(per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+				       vector, APIC_DEST_LOGICAL);
+	}
+	local_irq_restore(flags);
+}
+
+static void x2apic_send_IPI_allbutself(int vector)
+{
+	cpumask_t mask = cpu_online_map;
+
+	cpu_clear(smp_processor_id(), mask);
+
+	if (!cpus_empty(mask))
+		x2apic_send_IPI_mask(mask, vector);
+}
+
+static void x2apic_send_IPI_all(int vector)
+{
+	x2apic_send_IPI_mask(cpu_online_map, vector);
+}
+
+static int x2apic_apic_id_registered(void)
+{
+	return 1;
+}
+
+static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
+{
+	int cpu;
+
+	/*
+	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
+	 * May as well be the first.
+	 */
+	cpu = first_cpu(cpumask);
+	if ((unsigned)cpu < NR_CPUS)
+		return per_cpu(x86_cpu_to_logical_apicid, cpu);
+	else
+		return BAD_APICID;
+}
+
+static unsigned int get_apic_id(unsigned long x)
+{
+	unsigned int id;
+
+	id = x;
+	return id;
+}
+
+static unsigned long set_apic_id(unsigned int id)
+{
+	unsigned long x;
+
+	x = id;
+	return x;
+}
+
+static unsigned int x2apic_read_id(void)
+{
+	return apic_read(APIC_ID);
+}
+
+static unsigned int phys_pkg_id(int index_msb)
+{
+	return x2apic_read_id() >> index_msb;
+}
+
+static void x2apic_send_IPI_self(int vector)
+{
+	apic_write(APIC_SELF_IPI, vector);
+}
+
+static void init_x2apic_ldr(void)
+{
+	int cpu = smp_processor_id();
+
+	per_cpu(x86_cpu_to_logical_apicid, cpu) = apic_read(APIC_LDR);
+	return;
+}
+
+struct genapic apic_x2apic_cluster = {
+	.name = "cluster x2apic",
+	.acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
+	.int_delivery_mode = dest_LowestPrio,
+	.int_dest_mode = (APIC_DEST_LOGICAL != 0),
+	.target_cpus = x2apic_target_cpus,
+	.vector_allocation_domain = x2apic_vector_allocation_domain,
+	.apic_id_registered = x2apic_apic_id_registered,
+	.init_apic_ldr = init_x2apic_ldr,
+	.send_IPI_all = x2apic_send_IPI_all,
+	.send_IPI_allbutself = x2apic_send_IPI_allbutself,
+	.send_IPI_mask = x2apic_send_IPI_mask,
+	.send_IPI_self = x2apic_send_IPI_self,
+	.cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
+	.phys_pkg_id = phys_pkg_id,
+	.get_apic_id = get_apic_id,
+	.set_apic_id = set_apic_id,
+	.apic_id_mask = (0xFFFFFFFFu),
+};
diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c
new file mode 100644
index 0000000..958d537
--- /dev/null
+++ b/arch/x86/kernel/genx2apic_phys.c
@@ -0,0 +1,159 @@
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/dmar.h>
+
+#include <asm/smp.h>
+#include <asm/ipi.h>
+#include <asm/genapic.h>
+
+static int x2apic_phys;
+
+static int set_x2apic_phys_mode(char *arg)
+{
+	x2apic_phys = 1;
+	return 0;
+}
+early_param("x2apic_phys", set_x2apic_phys_mode);
+
+static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+	if (cpu_has_x2apic && x2apic_phys)
+		return 1;
+
+	return 0;
+}
+
+/* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
+
+static cpumask_t x2apic_target_cpus(void)
+{
+	return cpumask_of_cpu(0);
+}
+
+static cpumask_t x2apic_vector_allocation_domain(int cpu)
+{
+	cpumask_t domain = CPU_MASK_NONE;
+	cpu_set(cpu, domain);
+	return domain;
+}
+
+static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
+				   unsigned int dest)
+{
+	unsigned long cfg;
+
+	cfg = __prepare_ICR(0, vector, dest);
+
+	/*
+	 * send the IPI.
+	 */
+	x2apic_icr_write(cfg, apicid);
+}
+
+static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
+{
+	unsigned long flags;
+	unsigned long query_cpu;
+
+	local_irq_save(flags);
+	for_each_cpu_mask(query_cpu, mask) {
+		__x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
+				       vector, APIC_DEST_PHYSICAL);
+	}
+	local_irq_restore(flags);
+}
+
+static void x2apic_send_IPI_allbutself(int vector)
+{
+	cpumask_t mask = cpu_online_map;
+
+	cpu_clear(smp_processor_id(), mask);
+
+	if (!cpus_empty(mask))
+		x2apic_send_IPI_mask(mask, vector);
+}
+
+static void x2apic_send_IPI_all(int vector)
+{
+	x2apic_send_IPI_mask(cpu_online_map, vector);
+}
+
+static int x2apic_apic_id_registered(void)
+{
+	return 1;
+}
+
+static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
+{
+	int cpu;
+
+	/*
+	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
+	 * May as well be the first.
+	 */
+	cpu = first_cpu(cpumask);
+	if ((unsigned)cpu < NR_CPUS)
+		return per_cpu(x86_cpu_to_apicid, cpu);
+	else
+		return BAD_APICID;
+}
+
+static unsigned int get_apic_id(unsigned long x)
+{
+	unsigned int id;
+
+	id = x;
+	return id;
+}
+
+static unsigned long set_apic_id(unsigned int id)
+{
+	unsigned long x;
+
+	x = id;
+	return x;
+}
+
+static unsigned int x2apic_read_id(void)
+{
+	return apic_read(APIC_ID);
+}
+
+static unsigned int phys_pkg_id(int index_msb)
+{
+	return x2apic_read_id() >> index_msb;
+}
+
+void x2apic_send_IPI_self(int vector)
+{
+	apic_write(APIC_SELF_IPI, vector);
+}
+
+void init_x2apic_ldr(void)
+{
+	return;
+}
+
+struct genapic apic_x2apic_phys = {
+	.name = "physical x2apic",
+	.acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
+	.int_delivery_mode = dest_Fixed,
+	.int_dest_mode = (APIC_DEST_PHYSICAL != 0),
+	.target_cpus = x2apic_target_cpus,
+	.vector_allocation_domain = x2apic_vector_allocation_domain,
+	.apic_id_registered = x2apic_apic_id_registered,
+	.init_apic_ldr = init_x2apic_ldr,
+	.send_IPI_all = x2apic_send_IPI_all,
+	.send_IPI_allbutself = x2apic_send_IPI_allbutself,
+	.send_IPI_mask = x2apic_send_IPI_mask,
+	.send_IPI_self = x2apic_send_IPI_self,
+	.cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
+	.phys_pkg_id = phys_pkg_id,
+	.get_apic_id = get_apic_id,
+	.set_apic_id = set_apic_id,
+	.apic_id_mask = (0xFFFFFFFFu),
+};
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index bfa837c..99269be 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -12,12 +12,12 @@
 #include <linux/threads.h>
 #include <linux/cpumask.h>
 #include <linux/string.h>
-#include <linux/kernel.h>
 #include <linux/ctype.h>
 #include <linux/init.h>
 #include <linux/sched.h>
 #include <linux/bootmem.h>
 #include <linux/module.h>
+#include <linux/hardirq.h>
 #include <asm/smp.h>
 #include <asm/ipi.h>
 #include <asm/genapic.h>
@@ -26,6 +26,35 @@
 #include <asm/uv/uv_hub.h>
 #include <asm/uv/bios.h>
 
+DEFINE_PER_CPU(int, x2apic_extra_bits);
+
+static enum uv_system_type uv_system_type;
+
+static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+	if (!strcmp(oem_id, "SGI")) {
+		if (!strcmp(oem_table_id, "UVL"))
+			uv_system_type = UV_LEGACY_APIC;
+		else if (!strcmp(oem_table_id, "UVX"))
+			uv_system_type = UV_X2APIC;
+		else if (!strcmp(oem_table_id, "UVH")) {
+			uv_system_type = UV_NON_UNIQUE_APIC;
+			return 1;
+		}
+	}
+	return 0;
+}
+
+enum uv_system_type get_uv_system_type(void)
+{
+	return uv_system_type;
+}
+
+int is_uv_system(void)
+{
+	return uv_system_type != UV_NONE;
+}
+
 DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
 EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info);
 
@@ -123,6 +152,10 @@
 	return 1;
 }
 
+static void uv_init_apic_ldr(void)
+{
+}
+
 static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
 {
 	int cpu;
@@ -138,9 +171,34 @@
 		return BAD_APICID;
 }
 
+static unsigned int get_apic_id(unsigned long x)
+{
+	unsigned int id;
+
+	WARN_ON(preemptible() && num_online_cpus() > 1);
+	id = x | __get_cpu_var(x2apic_extra_bits);
+
+	return id;
+}
+
+static unsigned long set_apic_id(unsigned int id)
+{
+	unsigned long x;
+
+	/* maskout x2apic_extra_bits ? */
+	x = id;
+	return x;
+}
+
+static unsigned int uv_read_apic_id(void)
+{
+
+	return get_apic_id(apic_read(APIC_ID));
+}
+
 static unsigned int phys_pkg_id(int index_msb)
 {
-	return GET_APIC_ID(read_apic_id()) >> index_msb;
+	return uv_read_apic_id() >> index_msb;
 }
 
 #ifdef ZZZ		/* Needs x2apic patch */
@@ -152,17 +210,22 @@
 
 struct genapic apic_x2apic_uv_x = {
 	.name = "UV large system",
+	.acpi_madt_oem_check = uv_acpi_madt_oem_check,
 	.int_delivery_mode = dest_Fixed,
 	.int_dest_mode = (APIC_DEST_PHYSICAL != 0),
 	.target_cpus = uv_target_cpus,
 	.vector_allocation_domain = uv_vector_allocation_domain,/* Fixme ZZZ */
 	.apic_id_registered = uv_apic_id_registered,
+	.init_apic_ldr = uv_init_apic_ldr,
 	.send_IPI_all = uv_send_IPI_all,
 	.send_IPI_allbutself = uv_send_IPI_allbutself,
 	.send_IPI_mask = uv_send_IPI_mask,
 	/* ZZZ.send_IPI_self = uv_send_IPI_self, */
 	.cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
 	.phys_pkg_id = phys_pkg_id,	/* Fixme ZZZ */
+	.get_apic_id = get_apic_id,
+	.set_apic_id = set_apic_id,
+	.apic_id_mask = (0xFFFFFFFFu),
 };
 
 static __cpuinit void set_x2apic_extra_bits(int pnode)
@@ -401,3 +464,5 @@
 	if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
 		set_x2apic_extra_bits(uv_hub_info->pnode);
 }
+
+
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index eb9ddd8..45723f1 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -21,9 +21,12 @@
 # include <asm/sigcontext32.h>
 # include <asm/user32.h>
 #else
-# define save_i387_ia32		save_i387
-# define restore_i387_ia32	restore_i387
+# define save_i387_xstate_ia32		save_i387_xstate
+# define restore_i387_xstate_ia32	restore_i387_xstate
 # define _fpstate_ia32		_fpstate
+# define _xstate_ia32		_xstate
+# define sig_xstate_ia32_size   sig_xstate_size
+# define fx_sw_reserved_ia32	fx_sw_reserved
 # define user_i387_ia32_struct	user_i387_struct
 # define user32_fxsr_struct	user_fxsr_struct
 #endif
@@ -36,6 +39,7 @@
 
 static unsigned int		mxcsr_feature_mask __read_mostly = 0xffffffffu;
 unsigned int xstate_size;
+unsigned int sig_xstate_ia32_size = sizeof(struct _fpstate_ia32);
 static struct i387_fxsave_struct fx_scratch __cpuinitdata;
 
 void __cpuinit mxcsr_feature_mask_init(void)
@@ -61,6 +65,11 @@
 		return;
 	}
 
+	if (cpu_has_xsave) {
+		xsave_cntxt_init();
+		return;
+	}
+
 	if (cpu_has_fxsr)
 		xstate_size = sizeof(struct i387_fxsave_struct);
 #ifdef CONFIG_X86_32
@@ -83,9 +92,19 @@
 
 	write_cr0(oldcr0 & ~(X86_CR0_TS|X86_CR0_EM)); /* clear TS and EM */
 
+	/*
+	 * Boot processor to setup the FP and extended state context info.
+	 */
+	if (!smp_processor_id())
+		init_thread_xstate();
+	xsave_init();
+
 	mxcsr_feature_mask_init();
 	/* clean state in init */
-	current_thread_info()->status = 0;
+	if (cpu_has_xsave)
+		current_thread_info()->status = TS_XSAVE;
+	else
+		current_thread_info()->status = 0;
 	clear_used_math();
 }
 #endif	/* CONFIG_X86_64 */
@@ -195,6 +214,13 @@
 	 */
 	target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask;
 
+	/*
+	 * update the header bits in the xsave header, indicating the
+	 * presence of FP and SSE state.
+	 */
+	if (cpu_has_xsave)
+		target->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE;
+
 	return ret;
 }
 
@@ -395,6 +421,12 @@
 	if (!ret)
 		convert_to_fxsr(target, &env);
 
+	/*
+	 * update the header bit in the xsave header, indicating the
+	 * presence of FP.
+	 */
+	if (cpu_has_xsave)
+		target->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FP;
 	return ret;
 }
 
@@ -407,7 +439,6 @@
 	struct task_struct *tsk = current;
 	struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave;
 
-	unlazy_fpu(tsk);
 	fp->status = fp->swd;
 	if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct)))
 		return -1;
@@ -421,8 +452,6 @@
 	struct user_i387_ia32_struct env;
 	int err = 0;
 
-	unlazy_fpu(tsk);
-
 	convert_from_fxsr(&env, tsk);
 	if (__copy_to_user(buf, &env, sizeof(env)))
 		return -1;
@@ -432,16 +461,40 @@
 	if (err)
 		return -1;
 
-	if (__copy_to_user(&buf->_fxsr_env[0], fx,
-			   sizeof(struct i387_fxsave_struct)))
+	if (__copy_to_user(&buf->_fxsr_env[0], fx, xstate_size))
 		return -1;
 	return 1;
 }
 
-int save_i387_ia32(struct _fpstate_ia32 __user *buf)
+static int save_i387_xsave(void __user *buf)
 {
+	struct _fpstate_ia32 __user *fx = buf;
+	int err = 0;
+
+	if (save_i387_fxsave(fx) < 0)
+		return -1;
+
+	err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved_ia32,
+			     sizeof(struct _fpx_sw_bytes));
+	err |= __put_user(FP_XSTATE_MAGIC2,
+			  (__u32 __user *) (buf + sig_xstate_ia32_size
+					    - FP_XSTATE_MAGIC2_SIZE));
+	if (err)
+		return -1;
+
+	return 1;
+}
+
+int save_i387_xstate_ia32(void __user *buf)
+{
+	struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf;
+	struct task_struct *tsk = current;
+
 	if (!used_math())
 		return 0;
+
+	if (!access_ok(VERIFY_WRITE, buf, sig_xstate_ia32_size))
+		return -EACCES;
 	/*
 	 * This will cause a "finit" to be triggered by the next
 	 * attempted FPU operation by the 'current' process.
@@ -451,13 +504,17 @@
 	if (!HAVE_HWFP) {
 		return fpregs_soft_get(current, NULL,
 				       0, sizeof(struct user_i387_ia32_struct),
-				       NULL, buf) ? -1 : 1;
+				       NULL, fp) ? -1 : 1;
 	}
 
+	unlazy_fpu(tsk);
+
+	if (cpu_has_xsave)
+		return save_i387_xsave(fp);
 	if (cpu_has_fxsr)
-		return save_i387_fxsave(buf);
+		return save_i387_fxsave(fp);
 	else
-		return save_i387_fsave(buf);
+		return save_i387_fsave(fp);
 }
 
 static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf)
@@ -468,14 +525,15 @@
 				sizeof(struct i387_fsave_struct));
 }
 
-static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf)
+static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf,
+			       unsigned int size)
 {
 	struct task_struct *tsk = current;
 	struct user_i387_ia32_struct env;
 	int err;
 
 	err = __copy_from_user(&tsk->thread.xstate->fxsave, &buf->_fxsr_env[0],
-			       sizeof(struct i387_fxsave_struct));
+			       size);
 	/* mxcsr reserved bits must be masked to zero for security reasons */
 	tsk->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask;
 	if (err || __copy_from_user(&env, buf, sizeof(env)))
@@ -485,14 +543,69 @@
 	return 0;
 }
 
-int restore_i387_ia32(struct _fpstate_ia32 __user *buf)
+static int restore_i387_xsave(void __user *buf)
+{
+	struct _fpx_sw_bytes fx_sw_user;
+	struct _fpstate_ia32 __user *fx_user =
+			((struct _fpstate_ia32 __user *) buf);
+	struct i387_fxsave_struct __user *fx =
+		(struct i387_fxsave_struct __user *) &fx_user->_fxsr_env[0];
+	struct xsave_hdr_struct *xsave_hdr =
+				&current->thread.xstate->xsave.xsave_hdr;
+	u64 mask;
+	int err;
+
+	if (check_for_xstate(fx, buf, &fx_sw_user))
+		goto fx_only;
+
+	mask = fx_sw_user.xstate_bv;
+
+	err = restore_i387_fxsave(buf, fx_sw_user.xstate_size);
+
+	xsave_hdr->xstate_bv &= pcntxt_mask;
+	/*
+	 * These bits must be zero.
+	 */
+	xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0;
+
+	/*
+	 * Init the state that is not present in the memory layout
+	 * and enabled by the OS.
+	 */
+	mask = ~(pcntxt_mask & ~mask);
+	xsave_hdr->xstate_bv &= mask;
+
+	return err;
+fx_only:
+	/*
+	 * Couldn't find the extended state information in the memory
+	 * layout. Restore the FP/SSE and init the other extended state
+	 * enabled by the OS.
+	 */
+	xsave_hdr->xstate_bv = XSTATE_FPSSE;
+	return restore_i387_fxsave(buf, sizeof(struct i387_fxsave_struct));
+}
+
+int restore_i387_xstate_ia32(void __user *buf)
 {
 	int err;
 	struct task_struct *tsk = current;
+	struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf;
 
 	if (HAVE_HWFP)
 		clear_fpu(tsk);
 
+	if (!buf) {
+		if (used_math()) {
+			clear_fpu(tsk);
+			clear_used_math();
+		}
+
+		return 0;
+	} else
+		if (!access_ok(VERIFY_READ, buf, sig_xstate_ia32_size))
+			return -EACCES;
+
 	if (!used_math()) {
 		err = init_fpu(tsk);
 		if (err)
@@ -500,14 +613,17 @@
 	}
 
 	if (HAVE_HWFP) {
-		if (cpu_has_fxsr)
-			err = restore_i387_fxsave(buf);
+		if (cpu_has_xsave)
+			err = restore_i387_xsave(buf);
+		else if (cpu_has_fxsr)
+			err = restore_i387_fxsave(fp, sizeof(struct
+							   i387_fxsave_struct));
 		else
-			err = restore_i387_fsave(buf);
+			err = restore_i387_fsave(fp);
 	} else {
 		err = fpregs_soft_set(current, NULL,
 				      0, sizeof(struct user_i387_ia32_struct),
-				      NULL, buf) != 0;
+				      NULL, fp) != 0;
 	}
 	set_used_math();
 
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index dc92b49..4b8a53d 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -282,6 +282,30 @@
 
 device_initcall(i8259A_init_sysfs);
 
+void mask_8259A(void)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&i8259A_lock, flags);
+
+	outb(0xff, PIC_MASTER_IMR);	/* mask all of 8259A-1 */
+	outb(0xff, PIC_SLAVE_IMR);	/* mask all of 8259A-2 */
+
+	spin_unlock_irqrestore(&i8259A_lock, flags);
+}
+
+void unmask_8259A(void)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&i8259A_lock, flags);
+
+	outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */
+	outb(cached_slave_mask, PIC_SLAVE_IMR);	  /* restore slave IRQ mask */
+
+	spin_unlock_irqrestore(&i8259A_lock, flags);
+}
+
 void init_8259A(int auto_eoi)
 {
 	unsigned long flags;
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 09cddb5..26ea3ea 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -46,6 +46,7 @@
 #include <asm/nmi.h>
 #include <asm/msidef.h>
 #include <asm/hypertransport.h>
+#include <asm/setup.h>
 
 #include <mach_apic.h>
 #include <mach_apicdef.h>
@@ -1490,7 +1491,7 @@
 		smp_processor_id(), hard_smp_processor_id());
 	v = apic_read(APIC_ID);
 	printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v,
-			GET_APIC_ID(read_apic_id()));
+			GET_APIC_ID(v));
 	v = apic_read(APIC_LVR);
 	printk(KERN_INFO "... APIC VERSION: %08x\n", v);
 	ver = GET_APIC_VERSION(v);
@@ -1698,8 +1699,7 @@
 		entry.dest_mode       = 0; /* Physical */
 		entry.delivery_mode   = dest_ExtINT; /* ExtInt */
 		entry.vector          = 0;
-		entry.dest.physical.physical_dest =
-					GET_APIC_ID(read_apic_id());
+		entry.dest.physical.physical_dest = read_apic_id();
 
 		/*
 		 * Add it to the IO-APIC irq-routing table:
@@ -1725,10 +1725,8 @@
 	unsigned char old_id;
 	unsigned long flags;
 
-#ifdef CONFIG_X86_NUMAQ
-	if (found_numaq)
+	if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids())
 		return;
-#endif
 
 	/*
 	 * Don't check I/O APIC IDs for xAPIC systems.  They have
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 61a83b7..e63282e 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -37,6 +37,7 @@
 #include <acpi/acpi_bus.h>
 #endif
 #include <linux/bootmem.h>
+#include <linux/dmar.h>
 
 #include <asm/idle.h>
 #include <asm/io.h>
@@ -49,6 +50,7 @@
 #include <asm/nmi.h>
 #include <asm/msidef.h>
 #include <asm/hypertransport.h>
+#include <asm/irq_remapping.h>
 
 #include <mach_ipi.h>
 #include <mach_apic.h>
@@ -108,6 +110,9 @@
  */
 int nr_ioapic_registers[MAX_IO_APICS];
 
+/* I/O APIC RTE contents at the OS boot up */
+struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
+
 /* I/O APIC entries */
 struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
 int nr_ioapics;
@@ -303,7 +308,12 @@
 		pin = entry->pin;
 		if (pin == -1)
 			break;
-		io_apic_write(apic, 0x11 + pin*2, dest);
+		/*
+		 * With interrupt-remapping, destination information comes
+		 * from interrupt-remapping table entry.
+		 */
+		if (!irq_remapped(irq))
+			io_apic_write(apic, 0x11 + pin*2, dest);
 		reg = io_apic_read(apic, 0x10 + pin*2);
 		reg &= ~IO_APIC_REDIR_VECTOR_MASK;
 		reg |= vector;
@@ -440,6 +450,69 @@
 			clear_IO_APIC_pin(apic, pin);
 }
 
+/*
+ * Saves and masks all the unmasked IO-APIC RTE's
+ */
+int save_mask_IO_APIC_setup(void)
+{
+	union IO_APIC_reg_01 reg_01;
+	unsigned long flags;
+	int apic, pin;
+
+	/*
+	 * The number of IO-APIC IRQ registers (== #pins):
+	 */
+	for (apic = 0; apic < nr_ioapics; apic++) {
+		spin_lock_irqsave(&ioapic_lock, flags);
+		reg_01.raw = io_apic_read(apic, 1);
+		spin_unlock_irqrestore(&ioapic_lock, flags);
+		nr_ioapic_registers[apic] = reg_01.bits.entries+1;
+	}
+
+	for (apic = 0; apic < nr_ioapics; apic++) {
+		early_ioapic_entries[apic] =
+			kzalloc(sizeof(struct IO_APIC_route_entry) *
+				nr_ioapic_registers[apic], GFP_KERNEL);
+		if (!early_ioapic_entries[apic])
+			return -ENOMEM;
+	}
+
+	for (apic = 0; apic < nr_ioapics; apic++)
+		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+			struct IO_APIC_route_entry entry;
+
+			entry = early_ioapic_entries[apic][pin] =
+				ioapic_read_entry(apic, pin);
+			if (!entry.mask) {
+				entry.mask = 1;
+				ioapic_write_entry(apic, pin, entry);
+			}
+		}
+	return 0;
+}
+
+void restore_IO_APIC_setup(void)
+{
+	int apic, pin;
+
+	for (apic = 0; apic < nr_ioapics; apic++)
+		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
+			ioapic_write_entry(apic, pin,
+					   early_ioapic_entries[apic][pin]);
+}
+
+void reinit_intr_remapped_IO_APIC(int intr_remapping)
+{
+	/*
+	 * for now plain restore of previous settings.
+	 * TBD: In the case of OS enabling interrupt-remapping,
+	 * IO-APIC RTE's need to be setup to point to interrupt-remapping
+	 * table entries. for now, do a plain restore, and wait for
+	 * the setup_IO_APIC_irqs() to do proper initialization.
+	 */
+	restore_IO_APIC_setup();
+}
+
 int skip_ioapic_setup;
 int ioapic_force;
 
@@ -839,18 +912,98 @@
 }
 
 static struct irq_chip ioapic_chip;
+#ifdef CONFIG_INTR_REMAP
+static struct irq_chip ir_ioapic_chip;
+#endif
 
 static void ioapic_register_intr(int irq, unsigned long trigger)
 {
-	if (trigger) {
+	if (trigger)
 		irq_desc[irq].status |= IRQ_LEVEL;
-		set_irq_chip_and_handler_name(irq, &ioapic_chip,
-					      handle_fasteoi_irq, "fasteoi");
-	} else {
+	else
 		irq_desc[irq].status &= ~IRQ_LEVEL;
+
+#ifdef CONFIG_INTR_REMAP
+	if (irq_remapped(irq)) {
+		irq_desc[irq].status |= IRQ_MOVE_PCNTXT;
+		if (trigger)
+			set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
+						      handle_fasteoi_irq,
+						     "fasteoi");
+		else
+			set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
+						      handle_edge_irq, "edge");
+		return;
+	}
+#endif
+	if (trigger)
+		set_irq_chip_and_handler_name(irq, &ioapic_chip,
+					      handle_fasteoi_irq,
+					      "fasteoi");
+	else
 		set_irq_chip_and_handler_name(irq, &ioapic_chip,
 					      handle_edge_irq, "edge");
+}
+
+static int setup_ioapic_entry(int apic, int irq,
+			      struct IO_APIC_route_entry *entry,
+			      unsigned int destination, int trigger,
+			      int polarity, int vector)
+{
+	/*
+	 * add it to the IO-APIC irq-routing table:
+	 */
+	memset(entry,0,sizeof(*entry));
+
+#ifdef CONFIG_INTR_REMAP
+	if (intr_remapping_enabled) {
+		struct intel_iommu *iommu = map_ioapic_to_ir(apic);
+		struct irte irte;
+		struct IR_IO_APIC_route_entry *ir_entry =
+			(struct IR_IO_APIC_route_entry *) entry;
+		int index;
+
+		if (!iommu)
+			panic("No mapping iommu for ioapic %d\n", apic);
+
+		index = alloc_irte(iommu, irq, 1);
+		if (index < 0)
+			panic("Failed to allocate IRTE for ioapic %d\n", apic);
+
+		memset(&irte, 0, sizeof(irte));
+
+		irte.present = 1;
+		irte.dst_mode = INT_DEST_MODE;
+		irte.trigger_mode = trigger;
+		irte.dlvry_mode = INT_DELIVERY_MODE;
+		irte.vector = vector;
+		irte.dest_id = IRTE_DEST(destination);
+
+		modify_irte(irq, &irte);
+
+		ir_entry->index2 = (index >> 15) & 0x1;
+		ir_entry->zero = 0;
+		ir_entry->format = 1;
+		ir_entry->index = (index & 0x7fff);
+	} else
+#endif
+	{
+		entry->delivery_mode = INT_DELIVERY_MODE;
+		entry->dest_mode = INT_DEST_MODE;
+		entry->dest = destination;
 	}
+
+	entry->mask = 0;				/* enable IRQ */
+	entry->trigger = trigger;
+	entry->polarity = polarity;
+	entry->vector = vector;
+
+	/* Mask level triggered irqs.
+	 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
+	 */
+	if (trigger)
+		entry->mask = 1;
+	return 0;
 }
 
 static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
@@ -875,24 +1028,15 @@
 		    apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
 		    irq, trigger, polarity);
 
-	/*
-	 * add it to the IO-APIC irq-routing table:
-	 */
-	memset(&entry,0,sizeof(entry));
 
-	entry.delivery_mode = INT_DELIVERY_MODE;
-	entry.dest_mode = INT_DEST_MODE;
-	entry.dest = cpu_mask_to_apicid(mask);
-	entry.mask = 0;				/* enable IRQ */
-	entry.trigger = trigger;
-	entry.polarity = polarity;
-	entry.vector = cfg->vector;
-
-	/* Mask level triggered irqs.
-	 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
-	 */
-	if (trigger)
-		entry.mask = 1;
+	if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
+			       cpu_mask_to_apicid(mask), trigger, polarity,
+			       cfg->vector)) {
+		printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
+		       mp_ioapics[apic].mp_apicid, pin);
+		__clear_irq_vector(irq);
+		return;
+	}
 
 	ioapic_register_intr(irq, trigger);
 	if (irq < 16)
@@ -944,6 +1088,9 @@
 {
 	struct IO_APIC_route_entry entry;
 
+	if (intr_remapping_enabled)
+		return;
+
 	memset(&entry, 0, sizeof(entry));
 
 	/*
@@ -1090,6 +1237,7 @@
 void __apicdebuginit print_local_APIC(void * dummy)
 {
 	unsigned int v, ver, maxlvt;
+	unsigned long icr;
 
 	if (apic_verbosity == APIC_QUIET)
 		return;
@@ -1097,7 +1245,7 @@
 	printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
 		smp_processor_id(), hard_smp_processor_id());
 	v = apic_read(APIC_ID);
-	printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, GET_APIC_ID(read_apic_id()));
+	printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, read_apic_id());
 	v = apic_read(APIC_LVR);
 	printk(KERN_INFO "... APIC VERSION: %08x\n", v);
 	ver = GET_APIC_VERSION(v);
@@ -1133,10 +1281,9 @@
 	v = apic_read(APIC_ESR);
 	printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
 
-	v = apic_read(APIC_ICR);
-	printk(KERN_DEBUG "... APIC ICR: %08x\n", v);
-	v = apic_read(APIC_ICR2);
-	printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
+	icr = apic_icr_read();
+	printk(KERN_DEBUG "... APIC ICR: %08x\n", icr);
+	printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32);
 
 	v = apic_read(APIC_LVTT);
 	printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
@@ -1291,7 +1438,7 @@
 		entry.dest_mode       = 0; /* Physical */
 		entry.delivery_mode   = dest_ExtINT; /* ExtInt */
 		entry.vector          = 0;
-		entry.dest          = GET_APIC_ID(read_apic_id());
+		entry.dest            = read_apic_id();
 
 		/*
 		 * Add it to the IO-APIC irq-routing table:
@@ -1397,6 +1544,147 @@
  */
 
 #ifdef CONFIG_SMP
+
+#ifdef CONFIG_INTR_REMAP
+static void ir_irq_migration(struct work_struct *work);
+
+static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
+
+/*
+ * Migrate the IO-APIC irq in the presence of intr-remapping.
+ *
+ * For edge triggered, irq migration is a simple atomic update(of vector
+ * and cpu destination) of IRTE and flush the hardware cache.
+ *
+ * For level triggered, we need to modify the io-apic RTE aswell with the update
+ * vector information, along with modifying IRTE with vector and destination.
+ * So irq migration for level triggered is little  bit more complex compared to
+ * edge triggered migration. But the good news is, we use the same algorithm
+ * for level triggered migration as we have today, only difference being,
+ * we now initiate the irq migration from process context instead of the
+ * interrupt context.
+ *
+ * In future, when we do a directed EOI (combined with cpu EOI broadcast
+ * suppression) to the IO-APIC, level triggered irq migration will also be
+ * as simple as edge triggered migration and we can do the irq migration
+ * with a simple atomic update to IO-APIC RTE.
+ */
+static void migrate_ioapic_irq(int irq, cpumask_t mask)
+{
+	struct irq_cfg *cfg = irq_cfg + irq;
+	struct irq_desc *desc = irq_desc + irq;
+	cpumask_t tmp, cleanup_mask;
+	struct irte irte;
+	int modify_ioapic_rte = desc->status & IRQ_LEVEL;
+	unsigned int dest;
+	unsigned long flags;
+
+	cpus_and(tmp, mask, cpu_online_map);
+	if (cpus_empty(tmp))
+		return;
+
+	if (get_irte(irq, &irte))
+		return;
+
+	if (assign_irq_vector(irq, mask))
+		return;
+
+	cpus_and(tmp, cfg->domain, mask);
+	dest = cpu_mask_to_apicid(tmp);
+
+	if (modify_ioapic_rte) {
+		spin_lock_irqsave(&ioapic_lock, flags);
+		__target_IO_APIC_irq(irq, dest, cfg->vector);
+		spin_unlock_irqrestore(&ioapic_lock, flags);
+	}
+
+	irte.vector = cfg->vector;
+	irte.dest_id = IRTE_DEST(dest);
+
+	/*
+	 * Modified the IRTE and flushes the Interrupt entry cache.
+	 */
+	modify_irte(irq, &irte);
+
+	if (cfg->move_in_progress) {
+		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
+		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+		cfg->move_in_progress = 0;
+	}
+
+	irq_desc[irq].affinity = mask;
+}
+
+static int migrate_irq_remapped_level(int irq)
+{
+	int ret = -1;
+
+	mask_IO_APIC_irq(irq);
+
+	if (io_apic_level_ack_pending(irq)) {
+		/*
+	 	 * Interrupt in progress. Migrating irq now will change the
+		 * vector information in the IO-APIC RTE and that will confuse
+		 * the EOI broadcast performed by cpu.
+		 * So, delay the irq migration to the next instance.
+		 */
+		schedule_delayed_work(&ir_migration_work, 1);
+		goto unmask;
+	}
+
+	/* everthing is clear. we have right of way */
+	migrate_ioapic_irq(irq, irq_desc[irq].pending_mask);
+
+	ret = 0;
+	irq_desc[irq].status &= ~IRQ_MOVE_PENDING;
+	cpus_clear(irq_desc[irq].pending_mask);
+
+unmask:
+	unmask_IO_APIC_irq(irq);
+	return ret;
+}
+
+static void ir_irq_migration(struct work_struct *work)
+{
+	int irq;
+
+	for (irq = 0; irq < NR_IRQS; irq++) {
+		struct irq_desc *desc = irq_desc + irq;
+		if (desc->status & IRQ_MOVE_PENDING) {
+			unsigned long flags;
+
+			spin_lock_irqsave(&desc->lock, flags);
+			if (!desc->chip->set_affinity ||
+			    !(desc->status & IRQ_MOVE_PENDING)) {
+				desc->status &= ~IRQ_MOVE_PENDING;
+				spin_unlock_irqrestore(&desc->lock, flags);
+				continue;
+			}
+
+			desc->chip->set_affinity(irq,
+					         irq_desc[irq].pending_mask);
+			spin_unlock_irqrestore(&desc->lock, flags);
+		}
+	}
+}
+
+/*
+ * Migrates the IRQ destination in the process context.
+ */
+static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+{
+	if (irq_desc[irq].status & IRQ_LEVEL) {
+		irq_desc[irq].status |= IRQ_MOVE_PENDING;
+		irq_desc[irq].pending_mask = mask;
+		migrate_irq_remapped_level(irq);
+		return;
+	}
+
+	migrate_ioapic_irq(irq, mask);
+}
+#endif
+
 asmlinkage void smp_irq_move_cleanup_interrupt(void)
 {
 	unsigned vector, me;
@@ -1453,6 +1741,17 @@
 #else
 static inline void irq_complete_move(unsigned int irq) {}
 #endif
+#ifdef CONFIG_INTR_REMAP
+static void ack_x2apic_level(unsigned int irq)
+{
+	ack_x2APIC_irq();
+}
+
+static void ack_x2apic_edge(unsigned int irq)
+{
+	ack_x2APIC_irq();
+}
+#endif
 
 static void ack_apic_edge(unsigned int irq)
 {
@@ -1527,6 +1826,21 @@
 	.retrigger	= ioapic_retrigger_irq,
 };
 
+#ifdef CONFIG_INTR_REMAP
+static struct irq_chip ir_ioapic_chip __read_mostly = {
+	.name 		= "IR-IO-APIC",
+	.startup 	= startup_ioapic_irq,
+	.mask	 	= mask_IO_APIC_irq,
+	.unmask	 	= unmask_IO_APIC_irq,
+	.ack 		= ack_x2apic_edge,
+	.eoi 		= ack_x2apic_level,
+#ifdef CONFIG_SMP
+	.set_affinity 	= set_ir_ioapic_affinity_irq,
+#endif
+	.retrigger	= ioapic_retrigger_irq,
+};
+#endif
+
 static inline void init_IO_APIC_traps(void)
 {
 	int irq;
@@ -1712,6 +2026,8 @@
 	 * 8259A.
 	 */
 	if (pin1 == -1) {
+		if (intr_remapping_enabled)
+			panic("BIOS bug: timer not connected to IO-APIC");
 		pin1 = pin2;
 		apic1 = apic2;
 		no_pin1 = 1;
@@ -1738,6 +2054,8 @@
 				clear_IO_APIC_pin(0, pin1);
 			goto out;
 		}
+		if (intr_remapping_enabled)
+			panic("timer doesn't work through Interrupt-remapped IO-APIC");
 		clear_IO_APIC_pin(apic1, pin1);
 		if (!no_pin1)
 			apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
@@ -1977,6 +2295,9 @@
 
 	dynamic_irq_cleanup(irq);
 
+#ifdef CONFIG_INTR_REMAP
+	free_irte(irq);
+#endif
 	spin_lock_irqsave(&vector_lock, flags);
 	__clear_irq_vector(irq);
 	spin_unlock_irqrestore(&vector_lock, flags);
@@ -1995,10 +2316,41 @@
 
 	tmp = TARGET_CPUS;
 	err = assign_irq_vector(irq, tmp);
-	if (!err) {
-		cpus_and(tmp, cfg->domain, tmp);
-		dest = cpu_mask_to_apicid(tmp);
+	if (err)
+		return err;
 
+	cpus_and(tmp, cfg->domain, tmp);
+	dest = cpu_mask_to_apicid(tmp);
+
+#ifdef CONFIG_INTR_REMAP
+	if (irq_remapped(irq)) {
+		struct irte irte;
+		int ir_index;
+		u16 sub_handle;
+
+		ir_index = map_irq_to_irte_handle(irq, &sub_handle);
+		BUG_ON(ir_index == -1);
+
+		memset (&irte, 0, sizeof(irte));
+
+		irte.present = 1;
+		irte.dst_mode = INT_DEST_MODE;
+		irte.trigger_mode = 0; /* edge */
+		irte.dlvry_mode = INT_DELIVERY_MODE;
+		irte.vector = cfg->vector;
+		irte.dest_id = IRTE_DEST(dest);
+
+		modify_irte(irq, &irte);
+
+		msg->address_hi = MSI_ADDR_BASE_HI;
+		msg->data = sub_handle;
+		msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
+				  MSI_ADDR_IR_SHV |
+				  MSI_ADDR_IR_INDEX1(ir_index) |
+				  MSI_ADDR_IR_INDEX2(ir_index);
+	} else
+#endif
+	{
 		msg->address_hi = MSI_ADDR_BASE_HI;
 		msg->address_lo =
 			MSI_ADDR_BASE_LO |
@@ -2049,6 +2401,55 @@
 	write_msi_msg(irq, &msg);
 	irq_desc[irq].affinity = mask;
 }
+
+#ifdef CONFIG_INTR_REMAP
+/*
+ * Migrate the MSI irq to another cpumask. This migration is
+ * done in the process context using interrupt-remapping hardware.
+ */
+static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
+{
+	struct irq_cfg *cfg = irq_cfg + irq;
+	unsigned int dest;
+	cpumask_t tmp, cleanup_mask;
+	struct irte irte;
+
+	cpus_and(tmp, mask, cpu_online_map);
+	if (cpus_empty(tmp))
+		return;
+
+	if (get_irte(irq, &irte))
+		return;
+
+	if (assign_irq_vector(irq, mask))
+		return;
+
+	cpus_and(tmp, cfg->domain, mask);
+	dest = cpu_mask_to_apicid(tmp);
+
+	irte.vector = cfg->vector;
+	irte.dest_id = IRTE_DEST(dest);
+
+	/*
+	 * atomically update the IRTE with the new destination and vector.
+	 */
+	modify_irte(irq, &irte);
+
+	/*
+	 * After this point, all the interrupts will start arriving
+	 * at the new destination. So, time to cleanup the previous
+	 * vector allocation.
+	 */
+	if (cfg->move_in_progress) {
+		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
+		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+		cfg->move_in_progress = 0;
+	}
+
+	irq_desc[irq].affinity = mask;
+}
+#endif
 #endif /* CONFIG_SMP */
 
 /*
@@ -2066,28 +2467,159 @@
 	.retrigger	= ioapic_retrigger_irq,
 };
 
-int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
+#ifdef CONFIG_INTR_REMAP
+static struct irq_chip msi_ir_chip = {
+	.name		= "IR-PCI-MSI",
+	.unmask		= unmask_msi_irq,
+	.mask		= mask_msi_irq,
+	.ack		= ack_x2apic_edge,
+#ifdef CONFIG_SMP
+	.set_affinity	= ir_set_msi_irq_affinity,
+#endif
+	.retrigger	= ioapic_retrigger_irq,
+};
+
+/*
+ * Map the PCI dev to the corresponding remapping hardware unit
+ * and allocate 'nvec' consecutive interrupt-remapping table entries
+ * in it.
+ */
+static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
 {
+	struct intel_iommu *iommu;
+	int index;
+
+	iommu = map_dev_to_ir(dev);
+	if (!iommu) {
+		printk(KERN_ERR
+		       "Unable to map PCI %s to iommu\n", pci_name(dev));
+		return -ENOENT;
+	}
+
+	index = alloc_irte(iommu, irq, nvec);
+	if (index < 0) {
+		printk(KERN_ERR
+		       "Unable to allocate %d IRTE for PCI %s\n", nvec,
+		        pci_name(dev));
+		return -ENOSPC;
+	}
+	return index;
+}
+#endif
+
+static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
+{
+	int ret;
 	struct msi_msg msg;
-	int irq, ret;
-	irq = create_irq();
-	if (irq < 0)
-		return irq;
 
 	ret = msi_compose_msg(dev, irq, &msg);
-	if (ret < 0) {
-		destroy_irq(irq);
+	if (ret < 0)
 		return ret;
-	}
 
 	set_irq_msi(irq, desc);
 	write_msi_msg(irq, &msg);
 
-	set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
+#ifdef CONFIG_INTR_REMAP
+	if (irq_remapped(irq)) {
+		struct irq_desc *desc = irq_desc + irq;
+		/*
+		 * irq migration in process context
+		 */
+		desc->status |= IRQ_MOVE_PCNTXT;
+		set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
+	} else
+#endif
+		set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
 
 	return 0;
 }
 
+int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
+{
+	int irq, ret;
+
+	irq = create_irq();
+	if (irq < 0)
+		return irq;
+
+#ifdef CONFIG_INTR_REMAP
+	if (!intr_remapping_enabled)
+		goto no_ir;
+
+	ret = msi_alloc_irte(dev, irq, 1);
+	if (ret < 0)
+		goto error;
+no_ir:
+#endif
+	ret = setup_msi_irq(dev, desc, irq);
+	if (ret < 0) {
+		destroy_irq(irq);
+		return ret;
+	}
+	return 0;
+
+#ifdef CONFIG_INTR_REMAP
+error:
+	destroy_irq(irq);
+	return ret;
+#endif
+}
+
+int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+	int irq, ret, sub_handle;
+	struct msi_desc *desc;
+#ifdef CONFIG_INTR_REMAP
+	struct intel_iommu *iommu = 0;
+	int index = 0;
+#endif
+
+	sub_handle = 0;
+	list_for_each_entry(desc, &dev->msi_list, list) {
+		irq = create_irq();
+		if (irq < 0)
+			return irq;
+#ifdef CONFIG_INTR_REMAP
+		if (!intr_remapping_enabled)
+			goto no_ir;
+
+		if (!sub_handle) {
+			/*
+			 * allocate the consecutive block of IRTE's
+			 * for 'nvec'
+			 */
+			index = msi_alloc_irte(dev, irq, nvec);
+			if (index < 0) {
+				ret = index;
+				goto error;
+			}
+		} else {
+			iommu = map_dev_to_ir(dev);
+			if (!iommu) {
+				ret = -ENOENT;
+				goto error;
+			}
+			/*
+			 * setup the mapping between the irq and the IRTE
+			 * base index, the sub_handle pointing to the
+			 * appropriate interrupt remap table entry.
+			 */
+			set_irte_irq(irq, iommu, index, sub_handle);
+		}
+no_ir:
+#endif
+		ret = setup_msi_irq(dev, desc, irq);
+		if (ret < 0)
+			goto error;
+		sub_handle++;
+	}
+	return 0;
+
+error:
+	destroy_irq(irq);
+	return ret;
+}
+
 void arch_teardown_msi_irq(unsigned int irq)
 {
 	destroy_irq(irq);
@@ -2333,6 +2865,10 @@
 				setup_IO_APIC_irq(ioapic, pin, irq,
 						  irq_trigger(irq_entry),
 						  irq_polarity(irq_entry));
+#ifdef CONFIG_INTR_REMAP
+			else if (intr_remapping_enabled)
+				set_ir_ioapic_affinity_irq(irq, TARGET_CPUS);
+#endif
 			else
 				set_ioapic_affinity_irq(irq, TARGET_CPUS);
 		}
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 50e5e4a..1919143 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -14,6 +14,7 @@
 #include <linux/slab.h>
 #include <linux/thread_info.h>
 #include <linux/syscalls.h>
+#include <asm/syscalls.h>
 
 /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
 static void set_bitmap(unsigned long *bitmap, unsigned int base,
diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c
index 3f7537b..f1c688e 100644
--- a/arch/x86/kernel/ipi.c
+++ b/arch/x86/kernel/ipi.c
@@ -20,6 +20,8 @@
 
 #ifdef CONFIG_X86_32
 #include <mach_apic.h>
+#include <mach_ipi.h>
+
 /*
  * the following functions deal with sending IPIs between CPUs.
  *
@@ -147,7 +149,6 @@
 }
 
 /* must come after the send_IPI functions above for inlining */
-#include <mach_ipi.h>
 static int convert_apicid_to_cpu(int apic_id)
 {
 	int i;
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index b68e21f..0ed5f93 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -18,6 +18,7 @@
 #include <asm/ldt.h>
 #include <asm/desc.h>
 #include <asm/mmu_context.h>
+#include <asm/syscalls.h>
 
 #ifdef CONFIG_SMP
 static void flush_ldt(void *current_mm)
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index b3fb430..f98f4e1 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -397,7 +397,9 @@
        generic_bigsmp_probe();
 #endif
 
+#ifdef CONFIG_X86_32
 	setup_apic_routing();
+#endif
 	if (!num_processors)
 		printk(KERN_ERR "MPTABLE: no processors registered!\n");
 	return num_processors;
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index eecc8c1..4caff39 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -229,6 +229,12 @@
 	}
 }
 
+static int __init numaq_setup_ioapic_ids(void)
+{
+	/* so can skip it */
+	return 1;
+}
+
 static struct x86_quirks numaq_x86_quirks __initdata = {
 	.arch_pre_time_init	= numaq_pre_time_init,
 	.arch_time_init		= NULL,
@@ -243,6 +249,7 @@
 	.mpc_oem_bus_info	= mpc_oem_bus_info,
 	.mpc_oem_pci_bus	= mpc_oem_pci_bus,
 	.smp_read_mpc_oem	= smp_read_mpc_oem,
+	.setup_ioapic_ids	= numaq_setup_ioapic_ids,
 };
 
 void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 300da17..4090cd6 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -373,8 +373,6 @@
 
 struct pv_apic_ops pv_apic_ops = {
 #ifdef CONFIG_X86_LOCAL_APIC
-	.apic_write = native_apic_write,
-	.apic_read = native_apic_read,
 	.setup_boot_clock = setup_boot_APIC_clock,
 	.setup_secondary_clock = setup_secondary_APIC_clock,
 	.startup_ipi_hook = paravirt_nop,
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 3b7a1dd..2c9abc9 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -55,6 +55,8 @@
 #include <asm/tlbflush.h>
 #include <asm/cpu.h>
 #include <asm/kdebug.h>
+#include <asm/syscalls.h>
+#include <asm/smp.h>
 
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 71553b6..00263c9 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -51,6 +51,7 @@
 #include <asm/proto.h>
 #include <asm/ia32.h>
 #include <asm/idle.h>
+#include <asm/syscalls.h>
 
 asmlinkage extern void ret_from_fork(void);
 
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index e37dccc..9e43a48 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -14,6 +14,7 @@
 #include <linux/errno.h>
 #include <linux/ptrace.h>
 #include <linux/regset.h>
+#include <linux/tracehook.h>
 #include <linux/user.h>
 #include <linux/elf.h>
 #include <linux/security.h>
@@ -69,7 +70,7 @@
 
 #define FLAG_MASK		FLAG_MASK_32
 
-static long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
+static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
 {
 	BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0);
 	regno >>= 2;
@@ -1375,30 +1376,6 @@
 	force_sig_info(SIGTRAP, &info, tsk);
 }
 
-static void syscall_trace(struct pt_regs *regs)
-{
-	if (!(current->ptrace & PT_PTRACED))
-		return;
-
-#if 0
-	printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n",
-	       current->comm,
-	       regs->ip, regs->sp, regs->ax, regs->orig_ax, __builtin_return_address(0),
-	       current_thread_info()->flags, current->ptrace);
-#endif
-
-	ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
-				? 0x80 : 0));
-	/*
-	 * this isn't the same as continuing with a signal, but it will do
-	 * for normal use.  strace only continues with a signal if the
-	 * stopping signal is not SIGTRAP.  -brl
-	 */
-	if (current->exit_code) {
-		send_sig(current->exit_code, current, 1);
-		current->exit_code = 0;
-	}
-}
 
 #ifdef CONFIG_X86_32
 # define IS_IA32	1
@@ -1432,8 +1409,9 @@
 	if (unlikely(test_thread_flag(TIF_SYSCALL_EMU)))
 		ret = -1L;
 
-	if (ret || test_thread_flag(TIF_SYSCALL_TRACE))
-		syscall_trace(regs);
+	if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) &&
+	    tracehook_report_syscall_entry(regs))
+		ret = -1L;
 
 	if (unlikely(current->audit_context)) {
 		if (IS_IA32)
@@ -1459,7 +1437,7 @@
 		audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
 
 	if (test_thread_flag(TIF_SYSCALL_TRACE))
-		syscall_trace(regs);
+		tracehook_report_syscall_exit(regs, 0);
 
 	/*
 	 * If TIF_SYSCALL_EMU is set, we only get here because of
@@ -1475,6 +1453,6 @@
 	 * system call instruction.
 	 */
 	if (test_thread_flag(TIF_SINGLESTEP) &&
-	    (current->ptrace & PT_PTRACED))
+	    tracehook_consider_fatal_signal(current, SIGTRAP, SIG_DFL))
 		send_sigtrap(current, regs, 0);
 }
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 362d4e7..673f12c 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -739,6 +739,8 @@
 	num_physpages = max_pfn;
 
 	check_efer();
+ 	if (cpu_has_x2apic)
+ 		check_x2apic();
 
 	/* How many end-of-memory variables you have, grandma! */
 	/* need this before calling reserve_initrd */
diff --git a/arch/x86/kernel/sigframe.h b/arch/x86/kernel/sigframe.h
index 72bbb51..6dd7e2b 100644
--- a/arch/x86/kernel/sigframe.h
+++ b/arch/x86/kernel/sigframe.h
@@ -3,9 +3,18 @@
 	char __user *pretcode;
 	int sig;
 	struct sigcontext sc;
-	struct _fpstate fpstate;
+	/*
+	 * fpstate is unused. fpstate is moved/allocated after
+	 * retcode[] below. This movement allows to have the FP state and the
+	 * future state extensions (xsave) stay together.
+	 * And at the same time retaining the unused fpstate, prevents changing
+	 * the offset of extramask[] in the sigframe and thus prevent any
+	 * legacy application accessing/modifying it.
+	 */
+	struct _fpstate fpstate_unused;
 	unsigned long extramask[_NSIG_WORDS-1];
 	char retcode[8];
+	/* fp state follows here */
 };
 
 struct rt_sigframe {
@@ -15,13 +24,14 @@
 	void __user *puc;
 	struct siginfo info;
 	struct ucontext uc;
-	struct _fpstate fpstate;
 	char retcode[8];
+	/* fp state follows here */
 };
 #else
 struct rt_sigframe {
 	char __user *pretcode;
 	struct ucontext uc;
 	struct siginfo info;
+	/* fp state follows here */
 };
 #endif
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 6fb5bcd..b21070e 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -17,6 +17,7 @@
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/wait.h>
+#include <linux/tracehook.h>
 #include <linux/elf.h>
 #include <linux/smp.h>
 #include <linux/mm.h>
@@ -26,6 +27,7 @@
 #include <asm/uaccess.h>
 #include <asm/i387.h>
 #include <asm/vdso.h>
+#include <asm/syscalls.h>
 
 #include "sigframe.h"
 
@@ -159,28 +161,14 @@
 	}
 
 	{
-		struct _fpstate __user *buf;
+		void __user *buf;
 
 		err |= __get_user(buf, &sc->fpstate);
-		if (buf) {
-			if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
-				goto badframe;
-			err |= restore_i387(buf);
-		} else {
-			struct task_struct *me = current;
-
-			if (used_math()) {
-				clear_fpu(me);
-				clear_used_math();
-			}
-		}
+		err |= restore_i387_xstate(buf);
 	}
 
 	err |= __get_user(*pax, &sc->ax);
 	return err;
-
-badframe:
-	return 1;
 }
 
 asmlinkage unsigned long sys_sigreturn(unsigned long __unused)
@@ -262,7 +250,7 @@
  * Set up a signal frame.
  */
 static int
-setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate,
+setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
 		 struct pt_regs *regs, unsigned long mask)
 {
 	int tmp, err = 0;
@@ -289,7 +277,7 @@
 	err |= __put_user(regs->sp, &sc->sp_at_signal);
 	err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss);
 
-	tmp = save_i387(fpstate);
+	tmp = save_i387_xstate(fpstate);
 	if (tmp < 0)
 		err = 1;
 	else
@@ -306,7 +294,8 @@
  * Determine which stack to use..
  */
 static inline void __user *
-get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size)
+get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
+	     void **fpstate)
 {
 	unsigned long sp;
 
@@ -332,6 +321,11 @@
 			sp = (unsigned long) ka->sa.sa_restorer;
 	}
 
+	if (used_math()) {
+		sp = sp - sig_xstate_size;
+		*fpstate = (struct _fpstate *) sp;
+	}
+
 	sp -= frame_size;
 	/*
 	 * Align the stack pointer according to the i386 ABI,
@@ -350,8 +344,9 @@
 	void __user *restorer;
 	int err = 0;
 	int usig;
+	void __user *fpstate = NULL;
 
-	frame = get_sigframe(ka, regs, sizeof(*frame));
+	frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
 
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
 		goto give_sigsegv;
@@ -366,7 +361,7 @@
 	if (err)
 		goto give_sigsegv;
 
-	err = setup_sigcontext(&frame->sc, &frame->fpstate, regs, set->sig[0]);
+	err = setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]);
 	if (err)
 		goto give_sigsegv;
 
@@ -427,8 +422,9 @@
 	void __user *restorer;
 	int err = 0;
 	int usig;
+	void __user *fpstate = NULL;
 
-	frame = get_sigframe(ka, regs, sizeof(*frame));
+	frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
 
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
 		goto give_sigsegv;
@@ -447,13 +443,16 @@
 		goto give_sigsegv;
 
 	/* Create the ucontext.  */
-	err |= __put_user(0, &frame->uc.uc_flags);
+	if (cpu_has_xsave)
+		err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
+	else
+		err |= __put_user(0, &frame->uc.uc_flags);
 	err |= __put_user(0, &frame->uc.uc_link);
 	err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
 	err |= __put_user(sas_ss_flags(regs->sp),
 			  &frame->uc.uc_stack.ss_flags);
 	err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
-	err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate,
+	err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
 				regs, set->sig[0]);
 	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
 	if (err)
@@ -558,8 +557,6 @@
 	 * handler too.
 	 */
 	regs->flags &= ~X86_EFLAGS_TF;
-	if (test_thread_flag(TIF_SINGLESTEP))
-		ptrace_notify(SIGTRAP);
 
 	spin_lock_irq(&current->sighand->siglock);
 	sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
@@ -568,6 +565,9 @@
 	recalc_sigpending();
 	spin_unlock_irq(&current->sighand->siglock);
 
+	tracehook_signal_handler(sig, info, ka, regs,
+				 test_thread_flag(TIF_SINGLESTEP));
+
 	return 0;
 }
 
@@ -661,5 +661,10 @@
 	if (thread_info_flags & _TIF_SIGPENDING)
 		do_signal(regs);
 
+	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+		clear_thread_flag(TIF_NOTIFY_RESUME);
+		tracehook_notify_resume(regs);
+	}
+
 	clear_thread_flag(TIF_IRET);
 }
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index ca316b5..2f28252 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -15,6 +15,7 @@
 #include <linux/errno.h>
 #include <linux/wait.h>
 #include <linux/ptrace.h>
+#include <linux/tracehook.h>
 #include <linux/unistd.h>
 #include <linux/stddef.h>
 #include <linux/personality.h>
@@ -26,6 +27,8 @@
 #include <asm/proto.h>
 #include <asm/ia32_unistd.h>
 #include <asm/mce.h>
+#include <asm/syscall.h>
+#include <asm/syscalls.h>
 #include "sigframe.h"
 
 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
@@ -54,69 +57,6 @@
 }
 
 /*
- * Signal frame handlers.
- */
-
-static inline int save_i387(struct _fpstate __user *buf)
-{
-	struct task_struct *tsk = current;
-	int err = 0;
-
-	BUILD_BUG_ON(sizeof(struct user_i387_struct) !=
-			sizeof(tsk->thread.xstate->fxsave));
-
-	if ((unsigned long)buf % 16)
-		printk("save_i387: bad fpstate %p\n", buf);
-
-	if (!used_math())
-		return 0;
-	clear_used_math(); /* trigger finit */
-	if (task_thread_info(tsk)->status & TS_USEDFPU) {
-		err = save_i387_checking((struct i387_fxsave_struct __user *)
-					 buf);
-		if (err)
-			return err;
-		task_thread_info(tsk)->status &= ~TS_USEDFPU;
-		stts();
-	} else {
-		if (__copy_to_user(buf, &tsk->thread.xstate->fxsave,
-				   sizeof(struct i387_fxsave_struct)))
-			return -1;
-	}
-	return 1;
-}
-
-/*
- * This restores directly out of user space. Exceptions are handled.
- */
-static inline int restore_i387(struct _fpstate __user *buf)
-{
-	struct task_struct *tsk = current;
-	int err;
-
-	if (!used_math()) {
-		err = init_fpu(tsk);
-		if (err)
-			return err;
-	}
-
-	if (!(task_thread_info(current)->status & TS_USEDFPU)) {
-		clts();
-		task_thread_info(current)->status |= TS_USEDFPU;
-	}
-	err = restore_fpu_checking((__force struct i387_fxsave_struct *)buf);
-	if (unlikely(err)) {
-		/*
-		 * Encountered an error while doing the restore from the
-		 * user buffer, clear the fpu state.
-		 */
-		clear_fpu(tsk);
-		clear_used_math();
-	}
-	return err;
-}
-
-/*
  * Do a signal return; undo the signal stack.
  */
 static int
@@ -160,25 +100,11 @@
 	{
 		struct _fpstate __user * buf;
 		err |= __get_user(buf, &sc->fpstate);
-
-		if (buf) {
-			if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
-				goto badframe;
-			err |= restore_i387(buf);
-		} else {
-			struct task_struct *me = current;
-			if (used_math()) {
-				clear_fpu(me);
-				clear_used_math();
-			}
-		}
+		err |= restore_i387_xstate(buf);
 	}
 
 	err |= __get_user(*pax, &sc->ax);
 	return err;
-
-badframe:
-	return 1;
 }
 
 asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
@@ -269,26 +195,23 @@
 			sp = current->sas_ss_sp + current->sas_ss_size;
 	}
 
-	return (void __user *)round_down(sp - size, 16);
+	return (void __user *)round_down(sp - size, 64);
 }
 
 static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 			   sigset_t *set, struct pt_regs * regs)
 {
 	struct rt_sigframe __user *frame;
-	struct _fpstate __user *fp = NULL; 
+	void __user *fp = NULL;
 	int err = 0;
 	struct task_struct *me = current;
 
 	if (used_math()) {
-		fp = get_stack(ka, regs, sizeof(struct _fpstate)); 
+		fp = get_stack(ka, regs, sig_xstate_size);
 		frame = (void __user *)round_down(
 			(unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8;
 
-		if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate)))
-			goto give_sigsegv;
-
-		if (save_i387(fp) < 0) 
+		if (save_i387_xstate(fp) < 0)
 			err |= -1; 
 	} else
 		frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8;
@@ -303,7 +226,10 @@
 	}
 		
 	/* Create the ucontext.  */
-	err |= __put_user(0, &frame->uc.uc_flags);
+	if (cpu_has_xsave)
+		err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
+	else
+		err |= __put_user(0, &frame->uc.uc_flags);
 	err |= __put_user(0, &frame->uc.uc_link);
 	err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
 	err |= __put_user(sas_ss_flags(regs->sp),
@@ -355,35 +281,6 @@
 }
 
 /*
- * Return -1L or the syscall number that @regs is executing.
- */
-static long current_syscall(struct pt_regs *regs)
-{
-	/*
-	 * We always sign-extend a -1 value being set here,
-	 * so this is always either -1L or a syscall number.
-	 */
-	return regs->orig_ax;
-}
-
-/*
- * Return a value that is -EFOO if the system call in @regs->orig_ax
- * returned an error.  This only works for @regs from @current.
- */
-static long current_syscall_ret(struct pt_regs *regs)
-{
-#ifdef CONFIG_IA32_EMULATION
-	if (test_thread_flag(TIF_IA32))
-		/*
-		 * Sign-extend the value so (int)-EFOO becomes (long)-EFOO
-		 * and will match correctly in comparisons.
-		 */
-		return (int) regs->ax;
-#endif
-	return regs->ax;
-}
-
-/*
  * OK, we're invoking a handler
  */	
 
@@ -394,9 +291,9 @@
 	int ret;
 
 	/* Are we from a system call? */
-	if (current_syscall(regs) >= 0) {
+	if (syscall_get_nr(current, regs) >= 0) {
 		/* If so, check system call restarting.. */
-		switch (current_syscall_ret(regs)) {
+		switch (syscall_get_error(current, regs)) {
 		case -ERESTART_RESTARTBLOCK:
 		case -ERESTARTNOHAND:
 			regs->ax = -EINTR;
@@ -453,8 +350,6 @@
 		 * handler too.
 		 */
 		regs->flags &= ~X86_EFLAGS_TF;
-		if (test_thread_flag(TIF_SINGLESTEP))
-			ptrace_notify(SIGTRAP);
 
 		spin_lock_irq(&current->sighand->siglock);
 		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
@@ -462,6 +357,9 @@
 			sigaddset(&current->blocked,sig);
 		recalc_sigpending();
 		spin_unlock_irq(&current->sighand->siglock);
+
+		tracehook_signal_handler(sig, info, ka, regs,
+					 test_thread_flag(TIF_SINGLESTEP));
 	}
 
 	return ret;
@@ -518,9 +416,9 @@
 	}
 
 	/* Did we come from a system call? */
-	if (current_syscall(regs) >= 0) {
+	if (syscall_get_nr(current, regs) >= 0) {
 		/* Restart the system call - no handlers present */
-		switch (current_syscall_ret(regs)) {
+		switch (syscall_get_error(current, regs)) {
 		case -ERESTARTNOHAND:
 		case -ERESTARTSYS:
 		case -ERESTARTNOINTR:
@@ -558,6 +456,11 @@
 	/* deal with pending signal delivery */
 	if (thread_info_flags & _TIF_SIGPENDING)
 		do_signal(regs);
+
+	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+		clear_thread_flag(TIF_NOTIFY_RESUME);
+		tracehook_notify_resume(regs);
+	}
 }
 
 void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 7985c5b..aa804c6 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -88,7 +88,7 @@
 #define get_idle_for_cpu(x)      (per_cpu(idle_thread_array, x))
 #define set_idle_for_cpu(x, p)   (per_cpu(idle_thread_array, x) = (p))
 #else
-struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
+static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
 #define get_idle_for_cpu(x)      (idle_thread_array[(x)])
 #define set_idle_for_cpu(x, p)   (idle_thread_array[(x)] = (p))
 #endif
@@ -123,13 +123,12 @@
 
 static atomic_t init_deasserted;
 
-static int boot_cpu_logical_apicid;
 
 /* representing cpus for which sibling maps can be computed */
 static cpumask_t cpu_sibling_setup_map;
 
 /* Set if we find a B stepping CPU */
-int __cpuinitdata smp_b_stepping;
+static int __cpuinitdata smp_b_stepping;
 
 #if defined(CONFIG_NUMA) && defined(CONFIG_X86_32)
 
@@ -165,6 +164,8 @@
 #endif
 
 #ifdef CONFIG_X86_32
+static int boot_cpu_logical_apicid;
+
 u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly =
 					{ [0 ... NR_CPUS-1] = BAD_APICID };
 
@@ -210,7 +211,7 @@
 	/*
 	 * (This works even if the APIC is not enabled.)
 	 */
-	phys_id = GET_APIC_ID(read_apic_id());
+	phys_id = read_apic_id();
 	cpuid = smp_processor_id();
 	if (cpu_isset(cpuid, cpu_callin_map)) {
 		panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__,
@@ -550,8 +551,7 @@
 			printk(KERN_CONT
 			       "a previous APIC delivery may have failed\n");
 
-		apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
-		apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]);
+		apic_icr_write(APIC_DM_REMRD | regs[i], apicid);
 
 		timeout = 0;
 		do {
@@ -583,11 +583,9 @@
 	int maxlvt;
 
 	/* Target chip */
-	apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
-
 	/* Boot on the stack */
 	/* Kick the second */
-	apic_write(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);
+	apic_icr_write(APIC_DM_NMI | APIC_DEST_LOGICAL, logical_apicid);
 
 	pr_debug("Waiting for send to finish...\n");
 	send_status = safe_apic_wait_icr_idle();
@@ -640,13 +638,11 @@
 	/*
 	 * Turn INIT on target chip
 	 */
-	apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
 	/*
 	 * Send IPI
 	 */
-	apic_write(APIC_ICR,
-		   APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT);
+	apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT,
+		       phys_apicid);
 
 	pr_debug("Waiting for send to finish...\n");
 	send_status = safe_apic_wait_icr_idle();
@@ -656,10 +652,8 @@
 	pr_debug("Deasserting INIT.\n");
 
 	/* Target chip */
-	apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
 	/* Send IPI */
-	apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
+	apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid);
 
 	pr_debug("Waiting for send to finish...\n");
 	send_status = safe_apic_wait_icr_idle();
@@ -702,11 +696,10 @@
 		 */
 
 		/* Target chip */
-		apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
 		/* Boot on the stack */
 		/* Kick the second */
-		apic_write(APIC_ICR, APIC_DM_STARTUP | (start_eip >> 12));
+		apic_icr_write(APIC_DM_STARTUP | (start_eip >> 12),
+			       phys_apicid);
 
 		/*
 		 * Give the other CPU some time to accept the IPI.
@@ -1175,10 +1168,17 @@
 	 * Setup boot CPU information
 	 */
 	smp_store_cpu_info(0); /* Final full version of the data */
+#ifdef CONFIG_X86_32
 	boot_cpu_logical_apicid = logical_smp_processor_id();
+#endif
 	current_thread_info()->cpu = 0;  /* needed? */
 	set_cpu_sibling_map(0);
 
+#ifdef CONFIG_X86_64
+	enable_IR_x2apic();
+	setup_apic_routing();
+#endif
+
 	if (smp_sanity_check(max_cpus) < 0) {
 		printk(KERN_INFO "SMP disabled\n");
 		disable_smp();
@@ -1186,9 +1186,9 @@
 	}
 
 	preempt_disable();
-	if (GET_APIC_ID(read_apic_id()) != boot_cpu_physical_apicid) {
+	if (read_apic_id() != boot_cpu_physical_apicid) {
 		panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
-		     GET_APIC_ID(read_apic_id()), boot_cpu_physical_apicid);
+		     read_apic_id(), boot_cpu_physical_apicid);
 		/* Or can we switch back to PIC here? */
 	}
 	preempt_enable();
diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c
index d67ce5f..7b98785 100644
--- a/arch/x86/kernel/summit_32.c
+++ b/arch/x86/kernel/summit_32.c
@@ -30,7 +30,7 @@
 #include <linux/init.h>
 #include <asm/io.h>
 #include <asm/bios_ebda.h>
-#include <asm/mach-summit/mach_mpparse.h>
+#include <asm/summit/mpparse.h>
 
 static struct rio_table_hdr *rio_table_hdr __initdata;
 static struct scal_detail   *scal_devs[MAX_NUMNODES] __initdata;
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c
index 7066cb8..1884a8d 100644
--- a/arch/x86/kernel/sys_i386_32.c
+++ b/arch/x86/kernel/sys_i386_32.c
@@ -22,6 +22,8 @@
 #include <linux/uaccess.h>
 #include <linux/unistd.h>
 
+#include <asm/syscalls.h>
+
 asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
 			  unsigned long prot, unsigned long flags,
 			  unsigned long fd, unsigned long pgoff)
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 3b360ef..c9288c8 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -16,6 +16,7 @@
 
 #include <asm/uaccess.h>
 #include <asm/ia32.h>
+#include <asm/syscalls.h>
 
 asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags,
 	unsigned long fd, unsigned long off)
diff --git a/arch/x86/kernel/syscall_64.c b/arch/x86/kernel/syscall_64.c
index 170d43c..3d1be4f 100644
--- a/arch/x86/kernel/syscall_64.c
+++ b/arch/x86/kernel/syscall_64.c
@@ -8,12 +8,12 @@
 #define __NO_STUBS
 
 #define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ;
-#undef _ASM_X86_64_UNISTD_H_
+#undef ASM_X86__UNISTD_64_H
 #include <asm/unistd_64.h>
 
 #undef __SYSCALL
 #define __SYSCALL(nr, sym) [nr] = sym,
-#undef _ASM_X86_64_UNISTD_H_
+#undef ASM_X86__UNISTD_64_H
 
 typedef void (*sys_call_ptr_t)(void);
 
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c
index ffe3c66..bbecf8b 100644
--- a/arch/x86/kernel/time_32.c
+++ b/arch/x86/kernel/time_32.c
@@ -36,6 +36,7 @@
 #include <asm/arch_hooks.h>
 #include <asm/hpet.h>
 #include <asm/time.h>
+#include <asm/timer.h>
 
 #include "do_timer.h"
 
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index ab6bf37..6bb7b85 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -10,6 +10,7 @@
 #include <asm/ldt.h>
 #include <asm/processor.h>
 #include <asm/proto.h>
+#include <asm/syscalls.h>
 
 #include "tls.h"
 
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index 03df8e4..da5a596 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -1228,7 +1228,6 @@
 
 	set_bit(SYSCALL_VECTOR, used_vectors);
 
-	init_thread_xstate();
 	/*
 	 * Should be a barrier for any external CPU state:
 	 */
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index 513caac..38eb761 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -1134,7 +1134,7 @@
 	/*
 	 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
 	 */
-	if (unlikely(restore_fpu_checking(&me->thread.xstate->fxsave))) {
+	if (unlikely(restore_fpu_checking(me))) {
 		stts();
 		force_sig(SIGSEGV, me);
 		return;
@@ -1173,10 +1173,6 @@
 	set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
 #endif
 	/*
-	 * initialize the per thread extended state:
-	 */
-	init_thread_xstate();
-	/*
 	 * Should be a barrier for any external CPU state:
 	 */
 	cpu_init();
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 38f566f..4eeb5cf 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -46,6 +46,7 @@
 #include <asm/io.h>
 #include <asm/tlbflush.h>
 #include <asm/irq.h>
+#include <asm/syscalls.h>
 
 /*
  * Known problems:
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 6ca515d..61531d5 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -905,8 +905,8 @@
 #endif
 
 #ifdef CONFIG_X86_LOCAL_APIC
-	para_fill(pv_apic_ops.apic_read, APICRead);
-	para_fill(pv_apic_ops.apic_write, APICWrite);
+       para_fill(apic_ops->read, APICRead);
+       para_fill(apic_ops->write, APICWrite);
 #endif
 
 	/*
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
new file mode 100644
index 0000000..07713d6
--- /dev/null
+++ b/arch/x86/kernel/xsave.c
@@ -0,0 +1,316 @@
+/*
+ * xsave/xrstor support.
+ *
+ * Author: Suresh Siddha <suresh.b.siddha@intel.com>
+ */
+#include <linux/bootmem.h>
+#include <linux/compat.h>
+#include <asm/i387.h>
+#ifdef CONFIG_IA32_EMULATION
+#include <asm/sigcontext32.h>
+#endif
+#include <asm/xcr.h>
+
+/*
+ * Supported feature mask by the CPU and the kernel.
+ */
+u64 pcntxt_mask;
+
+struct _fpx_sw_bytes fx_sw_reserved;
+#ifdef CONFIG_IA32_EMULATION
+struct _fpx_sw_bytes fx_sw_reserved_ia32;
+#endif
+
+/*
+ * Check for the presence of extended state information in the
+ * user fpstate pointer in the sigcontext.
+ */
+int check_for_xstate(struct i387_fxsave_struct __user *buf,
+		     void __user *fpstate,
+		     struct _fpx_sw_bytes *fx_sw_user)
+{
+	int min_xstate_size = sizeof(struct i387_fxsave_struct) +
+			      sizeof(struct xsave_hdr_struct);
+	unsigned int magic2;
+	int err;
+
+	err = __copy_from_user(fx_sw_user, &buf->sw_reserved[0],
+			       sizeof(struct _fpx_sw_bytes));
+
+	if (err)
+		return err;
+
+	/*
+	 * First Magic check failed.
+	 */
+	if (fx_sw_user->magic1 != FP_XSTATE_MAGIC1)
+		return -1;
+
+	/*
+	 * Check for error scenarios.
+	 */
+	if (fx_sw_user->xstate_size < min_xstate_size ||
+	    fx_sw_user->xstate_size > xstate_size ||
+	    fx_sw_user->xstate_size > fx_sw_user->extended_size)
+		return -1;
+
+	err = __get_user(magic2, (__u32 *) (((void *)fpstate) +
+					    fx_sw_user->extended_size -
+					    FP_XSTATE_MAGIC2_SIZE));
+	/*
+	 * Check for the presence of second magic word at the end of memory
+	 * layout. This detects the case where the user just copied the legacy
+	 * fpstate layout with out copying the extended state information
+	 * in the memory layout.
+	 */
+	if (err || magic2 != FP_XSTATE_MAGIC2)
+		return -1;
+
+	return 0;
+}
+
+#ifdef CONFIG_X86_64
+/*
+ * Signal frame handlers.
+ */
+
+int save_i387_xstate(void __user *buf)
+{
+	struct task_struct *tsk = current;
+	int err = 0;
+
+	if (!access_ok(VERIFY_WRITE, buf, sig_xstate_size))
+		return -EACCES;
+
+	BUG_ON(sig_xstate_size < xstate_size);
+
+	if ((unsigned long)buf % 64)
+		printk("save_i387_xstate: bad fpstate %p\n", buf);
+
+	if (!used_math())
+		return 0;
+	clear_used_math(); /* trigger finit */
+	if (task_thread_info(tsk)->status & TS_USEDFPU) {
+		/*
+	 	 * Start with clearing the user buffer. This will present a
+	 	 * clean context for the bytes not touched by the fxsave/xsave.
+		 */
+		__clear_user(buf, sig_xstate_size);
+
+		if (task_thread_info(tsk)->status & TS_XSAVE)
+			err = xsave_user(buf);
+		else
+			err = fxsave_user(buf);
+
+		if (err)
+			return err;
+		task_thread_info(tsk)->status &= ~TS_USEDFPU;
+		stts();
+	} else {
+		if (__copy_to_user(buf, &tsk->thread.xstate->fxsave,
+				   xstate_size))
+			return -1;
+	}
+
+	if (task_thread_info(tsk)->status & TS_XSAVE) {
+		struct _fpstate __user *fx = buf;
+
+		err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved,
+				     sizeof(struct _fpx_sw_bytes));
+
+		err |= __put_user(FP_XSTATE_MAGIC2,
+				  (__u32 __user *) (buf + sig_xstate_size
+						    - FP_XSTATE_MAGIC2_SIZE));
+	}
+
+	return 1;
+}
+
+/*
+ * Restore the extended state if present. Otherwise, restore the FP/SSE
+ * state.
+ */
+int restore_user_xstate(void __user *buf)
+{
+	struct _fpx_sw_bytes fx_sw_user;
+	u64 mask;
+	int err;
+
+	if (((unsigned long)buf % 64) ||
+	     check_for_xstate(buf, buf, &fx_sw_user))
+		goto fx_only;
+
+	mask = fx_sw_user.xstate_bv;
+
+	/*
+	 * restore the state passed by the user.
+	 */
+	err = xrestore_user(buf, mask);
+	if (err)
+		return err;
+
+	/*
+	 * init the state skipped by the user.
+	 */
+	mask = pcntxt_mask & ~mask;
+
+	xrstor_state(init_xstate_buf, mask);
+
+	return 0;
+
+fx_only:
+	/*
+	 * couldn't find the extended state information in the
+	 * memory layout. Restore just the FP/SSE and init all
+	 * the other extended state.
+	 */
+	xrstor_state(init_xstate_buf, pcntxt_mask & ~XSTATE_FPSSE);
+	return fxrstor_checking((__force struct i387_fxsave_struct *)buf);
+}
+
+/*
+ * This restores directly out of user space. Exceptions are handled.
+ */
+int restore_i387_xstate(void __user *buf)
+{
+	struct task_struct *tsk = current;
+	int err = 0;
+
+	if (!buf) {
+		if (used_math())
+			goto clear;
+		return 0;
+	} else
+		if (!access_ok(VERIFY_READ, buf, sig_xstate_size))
+			return -EACCES;
+
+	if (!used_math()) {
+		err = init_fpu(tsk);
+		if (err)
+			return err;
+	}
+
+	if (!(task_thread_info(current)->status & TS_USEDFPU)) {
+		clts();
+		task_thread_info(current)->status |= TS_USEDFPU;
+	}
+	if (task_thread_info(tsk)->status & TS_XSAVE)
+		err = restore_user_xstate(buf);
+	else
+		err = fxrstor_checking((__force struct i387_fxsave_struct *)
+				       buf);
+	if (unlikely(err)) {
+		/*
+		 * Encountered an error while doing the restore from the
+		 * user buffer, clear the fpu state.
+		 */
+clear:
+		clear_fpu(tsk);
+		clear_used_math();
+	}
+	return err;
+}
+#endif
+
+/*
+ * Prepare the SW reserved portion of the fxsave memory layout, indicating
+ * the presence of the extended state information in the memory layout
+ * pointed by the fpstate pointer in the sigcontext.
+ * This will be saved when ever the FP and extended state context is
+ * saved on the user stack during the signal handler delivery to the user.
+ */
+void prepare_fx_sw_frame(void)
+{
+	int size_extended = (xstate_size - sizeof(struct i387_fxsave_struct)) +
+			     FP_XSTATE_MAGIC2_SIZE;
+
+	sig_xstate_size = sizeof(struct _fpstate) + size_extended;
+
+#ifdef CONFIG_IA32_EMULATION
+	sig_xstate_ia32_size = sizeof(struct _fpstate_ia32) + size_extended;
+#endif
+
+	memset(&fx_sw_reserved, 0, sizeof(fx_sw_reserved));
+
+	fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1;
+	fx_sw_reserved.extended_size = sig_xstate_size;
+	fx_sw_reserved.xstate_bv = pcntxt_mask;
+	fx_sw_reserved.xstate_size = xstate_size;
+#ifdef CONFIG_IA32_EMULATION
+	memcpy(&fx_sw_reserved_ia32, &fx_sw_reserved,
+	       sizeof(struct _fpx_sw_bytes));
+	fx_sw_reserved_ia32.extended_size = sig_xstate_ia32_size;
+#endif
+}
+
+/*
+ * Represents init state for the supported extended state.
+ */
+struct xsave_struct *init_xstate_buf;
+
+#ifdef CONFIG_X86_64
+unsigned int sig_xstate_size = sizeof(struct _fpstate);
+#endif
+
+/*
+ * Enable the extended processor state save/restore feature
+ */
+void __cpuinit xsave_init(void)
+{
+	if (!cpu_has_xsave)
+		return;
+
+	set_in_cr4(X86_CR4_OSXSAVE);
+
+	/*
+	 * Enable all the features that the HW is capable of
+	 * and the Linux kernel is aware of.
+	 */
+	xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask);
+}
+
+/*
+ * setup the xstate image representing the init state
+ */
+void setup_xstate_init(void)
+{
+	init_xstate_buf = alloc_bootmem(xstate_size);
+	init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT;
+}
+
+/*
+ * Enable and initialize the xsave feature.
+ */
+void __init xsave_cntxt_init(void)
+{
+	unsigned int eax, ebx, ecx, edx;
+
+	cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx);
+	pcntxt_mask = eax + ((u64)edx << 32);
+
+	if ((pcntxt_mask & XSTATE_FPSSE) != XSTATE_FPSSE) {
+		printk(KERN_ERR "FP/SSE not shown under xsave features 0x%llx\n",
+		       pcntxt_mask);
+		BUG();
+	}
+
+	/*
+	 * for now OS knows only about FP/SSE
+	 */
+	pcntxt_mask = pcntxt_mask & XCNTXT_MASK;
+	xsave_init();
+
+	/*
+	 * Recompute the context size for enabled features
+	 */
+	cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx);
+	xstate_size = ebx;
+
+	prepare_fx_sw_frame();
+
+	setup_xstate_init();
+
+	printk(KERN_INFO "xsave/xrstor: enabled xstate_bv 0x%llx, "
+	       "cntxt size 0x%x\n",
+	       pcntxt_mask, xstate_size);
+}
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index d9249a8..65f0b8a 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -55,6 +55,7 @@
 #include <linux/lguest_launcher.h>
 #include <linux/virtio_console.h>
 #include <linux/pm.h>
+#include <asm/apic.h>
 #include <asm/lguest.h>
 #include <asm/paravirt.h>
 #include <asm/param.h>
@@ -783,14 +784,44 @@
  * code qualifies for Advanced.  It will also never interrupt anything.  It
  * does, however, allow us to get through the Linux boot code. */
 #ifdef CONFIG_X86_LOCAL_APIC
-static void lguest_apic_write(unsigned long reg, u32 v)
+static void lguest_apic_write(u32 reg, u32 v)
 {
 }
 
-static u32 lguest_apic_read(unsigned long reg)
+static u32 lguest_apic_read(u32 reg)
 {
 	return 0;
 }
+
+static u64 lguest_apic_icr_read(void)
+{
+	return 0;
+}
+
+static void lguest_apic_icr_write(u32 low, u32 id)
+{
+	/* Warn to see if there's any stray references */
+	WARN_ON(1);
+}
+
+static void lguest_apic_wait_icr_idle(void)
+{
+	return;
+}
+
+static u32 lguest_apic_safe_wait_icr_idle(void)
+{
+	return 0;
+}
+
+static struct apic_ops lguest_basic_apic_ops = {
+	.read = lguest_apic_read,
+	.write = lguest_apic_write,
+	.icr_read = lguest_apic_icr_read,
+	.icr_write = lguest_apic_icr_write,
+	.wait_icr_idle = lguest_apic_wait_icr_idle,
+	.safe_wait_icr_idle = lguest_apic_safe_wait_icr_idle,
+};
 #endif
 
 /* STOP!  Until an interrupt comes in. */
@@ -990,8 +1021,7 @@
 
 #ifdef CONFIG_X86_LOCAL_APIC
 	/* apic read/write intercepts */
-	pv_apic_ops.apic_write = lguest_apic_write;
-	pv_apic_ops.apic_read = lguest_apic_read;
+	apic_ops = &lguest_basic_apic_ops;
 #endif
 
 	/* time operations */
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 24e6094..9e68075 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -14,6 +14,13 @@
 #include <asm/uaccess.h>
 #include <asm/mmx.h>
 
+#ifdef CONFIG_X86_INTEL_USERCOPY
+/*
+ * Alignment at which movsl is preferred for bulk memory copies.
+ */
+struct movsl_mask movsl_mask __read_mostly;
+#endif
+
 static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned long n)
 {
 #ifdef CONFIG_X86_INTEL_USERCOPY
diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c
index 3d31783..3f2cf11 100644
--- a/arch/x86/mach-default/setup.c
+++ b/arch/x86/mach-default/setup.c
@@ -10,13 +10,15 @@
 #include <asm/e820.h>
 #include <asm/setup.h>
 
+#include <mach_ipi.h>
+
 #ifdef CONFIG_HOTPLUG_CPU
 #define DEFAULT_SEND_IPI	(1)
 #else
 #define DEFAULT_SEND_IPI	(0)
 #endif
 
-int no_broadcast=DEFAULT_SEND_IPI;
+int no_broadcast = DEFAULT_SEND_IPI;
 
 /**
  * pre_intr_init_hook - initialisation prior to setting up interrupt vectors
diff --git a/arch/x86/mach-generic/Makefile b/arch/x86/mach-generic/Makefile
index 0dbd780..4706de7 100644
--- a/arch/x86/mach-generic/Makefile
+++ b/arch/x86/mach-generic/Makefile
@@ -9,4 +9,4 @@
 obj-$(CONFIG_X86_SUMMIT)	+= summit.o
 obj-$(CONFIG_X86_BIGSMP)	+= bigsmp.o
 obj-$(CONFIG_X86_ES7000)	+= es7000.o
-obj-$(CONFIG_X86_ES7000)	+= ../../x86/mach-es7000/
+obj-$(CONFIG_X86_ES7000)	+= ../../x86/es7000/
diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c
index 59d7717..df37fc9 100644
--- a/arch/x86/mach-generic/bigsmp.c
+++ b/arch/x86/mach-generic/bigsmp.c
@@ -5,18 +5,17 @@
 #define APIC_DEFINITION 1
 #include <linux/threads.h>
 #include <linux/cpumask.h>
-#include <asm/smp.h>
 #include <asm/mpspec.h>
 #include <asm/genapic.h>
 #include <asm/fixmap.h>
 #include <asm/apicdef.h>
 #include <linux/kernel.h>
-#include <linux/smp.h>
 #include <linux/init.h>
 #include <linux/dmi.h>
-#include <asm/mach-bigsmp/mach_apic.h>
-#include <asm/mach-bigsmp/mach_apicdef.h>
-#include <asm/mach-bigsmp/mach_ipi.h>
+#include <asm/bigsmp/apicdef.h>
+#include <linux/smp.h>
+#include <asm/bigsmp/apic.h>
+#include <asm/bigsmp/ipi.h>
 #include <asm/mach-default/mach_mpparse.h>
 
 static int dmi_bigsmp; /* can be set by dmi scanners */
diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c
index 4742626..520cca0 100644
--- a/arch/x86/mach-generic/es7000.c
+++ b/arch/x86/mach-generic/es7000.c
@@ -4,20 +4,19 @@
 #define APIC_DEFINITION 1
 #include <linux/threads.h>
 #include <linux/cpumask.h>
-#include <asm/smp.h>
 #include <asm/mpspec.h>
 #include <asm/genapic.h>
 #include <asm/fixmap.h>
 #include <asm/apicdef.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
-#include <linux/smp.h>
 #include <linux/init.h>
-#include <asm/mach-es7000/mach_apicdef.h>
-#include <asm/mach-es7000/mach_apic.h>
-#include <asm/mach-es7000/mach_ipi.h>
-#include <asm/mach-es7000/mach_mpparse.h>
-#include <asm/mach-es7000/mach_wakecpu.h>
+#include <asm/es7000/apicdef.h>
+#include <linux/smp.h>
+#include <asm/es7000/apic.h>
+#include <asm/es7000/ipi.h>
+#include <asm/es7000/mpparse.h>
+#include <asm/es7000/wakecpu.h>
 
 static int probe_es7000(void)
 {
diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c
index 8091e68..8cf5839 100644
--- a/arch/x86/mach-generic/numaq.c
+++ b/arch/x86/mach-generic/numaq.c
@@ -4,7 +4,6 @@
 #define APIC_DEFINITION 1
 #include <linux/threads.h>
 #include <linux/cpumask.h>
-#include <linux/smp.h>
 #include <asm/mpspec.h>
 #include <asm/genapic.h>
 #include <asm/fixmap.h>
@@ -12,11 +11,12 @@
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/init.h>
-#include <asm/mach-numaq/mach_apic.h>
-#include <asm/mach-numaq/mach_apicdef.h>
-#include <asm/mach-numaq/mach_ipi.h>
-#include <asm/mach-numaq/mach_mpparse.h>
-#include <asm/mach-numaq/mach_wakecpu.h>
+#include <asm/numaq/apicdef.h>
+#include <linux/smp.h>
+#include <asm/numaq/apic.h>
+#include <asm/numaq/ipi.h>
+#include <asm/numaq/mpparse.h>
+#include <asm/numaq/wakecpu.h>
 #include <asm/numaq.h>
 
 static int mps_oem_check(struct mp_config_table *mpc, char *oem,
diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c
index a97ea0f..6ad6b67 100644
--- a/arch/x86/mach-generic/summit.c
+++ b/arch/x86/mach-generic/summit.c
@@ -4,19 +4,18 @@
 #define APIC_DEFINITION 1
 #include <linux/threads.h>
 #include <linux/cpumask.h>
-#include <asm/smp.h>
 #include <asm/mpspec.h>
 #include <asm/genapic.h>
 #include <asm/fixmap.h>
 #include <asm/apicdef.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
-#include <linux/smp.h>
 #include <linux/init.h>
-#include <asm/mach-summit/mach_apic.h>
-#include <asm/mach-summit/mach_apicdef.h>
-#include <asm/mach-summit/mach_ipi.h>
-#include <asm/mach-summit/mach_mpparse.h>
+#include <asm/summit/apicdef.h>
+#include <linux/smp.h>
+#include <asm/summit/apic.h>
+#include <asm/summit/ipi.h>
+#include <asm/summit/mpparse.h>
 
 static int probe_summit(void)
 {
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 455f3fe..8f92cac 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -35,6 +35,7 @@
 #include <asm/tlbflush.h>
 #include <asm/proto.h>
 #include <asm-generic/sections.h>
+#include <asm/traps.h>
 
 /*
  * Page fault error code bits
@@ -357,8 +358,6 @@
 	return 0;
 }
 
-void do_invalid_op(struct pt_regs *, unsigned long);
-
 static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
 {
 #ifdef CONFIG_X86_F00F_BUG
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index d37f293..4974e97 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -47,6 +47,7 @@
 #include <asm/paravirt.h>
 #include <asm/setup.h>
 #include <asm/cacheflush.h>
+#include <asm/smp.h>
 
 unsigned int __VMALLOC_RESERVE = 128 << 20;
 
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index d4b6e6a..cac6da5 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -421,7 +421,7 @@
 	return;
 }
 
-int __initdata early_ioremap_debug;
+static int __initdata early_ioremap_debug;
 
 static int __init early_ioremap_debug_setup(char *str)
 {
@@ -547,7 +547,7 @@
 }
 
 
-int __initdata early_ioremap_nested;
+static int __initdata early_ioremap_nested;
 
 static int __init check_early_ioremap_leak(void)
 {
diff --git a/arch/x86/power/cpu_32.c b/arch/x86/power/cpu_32.c
index d3e083d..274d060 100644
--- a/arch/x86/power/cpu_32.c
+++ b/arch/x86/power/cpu_32.c
@@ -11,6 +11,7 @@
 #include <linux/suspend.h>
 #include <asm/mtrr.h>
 #include <asm/mce.h>
+#include <asm/xcr.h>
 
 static struct saved_context saved_context;
 
@@ -126,6 +127,12 @@
 	if (boot_cpu_has(X86_FEATURE_SEP))
 		enable_sep_cpu();
 
+	/*
+	 * restore XCR0 for xsave capable cpu's.
+	 */
+	if (cpu_has_xsave)
+		xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask);
+
 	fix_processor_context();
 	do_fpu_end();
 	mtrr_ap_init();
diff --git a/arch/x86/power/cpu_64.c b/arch/x86/power/cpu_64.c
index 66bdfb5..e3b6cf7 100644
--- a/arch/x86/power/cpu_64.c
+++ b/arch/x86/power/cpu_64.c
@@ -14,6 +14,7 @@
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/mtrr.h>
+#include <asm/xcr.h>
 
 static void fix_processor_context(void);
 
@@ -122,6 +123,12 @@
 	wrmsrl(MSR_GS_BASE, ctxt->gs_base);
 	wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
 
+	/*
+	 * restore XCR0 for xsave capable cpu's.
+	 */
+	if (cpu_has_xsave)
+		xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask);
+
 	fix_processor_context();
 
 	do_fpu_end();
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 9ff6e3c..8d28925 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -36,6 +36,7 @@
 #include <xen/hvc-console.h>
 
 #include <asm/paravirt.h>
+#include <asm/apic.h>
 #include <asm/page.h>
 #include <asm/xen/hypercall.h>
 #include <asm/xen/hypervisor.h>
@@ -580,16 +581,47 @@
 }
 
 #ifdef CONFIG_X86_LOCAL_APIC
-static u32 xen_apic_read(unsigned long reg)
+static u32 xen_apic_read(u32 reg)
 {
 	return 0;
 }
 
-static void xen_apic_write(unsigned long reg, u32 val)
+static void xen_apic_write(u32 reg, u32 val)
 {
 	/* Warn to see if there's any stray references */
 	WARN_ON(1);
 }
+
+static u64 xen_apic_icr_read(void)
+{
+	return 0;
+}
+
+static void xen_apic_icr_write(u32 low, u32 id)
+{
+	/* Warn to see if there's any stray references */
+	WARN_ON(1);
+}
+
+static void xen_apic_wait_icr_idle(void)
+{
+        return;
+}
+
+static u32 xen_safe_apic_wait_icr_idle(void)
+{
+        return 0;
+}
+
+static struct apic_ops xen_basic_apic_ops = {
+	.read = xen_apic_read,
+	.write = xen_apic_write,
+	.icr_read = xen_apic_icr_read,
+	.icr_write = xen_apic_icr_write,
+	.wait_icr_idle = xen_apic_wait_icr_idle,
+	.safe_wait_icr_idle = xen_safe_apic_wait_icr_idle,
+};
+
 #endif
 
 static void xen_flush_tlb(void)
@@ -1273,8 +1305,6 @@
 
 static const struct pv_apic_ops xen_apic_ops __initdata = {
 #ifdef CONFIG_X86_LOCAL_APIC
-	.apic_write = xen_apic_write,
-	.apic_read = xen_apic_read,
 	.setup_boot_clock = paravirt_nop,
 	.setup_secondary_clock = paravirt_nop,
 	.startup_ipi_hook = paravirt_nop,
@@ -1677,6 +1707,13 @@
 	pv_apic_ops = xen_apic_ops;
 	pv_mmu_ops = xen_mmu_ops;
 
+#ifdef CONFIG_X86_LOCAL_APIC
+	/*
+	 * set up the basic apic ops.
+	 */
+	apic_ops = &xen_basic_apic_ops;
+#endif
+
 	if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
 		pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start;
 		pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit;
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index 7d63f8c..4b47f4e 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -26,6 +26,8 @@
 # Build Intel IOMMU support
 obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o
 
+obj-$(CONFIG_INTR_REMAP) += dmar.o intr_remapping.o
+
 #
 # Some architectures use the generic PCI setup functions
 #
diff --git a/drivers/pci/dma_remapping.h b/drivers/pci/dma_remapping.h
new file mode 100644
index 0000000..bff5c65
--- /dev/null
+++ b/drivers/pci/dma_remapping.h
@@ -0,0 +1,157 @@
+#ifndef _DMA_REMAPPING_H
+#define _DMA_REMAPPING_H
+
+/*
+ * We need a fixed PAGE_SIZE of 4K irrespective of
+ * arch PAGE_SIZE for IOMMU page tables.
+ */
+#define PAGE_SHIFT_4K		(12)
+#define PAGE_SIZE_4K		(1UL << PAGE_SHIFT_4K)
+#define PAGE_MASK_4K		(((u64)-1) << PAGE_SHIFT_4K)
+#define PAGE_ALIGN_4K(addr)	(((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K)
+
+#define IOVA_PFN(addr)		((addr) >> PAGE_SHIFT_4K)
+#define DMA_32BIT_PFN		IOVA_PFN(DMA_32BIT_MASK)
+#define DMA_64BIT_PFN		IOVA_PFN(DMA_64BIT_MASK)
+
+
+/*
+ * 0: Present
+ * 1-11: Reserved
+ * 12-63: Context Ptr (12 - (haw-1))
+ * 64-127: Reserved
+ */
+struct root_entry {
+	u64	val;
+	u64	rsvd1;
+};
+#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry))
+static inline bool root_present(struct root_entry *root)
+{
+	return (root->val & 1);
+}
+static inline void set_root_present(struct root_entry *root)
+{
+	root->val |= 1;
+}
+static inline void set_root_value(struct root_entry *root, unsigned long value)
+{
+	root->val |= value & PAGE_MASK_4K;
+}
+
+struct context_entry;
+static inline struct context_entry *
+get_context_addr_from_root(struct root_entry *root)
+{
+	return (struct context_entry *)
+		(root_present(root)?phys_to_virt(
+		root->val & PAGE_MASK_4K):
+		NULL);
+}
+
+/*
+ * low 64 bits:
+ * 0: present
+ * 1: fault processing disable
+ * 2-3: translation type
+ * 12-63: address space root
+ * high 64 bits:
+ * 0-2: address width
+ * 3-6: aval
+ * 8-23: domain id
+ */
+struct context_entry {
+	u64 lo;
+	u64 hi;
+};
+#define context_present(c) ((c).lo & 1)
+#define context_fault_disable(c) (((c).lo >> 1) & 1)
+#define context_translation_type(c) (((c).lo >> 2) & 3)
+#define context_address_root(c) ((c).lo & PAGE_MASK_4K)
+#define context_address_width(c) ((c).hi &  7)
+#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
+
+#define context_set_present(c) do {(c).lo |= 1;} while (0)
+#define context_set_fault_enable(c) \
+	do {(c).lo &= (((u64)-1) << 2) | 1;} while (0)
+#define context_set_translation_type(c, val) \
+	do { \
+		(c).lo &= (((u64)-1) << 4) | 3; \
+		(c).lo |= ((val) & 3) << 2; \
+	} while (0)
+#define CONTEXT_TT_MULTI_LEVEL 0
+#define context_set_address_root(c, val) \
+	do {(c).lo |= (val) & PAGE_MASK_4K;} while (0)
+#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
+#define context_set_domain_id(c, val) \
+	do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
+#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
+
+/*
+ * 0: readable
+ * 1: writable
+ * 2-6: reserved
+ * 7: super page
+ * 8-11: available
+ * 12-63: Host physcial address
+ */
+struct dma_pte {
+	u64 val;
+};
+#define dma_clear_pte(p)	do {(p).val = 0;} while (0)
+
+#define DMA_PTE_READ (1)
+#define DMA_PTE_WRITE (2)
+
+#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0)
+#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0)
+#define dma_set_pte_prot(p, prot) \
+		do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0)
+#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K)
+#define dma_set_pte_addr(p, addr) do {\
+		(p).val |= ((addr) & PAGE_MASK_4K); } while (0)
+#define dma_pte_present(p) (((p).val & 3) != 0)
+
+struct intel_iommu;
+
+struct dmar_domain {
+	int	id;			/* domain id */
+	struct intel_iommu *iommu;	/* back pointer to owning iommu */
+
+	struct list_head devices; 	/* all devices' list */
+	struct iova_domain iovad;	/* iova's that belong to this domain */
+
+	struct dma_pte	*pgd;		/* virtual address */
+	spinlock_t	mapping_lock;	/* page table lock */
+	int		gaw;		/* max guest address width */
+
+	/* adjusted guest address width, 0 is level 2 30-bit */
+	int		agaw;
+
+#define DOMAIN_FLAG_MULTIPLE_DEVICES 1
+	int		flags;
+};
+
+/* PCI domain-device relationship */
+struct device_domain_info {
+	struct list_head link;	/* link to domain siblings */
+	struct list_head global; /* link to global list */
+	u8 bus;			/* PCI bus numer */
+	u8 devfn;		/* PCI devfn number */
+	struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
+	struct dmar_domain *domain; /* pointer to domain */
+};
+
+extern int init_dmars(void);
+extern void free_dmar_iommu(struct intel_iommu *iommu);
+
+extern int dmar_disabled;
+
+#ifndef CONFIG_DMAR_GFX_WA
+static inline void iommu_prepare_gfx_mapping(void)
+{
+	return;
+}
+#endif /* !CONFIG_DMAR_GFX_WA */
+
+#endif
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 8bf86ae..bd2c016 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -19,13 +19,16 @@
  * Author: Shaohua Li <shaohua.li@intel.com>
  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
  *
- * This file implements early detection/parsing of DMA Remapping Devices
+ * This file implements early detection/parsing of Remapping Devices
  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
  * tables.
+ *
+ * These routines are used by both DMA-remapping and Interrupt-remapping
  */
 
 #include <linux/pci.h>
 #include <linux/dmar.h>
+#include <linux/timer.h>
 #include "iova.h"
 #include "intel-iommu.h"
 
@@ -37,7 +40,6 @@
  * these units are not supported by the architecture.
  */
 LIST_HEAD(dmar_drhd_units);
-LIST_HEAD(dmar_rmrr_units);
 
 static struct acpi_table_header * __initdata dmar_tbl;
 
@@ -53,11 +55,6 @@
 		list_add(&drhd->list, &dmar_drhd_units);
 }
 
-static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
-{
-	list_add(&rmrr->list, &dmar_rmrr_units);
-}
-
 static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
 					   struct pci_dev **dev, u16 segment)
 {
@@ -172,19 +169,37 @@
 	struct acpi_dmar_hardware_unit *drhd;
 	struct dmar_drhd_unit *dmaru;
 	int ret = 0;
-	static int include_all;
 
 	dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
 	if (!dmaru)
 		return -ENOMEM;
 
+	dmaru->hdr = header;
 	drhd = (struct acpi_dmar_hardware_unit *)header;
 	dmaru->reg_base_addr = drhd->address;
 	dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
 
+	ret = alloc_iommu(dmaru);
+	if (ret) {
+		kfree(dmaru);
+		return ret;
+	}
+	dmar_register_drhd_unit(dmaru);
+	return 0;
+}
+
+static int __init
+dmar_parse_dev(struct dmar_drhd_unit *dmaru)
+{
+	struct acpi_dmar_hardware_unit *drhd;
+	static int include_all;
+	int ret;
+
+	drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
+
 	if (!dmaru->include_all)
 		ret = dmar_parse_dev_scope((void *)(drhd + 1),
-				((void *)drhd) + header->length,
+				((void *)drhd) + drhd->header.length,
 				&dmaru->devices_cnt, &dmaru->devices,
 				drhd->segment);
 	else {
@@ -197,37 +212,59 @@
 		include_all = 1;
 	}
 
-	if (ret || (dmaru->devices_cnt == 0 && !dmaru->include_all))
+	if (ret || (dmaru->devices_cnt == 0 && !dmaru->include_all)) {
+		list_del(&dmaru->list);
 		kfree(dmaru);
-	else
-		dmar_register_drhd_unit(dmaru);
+	}
 	return ret;
 }
 
+#ifdef CONFIG_DMAR
+LIST_HEAD(dmar_rmrr_units);
+
+static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
+{
+	list_add(&rmrr->list, &dmar_rmrr_units);
+}
+
+
 static int __init
 dmar_parse_one_rmrr(struct acpi_dmar_header *header)
 {
 	struct acpi_dmar_reserved_memory *rmrr;
 	struct dmar_rmrr_unit *rmrru;
-	int ret = 0;
 
 	rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
 	if (!rmrru)
 		return -ENOMEM;
 
+	rmrru->hdr = header;
 	rmrr = (struct acpi_dmar_reserved_memory *)header;
 	rmrru->base_address = rmrr->base_address;
 	rmrru->end_address = rmrr->end_address;
+
+	dmar_register_rmrr_unit(rmrru);
+	return 0;
+}
+
+static int __init
+rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
+{
+	struct acpi_dmar_reserved_memory *rmrr;
+	int ret;
+
+	rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
 	ret = dmar_parse_dev_scope((void *)(rmrr + 1),
-		((void *)rmrr) + header->length,
+		((void *)rmrr) + rmrr->header.length,
 		&rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
 
-	if (ret || (rmrru->devices_cnt == 0))
+	if (ret || (rmrru->devices_cnt == 0)) {
+		list_del(&rmrru->list);
 		kfree(rmrru);
-	else
-		dmar_register_rmrr_unit(rmrru);
+	}
 	return ret;
 }
+#endif
 
 static void __init
 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
@@ -252,6 +289,7 @@
 	}
 }
 
+
 /**
  * parse_dmar_table - parses the DMA reporting table
  */
@@ -284,7 +322,9 @@
 			ret = dmar_parse_one_drhd(entry_header);
 			break;
 		case ACPI_DMAR_TYPE_RESERVED_MEMORY:
+#ifdef CONFIG_DMAR
 			ret = dmar_parse_one_rmrr(entry_header);
+#endif
 			break;
 		default:
 			printk(KERN_WARNING PREFIX
@@ -300,15 +340,77 @@
 	return ret;
 }
 
+int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
+			  struct pci_dev *dev)
+{
+	int index;
+
+	while (dev) {
+		for (index = 0; index < cnt; index++)
+			if (dev == devices[index])
+				return 1;
+
+		/* Check our parent */
+		dev = dev->bus->self;
+	}
+
+	return 0;
+}
+
+struct dmar_drhd_unit *
+dmar_find_matched_drhd_unit(struct pci_dev *dev)
+{
+	struct dmar_drhd_unit *drhd = NULL;
+
+	list_for_each_entry(drhd, &dmar_drhd_units, list) {
+		if (drhd->include_all || dmar_pci_device_match(drhd->devices,
+						drhd->devices_cnt, dev))
+			return drhd;
+	}
+
+	return NULL;
+}
+
+int __init dmar_dev_scope_init(void)
+{
+	struct dmar_drhd_unit *drhd;
+	int ret = -ENODEV;
+
+	for_each_drhd_unit(drhd) {
+		ret = dmar_parse_dev(drhd);
+		if (ret)
+			return ret;
+	}
+
+#ifdef CONFIG_DMAR
+	{
+		struct dmar_rmrr_unit *rmrr;
+		for_each_rmrr_units(rmrr) {
+			ret = rmrr_parse_dev(rmrr);
+			if (ret)
+				return ret;
+		}
+	}
+#endif
+
+	return ret;
+}
+
 
 int __init dmar_table_init(void)
 {
-
+	static int dmar_table_initialized;
 	int ret;
 
+	if (dmar_table_initialized)
+		return 0;
+
+	dmar_table_initialized = 1;
+
 	ret = parse_dmar_table();
 	if (ret) {
-		printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
+		if (ret != -ENODEV)
+			printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
 		return ret;
 	}
 
@@ -317,9 +419,14 @@
 		return -ENODEV;
 	}
 
+#ifdef CONFIG_DMAR
 	if (list_empty(&dmar_rmrr_units))
 		printk(KERN_INFO PREFIX "No RMRR found\n");
+#endif
 
+#ifdef CONFIG_INTR_REMAP
+	parse_ioapics_under_ir();
+#endif
 	return 0;
 }
 
@@ -341,3 +448,255 @@
 
 	return (ACPI_SUCCESS(status) ? 1 : 0);
 }
+
+void __init detect_intel_iommu(void)
+{
+	int ret;
+
+	ret = early_dmar_detect();
+
+#ifdef CONFIG_DMAR
+	{
+		struct acpi_table_dmar *dmar;
+		/*
+		 * for now we will disable dma-remapping when interrupt
+		 * remapping is enabled.
+		 * When support for queued invalidation for IOTLB invalidation
+		 * is added, we will not need this any more.
+		 */
+		dmar = (struct acpi_table_dmar *) dmar_tbl;
+		if (ret && cpu_has_x2apic && dmar->flags & 0x1) {
+			printk(KERN_INFO
+			       "Queued invalidation will be enabled to support "
+			       "x2apic and Intr-remapping.\n");
+			printk(KERN_INFO
+			       "Disabling IOMMU detection, because of missing "
+			       "queued invalidation support for IOTLB "
+			       "invalidation\n");
+			printk(KERN_INFO
+			       "Use \"nox2apic\", if you want to use Intel "
+			       " IOMMU for DMA-remapping and don't care about "
+			       " x2apic support\n");
+
+			dmar_disabled = 1;
+			return;
+		}
+
+		if (ret && !no_iommu && !iommu_detected && !swiotlb &&
+		    !dmar_disabled)
+			iommu_detected = 1;
+	}
+#endif
+}
+
+
+int alloc_iommu(struct dmar_drhd_unit *drhd)
+{
+	struct intel_iommu *iommu;
+	int map_size;
+	u32 ver;
+	static int iommu_allocated = 0;
+
+	iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
+	if (!iommu)
+		return -ENOMEM;
+
+	iommu->seq_id = iommu_allocated++;
+
+	iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
+	if (!iommu->reg) {
+		printk(KERN_ERR "IOMMU: can't map the region\n");
+		goto error;
+	}
+	iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
+	iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
+
+	/* the registers might be more than one page */
+	map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
+		cap_max_fault_reg_offset(iommu->cap));
+	map_size = PAGE_ALIGN_4K(map_size);
+	if (map_size > PAGE_SIZE_4K) {
+		iounmap(iommu->reg);
+		iommu->reg = ioremap(drhd->reg_base_addr, map_size);
+		if (!iommu->reg) {
+			printk(KERN_ERR "IOMMU: can't map the region\n");
+			goto error;
+		}
+	}
+
+	ver = readl(iommu->reg + DMAR_VER_REG);
+	pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
+		drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
+		iommu->cap, iommu->ecap);
+
+	spin_lock_init(&iommu->register_lock);
+
+	drhd->iommu = iommu;
+	return 0;
+error:
+	kfree(iommu);
+	return -1;
+}
+
+void free_iommu(struct intel_iommu *iommu)
+{
+	if (!iommu)
+		return;
+
+#ifdef CONFIG_DMAR
+	free_dmar_iommu(iommu);
+#endif
+
+	if (iommu->reg)
+		iounmap(iommu->reg);
+	kfree(iommu);
+}
+
+/*
+ * Reclaim all the submitted descriptors which have completed its work.
+ */
+static inline void reclaim_free_desc(struct q_inval *qi)
+{
+	while (qi->desc_status[qi->free_tail] == QI_DONE) {
+		qi->desc_status[qi->free_tail] = QI_FREE;
+		qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
+		qi->free_cnt++;
+	}
+}
+
+/*
+ * Submit the queued invalidation descriptor to the remapping
+ * hardware unit and wait for its completion.
+ */
+void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
+{
+	struct q_inval *qi = iommu->qi;
+	struct qi_desc *hw, wait_desc;
+	int wait_index, index;
+	unsigned long flags;
+
+	if (!qi)
+		return;
+
+	hw = qi->desc;
+
+	spin_lock(&qi->q_lock);
+	while (qi->free_cnt < 3) {
+		spin_unlock(&qi->q_lock);
+		cpu_relax();
+		spin_lock(&qi->q_lock);
+	}
+
+	index = qi->free_head;
+	wait_index = (index + 1) % QI_LENGTH;
+
+	qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
+
+	hw[index] = *desc;
+
+	wait_desc.low = QI_IWD_STATUS_DATA(2) | QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
+	wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
+
+	hw[wait_index] = wait_desc;
+
+	__iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
+	__iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
+
+	qi->free_head = (qi->free_head + 2) % QI_LENGTH;
+	qi->free_cnt -= 2;
+
+	spin_lock_irqsave(&iommu->register_lock, flags);
+	/*
+	 * update the HW tail register indicating the presence of
+	 * new descriptors.
+	 */
+	writel(qi->free_head << 4, iommu->reg + DMAR_IQT_REG);
+	spin_unlock_irqrestore(&iommu->register_lock, flags);
+
+	while (qi->desc_status[wait_index] != QI_DONE) {
+		spin_unlock(&qi->q_lock);
+		cpu_relax();
+		spin_lock(&qi->q_lock);
+	}
+
+	qi->desc_status[index] = QI_DONE;
+
+	reclaim_free_desc(qi);
+	spin_unlock(&qi->q_lock);
+}
+
+/*
+ * Flush the global interrupt entry cache.
+ */
+void qi_global_iec(struct intel_iommu *iommu)
+{
+	struct qi_desc desc;
+
+	desc.low = QI_IEC_TYPE;
+	desc.high = 0;
+
+	qi_submit_sync(&desc, iommu);
+}
+
+/*
+ * Enable Queued Invalidation interface. This is a must to support
+ * interrupt-remapping. Also used by DMA-remapping, which replaces
+ * register based IOTLB invalidation.
+ */
+int dmar_enable_qi(struct intel_iommu *iommu)
+{
+	u32 cmd, sts;
+	unsigned long flags;
+	struct q_inval *qi;
+
+	if (!ecap_qis(iommu->ecap))
+		return -ENOENT;
+
+	/*
+	 * queued invalidation is already setup and enabled.
+	 */
+	if (iommu->qi)
+		return 0;
+
+	iommu->qi = kmalloc(sizeof(*qi), GFP_KERNEL);
+	if (!iommu->qi)
+		return -ENOMEM;
+
+	qi = iommu->qi;
+
+	qi->desc = (void *)(get_zeroed_page(GFP_KERNEL));
+	if (!qi->desc) {
+		kfree(qi);
+		iommu->qi = 0;
+		return -ENOMEM;
+	}
+
+	qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_KERNEL);
+	if (!qi->desc_status) {
+		free_page((unsigned long) qi->desc);
+		kfree(qi);
+		iommu->qi = 0;
+		return -ENOMEM;
+	}
+
+	qi->free_head = qi->free_tail = 0;
+	qi->free_cnt = QI_LENGTH;
+
+	spin_lock_init(&qi->q_lock);
+
+	spin_lock_irqsave(&iommu->register_lock, flags);
+	/* write zero to the tail reg */
+	writel(0, iommu->reg + DMAR_IQT_REG);
+
+	dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
+
+	cmd = iommu->gcmd | DMA_GCMD_QIE;
+	iommu->gcmd |= DMA_GCMD_QIE;
+	writel(cmd, iommu->reg + DMAR_GCMD_REG);
+
+	/* Make sure hardware complete it */
+	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
+	spin_unlock_irqrestore(&iommu->register_lock, flags);
+
+	return 0;
+}
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 8d0e60a..389fdd6 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -49,8 +49,6 @@
 
 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
 
-#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) /* 10sec */
-
 #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
 
 
@@ -58,8 +56,6 @@
 
 DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
 
-static struct intel_iommu *g_iommus;
-
 #define HIGH_WATER_MARK 250
 struct deferred_flush_tables {
 	int next;
@@ -80,7 +76,7 @@
 
 static void domain_remove_dev_info(struct dmar_domain *domain);
 
-static int dmar_disabled;
+int dmar_disabled;
 static int __initdata dmar_map_gfx = 1;
 static int dmar_forcedac;
 static int intel_iommu_strict;
@@ -185,13 +181,6 @@
 	kmem_cache_free(iommu_iova_cache, iova);
 }
 
-static inline void __iommu_flush_cache(
-	struct intel_iommu *iommu, void *addr, int size)
-{
-	if (!ecap_coherent(iommu->ecap))
-		clflush_cache_range(addr, size);
-}
-
 /* Gets context entry for a given bus and devfn */
 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
 		u8 bus, u8 devfn)
@@ -488,19 +477,6 @@
 	return 0;
 }
 
-#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
-{\
-	cycles_t start_time = get_cycles();\
-	while (1) {\
-		sts = op (iommu->reg + offset);\
-		if (cond)\
-			break;\
-		if (DMAR_OPERATION_TIMEOUT < (get_cycles() - start_time))\
-			panic("DMAR hardware is malfunctioning\n");\
-		cpu_relax();\
-	}\
-}
-
 static void iommu_set_root_entry(struct intel_iommu *iommu)
 {
 	void *addr;
@@ -990,6 +966,8 @@
 		return -ENOMEM;
 	}
 
+	spin_lock_init(&iommu->lock);
+
 	/*
 	 * if Caching mode is set, then invalid translations are tagged
 	 * with domainid 0. Hence we need to pre-allocate it.
@@ -998,62 +976,15 @@
 		set_bit(0, iommu->domain_ids);
 	return 0;
 }
-static struct intel_iommu *alloc_iommu(struct intel_iommu *iommu,
-					struct dmar_drhd_unit *drhd)
-{
-	int ret;
-	int map_size;
-	u32 ver;
 
-	iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
-	if (!iommu->reg) {
-		printk(KERN_ERR "IOMMU: can't map the region\n");
-		goto error;
-	}
-	iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
-	iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
-
-	/* the registers might be more than one page */
-	map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
-		cap_max_fault_reg_offset(iommu->cap));
-	map_size = PAGE_ALIGN_4K(map_size);
-	if (map_size > PAGE_SIZE_4K) {
-		iounmap(iommu->reg);
-		iommu->reg = ioremap(drhd->reg_base_addr, map_size);
-		if (!iommu->reg) {
-			printk(KERN_ERR "IOMMU: can't map the region\n");
-			goto error;
-		}
-	}
-
-	ver = readl(iommu->reg + DMAR_VER_REG);
-	pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
-		drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
-		iommu->cap, iommu->ecap);
-	ret = iommu_init_domains(iommu);
-	if (ret)
-		goto error_unmap;
-	spin_lock_init(&iommu->lock);
-	spin_lock_init(&iommu->register_lock);
-
-	drhd->iommu = iommu;
-	return iommu;
-error_unmap:
-	iounmap(iommu->reg);
-error:
-	kfree(iommu);
-	return NULL;
-}
 
 static void domain_exit(struct dmar_domain *domain);
-static void free_iommu(struct intel_iommu *iommu)
+
+void free_dmar_iommu(struct intel_iommu *iommu)
 {
 	struct dmar_domain *domain;
 	int i;
 
-	if (!iommu)
-		return;
-
 	i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
 	for (; i < cap_ndoms(iommu->cap); ) {
 		domain = iommu->domains[i];
@@ -1078,10 +1009,6 @@
 
 	/* free context mapping */
 	free_context_table(iommu);
-
-	if (iommu->reg)
-		iounmap(iommu->reg);
-	kfree(iommu);
 }
 
 static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
@@ -1426,37 +1353,6 @@
 	return NULL;
 }
 
-static int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
-     struct pci_dev *dev)
-{
-	int index;
-
-	while (dev) {
-		for (index = 0; index < cnt; index++)
-			if (dev == devices[index])
-				return 1;
-
-		/* Check our parent */
-		dev = dev->bus->self;
-	}
-
-	return 0;
-}
-
-static struct dmar_drhd_unit *
-dmar_find_matched_drhd_unit(struct pci_dev *dev)
-{
-	struct dmar_drhd_unit *drhd = NULL;
-
-	list_for_each_entry(drhd, &dmar_drhd_units, list) {
-		if (drhd->include_all || dmar_pci_device_match(drhd->devices,
-						drhd->devices_cnt, dev))
-			return drhd;
-	}
-
-	return NULL;
-}
-
 /* domain is initialized */
 static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
 {
@@ -1729,8 +1625,6 @@
 	 * endfor
 	 */
 	for_each_drhd_unit(drhd) {
-		if (drhd->ignored)
-			continue;
 		g_num_of_iommus++;
 		/*
 		 * lock not needed as this is only incremented in the single
@@ -1739,12 +1633,6 @@
 		 */
 	}
 
-	g_iommus = kzalloc(g_num_of_iommus * sizeof(*iommu), GFP_KERNEL);
-	if (!g_iommus) {
-		ret = -ENOMEM;
-		goto error;
-	}
-
 	deferred_flush = kzalloc(g_num_of_iommus *
 		sizeof(struct deferred_flush_tables), GFP_KERNEL);
 	if (!deferred_flush) {
@@ -1752,16 +1640,15 @@
 		goto error;
 	}
 
-	i = 0;
 	for_each_drhd_unit(drhd) {
 		if (drhd->ignored)
 			continue;
-		iommu = alloc_iommu(&g_iommus[i], drhd);
-		i++;
-		if (!iommu) {
-			ret = -ENOMEM;
+
+		iommu = drhd->iommu;
+
+		ret = iommu_init_domains(iommu);
+		if (ret)
 			goto error;
-		}
 
 		/*
 		 * TBD:
@@ -1845,7 +1732,6 @@
 		iommu = drhd->iommu;
 		free_iommu(iommu);
 	}
-	kfree(g_iommus);
 	return ret;
 }
 
@@ -2002,7 +1888,10 @@
 	/* just flush them all */
 	for (i = 0; i < g_num_of_iommus; i++) {
 		if (deferred_flush[i].next) {
-			iommu_flush_iotlb_global(&g_iommus[i], 0);
+			struct intel_iommu *iommu =
+				deferred_flush[i].domain[0]->iommu;
+
+			iommu_flush_iotlb_global(iommu, 0);
 			for (j = 0; j < deferred_flush[i].next; j++) {
 				__free_iova(&deferred_flush[i].domain[j]->iovad,
 						deferred_flush[i].iova[j]);
@@ -2032,7 +1921,8 @@
 	if (list_size == HIGH_WATER_MARK)
 		flush_unmaps();
 
-	iommu_id = dom->iommu - g_iommus;
+	iommu_id = dom->iommu->seq_id;
+
 	next = deferred_flush[iommu_id].next;
 	deferred_flush[iommu_id].domain[next] = dom;
 	deferred_flush[iommu_id].iova[next] = iova;
@@ -2348,15 +2238,6 @@
 
 }
 
-void __init detect_intel_iommu(void)
-{
-	if (swiotlb || no_iommu || iommu_detected || dmar_disabled)
-		return;
-	if (early_dmar_detect()) {
-		iommu_detected = 1;
-	}
-}
-
 static void __init init_no_remapping_devices(void)
 {
 	struct dmar_drhd_unit *drhd;
@@ -2403,12 +2284,19 @@
 {
 	int ret = 0;
 
-	if (no_iommu || swiotlb || dmar_disabled)
-		return -ENODEV;
-
 	if (dmar_table_init())
 		return 	-ENODEV;
 
+	if (dmar_dev_scope_init())
+		return 	-ENODEV;
+
+	/*
+	 * Check the need for DMA-remapping initialization now.
+	 * Above initialization will also be used by Interrupt-remapping.
+	 */
+	if (no_iommu || swiotlb || dmar_disabled)
+		return -ENODEV;
+
 	iommu_init_mempool();
 	dmar_init_reserved_ranges();
 
diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h
index afc0ad9..2142c01 100644
--- a/drivers/pci/intel-iommu.h
+++ b/drivers/pci/intel-iommu.h
@@ -27,19 +27,8 @@
 #include <linux/sysdev.h>
 #include "iova.h"
 #include <linux/io.h>
-
-/*
- * We need a fixed PAGE_SIZE of 4K irrespective of
- * arch PAGE_SIZE for IOMMU page tables.
- */
-#define PAGE_SHIFT_4K		(12)
-#define PAGE_SIZE_4K		(1UL << PAGE_SHIFT_4K)
-#define PAGE_MASK_4K		(((u64)-1) << PAGE_SHIFT_4K)
-#define PAGE_ALIGN_4K(addr)	(((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K)
-
-#define IOVA_PFN(addr)		((addr) >> PAGE_SHIFT_4K)
-#define DMA_32BIT_PFN		IOVA_PFN(DMA_32BIT_MASK)
-#define DMA_64BIT_PFN		IOVA_PFN(DMA_64BIT_MASK)
+#include <asm/cacheflush.h>
+#include "dma_remapping.h"
 
 /*
  * Intel IOMMU register specification per version 1.0 public spec.
@@ -63,6 +52,11 @@
 #define	DMAR_PLMLIMIT_REG 0x6c	/* PMRR low limit */
 #define	DMAR_PHMBASE_REG 0x70	/* pmrr high base addr */
 #define	DMAR_PHMLIMIT_REG 0x78	/* pmrr high limit */
+#define DMAR_IQH_REG	0x80	/* Invalidation queue head register */
+#define DMAR_IQT_REG	0x88	/* Invalidation queue tail register */
+#define DMAR_IQA_REG	0x90	/* Invalidation queue addr register */
+#define DMAR_ICS_REG	0x98	/* Invalidation complete status register */
+#define DMAR_IRTA_REG	0xb8    /* Interrupt remapping table addr register */
 
 #define OFFSET_STRIDE		(9)
 /*
@@ -126,6 +120,10 @@
 #define ecap_max_iotlb_offset(e) \
 	(ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16)
 #define ecap_coherent(e)	((e) & 0x1)
+#define ecap_qis(e)		((e) & 0x2)
+#define ecap_eim_support(e)	((e >> 4) & 0x1)
+#define ecap_ir_support(e)	((e >> 3) & 0x1)
+#define ecap_max_handle_mask(e) ((e >> 20) & 0xf)
 
 
 /* IOTLB_REG */
@@ -141,6 +139,17 @@
 #define DMA_TLB_IH_NONLEAF (((u64)1) << 6)
 #define DMA_TLB_MAX_SIZE (0x3f)
 
+/* INVALID_DESC */
+#define DMA_ID_TLB_GLOBAL_FLUSH	(((u64)1) << 3)
+#define DMA_ID_TLB_DSI_FLUSH	(((u64)2) << 3)
+#define DMA_ID_TLB_PSI_FLUSH	(((u64)3) << 3)
+#define DMA_ID_TLB_READ_DRAIN	(((u64)1) << 7)
+#define DMA_ID_TLB_WRITE_DRAIN	(((u64)1) << 6)
+#define DMA_ID_TLB_DID(id)	(((u64)((id & 0xffff) << 16)))
+#define DMA_ID_TLB_IH_NONLEAF	(((u64)1) << 6)
+#define DMA_ID_TLB_ADDR(addr)	(addr)
+#define DMA_ID_TLB_ADDR_MASK(mask)	(mask)
+
 /* PMEN_REG */
 #define DMA_PMEN_EPM (((u32)1)<<31)
 #define DMA_PMEN_PRS (((u32)1)<<0)
@@ -151,6 +160,9 @@
 #define DMA_GCMD_SFL (((u32)1) << 29)
 #define DMA_GCMD_EAFL (((u32)1) << 28)
 #define DMA_GCMD_WBF (((u32)1) << 27)
+#define DMA_GCMD_QIE (((u32)1) << 26)
+#define DMA_GCMD_SIRTP (((u32)1) << 24)
+#define DMA_GCMD_IRE (((u32) 1) << 25)
 
 /* GSTS_REG */
 #define DMA_GSTS_TES (((u32)1) << 31)
@@ -158,6 +170,9 @@
 #define DMA_GSTS_FLS (((u32)1) << 29)
 #define DMA_GSTS_AFLS (((u32)1) << 28)
 #define DMA_GSTS_WBFS (((u32)1) << 27)
+#define DMA_GSTS_QIES (((u32)1) << 26)
+#define DMA_GSTS_IRTPS (((u32)1) << 24)
+#define DMA_GSTS_IRES (((u32)1) << 25)
 
 /* CCMD_REG */
 #define DMA_CCMD_ICC (((u64)1) << 63)
@@ -187,158 +202,106 @@
 #define dma_frcd_source_id(c) (c & 0xffff)
 #define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */
 
-/*
- * 0: Present
- * 1-11: Reserved
- * 12-63: Context Ptr (12 - (haw-1))
- * 64-127: Reserved
- */
-struct root_entry {
-	u64	val;
-	u64	rsvd1;
-};
-#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry))
-static inline bool root_present(struct root_entry *root)
-{
-	return (root->val & 1);
-}
-static inline void set_root_present(struct root_entry *root)
-{
-	root->val |= 1;
-}
-static inline void set_root_value(struct root_entry *root, unsigned long value)
-{
-	root->val |= value & PAGE_MASK_4K;
+#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) /* 10sec */
+
+#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
+{\
+	cycles_t start_time = get_cycles();\
+	while (1) {\
+		sts = op (iommu->reg + offset);\
+		if (cond)\
+			break;\
+		if (DMAR_OPERATION_TIMEOUT < (get_cycles() - start_time))\
+			panic("DMAR hardware is malfunctioning\n");\
+		cpu_relax();\
+	}\
 }
 
-struct context_entry;
-static inline struct context_entry *
-get_context_addr_from_root(struct root_entry *root)
-{
-	return (struct context_entry *)
-		(root_present(root)?phys_to_virt(
-		root->val & PAGE_MASK_4K):
-		NULL);
-}
+#define QI_LENGTH	256	/* queue length */
 
-/*
- * low 64 bits:
- * 0: present
- * 1: fault processing disable
- * 2-3: translation type
- * 12-63: address space root
- * high 64 bits:
- * 0-2: address width
- * 3-6: aval
- * 8-23: domain id
- */
-struct context_entry {
-	u64 lo;
-	u64 hi;
-};
-#define context_present(c) ((c).lo & 1)
-#define context_fault_disable(c) (((c).lo >> 1) & 1)
-#define context_translation_type(c) (((c).lo >> 2) & 3)
-#define context_address_root(c) ((c).lo & PAGE_MASK_4K)
-#define context_address_width(c) ((c).hi &  7)
-#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
-
-#define context_set_present(c) do {(c).lo |= 1;} while (0)
-#define context_set_fault_enable(c) \
-	do {(c).lo &= (((u64)-1) << 2) | 1;} while (0)
-#define context_set_translation_type(c, val) \
-	do { \
-		(c).lo &= (((u64)-1) << 4) | 3; \
-		(c).lo |= ((val) & 3) << 2; \
-	} while (0)
-#define CONTEXT_TT_MULTI_LEVEL 0
-#define context_set_address_root(c, val) \
-	do {(c).lo |= (val) & PAGE_MASK_4K;} while (0)
-#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
-#define context_set_domain_id(c, val) \
-	do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
-#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
-
-/*
- * 0: readable
- * 1: writable
- * 2-6: reserved
- * 7: super page
- * 8-11: available
- * 12-63: Host physcial address
- */
-struct dma_pte {
-	u64 val;
-};
-#define dma_clear_pte(p)	do {(p).val = 0;} while (0)
-
-#define DMA_PTE_READ (1)
-#define DMA_PTE_WRITE (2)
-
-#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0)
-#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0)
-#define dma_set_pte_prot(p, prot) \
-		do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0)
-#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K)
-#define dma_set_pte_addr(p, addr) do {\
-		(p).val |= ((addr) & PAGE_MASK_4K); } while (0)
-#define dma_pte_present(p) (((p).val & 3) != 0)
-
-struct intel_iommu;
-
-struct dmar_domain {
-	int	id;			/* domain id */
-	struct intel_iommu *iommu;	/* back pointer to owning iommu */
-
-	struct list_head devices; 	/* all devices' list */
-	struct iova_domain iovad;	/* iova's that belong to this domain */
-
-	struct dma_pte	*pgd;		/* virtual address */
-	spinlock_t	mapping_lock;	/* page table lock */
-	int		gaw;		/* max guest address width */
-
-	/* adjusted guest address width, 0 is level 2 30-bit */
-	int		agaw;
-
-#define DOMAIN_FLAG_MULTIPLE_DEVICES 1
-	int		flags;
+enum {
+	QI_FREE,
+	QI_IN_USE,
+	QI_DONE
 };
 
-/* PCI domain-device relationship */
-struct device_domain_info {
-	struct list_head link;	/* link to domain siblings */
-	struct list_head global; /* link to global list */
-	u8 bus;			/* PCI bus numer */
-	u8 devfn;		/* PCI devfn number */
-	struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
-	struct dmar_domain *domain; /* pointer to domain */
+#define QI_CC_TYPE		0x1
+#define QI_IOTLB_TYPE		0x2
+#define QI_DIOTLB_TYPE		0x3
+#define QI_IEC_TYPE		0x4
+#define QI_IWD_TYPE		0x5
+
+#define QI_IEC_SELECTIVE	(((u64)1) << 4)
+#define QI_IEC_IIDEX(idx)	(((u64)(idx & 0xffff) << 32))
+#define QI_IEC_IM(m)		(((u64)(m & 0x1f) << 27))
+
+#define QI_IWD_STATUS_DATA(d)	(((u64)d) << 32)
+#define QI_IWD_STATUS_WRITE	(((u64)1) << 5)
+
+struct qi_desc {
+	u64 low, high;
 };
 
-extern int init_dmars(void);
+struct q_inval {
+	spinlock_t      q_lock;
+	struct qi_desc  *desc;          /* invalidation queue */
+	int             *desc_status;   /* desc status */
+	int             free_head;      /* first free entry */
+	int             free_tail;      /* last free entry */
+	int             free_cnt;
+};
+
+#ifdef CONFIG_INTR_REMAP
+/* 1MB - maximum possible interrupt remapping table size */
+#define INTR_REMAP_PAGE_ORDER	8
+#define INTR_REMAP_TABLE_REG_SIZE	0xf
+
+#define INTR_REMAP_TABLE_ENTRIES	65536
+
+struct ir_table {
+	struct irte *base;
+};
+#endif
 
 struct intel_iommu {
 	void __iomem	*reg; /* Pointer to hardware regs, virtual addr */
 	u64		cap;
 	u64		ecap;
-	unsigned long 	*domain_ids; /* bitmap of domains */
-	struct dmar_domain **domains; /* ptr to domains */
 	int		seg;
 	u32		gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */
-	spinlock_t	lock; /* protect context, domain ids */
 	spinlock_t	register_lock; /* protect register handling */
+	int		seq_id;	/* sequence id of the iommu */
+
+#ifdef CONFIG_DMAR
+	unsigned long 	*domain_ids; /* bitmap of domains */
+	struct dmar_domain **domains; /* ptr to domains */
+	spinlock_t	lock; /* protect context, domain ids */
 	struct root_entry *root_entry; /* virtual address */
 
 	unsigned int irq;
 	unsigned char name[7];    /* Device Name */
 	struct msi_msg saved_msg;
 	struct sys_device sysdev;
+#endif
+	struct q_inval  *qi;            /* Queued invalidation info */
+#ifdef CONFIG_INTR_REMAP
+	struct ir_table *ir_table;	/* Interrupt remapping info */
+#endif
 };
 
-#ifndef CONFIG_DMAR_GFX_WA
-static inline void iommu_prepare_gfx_mapping(void)
+static inline void __iommu_flush_cache(
+	struct intel_iommu *iommu, void *addr, int size)
 {
-	return;
+	if (!ecap_coherent(iommu->ecap))
+		clflush_cache_range(addr, size);
 }
-#endif /* !CONFIG_DMAR_GFX_WA */
 
+extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev);
+
+extern int alloc_iommu(struct dmar_drhd_unit *drhd);
+extern void free_iommu(struct intel_iommu *iommu);
+extern int dmar_enable_qi(struct intel_iommu *iommu);
+extern void qi_global_iec(struct intel_iommu *iommu);
+
+extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
 #endif
diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
new file mode 100644
index 0000000..bb642cc
--- /dev/null
+++ b/drivers/pci/intr_remapping.c
@@ -0,0 +1,471 @@
+#include <linux/dmar.h>
+#include <linux/spinlock.h>
+#include <linux/jiffies.h>
+#include <linux/pci.h>
+#include <linux/irq.h>
+#include <asm/io_apic.h>
+#include "intel-iommu.h"
+#include "intr_remapping.h"
+
+static struct ioapic_scope ir_ioapic[MAX_IO_APICS];
+static int ir_ioapic_num;
+int intr_remapping_enabled;
+
+static struct {
+	struct intel_iommu *iommu;
+	u16 irte_index;
+	u16 sub_handle;
+	u8  irte_mask;
+} irq_2_iommu[NR_IRQS];
+
+static DEFINE_SPINLOCK(irq_2_ir_lock);
+
+int irq_remapped(int irq)
+{
+	if (irq > NR_IRQS)
+		return 0;
+
+	if (!irq_2_iommu[irq].iommu)
+		return 0;
+
+	return 1;
+}
+
+int get_irte(int irq, struct irte *entry)
+{
+	int index;
+
+	if (!entry || irq > NR_IRQS)
+		return -1;
+
+	spin_lock(&irq_2_ir_lock);
+	if (!irq_2_iommu[irq].iommu) {
+		spin_unlock(&irq_2_ir_lock);
+		return -1;
+	}
+
+	index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
+	*entry = *(irq_2_iommu[irq].iommu->ir_table->base + index);
+
+	spin_unlock(&irq_2_ir_lock);
+	return 0;
+}
+
+int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
+{
+	struct ir_table *table = iommu->ir_table;
+	u16 index, start_index;
+	unsigned int mask = 0;
+	int i;
+
+	if (!count)
+		return -1;
+
+	/*
+	 * start the IRTE search from index 0.
+	 */
+	index = start_index = 0;
+
+	if (count > 1) {
+		count = __roundup_pow_of_two(count);
+		mask = ilog2(count);
+	}
+
+	if (mask > ecap_max_handle_mask(iommu->ecap)) {
+		printk(KERN_ERR
+		       "Requested mask %x exceeds the max invalidation handle"
+		       " mask value %Lx\n", mask,
+		       ecap_max_handle_mask(iommu->ecap));
+		return -1;
+	}
+
+	spin_lock(&irq_2_ir_lock);
+	do {
+		for (i = index; i < index + count; i++)
+			if  (table->base[i].present)
+				break;
+		/* empty index found */
+		if (i == index + count)
+			break;
+
+		index = (index + count) % INTR_REMAP_TABLE_ENTRIES;
+
+		if (index == start_index) {
+			spin_unlock(&irq_2_ir_lock);
+			printk(KERN_ERR "can't allocate an IRTE\n");
+			return -1;
+		}
+	} while (1);
+
+	for (i = index; i < index + count; i++)
+		table->base[i].present = 1;
+
+	irq_2_iommu[irq].iommu = iommu;
+	irq_2_iommu[irq].irte_index =  index;
+	irq_2_iommu[irq].sub_handle = 0;
+	irq_2_iommu[irq].irte_mask = mask;
+
+	spin_unlock(&irq_2_ir_lock);
+
+	return index;
+}
+
+static void qi_flush_iec(struct intel_iommu *iommu, int index, int mask)
+{
+	struct qi_desc desc;
+
+	desc.low = QI_IEC_IIDEX(index) | QI_IEC_TYPE | QI_IEC_IM(mask)
+		   | QI_IEC_SELECTIVE;
+	desc.high = 0;
+
+	qi_submit_sync(&desc, iommu);
+}
+
+int map_irq_to_irte_handle(int irq, u16 *sub_handle)
+{
+	int index;
+
+	spin_lock(&irq_2_ir_lock);
+	if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
+		spin_unlock(&irq_2_ir_lock);
+		return -1;
+	}
+
+	*sub_handle = irq_2_iommu[irq].sub_handle;
+	index = irq_2_iommu[irq].irte_index;
+	spin_unlock(&irq_2_ir_lock);
+	return index;
+}
+
+int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
+{
+	spin_lock(&irq_2_ir_lock);
+	if (irq >= NR_IRQS || irq_2_iommu[irq].iommu) {
+		spin_unlock(&irq_2_ir_lock);
+		return -1;
+	}
+
+	irq_2_iommu[irq].iommu = iommu;
+	irq_2_iommu[irq].irte_index = index;
+	irq_2_iommu[irq].sub_handle = subhandle;
+	irq_2_iommu[irq].irte_mask = 0;
+
+	spin_unlock(&irq_2_ir_lock);
+
+	return 0;
+}
+
+int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index)
+{
+	spin_lock(&irq_2_ir_lock);
+	if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
+		spin_unlock(&irq_2_ir_lock);
+		return -1;
+	}
+
+	irq_2_iommu[irq].iommu = NULL;
+	irq_2_iommu[irq].irte_index = 0;
+	irq_2_iommu[irq].sub_handle = 0;
+	irq_2_iommu[irq].irte_mask = 0;
+
+	spin_unlock(&irq_2_ir_lock);
+
+	return 0;
+}
+
+int modify_irte(int irq, struct irte *irte_modified)
+{
+	int index;
+	struct irte *irte;
+	struct intel_iommu *iommu;
+
+	spin_lock(&irq_2_ir_lock);
+	if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
+		spin_unlock(&irq_2_ir_lock);
+		return -1;
+	}
+
+	iommu = irq_2_iommu[irq].iommu;
+
+	index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
+	irte = &iommu->ir_table->base[index];
+
+	set_64bit((unsigned long *)irte, irte_modified->low | (1 << 1));
+	__iommu_flush_cache(iommu, irte, sizeof(*irte));
+
+	qi_flush_iec(iommu, index, 0);
+
+	spin_unlock(&irq_2_ir_lock);
+	return 0;
+}
+
+int flush_irte(int irq)
+{
+	int index;
+	struct intel_iommu *iommu;
+
+	spin_lock(&irq_2_ir_lock);
+	if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
+		spin_unlock(&irq_2_ir_lock);
+		return -1;
+	}
+
+	iommu = irq_2_iommu[irq].iommu;
+
+	index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
+
+	qi_flush_iec(iommu, index, irq_2_iommu[irq].irte_mask);
+	spin_unlock(&irq_2_ir_lock);
+
+	return 0;
+}
+
+struct intel_iommu *map_ioapic_to_ir(int apic)
+{
+	int i;
+
+	for (i = 0; i < MAX_IO_APICS; i++)
+		if (ir_ioapic[i].id == apic)
+			return ir_ioapic[i].iommu;
+	return NULL;
+}
+
+struct intel_iommu *map_dev_to_ir(struct pci_dev *dev)
+{
+	struct dmar_drhd_unit *drhd;
+
+	drhd = dmar_find_matched_drhd_unit(dev);
+	if (!drhd)
+		return NULL;
+
+	return drhd->iommu;
+}
+
+int free_irte(int irq)
+{
+	int index, i;
+	struct irte *irte;
+	struct intel_iommu *iommu;
+
+	spin_lock(&irq_2_ir_lock);
+	if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
+		spin_unlock(&irq_2_ir_lock);
+		return -1;
+	}
+
+	iommu = irq_2_iommu[irq].iommu;
+
+	index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
+	irte = &iommu->ir_table->base[index];
+
+	if (!irq_2_iommu[irq].sub_handle) {
+		for (i = 0; i < (1 << irq_2_iommu[irq].irte_mask); i++)
+			set_64bit((unsigned long *)irte, 0);
+		qi_flush_iec(iommu, index, irq_2_iommu[irq].irte_mask);
+	}
+
+	irq_2_iommu[irq].iommu = NULL;
+	irq_2_iommu[irq].irte_index = 0;
+	irq_2_iommu[irq].sub_handle = 0;
+	irq_2_iommu[irq].irte_mask = 0;
+
+	spin_unlock(&irq_2_ir_lock);
+
+	return 0;
+}
+
+static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode)
+{
+	u64 addr;
+	u32 cmd, sts;
+	unsigned long flags;
+
+	addr = virt_to_phys((void *)iommu->ir_table->base);
+
+	spin_lock_irqsave(&iommu->register_lock, flags);
+
+	dmar_writeq(iommu->reg + DMAR_IRTA_REG,
+		    (addr) | IR_X2APIC_MODE(mode) | INTR_REMAP_TABLE_REG_SIZE);
+
+	/* Set interrupt-remapping table pointer */
+	cmd = iommu->gcmd | DMA_GCMD_SIRTP;
+	writel(cmd, iommu->reg + DMAR_GCMD_REG);
+
+	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
+		      readl, (sts & DMA_GSTS_IRTPS), sts);
+	spin_unlock_irqrestore(&iommu->register_lock, flags);
+
+	/*
+	 * global invalidation of interrupt entry cache before enabling
+	 * interrupt-remapping.
+	 */
+	qi_global_iec(iommu);
+
+	spin_lock_irqsave(&iommu->register_lock, flags);
+
+	/* Enable interrupt-remapping */
+	cmd = iommu->gcmd | DMA_GCMD_IRE;
+	iommu->gcmd |= DMA_GCMD_IRE;
+	writel(cmd, iommu->reg + DMAR_GCMD_REG);
+
+	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
+		      readl, (sts & DMA_GSTS_IRES), sts);
+
+	spin_unlock_irqrestore(&iommu->register_lock, flags);
+}
+
+
+static int setup_intr_remapping(struct intel_iommu *iommu, int mode)
+{
+	struct ir_table *ir_table;
+	struct page *pages;
+
+	ir_table = iommu->ir_table = kzalloc(sizeof(struct ir_table),
+					     GFP_KERNEL);
+
+	if (!iommu->ir_table)
+		return -ENOMEM;
+
+	pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, INTR_REMAP_PAGE_ORDER);
+
+	if (!pages) {
+		printk(KERN_ERR "failed to allocate pages of order %d\n",
+		       INTR_REMAP_PAGE_ORDER);
+		kfree(iommu->ir_table);
+		return -ENOMEM;
+	}
+
+	ir_table->base = page_address(pages);
+
+	iommu_set_intr_remapping(iommu, mode);
+	return 0;
+}
+
+int __init enable_intr_remapping(int eim)
+{
+	struct dmar_drhd_unit *drhd;
+	int setup = 0;
+
+	/*
+	 * check for the Interrupt-remapping support
+	 */
+	for_each_drhd_unit(drhd) {
+		struct intel_iommu *iommu = drhd->iommu;
+
+		if (!ecap_ir_support(iommu->ecap))
+			continue;
+
+		if (eim && !ecap_eim_support(iommu->ecap)) {
+			printk(KERN_INFO "DRHD %Lx: EIM not supported by DRHD, "
+			       " ecap %Lx\n", drhd->reg_base_addr, iommu->ecap);
+			return -1;
+		}
+	}
+
+	/*
+	 * Enable queued invalidation for all the DRHD's.
+	 */
+	for_each_drhd_unit(drhd) {
+		int ret;
+		struct intel_iommu *iommu = drhd->iommu;
+		ret = dmar_enable_qi(iommu);
+
+		if (ret) {
+			printk(KERN_ERR "DRHD %Lx: failed to enable queued, "
+			       " invalidation, ecap %Lx, ret %d\n",
+			       drhd->reg_base_addr, iommu->ecap, ret);
+			return -1;
+		}
+	}
+
+	/*
+	 * Setup Interrupt-remapping for all the DRHD's now.
+	 */
+	for_each_drhd_unit(drhd) {
+		struct intel_iommu *iommu = drhd->iommu;
+
+		if (!ecap_ir_support(iommu->ecap))
+			continue;
+
+		if (setup_intr_remapping(iommu, eim))
+			goto error;
+
+		setup = 1;
+	}
+
+	if (!setup)
+		goto error;
+
+	intr_remapping_enabled = 1;
+
+	return 0;
+
+error:
+	/*
+	 * handle error condition gracefully here!
+	 */
+	return -1;
+}
+
+static int ir_parse_ioapic_scope(struct acpi_dmar_header *header,
+				 struct intel_iommu *iommu)
+{
+	struct acpi_dmar_hardware_unit *drhd;
+	struct acpi_dmar_device_scope *scope;
+	void *start, *end;
+
+	drhd = (struct acpi_dmar_hardware_unit *)header;
+
+	start = (void *)(drhd + 1);
+	end = ((void *)drhd) + header->length;
+
+	while (start < end) {
+		scope = start;
+		if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_IOAPIC) {
+			if (ir_ioapic_num == MAX_IO_APICS) {
+				printk(KERN_WARNING "Exceeded Max IO APICS\n");
+				return -1;
+			}
+
+			printk(KERN_INFO "IOAPIC id %d under DRHD base"
+			       " 0x%Lx\n", scope->enumeration_id,
+			       drhd->address);
+
+			ir_ioapic[ir_ioapic_num].iommu = iommu;
+			ir_ioapic[ir_ioapic_num].id = scope->enumeration_id;
+			ir_ioapic_num++;
+		}
+		start += scope->length;
+	}
+
+	return 0;
+}
+
+/*
+ * Finds the assocaition between IOAPIC's and its Interrupt-remapping
+ * hardware unit.
+ */
+int __init parse_ioapics_under_ir(void)
+{
+	struct dmar_drhd_unit *drhd;
+	int ir_supported = 0;
+
+	for_each_drhd_unit(drhd) {
+		struct intel_iommu *iommu = drhd->iommu;
+
+		if (ecap_ir_support(iommu->ecap)) {
+			if (ir_parse_ioapic_scope(drhd->hdr, iommu))
+				return -1;
+
+			ir_supported = 1;
+		}
+	}
+
+	if (ir_supported && ir_ioapic_num != nr_ioapics) {
+		printk(KERN_WARNING
+		       "Not all IO-APIC's listed under remapping hardware\n");
+		return -1;
+	}
+
+	return ir_supported;
+}
diff --git a/drivers/pci/intr_remapping.h b/drivers/pci/intr_remapping.h
new file mode 100644
index 0000000..05f2635
--- /dev/null
+++ b/drivers/pci/intr_remapping.h
@@ -0,0 +1,8 @@
+#include "intel-iommu.h"
+
+struct ioapic_scope {
+	struct intel_iommu *iommu;
+	unsigned int id;
+};
+
+#define IR_X2APIC_MODE(mode) (mode ? (1 << 11) : 0)
diff --git a/include/asm-x86/a.out-core.h b/include/asm-x86/a.out-core.h
index 714207a..f570576 100644
--- a/include/asm-x86/a.out-core.h
+++ b/include/asm-x86/a.out-core.h
@@ -9,8 +9,8 @@
  * 2 of the Licence, or (at your option) any later version.
  */
 
-#ifndef _ASM_A_OUT_CORE_H
-#define _ASM_A_OUT_CORE_H
+#ifndef ASM_X86__A_OUT_CORE_H
+#define ASM_X86__A_OUT_CORE_H
 
 #ifdef __KERNEL__
 #ifdef CONFIG_X86_32
@@ -70,4 +70,4 @@
 
 #endif /* CONFIG_X86_32 */
 #endif /* __KERNEL__ */
-#endif /* _ASM_A_OUT_CORE_H */
+#endif /* ASM_X86__A_OUT_CORE_H */
diff --git a/include/asm-x86/a.out.h b/include/asm-x86/a.out.h
index 4684f97..0948748 100644
--- a/include/asm-x86/a.out.h
+++ b/include/asm-x86/a.out.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_A_OUT_H
-#define _ASM_X86_A_OUT_H
+#ifndef ASM_X86__A_OUT_H
+#define ASM_X86__A_OUT_H
 
 struct exec
 {
@@ -17,4 +17,4 @@
 #define N_DRSIZE(a)	((a).a_drsize)
 #define N_SYMSIZE(a)	((a).a_syms)
 
-#endif /* _ASM_X86_A_OUT_H */
+#endif /* ASM_X86__A_OUT_H */
diff --git a/include/asm-x86/acpi.h b/include/asm-x86/acpi.h
index 635d764..bd76299 100644
--- a/include/asm-x86/acpi.h
+++ b/include/asm-x86/acpi.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_ACPI_H
-#define _ASM_X86_ACPI_H
+#ifndef ASM_X86__ACPI_H
+#define ASM_X86__ACPI_H
 
 /*
  *  Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
@@ -173,4 +173,4 @@
 
 #define acpi_unlazy_tlb(x)	leave_mm(x)
 
-#endif /*__X86_ASM_ACPI_H*/
+#endif /* ASM_X86__ACPI_H */
diff --git a/include/asm-x86/agp.h b/include/asm-x86/agp.h
index e4004a9..3617fd4 100644
--- a/include/asm-x86/agp.h
+++ b/include/asm-x86/agp.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_AGP_H
-#define _ASM_X86_AGP_H
+#ifndef ASM_X86__AGP_H
+#define ASM_X86__AGP_H
 
 #include <asm/pgtable.h>
 #include <asm/cacheflush.h>
@@ -32,4 +32,4 @@
 #define free_gatt_pages(table, order)	\
 	free_pages((unsigned long)(table), (order))
 
-#endif
+#endif /* ASM_X86__AGP_H */
diff --git a/include/asm-x86/alternative.h b/include/asm-x86/alternative.h
index f6aa18e..22d3c98 100644
--- a/include/asm-x86/alternative.h
+++ b/include/asm-x86/alternative.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_ALTERNATIVE_H
-#define _ASM_X86_ALTERNATIVE_H
+#ifndef ASM_X86__ALTERNATIVE_H
+#define ASM_X86__ALTERNATIVE_H
 
 #include <linux/types.h>
 #include <linux/stddef.h>
@@ -180,4 +180,4 @@
 extern void *text_poke(void *addr, const void *opcode, size_t len);
 extern void *text_poke_early(void *addr, const void *opcode, size_t len);
 
-#endif /* _ASM_X86_ALTERNATIVE_H */
+#endif /* ASM_X86__ALTERNATIVE_H */
diff --git a/include/asm-x86/amd_iommu.h b/include/asm-x86/amd_iommu.h
index 30a1204..783f43e 100644
--- a/include/asm-x86/amd_iommu.h
+++ b/include/asm-x86/amd_iommu.h
@@ -17,8 +17,8 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
 
-#ifndef _ASM_X86_AMD_IOMMU_H
-#define _ASM_X86_AMD_IOMMU_H
+#ifndef ASM_X86__AMD_IOMMU_H
+#define ASM_X86__AMD_IOMMU_H
 
 #ifdef CONFIG_AMD_IOMMU
 extern int amd_iommu_init(void);
@@ -29,4 +29,4 @@
 static inline void amd_iommu_detect(void) { }
 #endif
 
-#endif
+#endif /* ASM_X86__AMD_IOMMU_H */
diff --git a/include/asm-x86/amd_iommu_types.h b/include/asm-x86/amd_iommu_types.h
index dcc8120..1ffa4e5 100644
--- a/include/asm-x86/amd_iommu_types.h
+++ b/include/asm-x86/amd_iommu_types.h
@@ -17,8 +17,8 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
 
-#ifndef __AMD_IOMMU_TYPES_H__
-#define __AMD_IOMMU_TYPES_H__
+#ifndef ASM_X86__AMD_IOMMU_TYPES_H
+#define ASM_X86__AMD_IOMMU_TYPES_H
 
 #include <linux/types.h>
 #include <linux/list.h>
@@ -341,4 +341,4 @@
 	return (((u16)bus) << 8) | devfn;
 }
 
-#endif
+#endif /* ASM_X86__AMD_IOMMU_TYPES_H */
diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h
index 133c998..1311c82 100644
--- a/include/asm-x86/apic.h
+++ b/include/asm-x86/apic.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_APIC_H
-#define _ASM_X86_APIC_H
+#ifndef ASM_X86__APIC_H
+#define ASM_X86__APIC_H
 
 #include <linux/pm.h>
 #include <linux/delay.h>
@@ -9,6 +9,8 @@
 #include <asm/apicdef.h>
 #include <asm/processor.h>
 #include <asm/system.h>
+#include <asm/cpufeature.h>
+#include <asm/msr.h>
 
 #define ARCH_APICTIMER_STOPS_ON_C3	1
 
@@ -47,15 +49,18 @@
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #else
-#define apic_write native_apic_write
-#define apic_read native_apic_read
 #define setup_boot_clock setup_boot_APIC_clock
 #define setup_secondary_clock setup_secondary_APIC_clock
 #endif
 
 extern int is_vsmp_box(void);
+extern void xapic_wait_icr_idle(void);
+extern u32 safe_xapic_wait_icr_idle(void);
+extern u64 xapic_icr_read(void);
+extern void xapic_icr_write(u32, u32);
+extern int setup_profiling_timer(unsigned int);
 
-static inline void native_apic_write(unsigned long reg, u32 v)
+static inline void native_apic_mem_write(u32 reg, u32 v)
 {
 	volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg);
 
@@ -64,15 +69,68 @@
 		       ASM_OUTPUT2("0" (v), "m" (*addr)));
 }
 
-static inline u32 native_apic_read(unsigned long reg)
+static inline u32 native_apic_mem_read(u32 reg)
 {
 	return *((volatile u32 *)(APIC_BASE + reg));
 }
 
-extern void apic_wait_icr_idle(void);
-extern u32 safe_apic_wait_icr_idle(void);
+static inline void native_apic_msr_write(u32 reg, u32 v)
+{
+	if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR ||
+	    reg == APIC_LVR)
+		return;
+
+	wrmsr(APIC_BASE_MSR + (reg >> 4), v, 0);
+}
+
+static inline u32 native_apic_msr_read(u32 reg)
+{
+	u32 low, high;
+
+	if (reg == APIC_DFR)
+		return -1;
+
+	rdmsr(APIC_BASE_MSR + (reg >> 4), low, high);
+	return low;
+}
+
+#ifndef CONFIG_X86_32
+extern int x2apic, x2apic_preenabled;
+extern void check_x2apic(void);
+extern void enable_x2apic(void);
+extern void enable_IR_x2apic(void);
+extern void x2apic_icr_write(u32 low, u32 id);
+#endif
+
+struct apic_ops {
+	u32 (*read)(u32 reg);
+	void (*write)(u32 reg, u32 v);
+	u64 (*icr_read)(void);
+	void (*icr_write)(u32 low, u32 high);
+	void (*wait_icr_idle)(void);
+	u32 (*safe_wait_icr_idle)(void);
+};
+
+extern struct apic_ops *apic_ops;
+
+#define apic_read (apic_ops->read)
+#define apic_write (apic_ops->write)
+#define apic_icr_read (apic_ops->icr_read)
+#define apic_icr_write (apic_ops->icr_write)
+#define apic_wait_icr_idle (apic_ops->wait_icr_idle)
+#define safe_apic_wait_icr_idle (apic_ops->safe_wait_icr_idle)
+
 extern int get_physical_broadcast(void);
 
+#ifdef CONFIG_X86_64
+static inline void ack_x2APIC_irq(void)
+{
+	/* Docs say use 0 for future compatibility */
+	native_apic_msr_write(APIC_EOI, 0);
+}
+#endif
+
+
 static inline void ack_APIC_irq(void)
 {
 	/*
@@ -128,4 +186,4 @@
 
 #endif /* !CONFIG_X86_LOCAL_APIC */
 
-#endif /* __ASM_APIC_H */
+#endif /* ASM_X86__APIC_H */
diff --git a/include/asm-x86/apicdef.h b/include/asm-x86/apicdef.h
index 6b9008c..b922c85 100644
--- a/include/asm-x86/apicdef.h
+++ b/include/asm-x86/apicdef.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_APICDEF_H
-#define _ASM_X86_APICDEF_H
+#ifndef ASM_X86__APICDEF_H
+#define ASM_X86__APICDEF_H
 
 /*
  * Constants for various Intel APICs. (local APIC, IOAPIC, etc.)
@@ -105,6 +105,7 @@
 #define	APIC_TMICT	0x380
 #define	APIC_TMCCT	0x390
 #define	APIC_TDCR	0x3E0
+#define APIC_SELF_IPI	0x3F0
 #define		APIC_TDR_DIV_TMBASE	(1 << 2)
 #define		APIC_TDR_DIV_1		0xB
 #define		APIC_TDR_DIV_2		0x0
@@ -128,6 +129,8 @@
 #define	APIC_EILVT3     0x530
 
 #define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
+#define APIC_BASE_MSR	0x800
+#define X2APIC_ENABLE	(1UL << 10)
 
 #ifdef CONFIG_X86_32
 # define MAX_IO_APICS 64
@@ -411,4 +414,4 @@
 #else
  #define BAD_APICID 0xFFFFu
 #endif
-#endif
+#endif /* ASM_X86__APICDEF_H */
diff --git a/include/asm-x86/arch_hooks.h b/include/asm-x86/arch_hooks.h
index 8411750..72adc3a 100644
--- a/include/asm-x86/arch_hooks.h
+++ b/include/asm-x86/arch_hooks.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_ARCH_HOOKS_H
-#define _ASM_ARCH_HOOKS_H
+#ifndef ASM_X86__ARCH_HOOKS_H
+#define ASM_X86__ARCH_HOOKS_H
 
 #include <linux/interrupt.h>
 
@@ -25,4 +25,4 @@
 extern void time_init_hook(void);
 extern void mca_nmi_hook(void);
 
-#endif
+#endif /* ASM_X86__ARCH_HOOKS_H */
diff --git a/include/asm-x86/asm.h b/include/asm-x86/asm.h
index 9722032..2439ae4 100644
--- a/include/asm-x86/asm.h
+++ b/include/asm-x86/asm.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_ASM_H
-#define _ASM_X86_ASM_H
+#ifndef ASM_X86__ASM_H
+#define ASM_X86__ASM_H
 
 #ifdef __ASSEMBLY__
 # define __ASM_FORM(x)	x
@@ -39,4 +39,4 @@
 	_ASM_PTR #from "," #to "\n" \
 	" .previous\n"
 
-#endif /* _ASM_X86_ASM_H */
+#endif /* ASM_X86__ASM_H */
diff --git a/include/asm-x86/atomic_32.h b/include/asm-x86/atomic_32.h
index 21a4825..14d3f0b 100644
--- a/include/asm-x86/atomic_32.h
+++ b/include/asm-x86/atomic_32.h
@@ -1,5 +1,5 @@
-#ifndef __ARCH_I386_ATOMIC__
-#define __ARCH_I386_ATOMIC__
+#ifndef ASM_X86__ATOMIC_32_H
+#define ASM_X86__ATOMIC_32_H
 
 #include <linux/compiler.h>
 #include <asm/processor.h>
@@ -256,4 +256,4 @@
 #define smp_mb__after_atomic_inc()	barrier()
 
 #include <asm-generic/atomic.h>
-#endif
+#endif /* ASM_X86__ATOMIC_32_H */
diff --git a/include/asm-x86/atomic_64.h b/include/asm-x86/atomic_64.h
index 91c7d03..2cb218c 100644
--- a/include/asm-x86/atomic_64.h
+++ b/include/asm-x86/atomic_64.h
@@ -1,5 +1,5 @@
-#ifndef __ARCH_X86_64_ATOMIC__
-#define __ARCH_X86_64_ATOMIC__
+#ifndef ASM_X86__ATOMIC_64_H
+#define ASM_X86__ATOMIC_64_H
 
 #include <asm/alternative.h>
 #include <asm/cmpxchg.h>
@@ -470,4 +470,4 @@
 #define smp_mb__after_atomic_inc()	barrier()
 
 #include <asm-generic/atomic.h>
-#endif
+#endif /* ASM_X86__ATOMIC_64_H */
diff --git a/include/asm-x86/auxvec.h b/include/asm-x86/auxvec.h
index 87f5e6d..12c7cac 100644
--- a/include/asm-x86/auxvec.h
+++ b/include/asm-x86/auxvec.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_AUXVEC_H
-#define _ASM_X86_AUXVEC_H
+#ifndef ASM_X86__AUXVEC_H
+#define ASM_X86__AUXVEC_H
 /*
  * Architecture-neutral AT_ values in 0-17, leave some room
  * for more of them, start the x86-specific ones at 32.
@@ -9,4 +9,4 @@
 #endif
 #define AT_SYSINFO_EHDR		33
 
-#endif
+#endif /* ASM_X86__AUXVEC_H */
diff --git a/include/asm-x86/mach-bigsmp/mach_apic.h b/include/asm-x86/bigsmp/apic.h
similarity index 98%
rename from include/asm-x86/mach-bigsmp/mach_apic.h
rename to include/asm-x86/bigsmp/apic.h
index c3b9dc6..0a9cd7c 100644
--- a/include/asm-x86/mach-bigsmp/mach_apic.h
+++ b/include/asm-x86/bigsmp/apic.h
@@ -11,7 +11,7 @@
 
 /* Round robin the irqs amoung the online cpus */
 static inline cpumask_t target_cpus(void)
-{ 
+{
 	static unsigned long cpu = NR_CPUS;
 	do {
 		if (cpu >= NR_CPUS)
@@ -23,7 +23,7 @@
 }
 
 #undef APIC_DEST_LOGICAL
-#define APIC_DEST_LOGICAL 	0
+#define APIC_DEST_LOGICAL	0
 #define TARGET_CPUS		(target_cpus())
 #define APIC_DFR_VALUE		(APIC_DFR_FLAT)
 #define INT_DELIVERY_MODE	(dest_Fixed)
diff --git a/include/asm-x86/mach-bigsmp/mach_apicdef.h b/include/asm-x86/bigsmp/apicdef.h
similarity index 73%
rename from include/asm-x86/mach-bigsmp/mach_apicdef.h
rename to include/asm-x86/bigsmp/apicdef.h
index a58ab5a..392c3f5 100644
--- a/include/asm-x86/mach-bigsmp/mach_apicdef.h
+++ b/include/asm-x86/bigsmp/apicdef.h
@@ -3,10 +3,10 @@
 
 #define		APIC_ID_MASK		(0xFF<<24)
 
-static inline unsigned get_apic_id(unsigned long x) 
-{ 
+static inline unsigned get_apic_id(unsigned long x)
+{
 	return (((x)>>24)&0xFF);
-} 
+}
 
 #define		GET_APIC_ID(x)	get_apic_id(x)
 
diff --git a/include/asm-x86/mach-bigsmp/mach_ipi.h b/include/asm-x86/bigsmp/ipi.h
similarity index 100%
rename from include/asm-x86/mach-bigsmp/mach_ipi.h
rename to include/asm-x86/bigsmp/ipi.h
diff --git a/include/asm-x86/bios_ebda.h b/include/asm-x86/bios_ebda.h
index 0033e50..ec42ed8 100644
--- a/include/asm-x86/bios_ebda.h
+++ b/include/asm-x86/bios_ebda.h
@@ -1,5 +1,5 @@
-#ifndef _MACH_BIOS_EBDA_H
-#define _MACH_BIOS_EBDA_H
+#ifndef ASM_X86__BIOS_EBDA_H
+#define ASM_X86__BIOS_EBDA_H
 
 #include <asm/io.h>
 
@@ -16,4 +16,4 @@
 
 void reserve_ebda_region(void);
 
-#endif /* _MACH_BIOS_EBDA_H */
+#endif /* ASM_X86__BIOS_EBDA_H */
diff --git a/include/asm-x86/bitops.h b/include/asm-x86/bitops.h
index cfb2b64..61989b9 100644
--- a/include/asm-x86/bitops.h
+++ b/include/asm-x86/bitops.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_BITOPS_H
-#define _ASM_X86_BITOPS_H
+#ifndef ASM_X86__BITOPS_H
+#define ASM_X86__BITOPS_H
 
 /*
  * Copyright 1992, Linus Torvalds.
@@ -458,4 +458,4 @@
 #include <asm-generic/bitops/minix.h>
 
 #endif /* __KERNEL__ */
-#endif	/* _ASM_X86_BITOPS_H */
+#endif /* ASM_X86__BITOPS_H */
diff --git a/include/asm-x86/boot.h b/include/asm-x86/boot.h
index 2faed7e..825de5d 100644
--- a/include/asm-x86/boot.h
+++ b/include/asm-x86/boot.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_BOOT_H
-#define _ASM_BOOT_H
+#ifndef ASM_X86__BOOT_H
+#define ASM_X86__BOOT_H
 
 /* Don't touch these, unless you really know what you're doing. */
 #define DEF_INITSEG	0x9000
@@ -25,4 +25,4 @@
 #define BOOT_STACK_SIZE	0x1000
 #endif
 
-#endif /* _ASM_BOOT_H */
+#endif /* ASM_X86__BOOT_H */
diff --git a/include/asm-x86/bootparam.h b/include/asm-x86/bootparam.h
index ae22bdf..ccf027e 100644
--- a/include/asm-x86/bootparam.h
+++ b/include/asm-x86/bootparam.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_BOOTPARAM_H
-#define _ASM_BOOTPARAM_H
+#ifndef ASM_X86__BOOTPARAM_H
+#define ASM_X86__BOOTPARAM_H
 
 #include <linux/types.h>
 #include <linux/screen_info.h>
@@ -108,4 +108,4 @@
 	__u8  _pad9[276];				/* 0xeec */
 } __attribute__((packed));
 
-#endif /* _ASM_BOOTPARAM_H */
+#endif /* ASM_X86__BOOTPARAM_H */
diff --git a/include/asm-x86/bug.h b/include/asm-x86/bug.h
index b69aa64..91ad43a 100644
--- a/include/asm-x86/bug.h
+++ b/include/asm-x86/bug.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_BUG_H
-#define _ASM_X86_BUG_H
+#ifndef ASM_X86__BUG_H
+#define ASM_X86__BUG_H
 
 #ifdef CONFIG_BUG
 #define HAVE_ARCH_BUG
@@ -36,4 +36,4 @@
 #endif /* !CONFIG_BUG */
 
 #include <asm-generic/bug.h>
-#endif
+#endif /* ASM_X86__BUG_H */
diff --git a/include/asm-x86/bugs.h b/include/asm-x86/bugs.h
index 021cbdd..ae514c7 100644
--- a/include/asm-x86/bugs.h
+++ b/include/asm-x86/bugs.h
@@ -1,7 +1,12 @@
-#ifndef _ASM_X86_BUGS_H
-#define _ASM_X86_BUGS_H
+#ifndef ASM_X86__BUGS_H
+#define ASM_X86__BUGS_H
 
 extern void check_bugs(void);
-int ppro_with_ram_bug(void);
 
-#endif /* _ASM_X86_BUGS_H */
+#ifdef CONFIG_CPU_SUP_INTEL_32
+int ppro_with_ram_bug(void);
+#else
+static inline int ppro_with_ram_bug(void) { return 0; }
+#endif
+
+#endif /* ASM_X86__BUGS_H */
diff --git a/include/asm-x86/byteorder.h b/include/asm-x86/byteorder.h
index e02ae2d..722f27d 100644
--- a/include/asm-x86/byteorder.h
+++ b/include/asm-x86/byteorder.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_BYTEORDER_H
-#define _ASM_X86_BYTEORDER_H
+#ifndef ASM_X86__BYTEORDER_H
+#define ASM_X86__BYTEORDER_H
 
 #include <asm/types.h>
 #include <linux/compiler.h>
@@ -78,4 +78,4 @@
 
 #include <linux/byteorder/little_endian.h>
 
-#endif /* _ASM_X86_BYTEORDER_H */
+#endif /* ASM_X86__BYTEORDER_H */
diff --git a/include/asm-x86/cache.h b/include/asm-x86/cache.h
index 1e0bac8..ea3f1cc 100644
--- a/include/asm-x86/cache.h
+++ b/include/asm-x86/cache.h
@@ -1,5 +1,5 @@
-#ifndef _ARCH_X86_CACHE_H
-#define _ARCH_X86_CACHE_H
+#ifndef ASM_X86__CACHE_H
+#define ASM_X86__CACHE_H
 
 /* L1 cache line size */
 #define L1_CACHE_SHIFT	(CONFIG_X86_L1_CACHE_SHIFT)
@@ -17,4 +17,4 @@
 #endif
 #endif
 
-#endif
+#endif /* ASM_X86__CACHE_H */
diff --git a/include/asm-x86/cacheflush.h b/include/asm-x86/cacheflush.h
index f4c0ab5..59859cb 100644
--- a/include/asm-x86/cacheflush.h
+++ b/include/asm-x86/cacheflush.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_CACHEFLUSH_H
-#define _ASM_X86_CACHEFLUSH_H
+#ifndef ASM_X86__CACHEFLUSH_H
+#define ASM_X86__CACHEFLUSH_H
 
 /* Keep includes the same across arches.  */
 #include <linux/mm.h>
@@ -112,4 +112,4 @@
 }
 #endif
 
-#endif
+#endif /* ASM_X86__CACHEFLUSH_H */
diff --git a/include/asm-x86/calgary.h b/include/asm-x86/calgary.h
index 67f6040..933fd27 100644
--- a/include/asm-x86/calgary.h
+++ b/include/asm-x86/calgary.h
@@ -21,8 +21,8 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
 
-#ifndef _ASM_X86_64_CALGARY_H
-#define _ASM_X86_64_CALGARY_H
+#ifndef ASM_X86__CALGARY_H
+#define ASM_X86__CALGARY_H
 
 #include <linux/spinlock.h>
 #include <linux/device.h>
@@ -69,4 +69,4 @@
 static inline void detect_calgary(void) { return; }
 #endif
 
-#endif /* _ASM_X86_64_CALGARY_H */
+#endif /* ASM_X86__CALGARY_H */
diff --git a/include/asm-x86/checksum_32.h b/include/asm-x86/checksum_32.h
index 52bbb0d..d041e8c 100644
--- a/include/asm-x86/checksum_32.h
+++ b/include/asm-x86/checksum_32.h
@@ -1,5 +1,5 @@
-#ifndef _I386_CHECKSUM_H
-#define _I386_CHECKSUM_H
+#ifndef ASM_X86__CHECKSUM_32_H
+#define ASM_X86__CHECKSUM_32_H
 
 #include <linux/in6.h>
 
@@ -186,4 +186,4 @@
 	return (__force __wsum)-1; /* invalid checksum */
 }
 
-#endif
+#endif /* ASM_X86__CHECKSUM_32_H */
diff --git a/include/asm-x86/checksum_64.h b/include/asm-x86/checksum_64.h
index 8bd861c..110f403 100644
--- a/include/asm-x86/checksum_64.h
+++ b/include/asm-x86/checksum_64.h
@@ -1,5 +1,5 @@
-#ifndef _X86_64_CHECKSUM_H
-#define _X86_64_CHECKSUM_H
+#ifndef ASM_X86__CHECKSUM_64_H
+#define ASM_X86__CHECKSUM_64_H
 
 /*
  * Checksums for x86-64
@@ -188,4 +188,4 @@
 	return a;
 }
 
-#endif
+#endif /* ASM_X86__CHECKSUM_64_H */
diff --git a/include/asm-x86/cmpxchg_32.h b/include/asm-x86/cmpxchg_32.h
index bf5a69d..0622e45 100644
--- a/include/asm-x86/cmpxchg_32.h
+++ b/include/asm-x86/cmpxchg_32.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_CMPXCHG_H
-#define __ASM_CMPXCHG_H
+#ifndef ASM_X86__CMPXCHG_32_H
+#define ASM_X86__CMPXCHG_32_H
 
 #include <linux/bitops.h> /* for LOCK_PREFIX */
 
@@ -341,4 +341,4 @@
 
 #endif
 
-#endif
+#endif /* ASM_X86__CMPXCHG_32_H */
diff --git a/include/asm-x86/cmpxchg_64.h b/include/asm-x86/cmpxchg_64.h
index 17463cc..63c1a5e 100644
--- a/include/asm-x86/cmpxchg_64.h
+++ b/include/asm-x86/cmpxchg_64.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_CMPXCHG_H
-#define __ASM_CMPXCHG_H
+#ifndef ASM_X86__CMPXCHG_64_H
+#define ASM_X86__CMPXCHG_64_H
 
 #include <asm/alternative.h> /* Provides LOCK_PREFIX */
 
@@ -182,4 +182,4 @@
 	cmpxchg_local((ptr), (o), (n));					\
 })
 
-#endif
+#endif /* ASM_X86__CMPXCHG_64_H */
diff --git a/include/asm-x86/compat.h b/include/asm-x86/compat.h
index 1793ac3..6732b15 100644
--- a/include/asm-x86/compat.h
+++ b/include/asm-x86/compat.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_64_COMPAT_H
-#define _ASM_X86_64_COMPAT_H
+#ifndef ASM_X86__COMPAT_H
+#define ASM_X86__COMPAT_H
 
 /*
  * Architecture specific compatibility types
@@ -215,4 +215,4 @@
 	return current_thread_info()->status & TS_COMPAT;
 }
 
-#endif /* _ASM_X86_64_COMPAT_H */
+#endif /* ASM_X86__COMPAT_H */
diff --git a/include/asm-x86/cpu.h b/include/asm-x86/cpu.h
index 73f2ea8..83a1150 100644
--- a/include/asm-x86/cpu.h
+++ b/include/asm-x86/cpu.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_I386_CPU_H_
-#define _ASM_I386_CPU_H_
+#ifndef ASM_X86__CPU_H
+#define ASM_X86__CPU_H
 
 #include <linux/device.h>
 #include <linux/cpu.h>
@@ -17,4 +17,4 @@
 #endif
 
 DECLARE_PER_CPU(int, cpu_state);
-#endif /* _ASM_I386_CPU_H_ */
+#endif /* ASM_X86__CPU_H */
diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h
index 762f6a6..6dfa2b3 100644
--- a/include/asm-x86/cpufeature.h
+++ b/include/asm-x86/cpufeature.h
@@ -1,20 +1,26 @@
 /*
  * Defines x86 CPU feature bits
  */
-#ifndef _ASM_X86_CPUFEATURE_H
-#define _ASM_X86_CPUFEATURE_H
+#ifndef ASM_X86__CPUFEATURE_H
+#define ASM_X86__CPUFEATURE_H
 
 #include <asm/required-features.h>
 
 #define NCAPINTS	8	/* N 32-bit words worth of info */
 
+/*
+ * Note: If the comment begins with a quoted string, that string is used
+ * in /proc/cpuinfo instead of the macro name.  If the string is "",
+ * this feature bit is not displayed in /proc/cpuinfo at all.
+ */
+
 /* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
 #define X86_FEATURE_FPU		(0*32+ 0) /* Onboard FPU */
 #define X86_FEATURE_VME		(0*32+ 1) /* Virtual Mode Extensions */
 #define X86_FEATURE_DE		(0*32+ 2) /* Debugging Extensions */
 #define X86_FEATURE_PSE		(0*32+ 3) /* Page Size Extensions */
 #define X86_FEATURE_TSC		(0*32+ 4) /* Time Stamp Counter */
-#define X86_FEATURE_MSR		(0*32+ 5) /* Model-Specific Registers, RDMSR, WRMSR */
+#define X86_FEATURE_MSR		(0*32+ 5) /* Model-Specific Registers */
 #define X86_FEATURE_PAE		(0*32+ 6) /* Physical Address Extensions */
 #define X86_FEATURE_MCE		(0*32+ 7) /* Machine Check Architecture */
 #define X86_FEATURE_CX8		(0*32+ 8) /* CMPXCHG8 instruction */
@@ -23,22 +29,23 @@
 #define X86_FEATURE_MTRR	(0*32+12) /* Memory Type Range Registers */
 #define X86_FEATURE_PGE		(0*32+13) /* Page Global Enable */
 #define X86_FEATURE_MCA		(0*32+14) /* Machine Check Architecture */
-#define X86_FEATURE_CMOV	(0*32+15) /* CMOV instruction (FCMOVCC and FCOMI too if FPU present) */
+#define X86_FEATURE_CMOV	(0*32+15) /* CMOV instructions */
+					  /* (plus FCMOVcc, FCOMI with FPU) */
 #define X86_FEATURE_PAT		(0*32+16) /* Page Attribute Table */
 #define X86_FEATURE_PSE36	(0*32+17) /* 36-bit PSEs */
 #define X86_FEATURE_PN		(0*32+18) /* Processor serial number */
-#define X86_FEATURE_CLFLSH	(0*32+19) /* Supports the CLFLUSH instruction */
-#define X86_FEATURE_DS		(0*32+21) /* Debug Store */
+#define X86_FEATURE_CLFLSH	(0*32+19) /* "clflush" CLFLUSH instruction */
+#define X86_FEATURE_DS		(0*32+21) /* "dts" Debug Store */
 #define X86_FEATURE_ACPI	(0*32+22) /* ACPI via MSR */
 #define X86_FEATURE_MMX		(0*32+23) /* Multimedia Extensions */
-#define X86_FEATURE_FXSR	(0*32+24) /* FXSAVE and FXRSTOR instructions (fast save and restore */
-					  /* of FPU context), and CR4.OSFXSR available */
-#define X86_FEATURE_XMM		(0*32+25) /* Streaming SIMD Extensions */
-#define X86_FEATURE_XMM2	(0*32+26) /* Streaming SIMD Extensions-2 */
-#define X86_FEATURE_SELFSNOOP	(0*32+27) /* CPU self snoop */
+#define X86_FEATURE_FXSR	(0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
+#define X86_FEATURE_XMM		(0*32+25) /* "sse" */
+#define X86_FEATURE_XMM2	(0*32+26) /* "sse2" */
+#define X86_FEATURE_SELFSNOOP	(0*32+27) /* "ss" CPU self snoop */
 #define X86_FEATURE_HT		(0*32+28) /* Hyper-Threading */
-#define X86_FEATURE_ACC		(0*32+29) /* Automatic clock control */
+#define X86_FEATURE_ACC		(0*32+29) /* "tm" Automatic clock control */
 #define X86_FEATURE_IA64	(0*32+30) /* IA-64 processor */
+#define X86_FEATURE_PBE		(0*32+31) /* Pending Break Enable */
 
 /* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
 /* Don't duplicate feature flags which are redundant with Intel! */
@@ -46,7 +53,8 @@
 #define X86_FEATURE_MP		(1*32+19) /* MP Capable. */
 #define X86_FEATURE_NX		(1*32+20) /* Execute Disable */
 #define X86_FEATURE_MMXEXT	(1*32+22) /* AMD MMX extensions */
-#define X86_FEATURE_GBPAGES	(1*32+26) /* GB pages */
+#define X86_FEATURE_FXSR_OPT	(1*32+25) /* FXSAVE/FXRSTOR optimizations */
+#define X86_FEATURE_GBPAGES	(1*32+26) /* "pdpe1gb" GB pages */
 #define X86_FEATURE_RDTSCP	(1*32+27) /* RDTSCP */
 #define X86_FEATURE_LM		(1*32+29) /* Long Mode (x86-64) */
 #define X86_FEATURE_3DNOWEXT	(1*32+30) /* AMD 3DNow! extensions */
@@ -64,51 +72,76 @@
 #define X86_FEATURE_CYRIX_ARR	(3*32+ 2) /* Cyrix ARRs (= MTRRs) */
 #define X86_FEATURE_CENTAUR_MCR	(3*32+ 3) /* Centaur MCRs (= MTRRs) */
 /* cpu types for specific tunings: */
-#define X86_FEATURE_K8		(3*32+ 4) /* Opteron, Athlon64 */
-#define X86_FEATURE_K7		(3*32+ 5) /* Athlon */
-#define X86_FEATURE_P3		(3*32+ 6) /* P3 */
-#define X86_FEATURE_P4		(3*32+ 7) /* P4 */
+#define X86_FEATURE_K8		(3*32+ 4) /* "" Opteron, Athlon64 */
+#define X86_FEATURE_K7		(3*32+ 5) /* "" Athlon */
+#define X86_FEATURE_P3		(3*32+ 6) /* "" P3 */
+#define X86_FEATURE_P4		(3*32+ 7) /* "" P4 */
 #define X86_FEATURE_CONSTANT_TSC (3*32+ 8) /* TSC ticks at a constant rate */
 #define X86_FEATURE_UP		(3*32+ 9) /* smp kernel running on up */
-#define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* FXSAVE leaks FOP/FIP/FOP */
+#define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* "" FXSAVE leaks FOP/FIP/FOP */
 #define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */
-#define X86_FEATURE_PEBS	(3*32+12)  /* Precise-Event Based Sampling */
-#define X86_FEATURE_BTS		(3*32+13)  /* Branch Trace Store */
-#define X86_FEATURE_SYSCALL32	(3*32+14)  /* syscall in ia32 userspace */
-#define X86_FEATURE_SYSENTER32	(3*32+15)  /* sysenter in ia32 userspace */
-#define X86_FEATURE_REP_GOOD	(3*32+16) /* rep microcode works well on this CPU */
-#define X86_FEATURE_MFENCE_RDTSC (3*32+17) /* Mfence synchronizes RDTSC */
-#define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* Lfence synchronizes RDTSC */
-#define X86_FEATURE_11AP	(3*32+19)  /* Bad local APIC aka 11AP */
+#define X86_FEATURE_PEBS	(3*32+12) /* Precise-Event Based Sampling */
+#define X86_FEATURE_BTS		(3*32+13) /* Branch Trace Store */
+#define X86_FEATURE_SYSCALL32	(3*32+14) /* "" syscall in ia32 userspace */
+#define X86_FEATURE_SYSENTER32	(3*32+15) /* "" sysenter in ia32 userspace */
+#define X86_FEATURE_REP_GOOD	(3*32+16) /* rep microcode works well */
+#define X86_FEATURE_MFENCE_RDTSC (3*32+17) /* "" Mfence synchronizes RDTSC */
+#define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* "" Lfence synchronizes RDTSC */
+#define X86_FEATURE_11AP	(3*32+19) /* "" Bad local APIC aka 11AP */
+#define X86_FEATURE_NOPL	(3*32+20) /* The NOPL (0F 1F) instructions */
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
-#define X86_FEATURE_XMM3	(4*32+ 0) /* Streaming SIMD Extensions-3 */
-#define X86_FEATURE_MWAIT	(4*32+ 3) /* Monitor/Mwait support */
-#define X86_FEATURE_DSCPL	(4*32+ 4) /* CPL Qualified Debug Store */
+#define X86_FEATURE_XMM3	(4*32+ 0) /* "pni" SSE-3 */
+#define X86_FEATURE_PCLMULQDQ	(4*32+ 1) /* PCLMULQDQ instruction */
+#define X86_FEATURE_DTES64	(4*32+ 2) /* 64-bit Debug Store */
+#define X86_FEATURE_MWAIT	(4*32+ 3) /* "monitor" Monitor/Mwait support */
+#define X86_FEATURE_DSCPL	(4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */
+#define X86_FEATURE_VMX		(4*32+ 5) /* Hardware virtualization */
+#define X86_FEATURE_SMX		(4*32+ 6) /* Safer mode */
 #define X86_FEATURE_EST		(4*32+ 7) /* Enhanced SpeedStep */
 #define X86_FEATURE_TM2		(4*32+ 8) /* Thermal Monitor 2 */
+#define X86_FEATURE_SSSE3	(4*32+ 9) /* Supplemental SSE-3 */
 #define X86_FEATURE_CID		(4*32+10) /* Context ID */
+#define X86_FEATURE_FMA		(4*32+12) /* Fused multiply-add */
 #define X86_FEATURE_CX16	(4*32+13) /* CMPXCHG16B */
 #define X86_FEATURE_XTPR	(4*32+14) /* Send Task Priority Messages */
+#define X86_FEATURE_PDCM	(4*32+15) /* Performance Capabilities */
 #define X86_FEATURE_DCA		(4*32+18) /* Direct Cache Access */
-#define X86_FEATURE_XMM4_2	(4*32+20) /* Streaming SIMD Extensions-4.2 */
+#define X86_FEATURE_XMM4_1	(4*32+19) /* "sse4_1" SSE-4.1 */
+#define X86_FEATURE_XMM4_2	(4*32+20) /* "sse4_2" SSE-4.2 */
+#define X86_FEATURE_X2APIC	(4*32+21) /* x2APIC */
+#define X86_FEATURE_AES		(4*32+25) /* AES instructions */
+#define X86_FEATURE_XSAVE	(4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
+#define X86_FEATURE_OSXSAVE	(4*32+27) /* "" XSAVE enabled in the OS */
+#define X86_FEATURE_AVX		(4*32+28) /* Advanced Vector Extensions */
 
 /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
-#define X86_FEATURE_XSTORE	(5*32+ 2) /* on-CPU RNG present (xstore insn) */
-#define X86_FEATURE_XSTORE_EN	(5*32+ 3) /* on-CPU RNG enabled */
-#define X86_FEATURE_XCRYPT	(5*32+ 6) /* on-CPU crypto (xcrypt insn) */
-#define X86_FEATURE_XCRYPT_EN	(5*32+ 7) /* on-CPU crypto enabled */
+#define X86_FEATURE_XSTORE	(5*32+ 2) /* "rng" RNG present (xstore) */
+#define X86_FEATURE_XSTORE_EN	(5*32+ 3) /* "rng_en" RNG enabled */
+#define X86_FEATURE_XCRYPT	(5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
+#define X86_FEATURE_XCRYPT_EN	(5*32+ 7) /* "ace_en" on-CPU crypto enabled */
 #define X86_FEATURE_ACE2	(5*32+ 8) /* Advanced Cryptography Engine v2 */
 #define X86_FEATURE_ACE2_EN	(5*32+ 9) /* ACE v2 enabled */
-#define X86_FEATURE_PHE		(5*32+ 10) /* PadLock Hash Engine */
-#define X86_FEATURE_PHE_EN	(5*32+ 11) /* PHE enabled */
-#define X86_FEATURE_PMM		(5*32+ 12) /* PadLock Montgomery Multiplier */
-#define X86_FEATURE_PMM_EN	(5*32+ 13) /* PMM enabled */
+#define X86_FEATURE_PHE		(5*32+10) /* PadLock Hash Engine */
+#define X86_FEATURE_PHE_EN	(5*32+11) /* PHE enabled */
+#define X86_FEATURE_PMM		(5*32+12) /* PadLock Montgomery Multiplier */
+#define X86_FEATURE_PMM_EN	(5*32+13) /* PMM enabled */
 
 /* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
 #define X86_FEATURE_LAHF_LM	(6*32+ 0) /* LAHF/SAHF in long mode */
 #define X86_FEATURE_CMP_LEGACY	(6*32+ 1) /* If yes HyperThreading not valid */
-#define X86_FEATURE_IBS		(6*32+ 10) /* Instruction Based Sampling */
+#define X86_FEATURE_SVM		(6*32+ 2) /* Secure virtual machine */
+#define X86_FEATURE_EXTAPIC	(6*32+ 3) /* Extended APIC space */
+#define X86_FEATURE_CR8_LEGACY	(6*32+ 4) /* CR8 in 32-bit mode */
+#define X86_FEATURE_ABM		(6*32+ 5) /* Advanced bit manipulation */
+#define X86_FEATURE_SSE4A	(6*32+ 6) /* SSE-4A */
+#define X86_FEATURE_MISALIGNSSE (6*32+ 7) /* Misaligned SSE mode */
+#define X86_FEATURE_3DNOWPREFETCH (6*32+ 8) /* 3DNow prefetch instructions */
+#define X86_FEATURE_OSVW	(6*32+ 9) /* OS Visible Workaround */
+#define X86_FEATURE_IBS		(6*32+10) /* Instruction Based Sampling */
+#define X86_FEATURE_SSE5	(6*32+11) /* SSE-5 */
+#define X86_FEATURE_SKINIT	(6*32+12) /* SKINIT/STGI instructions */
+#define X86_FEATURE_WDT		(6*32+13) /* Watchdog timer */
 
 /*
  * Auxiliary flags: Linux defined - For features scattered in various
@@ -149,7 +182,7 @@
 } while (0)
 #define setup_force_cpu_cap(bit) do { \
 	set_cpu_cap(&boot_cpu_data, bit);	\
-	clear_bit(bit, (unsigned long *)cleared_cpu_caps); 	\
+	clear_bit(bit, (unsigned long *)cleared_cpu_caps);	\
 } while (0)
 
 #define cpu_has_fpu		boot_cpu_has(X86_FEATURE_FPU)
@@ -190,7 +223,10 @@
 #define cpu_has_gbpages		boot_cpu_has(X86_FEATURE_GBPAGES)
 #define cpu_has_arch_perfmon	boot_cpu_has(X86_FEATURE_ARCH_PERFMON)
 #define cpu_has_pat		boot_cpu_has(X86_FEATURE_PAT)
+#define cpu_has_xmm4_1		boot_cpu_has(X86_FEATURE_XMM4_1)
 #define cpu_has_xmm4_2		boot_cpu_has(X86_FEATURE_XMM4_2)
+#define cpu_has_x2apic		boot_cpu_has(X86_FEATURE_X2APIC)
+#define cpu_has_xsave		boot_cpu_has(X86_FEATURE_XSAVE)
 
 #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64)
 # define cpu_has_invlpg		1
@@ -222,4 +258,4 @@
 
 #endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */
 
-#endif /* _ASM_X86_CPUFEATURE_H */
+#endif /* ASM_X86__CPUFEATURE_H */
diff --git a/include/asm-x86/current.h b/include/asm-x86/current.h
index 7515c19..a863ead 100644
--- a/include/asm-x86/current.h
+++ b/include/asm-x86/current.h
@@ -1,5 +1,5 @@
-#ifndef _X86_CURRENT_H
-#define _X86_CURRENT_H
+#ifndef ASM_X86__CURRENT_H
+#define ASM_X86__CURRENT_H
 
 #ifdef CONFIG_X86_32
 #include <linux/compiler.h>
@@ -36,4 +36,4 @@
 
 #define current get_current()
 
-#endif /* X86_CURRENT_H */
+#endif /* ASM_X86__CURRENT_H */
diff --git a/include/asm-x86/debugreg.h b/include/asm-x86/debugreg.h
index c6344d5..ecb6907 100644
--- a/include/asm-x86/debugreg.h
+++ b/include/asm-x86/debugreg.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_DEBUGREG_H
-#define _ASM_X86_DEBUGREG_H
+#ifndef ASM_X86__DEBUGREG_H
+#define ASM_X86__DEBUGREG_H
 
 
 /* Indicate the register numbers for a number of the specific
@@ -67,4 +67,4 @@
 #define DR_LOCAL_SLOWDOWN (0x100)   /* Local slow the pipeline */
 #define DR_GLOBAL_SLOWDOWN (0x200)  /* Global slow the pipeline */
 
-#endif
+#endif /* ASM_X86__DEBUGREG_H */
diff --git a/include/asm-x86/delay.h b/include/asm-x86/delay.h
index 409a649..8a0da95 100644
--- a/include/asm-x86/delay.h
+++ b/include/asm-x86/delay.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_DELAY_H
-#define _ASM_X86_DELAY_H
+#ifndef ASM_X86__DELAY_H
+#define ASM_X86__DELAY_H
 
 /*
  * Copyright (C) 1993 Linus Torvalds
@@ -28,4 +28,4 @@
 
 void use_tsc_delay(void);
 
-#endif /* _ASM_X86_DELAY_H */
+#endif /* ASM_X86__DELAY_H */
diff --git a/include/asm-x86/desc.h b/include/asm-x86/desc.h
index a44c4dc..b73fea5 100644
--- a/include/asm-x86/desc.h
+++ b/include/asm-x86/desc.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_DESC_H_
-#define _ASM_DESC_H_
+#ifndef ASM_X86__DESC_H
+#define ASM_X86__DESC_H
 
 #ifndef __ASSEMBLY__
 #include <asm/desc_defs.h>
@@ -397,4 +397,4 @@
 
 #endif /* __ASSEMBLY__ */
 
-#endif
+#endif /* ASM_X86__DESC_H */
diff --git a/include/asm-x86/desc_defs.h b/include/asm-x86/desc_defs.h
index f7bacf3..b881db6 100644
--- a/include/asm-x86/desc_defs.h
+++ b/include/asm-x86/desc_defs.h
@@ -1,6 +1,6 @@
 /* Written 2000 by Andi Kleen */
-#ifndef __ARCH_DESC_DEFS_H
-#define __ARCH_DESC_DEFS_H
+#ifndef ASM_X86__DESC_DEFS_H
+#define ASM_X86__DESC_DEFS_H
 
 /*
  * Segment descriptor structure definitions, usable from both x86_64 and i386
@@ -92,4 +92,4 @@
 
 #endif /* !__ASSEMBLY__ */
 
-#endif
+#endif /* ASM_X86__DESC_DEFS_H */
diff --git a/include/asm-x86/device.h b/include/asm-x86/device.h
index 3c034f4..1bece04 100644
--- a/include/asm-x86/device.h
+++ b/include/asm-x86/device.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_DEVICE_H
-#define _ASM_X86_DEVICE_H
+#ifndef ASM_X86__DEVICE_H
+#define ASM_X86__DEVICE_H
 
 struct dev_archdata {
 #ifdef CONFIG_ACPI
@@ -13,4 +13,4 @@
 #endif
 };
 
-#endif /* _ASM_X86_DEVICE_H */
+#endif /* ASM_X86__DEVICE_H */
diff --git a/include/asm-x86/div64.h b/include/asm-x86/div64.h
index 9a2d644..f9530f2 100644
--- a/include/asm-x86/div64.h
+++ b/include/asm-x86/div64.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_DIV64_H
-#define _ASM_X86_DIV64_H
+#ifndef ASM_X86__DIV64_H
+#define ASM_X86__DIV64_H
 
 #ifdef CONFIG_X86_32
 
@@ -57,4 +57,4 @@
 # include <asm-generic/div64.h>
 #endif /* CONFIG_X86_32 */
 
-#endif /* _ASM_X86_DIV64_H */
+#endif /* ASM_X86__DIV64_H */
diff --git a/include/asm-x86/dma-mapping.h b/include/asm-x86/dma-mapping.h
index ad9cd6d..5d200e7 100644
--- a/include/asm-x86/dma-mapping.h
+++ b/include/asm-x86/dma-mapping.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_DMA_MAPPING_H_
-#define _ASM_DMA_MAPPING_H_
+#ifndef ASM_X86__DMA_MAPPING_H
+#define ASM_X86__DMA_MAPPING_H
 
 /*
  * IOMMU interface. See Documentation/DMA-mapping.txt and DMA-API.txt for
@@ -250,4 +250,4 @@
 #define dma_is_consistent(d, h)	(1)
 
 #include <asm-generic/dma-coherent.h>
-#endif
+#endif /* ASM_X86__DMA_MAPPING_H */
diff --git a/include/asm-x86/dma.h b/include/asm-x86/dma.h
index ca1098a..c9f7a4e 100644
--- a/include/asm-x86/dma.h
+++ b/include/asm-x86/dma.h
@@ -5,8 +5,8 @@
  * and John Boyd, Nov. 1992.
  */
 
-#ifndef _ASM_X86_DMA_H
-#define _ASM_X86_DMA_H
+#ifndef ASM_X86__DMA_H
+#define ASM_X86__DMA_H
 
 #include <linux/spinlock.h>	/* And spinlocks */
 #include <asm/io.h>		/* need byte IO */
@@ -315,4 +315,4 @@
 #define isa_dma_bridge_buggy	(0)
 #endif
 
-#endif /* _ASM_X86_DMA_H */
+#endif /* ASM_X86__DMA_H */
diff --git a/include/asm-x86/dmi.h b/include/asm-x86/dmi.h
index 58a8657..1cff6fe 100644
--- a/include/asm-x86/dmi.h
+++ b/include/asm-x86/dmi.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_DMI_H
-#define _ASM_X86_DMI_H
+#ifndef ASM_X86__DMI_H
+#define ASM_X86__DMI_H
 
 #include <asm/io.h>
 
@@ -23,4 +23,4 @@
 #define dmi_ioremap early_ioremap
 #define dmi_iounmap early_iounmap
 
-#endif
+#endif /* ASM_X86__DMI_H */
diff --git a/include/asm-x86/ds.h b/include/asm-x86/ds.h
index 7881368..6b27c68 100644
--- a/include/asm-x86/ds.h
+++ b/include/asm-x86/ds.h
@@ -17,8 +17,8 @@
  * Markus Metzger <markus.t.metzger@intel.com>, Dec 2007
  */
 
-#ifndef _ASM_X86_DS_H
-#define _ASM_X86_DS_H
+#ifndef ASM_X86__DS_H
+#define ASM_X86__DS_H
 
 #include <linux/types.h>
 #include <linux/init.h>
@@ -69,4 +69,4 @@
 extern unsigned long ds_debugctl_mask(void);
 extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *c);
 
-#endif /* _ASM_X86_DS_H */
+#endif /* ASM_X86__DS_H */
diff --git a/include/asm-x86/dwarf2.h b/include/asm-x86/dwarf2.h
index 738bb9f..21d1bc3 100644
--- a/include/asm-x86/dwarf2.h
+++ b/include/asm-x86/dwarf2.h
@@ -1,5 +1,5 @@
-#ifndef _DWARF2_H
-#define _DWARF2_H
+#ifndef ASM_X86__DWARF2_H
+#define ASM_X86__DWARF2_H
 
 #ifndef __ASSEMBLY__
 #warning "asm/dwarf2.h should be only included in pure assembly files"
@@ -58,4 +58,4 @@
 
 #endif
 
-#endif
+#endif /* ASM_X86__DWARF2_H */
diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h
index 16a31e2..f52daf1 100644
--- a/include/asm-x86/e820.h
+++ b/include/asm-x86/e820.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_E820_H
-#define __ASM_E820_H
+#ifndef ASM_X86__E820_H
+#define ASM_X86__E820_H
 #define E820MAP	0x2d0		/* our map */
 #define E820MAX	128		/* number of entries in E820MAP */
 
@@ -64,6 +64,7 @@
 extern struct e820map e820;
 extern struct e820map e820_saved;
 
+extern unsigned long pci_mem_start;
 extern int e820_any_mapped(u64 start, u64 end, unsigned type);
 extern int e820_all_mapped(u64 start, u64 end, unsigned type);
 extern void e820_add_region(u64 start, u64 size, int type);
@@ -140,4 +141,4 @@
 #define HIGH_MEMORY	(1024*1024)
 #endif /* __KERNEL__ */
 
-#endif  /* __ASM_E820_H */
+#endif /* ASM_X86__E820_H */
diff --git a/include/asm-x86/edac.h b/include/asm-x86/edac.h
index a8088f6..9493c5b 100644
--- a/include/asm-x86/edac.h
+++ b/include/asm-x86/edac.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_EDAC_H
-#define _ASM_X86_EDAC_H
+#ifndef ASM_X86__EDAC_H
+#define ASM_X86__EDAC_H
 
 /* ECC atomic, DMA, SMP and interrupt safe scrub function */
 
@@ -15,4 +15,4 @@
 		asm volatile("lock; addl $0, %0"::"m" (*virt_addr));
 }
 
-#endif
+#endif /* ASM_X86__EDAC_H */
diff --git a/include/asm-x86/efi.h b/include/asm-x86/efi.h
index d4f2b0a..ed2de22 100644
--- a/include/asm-x86/efi.h
+++ b/include/asm-x86/efi.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_EFI_H
-#define _ASM_X86_EFI_H
+#ifndef ASM_X86__EFI_H
+#define ASM_X86__EFI_H
 
 #ifdef CONFIG_X86_32
 
@@ -94,4 +94,4 @@
 extern void efi_call_phys_prelog(void);
 extern void efi_call_phys_epilog(void);
 
-#endif
+#endif /* ASM_X86__EFI_H */
diff --git a/include/asm-x86/elf.h b/include/asm-x86/elf.h
index 7be4733..cd678b2 100644
--- a/include/asm-x86/elf.h
+++ b/include/asm-x86/elf.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_ELF_H
-#define _ASM_X86_ELF_H
+#ifndef ASM_X86__ELF_H
+#define ASM_X86__ELF_H
 
 /*
  * ELF register definitions..
@@ -332,4 +332,4 @@
 extern unsigned long arch_randomize_brk(struct mm_struct *mm);
 #define arch_randomize_brk arch_randomize_brk
 
-#endif
+#endif /* ASM_X86__ELF_H */
diff --git a/include/asm-x86/emergency-restart.h b/include/asm-x86/emergency-restart.h
index 8e6aef1..190d0d8 100644
--- a/include/asm-x86/emergency-restart.h
+++ b/include/asm-x86/emergency-restart.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_EMERGENCY_RESTART_H
-#define _ASM_EMERGENCY_RESTART_H
+#ifndef ASM_X86__EMERGENCY_RESTART_H
+#define ASM_X86__EMERGENCY_RESTART_H
 
 enum reboot_type {
 	BOOT_TRIPLE = 't',
@@ -15,4 +15,4 @@
 
 extern void machine_emergency_restart(void);
 
-#endif /* _ASM_EMERGENCY_RESTART_H */
+#endif /* ASM_X86__EMERGENCY_RESTART_H */
diff --git a/include/asm-x86/mach-es7000/mach_apic.h b/include/asm-x86/es7000/apic.h
similarity index 91%
rename from include/asm-x86/mach-es7000/mach_apic.h
rename to include/asm-x86/es7000/apic.h
index 0a3fdf9..bd2c44d 100644
--- a/include/asm-x86/mach-es7000/mach_apic.h
+++ b/include/asm-x86/es7000/apic.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_APIC_H
-#define __ASM_MACH_APIC_H
+#ifndef __ASM_ES7000_APIC_H
+#define __ASM_ES7000_APIC_H
 
 #define xapic_phys_to_log_apicid(cpu) per_cpu(x86_bios_cpu_apicid, cpu)
 #define esr_disable (1)
@@ -10,7 +10,7 @@
 }
 
 static inline cpumask_t target_cpus(void)
-{ 
+{
 #if defined CONFIG_ES7000_CLUSTERED_APIC
 	return CPU_MASK_ALL;
 #else
@@ -23,24 +23,24 @@
 #define APIC_DFR_VALUE		(APIC_DFR_CLUSTER)
 #define INT_DELIVERY_MODE	(dest_LowestPrio)
 #define INT_DEST_MODE		(1)    /* logical delivery broadcast to all procs */
-#define NO_BALANCE_IRQ 		(1)
+#define NO_BALANCE_IRQ		(1)
 #undef  WAKE_SECONDARY_VIA_INIT
 #define WAKE_SECONDARY_VIA_MIP
 #else
 #define APIC_DFR_VALUE		(APIC_DFR_FLAT)
 #define INT_DELIVERY_MODE	(dest_Fixed)
 #define INT_DEST_MODE		(0)    /* phys delivery to target procs */
-#define NO_BALANCE_IRQ 		(0)
+#define NO_BALANCE_IRQ		(0)
 #undef  APIC_DEST_LOGICAL
 #define APIC_DEST_LOGICAL	0x0
 #define WAKE_SECONDARY_VIA_INIT
 #endif
 
 static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
-{ 
+{
 	return 0;
-} 
-static inline unsigned long check_apicid_present(int bit) 
+}
+static inline unsigned long check_apicid_present(int bit)
 {
 	return physid_isset(bit, phys_cpu_present_map);
 }
@@ -80,7 +80,7 @@
 {
 	int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id());
 	printk("Enabling APIC mode:  %s.  Using %d I/O APICs, target cpus %lx\n",
-		(apic_version[apic] == 0x14) ? 
+		(apic_version[apic] == 0x14) ?
 		"Physical Cluster" : "Logical Cluster", nr_ioapics, cpus_addr(TARGET_CPUS)[0]);
 }
 
@@ -141,7 +141,7 @@
 extern unsigned int boot_cpu_physical_apicid;
 static inline int check_phys_apicid_present(int cpu_physical_apicid)
 {
-	boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
+	boot_cpu_physical_apicid = read_apic_id();
 	return (1);
 }
 
@@ -150,7 +150,7 @@
 	int num_bits_set;
 	int cpus_found = 0;
 	int cpu;
-	int apicid;	
+	int apicid;
 
 	num_bits_set = cpus_weight(cpumask);
 	/* Return id to all */
@@ -160,16 +160,16 @@
 #else
 		return cpu_to_logical_apicid(0);
 #endif
-	/* 
-	 * The cpus in the mask must all be on the apic cluster.  If are not 
-	 * on the same apicid cluster return default value of TARGET_CPUS. 
+	/*
+	 * The cpus in the mask must all be on the apic cluster.  If are not
+	 * on the same apicid cluster return default value of TARGET_CPUS.
 	 */
 	cpu = first_cpu(cpumask);
 	apicid = cpu_to_logical_apicid(cpu);
 	while (cpus_found < num_bits_set) {
 		if (cpu_isset(cpu, cpumask)) {
 			int new_apicid = cpu_to_logical_apicid(cpu);
-			if (apicid_cluster(apicid) != 
+			if (apicid_cluster(apicid) !=
 					apicid_cluster(new_apicid)){
 				printk ("%s: Not a valid mask!\n",__FUNCTION__);
 #if defined CONFIG_ES7000_CLUSTERED_APIC
@@ -191,4 +191,4 @@
 	return cpuid_apic >> index_msb;
 }
 
-#endif /* __ASM_MACH_APIC_H */
+#endif /* __ASM_ES7000_APIC_H */
diff --git a/include/asm-x86/es7000/apicdef.h b/include/asm-x86/es7000/apicdef.h
new file mode 100644
index 0000000..8b234a3
--- /dev/null
+++ b/include/asm-x86/es7000/apicdef.h
@@ -0,0 +1,13 @@
+#ifndef __ASM_ES7000_APICDEF_H
+#define __ASM_ES7000_APICDEF_H
+
+#define		APIC_ID_MASK		(0xFF<<24)
+
+static inline unsigned get_apic_id(unsigned long x)
+{
+	return (((x)>>24)&0xFF);
+}
+
+#define		GET_APIC_ID(x)	get_apic_id(x)
+
+#endif
diff --git a/include/asm-x86/mach-es7000/mach_ipi.h b/include/asm-x86/es7000/ipi.h
similarity index 83%
rename from include/asm-x86/mach-es7000/mach_ipi.h
rename to include/asm-x86/es7000/ipi.h
index 5e61bd2..632a955 100644
--- a/include/asm-x86/mach-es7000/mach_ipi.h
+++ b/include/asm-x86/es7000/ipi.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_IPI_H
-#define __ASM_MACH_IPI_H
+#ifndef __ASM_ES7000_IPI_H
+#define __ASM_ES7000_IPI_H
 
 void send_IPI_mask_sequence(cpumask_t mask, int vector);
 
@@ -21,4 +21,4 @@
 	send_IPI_mask(cpu_online_map, vector);
 }
 
-#endif /* __ASM_MACH_IPI_H */
+#endif /* __ASM_ES7000_IPI_H */
diff --git a/include/asm-x86/mach-es7000/mach_mpparse.h b/include/asm-x86/es7000/mpparse.h
similarity index 90%
rename from include/asm-x86/mach-es7000/mach_mpparse.h
rename to include/asm-x86/es7000/mpparse.h
index ef26d35..7b5c889 100644
--- a/include/asm-x86/mach-es7000/mach_mpparse.h
+++ b/include/asm-x86/es7000/mpparse.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_MPPARSE_H
-#define __ASM_MACH_MPPARSE_H
+#ifndef __ASM_ES7000_MPPARSE_H
+#define __ASM_ES7000_MPPARSE_H
 
 #include <linux/acpi.h>
 
diff --git a/include/asm-x86/mach-es7000/mach_wakecpu.h b/include/asm-x86/es7000/wakecpu.h
similarity index 94%
rename from include/asm-x86/mach-es7000/mach_wakecpu.h
rename to include/asm-x86/es7000/wakecpu.h
index 84ff583..3ffc5a7 100644
--- a/include/asm-x86/mach-es7000/mach_wakecpu.h
+++ b/include/asm-x86/es7000/wakecpu.h
@@ -1,7 +1,7 @@
-#ifndef __ASM_MACH_WAKECPU_H
-#define __ASM_MACH_WAKECPU_H
+#ifndef __ASM_ES7000_WAKECPU_H
+#define __ASM_ES7000_WAKECPU_H
 
-/* 
+/*
  * This file copes with machines that wakeup secondary CPUs by the
  * INIT, INIT, STARTUP sequence.
  */
diff --git a/include/asm-x86/fb.h b/include/asm-x86/fb.h
index 5301846..aca38dbd 100644
--- a/include/asm-x86/fb.h
+++ b/include/asm-x86/fb.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_FB_H
-#define _ASM_X86_FB_H
+#ifndef ASM_X86__FB_H
+#define ASM_X86__FB_H
 
 #include <linux/fb.h>
 #include <linux/fs.h>
@@ -18,4 +18,4 @@
 static inline int fb_is_primary_device(struct fb_info *info) { return 0; }
 #endif
 
-#endif /* _ASM_X86_FB_H */
+#endif /* ASM_X86__FB_H */
diff --git a/include/asm-x86/fixmap.h b/include/asm-x86/fixmap.h
index 44d4f82..78e33a1 100644
--- a/include/asm-x86/fixmap.h
+++ b/include/asm-x86/fixmap.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_FIXMAP_H
-#define _ASM_FIXMAP_H
+#ifndef ASM_X86__FIXMAP_H
+#define ASM_X86__FIXMAP_H
 
 #ifdef CONFIG_X86_32
 # include "fixmap_32.h"
@@ -65,4 +65,4 @@
 	BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
 	return __virt_to_fix(vaddr);
 }
-#endif
+#endif /* ASM_X86__FIXMAP_H */
diff --git a/include/asm-x86/fixmap_32.h b/include/asm-x86/fixmap_32.h
index f1ac2b2..784e3e7 100644
--- a/include/asm-x86/fixmap_32.h
+++ b/include/asm-x86/fixmap_32.h
@@ -10,8 +10,8 @@
  * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
  */
 
-#ifndef _ASM_FIXMAP_32_H
-#define _ASM_FIXMAP_32_H
+#ifndef ASM_X86__FIXMAP_32_H
+#define ASM_X86__FIXMAP_32_H
 
 
 /* used by vmalloc.c, vsyscall.lds.S.
@@ -120,4 +120,4 @@
 #define FIXADDR_BOOT_START	(FIXADDR_TOP - __FIXADDR_BOOT_SIZE)
 
 #endif /* !__ASSEMBLY__ */
-#endif
+#endif /* ASM_X86__FIXMAP_32_H */
diff --git a/include/asm-x86/fixmap_64.h b/include/asm-x86/fixmap_64.h
index 00f3d74..dafb24b 100644
--- a/include/asm-x86/fixmap_64.h
+++ b/include/asm-x86/fixmap_64.h
@@ -8,8 +8,8 @@
  * Copyright (C) 1998 Ingo Molnar
  */
 
-#ifndef _ASM_FIXMAP_64_H
-#define _ASM_FIXMAP_64_H
+#ifndef ASM_X86__FIXMAP_64_H
+#define ASM_X86__FIXMAP_64_H
 
 #include <linux/kernel.h>
 #include <asm/acpi.h>
@@ -80,4 +80,4 @@
 #define FIXADDR_USER_START	((unsigned long)VSYSCALL32_VSYSCALL)
 #define FIXADDR_USER_END	(FIXADDR_USER_START + PAGE_SIZE)
 
-#endif
+#endif /* ASM_X86__FIXMAP_64_H */
diff --git a/include/asm-x86/floppy.h b/include/asm-x86/floppy.h
index dbe82a5..7d83a3a 100644
--- a/include/asm-x86/floppy.h
+++ b/include/asm-x86/floppy.h
@@ -7,8 +7,8 @@
  *
  * Copyright (C) 1995
  */
-#ifndef _ASM_X86_FLOPPY_H
-#define _ASM_X86_FLOPPY_H
+#ifndef ASM_X86__FLOPPY_H
+#define ASM_X86__FLOPPY_H
 
 #include <linux/vmalloc.h>
 
@@ -278,4 +278,4 @@
 
 #define EXTRA_FLOPPY_PARAMS
 
-#endif /* _ASM_X86_FLOPPY_H */
+#endif /* ASM_X86__FLOPPY_H */
diff --git a/include/asm-x86/ftrace.h b/include/asm-x86/ftrace.h
index 5c68b32..be0e004 100644
--- a/include/asm-x86/ftrace.h
+++ b/include/asm-x86/ftrace.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_FTRACE
-#define _ASM_X86_FTRACE
+#ifndef ASM_X86__FTRACE_H
+#define ASM_X86__FTRACE_H
 
 #ifdef CONFIG_FTRACE
 #define MCOUNT_ADDR		((long)(mcount))
@@ -11,4 +11,4 @@
 
 #endif /* CONFIG_FTRACE */
 
-#endif /* _ASM_X86_FTRACE */
+#endif /* ASM_X86__FTRACE_H */
diff --git a/include/asm-x86/futex.h b/include/asm-x86/futex.h
index e7a76b3..45dc24d 100644
--- a/include/asm-x86/futex.h
+++ b/include/asm-x86/futex.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_FUTEX_H
-#define _ASM_X86_FUTEX_H
+#ifndef ASM_X86__FUTEX_H
+#define ASM_X86__FUTEX_H
 
 #ifdef __KERNEL__
 
@@ -137,4 +137,4 @@
 }
 
 #endif
-#endif
+#endif /* ASM_X86__FUTEX_H */
diff --git a/include/asm-x86/gart.h b/include/asm-x86/gart.h
index 3f62a83..07f4458 100644
--- a/include/asm-x86/gart.h
+++ b/include/asm-x86/gart.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X8664_GART_H
-#define _ASM_X8664_GART_H 1
+#ifndef ASM_X86__GART_H
+#define ASM_X86__GART_H
 
 #include <asm/e820.h>
 
@@ -68,4 +68,4 @@
 	return 1;
 }
 
-#endif
+#endif /* ASM_X86__GART_H */
diff --git a/include/asm-x86/genapic_32.h b/include/asm-x86/genapic_32.h
index 754d635..34280f0 100644
--- a/include/asm-x86/genapic_32.h
+++ b/include/asm-x86/genapic_32.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_GENAPIC_H
-#define _ASM_GENAPIC_H 1
+#ifndef ASM_X86__GENAPIC_32_H
+#define ASM_X86__GENAPIC_32_H
 
 #include <asm/mpspec.h>
 
@@ -121,4 +121,4 @@
 #define uv_system_init()		do {} while (0)
 
 
-#endif
+#endif /* ASM_X86__GENAPIC_32_H */
diff --git a/include/asm-x86/genapic_64.h b/include/asm-x86/genapic_64.h
index a47d631..ed6a488 100644
--- a/include/asm-x86/genapic_64.h
+++ b/include/asm-x86/genapic_64.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_GENAPIC_H
-#define _ASM_GENAPIC_H 1
+#ifndef ASM_X86__GENAPIC_64_H
+#define ASM_X86__GENAPIC_64_H
 
 /*
  * Copyright 2004 James Cleverdon, IBM.
@@ -14,6 +14,7 @@
 
 struct genapic {
 	char *name;
+	int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id);
 	u32 int_delivery_mode;
 	u32 int_dest_mode;
 	int (*apic_id_registered)(void);
@@ -24,17 +25,24 @@
 	void (*send_IPI_mask)(cpumask_t mask, int vector);
 	void (*send_IPI_allbutself)(int vector);
 	void (*send_IPI_all)(int vector);
+	void (*send_IPI_self)(int vector);
 	/* */
 	unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask);
 	unsigned int (*phys_pkg_id)(int index_msb);
+	unsigned int (*get_apic_id)(unsigned long x);
+	unsigned long (*set_apic_id)(unsigned int id);
+	unsigned long apic_id_mask;
 };
 
 extern struct genapic *genapic;
 
 extern struct genapic apic_flat;
 extern struct genapic apic_physflat;
+extern struct genapic apic_x2apic_cluster;
+extern struct genapic apic_x2apic_phys;
 extern int acpi_madt_oem_check(char *, char *);
 
+extern void apic_send_IPI_self(int vector);
 enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
 extern enum uv_system_type get_uv_system_type(void);
 extern int is_uv_system(void);
@@ -47,4 +55,4 @@
 
 extern void setup_apic_routing(void);
 
-#endif
+#endif /* ASM_X86__GENAPIC_64_H */
diff --git a/include/asm-x86/geode.h b/include/asm-x86/geode.h
index 2c1cda0..3f3444b 100644
--- a/include/asm-x86/geode.h
+++ b/include/asm-x86/geode.h
@@ -7,8 +7,8 @@
  * as published by the Free Software Foundation.
  */
 
-#ifndef _ASM_GEODE_H_
-#define _ASM_GEODE_H_
+#ifndef ASM_X86__GEODE_H
+#define ASM_X86__GEODE_H
 
 #include <asm/processor.h>
 #include <linux/io.h>
@@ -250,4 +250,4 @@
 static inline int mfgpt_timer_setup(void) { return 0; }
 #endif
 
-#endif
+#endif /* ASM_X86__GEODE_H */
diff --git a/include/asm-x86/gpio.h b/include/asm-x86/gpio.h
index c4c91b3..497fb98 100644
--- a/include/asm-x86/gpio.h
+++ b/include/asm-x86/gpio.h
@@ -53,4 +53,4 @@
 
 #endif /* CONFIG_GPIOLIB */
 
-#endif /* _ASM_I386_GPIO_H */
+#endif /* ASM_X86__GPIO_H */
diff --git a/include/asm-x86/hardirq_32.h b/include/asm-x86/hardirq_32.h
index 4f85f0f..700fe23 100644
--- a/include/asm-x86/hardirq_32.h
+++ b/include/asm-x86/hardirq_32.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_HARDIRQ_H
-#define __ASM_HARDIRQ_H
+#ifndef ASM_X86__HARDIRQ_32_H
+#define ASM_X86__HARDIRQ_32_H
 
 #include <linux/threads.h>
 #include <linux/irq.h>
@@ -25,4 +25,4 @@
 void ack_bad_irq(unsigned int irq);
 #include <linux/irq_cpustat.h>
 
-#endif /* __ASM_HARDIRQ_H */
+#endif /* ASM_X86__HARDIRQ_32_H */
diff --git a/include/asm-x86/hardirq_64.h b/include/asm-x86/hardirq_64.h
index 95d5e09..f8bd291 100644
--- a/include/asm-x86/hardirq_64.h
+++ b/include/asm-x86/hardirq_64.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_HARDIRQ_H
-#define __ASM_HARDIRQ_H
+#ifndef ASM_X86__HARDIRQ_64_H
+#define ASM_X86__HARDIRQ_64_H
 
 #include <linux/threads.h>
 #include <linux/irq.h>
@@ -20,4 +20,4 @@
 
 extern void ack_bad_irq(unsigned int irq);
 
-#endif /* __ASM_HARDIRQ_H */
+#endif /* ASM_X86__HARDIRQ_64_H */
diff --git a/include/asm-x86/highmem.h b/include/asm-x86/highmem.h
index 4514b16..bc3f6a2 100644
--- a/include/asm-x86/highmem.h
+++ b/include/asm-x86/highmem.h
@@ -15,8 +15,8 @@
  * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
  */
 
-#ifndef _ASM_HIGHMEM_H
-#define _ASM_HIGHMEM_H
+#ifndef ASM_X86__HIGHMEM_H
+#define ASM_X86__HIGHMEM_H
 
 #ifdef __KERNEL__
 
@@ -79,4 +79,4 @@
 
 #endif /* __KERNEL__ */
 
-#endif /* _ASM_HIGHMEM_H */
+#endif /* ASM_X86__HIGHMEM_H */
diff --git a/include/asm-x86/hpet.h b/include/asm-x86/hpet.h
index 82f1ac6..cbbbb6d 100644
--- a/include/asm-x86/hpet.h
+++ b/include/asm-x86/hpet.h
@@ -1,5 +1,5 @@
-#ifndef ASM_X86_HPET_H
-#define ASM_X86_HPET_H
+#ifndef ASM_X86__HPET_H
+#define ASM_X86__HPET_H
 
 #ifdef CONFIG_HPET_TIMER
 
@@ -90,4 +90,4 @@
 #define hpet_readl(a) 0
 
 #endif
-#endif /* ASM_X86_HPET_H */
+#endif /* ASM_X86__HPET_H */
diff --git a/include/asm-x86/hugetlb.h b/include/asm-x86/hugetlb.h
index 439a9ac..0b7ec5d 100644
--- a/include/asm-x86/hugetlb.h
+++ b/include/asm-x86/hugetlb.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_HUGETLB_H
-#define _ASM_X86_HUGETLB_H
+#ifndef ASM_X86__HUGETLB_H
+#define ASM_X86__HUGETLB_H
 
 #include <asm/page.h>
 
@@ -90,4 +90,4 @@
 {
 }
 
-#endif /* _ASM_X86_HUGETLB_H */
+#endif /* ASM_X86__HUGETLB_H */
diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h
index edd0b95..278571a 100644
--- a/include/asm-x86/hw_irq.h
+++ b/include/asm-x86/hw_irq.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_HW_IRQ_H
-#define _ASM_HW_IRQ_H
+#ifndef ASM_X86__HW_IRQ_H
+#define ASM_X86__HW_IRQ_H
 
 /*
  * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar
@@ -73,7 +73,9 @@
 #endif
 
 /* IPI functions */
+#ifdef CONFIG_X86_32
 extern void send_IPI_self(int vector);
+#endif
 extern void send_IPI(int dest, int vector);
 
 /* Statistics */
@@ -93,6 +95,26 @@
 extern asmlinkage void qic_enable_irq_interrupt(void);
 extern asmlinkage void qic_call_function_interrupt(void);
 
+/* SMP */
+extern void smp_apic_timer_interrupt(struct pt_regs *);
+#ifdef CONFIG_X86_32
+extern void smp_spurious_interrupt(struct pt_regs *);
+extern void smp_error_interrupt(struct pt_regs *);
+#else
+extern asmlinkage void smp_spurious_interrupt(void);
+extern asmlinkage void smp_error_interrupt(void);
+#endif
+#ifdef CONFIG_X86_SMP
+extern void smp_reschedule_interrupt(struct pt_regs *);
+extern void smp_call_function_interrupt(struct pt_regs *);
+extern void smp_call_function_single_interrupt(struct pt_regs *);
+#ifdef CONFIG_X86_32
+extern void smp_invalidate_interrupt(struct pt_regs *);
+#else
+extern asmlinkage void smp_invalidate_interrupt(struct pt_regs *);
+#endif
+#endif
+
 #ifdef CONFIG_X86_32
 extern void (*const interrupt[NR_IRQS])(void);
 #else
@@ -112,4 +134,4 @@
 
 #endif /* !ASSEMBLY_ */
 
-#endif
+#endif /* ASM_X86__HW_IRQ_H */
diff --git a/include/asm-x86/hypertransport.h b/include/asm-x86/hypertransport.h
index d2bbd23..cc011a3 100644
--- a/include/asm-x86/hypertransport.h
+++ b/include/asm-x86/hypertransport.h
@@ -1,5 +1,5 @@
-#ifndef ASM_HYPERTRANSPORT_H
-#define ASM_HYPERTRANSPORT_H
+#ifndef ASM_X86__HYPERTRANSPORT_H
+#define ASM_X86__HYPERTRANSPORT_H
 
 /*
  * Constants for x86 Hypertransport Interrupts.
@@ -42,4 +42,4 @@
 #define HT_IRQ_HIGH_DEST_ID(v)						\
 	((((v) >> 8) << HT_IRQ_HIGH_DEST_ID_SHIFT) & HT_IRQ_HIGH_DEST_ID_MASK)
 
-#endif /* ASM_HYPERTRANSPORT_H */
+#endif /* ASM_X86__HYPERTRANSPORT_H */
diff --git a/include/asm-x86/i387.h b/include/asm-x86/i387.h
index 56d00e3..9ba862a 100644
--- a/include/asm-x86/i387.h
+++ b/include/asm-x86/i387.h
@@ -7,8 +7,8 @@
  * x86-64 work by Andi Kleen 2002
  */
 
-#ifndef _ASM_X86_I387_H
-#define _ASM_X86_I387_H
+#ifndef ASM_X86__I387_H
+#define ASM_X86__I387_H
 
 #include <linux/sched.h>
 #include <linux/kernel_stat.h>
@@ -19,23 +19,32 @@
 #include <asm/sigcontext.h>
 #include <asm/user.h>
 #include <asm/uaccess.h>
+#include <asm/xsave.h>
 
+extern unsigned int sig_xstate_size;
 extern void fpu_init(void);
 extern void mxcsr_feature_mask_init(void);
 extern int init_fpu(struct task_struct *child);
 extern asmlinkage void math_state_restore(void);
 extern void init_thread_xstate(void);
+extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
 
 extern user_regset_active_fn fpregs_active, xfpregs_active;
 extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get;
 extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set;
 
+extern struct _fpx_sw_bytes fx_sw_reserved;
 #ifdef CONFIG_IA32_EMULATION
+extern unsigned int sig_xstate_ia32_size;
+extern struct _fpx_sw_bytes fx_sw_reserved_ia32;
 struct _fpstate_ia32;
-extern int save_i387_ia32(struct _fpstate_ia32 __user *buf);
-extern int restore_i387_ia32(struct _fpstate_ia32 __user *buf);
+struct _xstate_ia32;
+extern int save_i387_xstate_ia32(void __user *buf);
+extern int restore_i387_xstate_ia32(void __user *buf);
 #endif
 
+#define X87_FSW_ES (1 << 7)	/* Exception Summary */
+
 #ifdef CONFIG_X86_64
 
 /* Ignore delayed exceptions from user space */
@@ -46,7 +55,7 @@
 		     _ASM_EXTABLE(1b, 2b));
 }
 
-static inline int restore_fpu_checking(struct i387_fxsave_struct *fx)
+static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
 {
 	int err;
 
@@ -66,15 +75,31 @@
 	return err;
 }
 
-#define X87_FSW_ES (1 << 7)	/* Exception Summary */
+static inline int restore_fpu_checking(struct task_struct *tsk)
+{
+	if (task_thread_info(tsk)->status & TS_XSAVE)
+		return xrstor_checking(&tsk->thread.xstate->xsave);
+	else
+		return fxrstor_checking(&tsk->thread.xstate->fxsave);
+}
 
 /* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception
    is pending. Clear the x87 state here by setting it to fixed
    values. The kernel data segment can be sometimes 0 and sometimes
    new user value. Both should be ok.
    Use the PDA as safe address because it should be already in L1. */
-static inline void clear_fpu_state(struct i387_fxsave_struct *fx)
+static inline void clear_fpu_state(struct task_struct *tsk)
 {
+	struct xsave_struct *xstate = &tsk->thread.xstate->xsave;
+	struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave;
+
+	/*
+	 * xsave header may indicate the init state of the FP.
+	 */
+	if ((task_thread_info(tsk)->status & TS_XSAVE) &&
+	    !(xstate->xsave_hdr.xstate_bv & XSTATE_FP))
+		return;
+
 	if (unlikely(fx->swd & X87_FSW_ES))
 		asm volatile("fnclex");
 	alternative_input(ASM_NOP8 ASM_NOP2,
@@ -83,7 +108,7 @@
 			  X86_FEATURE_FXSAVE_LEAK);
 }
 
-static inline int save_i387_checking(struct i387_fxsave_struct __user *fx)
+static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
 {
 	int err;
 
@@ -107,7 +132,7 @@
 	return err;
 }
 
-static inline void __save_init_fpu(struct task_struct *tsk)
+static inline void fxsave(struct task_struct *tsk)
 {
 	/* Using "rex64; fxsave %0" is broken because, if the memory operand
 	   uses any extended registers for addressing, a second REX prefix
@@ -132,7 +157,16 @@
 			     : "=m" (tsk->thread.xstate->fxsave)
 			     : "cdaSDb" (&tsk->thread.xstate->fxsave));
 #endif
-	clear_fpu_state(&tsk->thread.xstate->fxsave);
+}
+
+static inline void __save_init_fpu(struct task_struct *tsk)
+{
+	if (task_thread_info(tsk)->status & TS_XSAVE)
+		xsave(tsk);
+	else
+		fxsave(tsk);
+
+	clear_fpu_state(tsk);
 	task_thread_info(tsk)->status &= ~TS_USEDFPU;
 }
 
@@ -147,6 +181,10 @@
 
 static inline void restore_fpu(struct task_struct *tsk)
 {
+	if (task_thread_info(tsk)->status & TS_XSAVE) {
+		xrstor_checking(&tsk->thread.xstate->xsave);
+		return;
+	}
 	/*
 	 * The "nop" is needed to make the instructions the same
 	 * length.
@@ -172,6 +210,27 @@
  */
 static inline void __save_init_fpu(struct task_struct *tsk)
 {
+	if (task_thread_info(tsk)->status & TS_XSAVE) {
+		struct xsave_struct *xstate = &tsk->thread.xstate->xsave;
+		struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave;
+
+		xsave(tsk);
+
+		/*
+		 * xsave header may indicate the init state of the FP.
+		 */
+		if (!(xstate->xsave_hdr.xstate_bv & XSTATE_FP))
+			goto end;
+
+		if (unlikely(fx->swd & X87_FSW_ES))
+			asm volatile("fnclex");
+
+		/*
+		 * we can do a simple return here or be paranoid :)
+		 */
+		goto clear_state;
+	}
+
 	/* Use more nops than strictly needed in case the compiler
 	   varies code */
 	alternative_input(
@@ -181,6 +240,7 @@
 		X86_FEATURE_FXSR,
 		[fx] "m" (tsk->thread.xstate->fxsave),
 		[fsw] "m" (tsk->thread.xstate->fxsave.swd) : "memory");
+clear_state:
 	/* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
 	   is pending.  Clear the x87 state here by setting it to fixed
 	   values. safe_address is a random variable that should be in L1 */
@@ -190,16 +250,17 @@
 		"fildl %[addr]", 	/* set F?P to defined value */
 		X86_FEATURE_FXSAVE_LEAK,
 		[addr] "m" (safe_address));
+end:
 	task_thread_info(tsk)->status &= ~TS_USEDFPU;
 }
 
+#endif	/* CONFIG_X86_64 */
+
 /*
  * Signal frame handlers...
  */
-extern int save_i387(struct _fpstate __user *buf);
-extern int restore_i387(struct _fpstate __user *buf);
-
-#endif	/* CONFIG_X86_64 */
+extern int save_i387_xstate(void __user *buf);
+extern int restore_i387_xstate(void __user *buf);
 
 static inline void __unlazy_fpu(struct task_struct *tsk)
 {
@@ -336,4 +397,4 @@
 	}
 }
 
-#endif	/* _ASM_X86_I387_H */
+#endif /* ASM_X86__I387_H */
diff --git a/include/asm-x86/i8253.h b/include/asm-x86/i8253.h
index b51c048..15a5b53 100644
--- a/include/asm-x86/i8253.h
+++ b/include/asm-x86/i8253.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_I8253_H__
-#define __ASM_I8253_H__
+#ifndef ASM_X86__I8253_H
+#define ASM_X86__I8253_H
 
 /* i8253A PIT registers */
 #define PIT_MODE		0x43
@@ -15,4 +15,4 @@
 #define inb_pit		inb_p
 #define outb_pit	outb_p
 
-#endif	/* __ASM_I8253_H__ */
+#endif /* ASM_X86__I8253_H */
diff --git a/include/asm-x86/i8259.h b/include/asm-x86/i8259.h
index 2f98df9..23c1b3b 100644
--- a/include/asm-x86/i8259.h
+++ b/include/asm-x86/i8259.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_I8259_H__
-#define __ASM_I8259_H__
+#ifndef ASM_X86__I8259_H
+#define ASM_X86__I8259_H
 
 #include <linux/delay.h>
 
@@ -57,4 +57,7 @@
 
 extern struct irq_chip i8259A_chip;
 
-#endif	/* __ASM_I8259_H__ */
+extern void mask_8259A(void);
+extern void unmask_8259A(void);
+
+#endif /* ASM_X86__I8259_H */
diff --git a/include/asm-x86/ia32.h b/include/asm-x86/ia32.h
index 55d3abe..f932f7a 100644
--- a/include/asm-x86/ia32.h
+++ b/include/asm-x86/ia32.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_64_IA32_H
-#define _ASM_X86_64_IA32_H
+#ifndef ASM_X86__IA32_H
+#define ASM_X86__IA32_H
 
 
 #ifdef CONFIG_IA32_EMULATION
@@ -167,4 +167,4 @@
 
 #endif /* !CONFIG_IA32_SUPPORT */
 
-#endif
+#endif /* ASM_X86__IA32_H */
diff --git a/include/asm-x86/ia32_unistd.h b/include/asm-x86/ia32_unistd.h
index 61cea9e..dbd887d 100644
--- a/include/asm-x86/ia32_unistd.h
+++ b/include/asm-x86/ia32_unistd.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_64_IA32_UNISTD_H_
-#define _ASM_X86_64_IA32_UNISTD_H_
+#ifndef ASM_X86__IA32_UNISTD_H
+#define ASM_X86__IA32_UNISTD_H
 
 /*
  * This file contains the system call numbers of the ia32 port,
@@ -15,4 +15,4 @@
 #define __NR_ia32_sigreturn	119
 #define __NR_ia32_rt_sigreturn	173
 
-#endif /* _ASM_X86_64_IA32_UNISTD_H_ */
+#endif /* ASM_X86__IA32_UNISTD_H */
diff --git a/include/asm-x86/idle.h b/include/asm-x86/idle.h
index d240e5b..dc9c794 100644
--- a/include/asm-x86/idle.h
+++ b/include/asm-x86/idle.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_64_IDLE_H
-#define _ASM_X86_64_IDLE_H 1
+#ifndef ASM_X86__IDLE_H
+#define ASM_X86__IDLE_H
 
 #define IDLE_START 1
 #define IDLE_END 2
@@ -10,4 +10,4 @@
 void enter_idle(void);
 void exit_idle(void);
 
-#endif
+#endif /* ASM_X86__IDLE_H */
diff --git a/include/asm-x86/intel_arch_perfmon.h b/include/asm-x86/intel_arch_perfmon.h
index fa0fd06..07c03c6 100644
--- a/include/asm-x86/intel_arch_perfmon.h
+++ b/include/asm-x86/intel_arch_perfmon.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_INTEL_ARCH_PERFMON_H
-#define _ASM_X86_INTEL_ARCH_PERFMON_H
+#ifndef ASM_X86__INTEL_ARCH_PERFMON_H
+#define ASM_X86__INTEL_ARCH_PERFMON_H
 
 #define MSR_ARCH_PERFMON_PERFCTR0		0xc1
 #define MSR_ARCH_PERFMON_PERFCTR1		0xc2
@@ -28,4 +28,4 @@
 	unsigned int full;
 };
 
-#endif /* _ASM_X86_INTEL_ARCH_PERFMON_H */
+#endif /* ASM_X86__INTEL_ARCH_PERFMON_H */
diff --git a/include/asm-x86/io.h b/include/asm-x86/io.h
index 0f954dc..72b7719 100644
--- a/include/asm-x86/io.h
+++ b/include/asm-x86/io.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_IO_H
-#define _ASM_X86_IO_H
+#ifndef ASM_X86__IO_H
+#define ASM_X86__IO_H
 
 #define ARCH_HAS_IOREMAP_WC
 
@@ -73,6 +73,8 @@
 #define writeq writeq
 #endif
 
+extern int iommu_bio_merge;
+
 #ifdef CONFIG_X86_32
 # include "io_32.h"
 #else
@@ -99,4 +101,4 @@
 extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys);
 
 
-#endif /* _ASM_X86_IO_H */
+#endif /* ASM_X86__IO_H */
diff --git a/include/asm-x86/io_32.h b/include/asm-x86/io_32.h
index e876d89..4f7d878 100644
--- a/include/asm-x86/io_32.h
+++ b/include/asm-x86/io_32.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_IO_H
-#define _ASM_IO_H
+#ifndef ASM_X86__IO_32_H
+#define ASM_X86__IO_32_H
 
 #include <linux/string.h>
 #include <linux/compiler.h>
@@ -281,4 +281,4 @@
 BUILDIO(w, w, short)
 BUILDIO(l, , int)
 
-#endif
+#endif /* ASM_X86__IO_32_H */
diff --git a/include/asm-x86/io_64.h b/include/asm-x86/io_64.h
index 22995c5..64429e9 100644
--- a/include/asm-x86/io_64.h
+++ b/include/asm-x86/io_64.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_IO_H
-#define _ASM_IO_H
+#ifndef ASM_X86__IO_64_H
+#define ASM_X86__IO_64_H
 
 
 /*
@@ -235,7 +235,6 @@
 
 #define flush_write_buffers()
 
-extern int iommu_bio_merge;
 #define BIO_VMERGE_BOUNDARY iommu_bio_merge
 
 /*
@@ -245,4 +244,4 @@
 
 #endif /* __KERNEL__ */
 
-#endif
+#endif /* ASM_X86__IO_64_H */
diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h
index 14f82bb..8ec68a5 100644
--- a/include/asm-x86/io_apic.h
+++ b/include/asm-x86/io_apic.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_IO_APIC_H
-#define __ASM_IO_APIC_H
+#ifndef ASM_X86__IO_APIC_H
+#define ASM_X86__IO_APIC_H
 
 #include <linux/types.h>
 #include <asm/mpspec.h>
@@ -107,6 +107,20 @@
 
 } __attribute__ ((packed));
 
+struct IR_IO_APIC_route_entry {
+	__u64	vector		: 8,
+		zero		: 3,
+		index2		: 1,
+		delivery_status : 1,
+		polarity	: 1,
+		irr		: 1,
+		trigger		: 1,
+		mask		: 1,
+		reserved	: 31,
+		format		: 1,
+		index		: 15;
+} __attribute__ ((packed));
+
 #ifdef CONFIG_X86_IO_APIC
 
 /*
@@ -183,10 +197,16 @@
 extern int (*ioapic_renumber_irq)(int ioapic, int irq);
 extern void ioapic_init_mappings(void);
 
+#ifdef CONFIG_X86_64
+extern int save_mask_IO_APIC_setup(void);
+extern void restore_IO_APIC_setup(void);
+extern void reinit_intr_remapped_IO_APIC(int);
+#endif
+
 #else  /* !CONFIG_X86_IO_APIC */
 #define io_apic_assign_pci_irqs 0
 static const int timer_through_8259 = 0;
 static inline void ioapic_init_mappings(void) { }
 #endif
 
-#endif
+#endif /* ASM_X86__IO_APIC_H */
diff --git a/include/asm-x86/ioctls.h b/include/asm-x86/ioctls.h
index c0c338b..3366035 100644
--- a/include/asm-x86/ioctls.h
+++ b/include/asm-x86/ioctls.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_IOCTLS_H
-#define _ASM_X86_IOCTLS_H
+#ifndef ASM_X86__IOCTLS_H
+#define ASM_X86__IOCTLS_H
 
 #include <asm/ioctl.h>
 
@@ -85,4 +85,4 @@
 
 #define TIOCSER_TEMT    0x01	/* Transmitter physically empty */
 
-#endif
+#endif /* ASM_X86__IOCTLS_H */
diff --git a/include/asm-x86/iommu.h b/include/asm-x86/iommu.h
index 5f888cc..e86f441 100644
--- a/include/asm-x86/iommu.h
+++ b/include/asm-x86/iommu.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X8664_IOMMU_H
-#define _ASM_X8664_IOMMU_H 1
+#ifndef ASM_X86__IOMMU_H
+#define ASM_X86__IOMMU_H
 
 extern void pci_iommu_shutdown(void);
 extern void no_iommu_init(void);
@@ -42,4 +42,4 @@
 }
 #endif
 
-#endif
+#endif /* ASM_X86__IOMMU_H */
diff --git a/include/asm-x86/ipcbuf.h b/include/asm-x86/ipcbuf.h
index ee678fd..910304f 100644
--- a/include/asm-x86/ipcbuf.h
+++ b/include/asm-x86/ipcbuf.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_IPCBUF_H
-#define _ASM_X86_IPCBUF_H
+#ifndef ASM_X86__IPCBUF_H
+#define ASM_X86__IPCBUF_H
 
 /*
  * The ipc64_perm structure for x86 architecture.
@@ -25,4 +25,4 @@
 	unsigned long		__unused2;
 };
 
-#endif /* _ASM_X86_IPCBUF_H */
+#endif /* ASM_X86__IPCBUF_H */
diff --git a/include/asm-x86/ipi.h b/include/asm-x86/ipi.h
index bb1c09f..30a692c 100644
--- a/include/asm-x86/ipi.h
+++ b/include/asm-x86/ipi.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_IPI_H
-#define __ASM_IPI_H
+#ifndef ASM_X86__IPI_H
+#define ASM_X86__IPI_H
 
 /*
  * Copyright 2004 James Cleverdon, IBM.
@@ -49,6 +49,12 @@
 	return SET_APIC_DEST_FIELD(mask);
 }
 
+static inline void __xapic_wait_icr_idle(void)
+{
+	while (native_apic_mem_read(APIC_ICR) & APIC_ICR_BUSY)
+		cpu_relax();
+}
+
 static inline void __send_IPI_shortcut(unsigned int shortcut, int vector,
 				       unsigned int dest)
 {
@@ -64,7 +70,7 @@
 	/*
 	 * Wait for idle.
 	 */
-	apic_wait_icr_idle();
+	__xapic_wait_icr_idle();
 
 	/*
 	 * No need to touch the target chip field
@@ -74,7 +80,7 @@
 	/*
 	 * Send the IPI. The write to APIC_ICR fires this off.
 	 */
-	apic_write(APIC_ICR, cfg);
+	native_apic_mem_write(APIC_ICR, cfg);
 }
 
 /*
@@ -92,13 +98,13 @@
 	if (unlikely(vector == NMI_VECTOR))
 		safe_apic_wait_icr_idle();
 	else
-		apic_wait_icr_idle();
+		__xapic_wait_icr_idle();
 
 	/*
 	 * prepare target chip field
 	 */
 	cfg = __prepare_ICR2(mask);
-	apic_write(APIC_ICR2, cfg);
+	native_apic_mem_write(APIC_ICR2, cfg);
 
 	/*
 	 * program the ICR
@@ -108,7 +114,7 @@
 	/*
 	 * Send the IPI. The write to APIC_ICR fires this off.
 	 */
-	apic_write(APIC_ICR, cfg);
+	native_apic_mem_write(APIC_ICR, cfg);
 }
 
 static inline void send_IPI_mask_sequence(cpumask_t mask, int vector)
@@ -129,4 +135,4 @@
 	local_irq_restore(flags);
 }
 
-#endif /* __ASM_IPI_H */
+#endif /* ASM_X86__IPI_H */
diff --git a/include/asm-x86/irq.h b/include/asm-x86/irq.h
index 1a29257..1e5f290 100644
--- a/include/asm-x86/irq.h
+++ b/include/asm-x86/irq.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_IRQ_H
-#define _ASM_IRQ_H
+#ifndef ASM_X86__IRQ_H
+#define ASM_X86__IRQ_H
 /*
  *	(C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar
  *
@@ -47,4 +47,4 @@
 /* Interrupt vector management */
 extern DECLARE_BITMAP(used_vectors, NR_VECTORS);
 
-#endif /* _ASM_IRQ_H */
+#endif /* ASM_X86__IRQ_H */
diff --git a/include/asm-x86/irq_regs_32.h b/include/asm-x86/irq_regs_32.h
index 3368b20..316a3b2 100644
--- a/include/asm-x86/irq_regs_32.h
+++ b/include/asm-x86/irq_regs_32.h
@@ -4,8 +4,8 @@
  *
  * Jeremy Fitzhardinge <jeremy@goop.org>
  */
-#ifndef _ASM_I386_IRQ_REGS_H
-#define _ASM_I386_IRQ_REGS_H
+#ifndef ASM_X86__IRQ_REGS_32_H
+#define ASM_X86__IRQ_REGS_32_H
 
 #include <asm/percpu.h>
 
@@ -26,4 +26,4 @@
 	return old_regs;
 }
 
-#endif /* _ASM_I386_IRQ_REGS_H */
+#endif /* ASM_X86__IRQ_REGS_32_H */
diff --git a/include/asm-x86/irq_remapping.h b/include/asm-x86/irq_remapping.h
new file mode 100644
index 0000000..78242c6
--- /dev/null
+++ b/include/asm-x86/irq_remapping.h
@@ -0,0 +1,8 @@
+#ifndef _ASM_IRQ_REMAPPING_H
+#define _ASM_IRQ_REMAPPING_H
+
+extern int x2apic;
+
+#define IRTE_DEST(dest) ((x2apic) ? dest : dest << 8)
+
+#endif
diff --git a/include/asm-x86/irq_vectors.h b/include/asm-x86/irq_vectors.h
index a48c7f2..c5d2d76 100644
--- a/include/asm-x86/irq_vectors.h
+++ b/include/asm-x86/irq_vectors.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_IRQ_VECTORS_H
-#define _ASM_IRQ_VECTORS_H
+#ifndef ASM_X86__IRQ_VECTORS_H
+#define ASM_X86__IRQ_VECTORS_H
 
 #include <linux/threads.h>
 
@@ -179,4 +179,4 @@
 #define VIC_CPU_BOOT_ERRATA_CPI		(VIC_CPI_LEVEL0 + 8)
 
 
-#endif /* _ASM_IRQ_VECTORS_H */
+#endif /* ASM_X86__IRQ_VECTORS_H */
diff --git a/include/asm-x86/ist.h b/include/asm-x86/ist.h
index 6ec6cee..35a2fe9 100644
--- a/include/asm-x86/ist.h
+++ b/include/asm-x86/ist.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_IST_H
-#define _ASM_IST_H
+#ifndef ASM_X86__IST_H
+#define ASM_X86__IST_H
 
 /*
  * Include file for the interface to IST BIOS
@@ -31,4 +31,4 @@
 extern struct ist_info ist_info;
 
 #endif	/* __KERNEL__ */
-#endif	/* _ASM_IST_H */
+#endif /* ASM_X86__IST_H */
diff --git a/include/asm-x86/k8.h b/include/asm-x86/k8.h
index 452e2b6..2bbaf43 100644
--- a/include/asm-x86/k8.h
+++ b/include/asm-x86/k8.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_K8_H
-#define _ASM_K8_H 1
+#ifndef ASM_X86__K8_H
+#define ASM_X86__K8_H
 
 #include <linux/pci.h>
 
@@ -12,4 +12,4 @@
 extern void k8_flush_garts(void);
 extern int k8_scan_nodes(unsigned long start, unsigned long end);
 
-#endif
+#endif /* ASM_X86__K8_H */
diff --git a/include/asm-x86/kdebug.h b/include/asm-x86/kdebug.h
index 96651bb..5ec3ad3 100644
--- a/include/asm-x86/kdebug.h
+++ b/include/asm-x86/kdebug.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_KDEBUG_H
-#define _ASM_X86_KDEBUG_H
+#ifndef ASM_X86__KDEBUG_H
+#define ASM_X86__KDEBUG_H
 
 #include <linux/notifier.h>
 
@@ -35,4 +35,4 @@
 extern unsigned long oops_begin(void);
 extern void oops_end(unsigned long, struct pt_regs *, int signr);
 
-#endif
+#endif /* ASM_X86__KDEBUG_H */
diff --git a/include/asm-x86/kexec.h b/include/asm-x86/kexec.h
index 4246ab7..ea09600 100644
--- a/include/asm-x86/kexec.h
+++ b/include/asm-x86/kexec.h
@@ -1,5 +1,5 @@
-#ifndef _KEXEC_H
-#define _KEXEC_H
+#ifndef ASM_X86__KEXEC_H
+#define ASM_X86__KEXEC_H
 
 #ifdef CONFIG_X86_32
 # define PA_CONTROL_PAGE	0
@@ -172,4 +172,4 @@
 
 #endif /* __ASSEMBLY__ */
 
-#endif /* _KEXEC_H */
+#endif /* ASM_X86__KEXEC_H */
diff --git a/include/asm-x86/kgdb.h b/include/asm-x86/kgdb.h
index 484c475..83a7ee2 100644
--- a/include/asm-x86/kgdb.h
+++ b/include/asm-x86/kgdb.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_KGDB_H_
-#define _ASM_KGDB_H_
+#ifndef ASM_X86__KGDB_H
+#define ASM_X86__KGDB_H
 
 /*
  * Copyright (C) 2001-2004 Amit S. Kale
@@ -78,4 +78,4 @@
 #define BREAK_INSTR_SIZE	1
 #define CACHE_FLUSH_IS_SAFE	1
 
-#endif				/* _ASM_KGDB_H_ */
+#endif /* ASM_X86__KGDB_H */
diff --git a/include/asm-x86/kmap_types.h b/include/asm-x86/kmap_types.h
index 5f41741..89f4449 100644
--- a/include/asm-x86/kmap_types.h
+++ b/include/asm-x86/kmap_types.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_KMAP_TYPES_H
-#define _ASM_X86_KMAP_TYPES_H
+#ifndef ASM_X86__KMAP_TYPES_H
+#define ASM_X86__KMAP_TYPES_H
 
 #if defined(CONFIG_X86_32) && defined(CONFIG_DEBUG_HIGHMEM)
 # define D(n) __KM_FENCE_##n ,
@@ -26,4 +26,4 @@
 
 #undef D
 
-#endif
+#endif /* ASM_X86__KMAP_TYPES_H */
diff --git a/include/asm-x86/kprobes.h b/include/asm-x86/kprobes.h
index 54980b0..bd840786 100644
--- a/include/asm-x86/kprobes.h
+++ b/include/asm-x86/kprobes.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_KPROBES_H
-#define _ASM_KPROBES_H
+#ifndef ASM_X86__KPROBES_H
+#define ASM_X86__KPROBES_H
 /*
  *  Kernel Probes (KProbes)
  *
@@ -94,4 +94,4 @@
 extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
 extern int kprobe_exceptions_notify(struct notifier_block *self,
 				    unsigned long val, void *data);
-#endif				/* _ASM_KPROBES_H */
+#endif /* ASM_X86__KPROBES_H */
diff --git a/include/asm-x86/kvm.h b/include/asm-x86/kvm.h
index 6f18408..78e954d 100644
--- a/include/asm-x86/kvm.h
+++ b/include/asm-x86/kvm.h
@@ -1,5 +1,5 @@
-#ifndef __LINUX_KVM_X86_H
-#define __LINUX_KVM_X86_H
+#ifndef ASM_X86__KVM_H
+#define ASM_X86__KVM_H
 
 /*
  * KVM x86 specific structures and definitions
@@ -230,4 +230,4 @@
 #define KVM_TRC_APIC_ACCESS      (KVM_TRC_HANDLER + 0x14)
 #define KVM_TRC_TDP_FAULT        (KVM_TRC_HANDLER + 0x15)
 
-#endif
+#endif /* ASM_X86__KVM_H */
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index c2e34c2..6979454 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -1,4 +1,4 @@
-#/*
+/*
  * Kernel-based Virtual Machine driver for Linux
  *
  * This header defines architecture specific interfaces, x86 version
@@ -8,8 +8,8 @@
  *
  */
 
-#ifndef ASM_KVM_HOST_H
-#define ASM_KVM_HOST_H
+#ifndef ASM_X86__KVM_HOST_H
+#define ASM_X86__KVM_HOST_H
 
 #include <linux/types.h>
 #include <linux/mm.h>
@@ -735,4 +735,4 @@
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_age_hva(struct kvm *kvm, unsigned long hva);
 
-#endif
+#endif /* ASM_X86__KVM_HOST_H */
diff --git a/include/asm-x86/kvm_para.h b/include/asm-x86/kvm_para.h
index 76f3921..30054fd 100644
--- a/include/asm-x86/kvm_para.h
+++ b/include/asm-x86/kvm_para.h
@@ -1,5 +1,5 @@
-#ifndef __X86_KVM_PARA_H
-#define __X86_KVM_PARA_H
+#ifndef ASM_X86__KVM_PARA_H
+#define ASM_X86__KVM_PARA_H
 
 /* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx.  It
  * should be used to determine that a VM is running under KVM.
@@ -144,4 +144,4 @@
 
 #endif
 
-#endif
+#endif /* ASM_X86__KVM_PARA_H */
diff --git a/include/asm-x86/kvm_x86_emulate.h b/include/asm-x86/kvm_x86_emulate.h
index 4e8c1e4..e2d9b03 100644
--- a/include/asm-x86/kvm_x86_emulate.h
+++ b/include/asm-x86/kvm_x86_emulate.h
@@ -8,8 +8,8 @@
  * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
  */
 
-#ifndef __X86_EMULATE_H__
-#define __X86_EMULATE_H__
+#ifndef ASM_X86__KVM_X86_EMULATE_H
+#define ASM_X86__KVM_X86_EMULATE_H
 
 struct x86_emulate_ctxt;
 
@@ -181,4 +181,4 @@
 int x86_emulate_insn(struct x86_emulate_ctxt *ctxt,
 		     struct x86_emulate_ops *ops);
 
-#endif				/* __X86_EMULATE_H__ */
+#endif /* ASM_X86__KVM_X86_EMULATE_H */
diff --git a/include/asm-x86/ldt.h b/include/asm-x86/ldt.h
index 20c5972..a522850 100644
--- a/include/asm-x86/ldt.h
+++ b/include/asm-x86/ldt.h
@@ -3,8 +3,8 @@
  *
  * Definitions of structures used with the modify_ldt system call.
  */
-#ifndef _ASM_X86_LDT_H
-#define _ASM_X86_LDT_H
+#ifndef ASM_X86__LDT_H
+#define ASM_X86__LDT_H
 
 /* Maximum number of LDT entries supported. */
 #define LDT_ENTRIES	8192
@@ -37,4 +37,4 @@
 #define MODIFY_LDT_CONTENTS_CODE	2
 
 #endif /* !__ASSEMBLY__ */
-#endif
+#endif /* ASM_X86__LDT_H */
diff --git a/include/asm-x86/lguest.h b/include/asm-x86/lguest.h
index be4a724..7505e94 100644
--- a/include/asm-x86/lguest.h
+++ b/include/asm-x86/lguest.h
@@ -1,5 +1,5 @@
-#ifndef _X86_LGUEST_H
-#define _X86_LGUEST_H
+#ifndef ASM_X86__LGUEST_H
+#define ASM_X86__LGUEST_H
 
 #define GDT_ENTRY_LGUEST_CS	10
 #define GDT_ENTRY_LGUEST_DS	11
@@ -91,4 +91,4 @@
 
 #endif /* __ASSEMBLY__ */
 
-#endif
+#endif /* ASM_X86__LGUEST_H */
diff --git a/include/asm-x86/lguest_hcall.h b/include/asm-x86/lguest_hcall.h
index a3241f2..8f034ba 100644
--- a/include/asm-x86/lguest_hcall.h
+++ b/include/asm-x86/lguest_hcall.h
@@ -1,6 +1,6 @@
 /* Architecture specific portion of the lguest hypercalls */
-#ifndef _X86_LGUEST_HCALL_H
-#define _X86_LGUEST_HCALL_H
+#ifndef ASM_X86__LGUEST_HCALL_H
+#define ASM_X86__LGUEST_HCALL_H
 
 #define LHCALL_FLUSH_ASYNC	0
 #define LHCALL_LGUEST_INIT	1
@@ -68,4 +68,4 @@
 };
 
 #endif /* !__ASSEMBLY__ */
-#endif	/* _I386_LGUEST_HCALL_H */
+#endif /* ASM_X86__LGUEST_HCALL_H */
diff --git a/include/asm-x86/linkage.h b/include/asm-x86/linkage.h
index 64e444f..42d8b62 100644
--- a/include/asm-x86/linkage.h
+++ b/include/asm-x86/linkage.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_LINKAGE_H
-#define __ASM_LINKAGE_H
+#ifndef ASM_X86__LINKAGE_H
+#define ASM_X86__LINKAGE_H
 
 #undef notrace
 #define notrace __attribute__((no_instrument_function))
@@ -57,5 +57,5 @@
 #define __ALIGN_STR ".align 16,0x90"
 #endif
 
-#endif
+#endif /* ASM_X86__LINKAGE_H */
 
diff --git a/include/asm-x86/local.h b/include/asm-x86/local.h
index 330a724..ae91994 100644
--- a/include/asm-x86/local.h
+++ b/include/asm-x86/local.h
@@ -1,5 +1,5 @@
-#ifndef _ARCH_LOCAL_H
-#define _ARCH_LOCAL_H
+#ifndef ASM_X86__LOCAL_H
+#define ASM_X86__LOCAL_H
 
 #include <linux/percpu.h>
 
@@ -232,4 +232,4 @@
 #define __cpu_local_add(i, l)	cpu_local_add((i), (l))
 #define __cpu_local_sub(i, l)	cpu_local_sub((i), (l))
 
-#endif /* _ARCH_LOCAL_H */
+#endif /* ASM_X86__LOCAL_H */
diff --git a/include/asm-x86/mach-default/apm.h b/include/asm-x86/mach-default/apm.h
index 989f34c..2aa61b5 100644
--- a/include/asm-x86/mach-default/apm.h
+++ b/include/asm-x86/mach-default/apm.h
@@ -3,8 +3,8 @@
  *  Split out from apm.c by Osamu Tomita <tomita@cinet.co.jp>
  */
 
-#ifndef _ASM_APM_H
-#define _ASM_APM_H
+#ifndef ASM_X86__MACH_DEFAULT__APM_H
+#define ASM_X86__MACH_DEFAULT__APM_H
 
 #ifdef APM_ZERO_SEGS
 #	define APM_DO_ZERO_SEGS \
@@ -70,4 +70,4 @@
 	return error;
 }
 
-#endif /* _ASM_APM_H */
+#endif /* ASM_X86__MACH_DEFAULT__APM_H */
diff --git a/include/asm-x86/mach-default/mach_apic.h b/include/asm-x86/mach-default/mach_apic.h
index f3226b9..2a330a4 100644
--- a/include/asm-x86/mach-default/mach_apic.h
+++ b/include/asm-x86/mach-default/mach_apic.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_APIC_H
-#define __ASM_MACH_APIC_H
+#ifndef ASM_X86__MACH_DEFAULT__MACH_APIC_H
+#define ASM_X86__MACH_DEFAULT__MACH_APIC_H
 
 #ifdef CONFIG_X86_LOCAL_APIC
 
@@ -30,6 +30,8 @@
 #define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid)
 #define phys_pkg_id	(genapic->phys_pkg_id)
 #define vector_allocation_domain    (genapic->vector_allocation_domain)
+#define read_apic_id()  (GET_APIC_ID(apic_read(APIC_ID)))
+#define send_IPI_self (genapic->send_IPI_self)
 extern void setup_apic_routing(void);
 #else
 #define INT_DELIVERY_MODE dest_LowestPrio
@@ -54,7 +56,7 @@
 
 static inline int apic_id_registered(void)
 {
-	return physid_isset(GET_APIC_ID(read_apic_id()), phys_cpu_present_map);
+	return physid_isset(read_apic_id(), phys_cpu_present_map);
 }
 
 static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
@@ -138,4 +140,4 @@
 }
 
 #endif /* CONFIG_X86_LOCAL_APIC */
-#endif /* __ASM_MACH_APIC_H */
+#endif /* ASM_X86__MACH_DEFAULT__MACH_APIC_H */
diff --git a/include/asm-x86/mach-default/mach_apicdef.h b/include/asm-x86/mach-default/mach_apicdef.h
index e4b29ba..0c2d41c 100644
--- a/include/asm-x86/mach-default/mach_apicdef.h
+++ b/include/asm-x86/mach-default/mach_apicdef.h
@@ -1,12 +1,12 @@
-#ifndef __ASM_MACH_APICDEF_H
-#define __ASM_MACH_APICDEF_H
+#ifndef ASM_X86__MACH_DEFAULT__MACH_APICDEF_H
+#define ASM_X86__MACH_DEFAULT__MACH_APICDEF_H
 
 #include <asm/apic.h>
 
 #ifdef CONFIG_X86_64
-#define	APIC_ID_MASK		(0xFFu<<24)
-#define GET_APIC_ID(x)          (((x)>>24)&0xFFu)
-#define	SET_APIC_ID(x)		(((x)<<24))
+#define	APIC_ID_MASK		(genapic->apic_id_mask)
+#define GET_APIC_ID(x)		(genapic->get_apic_id(x))
+#define	SET_APIC_ID(x)		(genapic->set_apic_id(x))
 #else
 #define		APIC_ID_MASK		(0xF<<24)
 static inline unsigned get_apic_id(unsigned long x) 
@@ -21,4 +21,4 @@
 #define		GET_APIC_ID(x)	get_apic_id(x)
 #endif
 
-#endif
+#endif /* ASM_X86__MACH_DEFAULT__MACH_APICDEF_H */
diff --git a/include/asm-x86/mach-default/mach_ipi.h b/include/asm-x86/mach-default/mach_ipi.h
index be32336..674bc7e 100644
--- a/include/asm-x86/mach-default/mach_ipi.h
+++ b/include/asm-x86/mach-default/mach_ipi.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_IPI_H
-#define __ASM_MACH_IPI_H
+#ifndef ASM_X86__MACH_DEFAULT__MACH_IPI_H
+#define ASM_X86__MACH_DEFAULT__MACH_IPI_H
 
 /* Avoid include hell */
 #define NMI_VECTOR 0x02
@@ -61,4 +61,4 @@
 }
 #endif
 
-#endif /* __ASM_MACH_IPI_H */
+#endif /* ASM_X86__MACH_DEFAULT__MACH_IPI_H */
diff --git a/include/asm-x86/mach-default/mach_mpparse.h b/include/asm-x86/mach-default/mach_mpparse.h
index d141085..9c381f2 100644
--- a/include/asm-x86/mach-default/mach_mpparse.h
+++ b/include/asm-x86/mach-default/mach_mpparse.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_MPPARSE_H
-#define __ASM_MACH_MPPARSE_H
+#ifndef ASM_X86__MACH_DEFAULT__MACH_MPPARSE_H
+#define ASM_X86__MACH_DEFAULT__MACH_MPPARSE_H
 
 static inline int mps_oem_check(struct mp_config_table *mpc, char *oem, 
 		char *productid)
@@ -14,4 +14,4 @@
 }
 
 
-#endif /* __ASM_MACH_MPPARSE_H */
+#endif /* ASM_X86__MACH_DEFAULT__MACH_MPPARSE_H */
diff --git a/include/asm-x86/mach-default/mach_mpspec.h b/include/asm-x86/mach-default/mach_mpspec.h
index 51c9a97..d77646f 100644
--- a/include/asm-x86/mach-default/mach_mpspec.h
+++ b/include/asm-x86/mach-default/mach_mpspec.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_MPSPEC_H
-#define __ASM_MACH_MPSPEC_H
+#ifndef ASM_X86__MACH_DEFAULT__MACH_MPSPEC_H
+#define ASM_X86__MACH_DEFAULT__MACH_MPSPEC_H
 
 #define MAX_IRQ_SOURCES 256
 
@@ -9,4 +9,4 @@
 #define MAX_MP_BUSSES 32
 #endif
 
-#endif /* __ASM_MACH_MPSPEC_H */
+#endif /* ASM_X86__MACH_DEFAULT__MACH_MPSPEC_H */
diff --git a/include/asm-x86/mach-default/mach_timer.h b/include/asm-x86/mach-default/mach_timer.h
index 4b76e53..990b158 100644
--- a/include/asm-x86/mach-default/mach_timer.h
+++ b/include/asm-x86/mach-default/mach_timer.h
@@ -10,8 +10,8 @@
  * directly because of the awkward 8-bit access mechanism of the 82C54
  * device.
  */
-#ifndef _MACH_TIMER_H
-#define _MACH_TIMER_H
+#ifndef ASM_X86__MACH_DEFAULT__MACH_TIMER_H
+#define ASM_X86__MACH_DEFAULT__MACH_TIMER_H
 
 #define CALIBRATE_TIME_MSEC 30 /* 30 msecs */
 #define CALIBRATE_LATCH	\
@@ -45,4 +45,4 @@
 	*count_p = count;
 }
 
-#endif /* !_MACH_TIMER_H */
+#endif /* ASM_X86__MACH_DEFAULT__MACH_TIMER_H */
diff --git a/include/asm-x86/mach-default/mach_traps.h b/include/asm-x86/mach-default/mach_traps.h
index 2fe7705..de9ac3f 100644
--- a/include/asm-x86/mach-default/mach_traps.h
+++ b/include/asm-x86/mach-default/mach_traps.h
@@ -2,8 +2,8 @@
  *  Machine specific NMI handling for generic.
  *  Split out from traps.c by Osamu Tomita <tomita@cinet.co.jp>
  */
-#ifndef _MACH_TRAPS_H
-#define _MACH_TRAPS_H
+#ifndef ASM_X86__MACH_DEFAULT__MACH_TRAPS_H
+#define ASM_X86__MACH_DEFAULT__MACH_TRAPS_H
 
 #include <asm/mc146818rtc.h>
 
@@ -36,4 +36,4 @@
 		unlock_cmos();
 }
 
-#endif /* !_MACH_TRAPS_H */
+#endif /* ASM_X86__MACH_DEFAULT__MACH_TRAPS_H */
diff --git a/include/asm-x86/mach-default/mach_wakecpu.h b/include/asm-x86/mach-default/mach_wakecpu.h
index 3ebb178..361b810 100644
--- a/include/asm-x86/mach-default/mach_wakecpu.h
+++ b/include/asm-x86/mach-default/mach_wakecpu.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_WAKECPU_H
-#define __ASM_MACH_WAKECPU_H
+#ifndef ASM_X86__MACH_DEFAULT__MACH_WAKECPU_H
+#define ASM_X86__MACH_DEFAULT__MACH_WAKECPU_H
 
 /* 
  * This file copes with machines that wakeup secondary CPUs by the
@@ -39,4 +39,4 @@
  #define inquire_remote_apic(apicid) {}
 #endif
 
-#endif /* __ASM_MACH_WAKECPU_H */
+#endif /* ASM_X86__MACH_DEFAULT__MACH_WAKECPU_H */
diff --git a/include/asm-x86/mach-es7000/mach_apicdef.h b/include/asm-x86/mach-es7000/mach_apicdef.h
deleted file mode 100644
index a58ab5a..0000000
--- a/include/asm-x86/mach-es7000/mach_apicdef.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef __ASM_MACH_APICDEF_H
-#define __ASM_MACH_APICDEF_H
-
-#define		APIC_ID_MASK		(0xFF<<24)
-
-static inline unsigned get_apic_id(unsigned long x) 
-{ 
-	return (((x)>>24)&0xFF);
-} 
-
-#define		GET_APIC_ID(x)	get_apic_id(x)
-
-#endif
diff --git a/include/asm-x86/mach-generic/gpio.h b/include/asm-x86/mach-generic/gpio.h
index 5305dcb..6ce0f77 100644
--- a/include/asm-x86/mach-generic/gpio.h
+++ b/include/asm-x86/mach-generic/gpio.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_GENERIC_GPIO_H
-#define __ASM_MACH_GENERIC_GPIO_H
+#ifndef ASM_X86__MACH_GENERIC__GPIO_H
+#define ASM_X86__MACH_GENERIC__GPIO_H
 
 int gpio_request(unsigned gpio, const char *label);
 void gpio_free(unsigned gpio);
@@ -12,4 +12,4 @@
 
 #include <asm-generic/gpio.h>           /* cansleep wrappers */
 
-#endif /* __ASM_MACH_GENERIC_GPIO_H */
+#endif /* ASM_X86__MACH_GENERIC__GPIO_H */
diff --git a/include/asm-x86/mach-generic/irq_vectors_limits.h b/include/asm-x86/mach-generic/irq_vectors_limits.h
index 890ce3f..f7870e1 100644
--- a/include/asm-x86/mach-generic/irq_vectors_limits.h
+++ b/include/asm-x86/mach-generic/irq_vectors_limits.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_IRQ_VECTORS_LIMITS_H
-#define _ASM_IRQ_VECTORS_LIMITS_H
+#ifndef ASM_X86__MACH_GENERIC__IRQ_VECTORS_LIMITS_H
+#define ASM_X86__MACH_GENERIC__IRQ_VECTORS_LIMITS_H
 
 /*
  * For Summit or generic (i.e. installer) kernels, we have lots of I/O APICs,
@@ -11,4 +11,4 @@
 #define NR_IRQS	224
 #define NR_IRQ_VECTORS	1024
 
-#endif /* _ASM_IRQ_VECTORS_LIMITS_H */
+#endif /* ASM_X86__MACH_GENERIC__IRQ_VECTORS_LIMITS_H */
diff --git a/include/asm-x86/mach-generic/mach_apic.h b/include/asm-x86/mach-generic/mach_apic.h
index 6eff343..5d010c6 100644
--- a/include/asm-x86/mach-generic/mach_apic.h
+++ b/include/asm-x86/mach-generic/mach_apic.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_APIC_H
-#define __ASM_MACH_APIC_H
+#ifndef ASM_X86__MACH_GENERIC__MACH_APIC_H
+#define ASM_X86__MACH_GENERIC__MACH_APIC_H
 
 #include <asm/genapic.h>
 
@@ -29,4 +29,4 @@
 
 extern void generic_bigsmp_probe(void);
 
-#endif /* __ASM_MACH_APIC_H */
+#endif /* ASM_X86__MACH_GENERIC__MACH_APIC_H */
diff --git a/include/asm-x86/mach-generic/mach_apicdef.h b/include/asm-x86/mach-generic/mach_apicdef.h
index 28ed989..1657f38 100644
--- a/include/asm-x86/mach-generic/mach_apicdef.h
+++ b/include/asm-x86/mach-generic/mach_apicdef.h
@@ -1,5 +1,5 @@
-#ifndef _GENAPIC_MACH_APICDEF_H
-#define _GENAPIC_MACH_APICDEF_H 1
+#ifndef ASM_X86__MACH_GENERIC__MACH_APICDEF_H
+#define ASM_X86__MACH_GENERIC__MACH_APICDEF_H
 
 #ifndef APIC_DEFINITION
 #include <asm/genapic.h>
@@ -8,4 +8,4 @@
 #define APIC_ID_MASK (genapic->apic_id_mask)
 #endif
 
-#endif
+#endif /* ASM_X86__MACH_GENERIC__MACH_APICDEF_H */
diff --git a/include/asm-x86/mach-generic/mach_ipi.h b/include/asm-x86/mach-generic/mach_ipi.h
index 441b0fe..f67433d 100644
--- a/include/asm-x86/mach-generic/mach_ipi.h
+++ b/include/asm-x86/mach-generic/mach_ipi.h
@@ -1,5 +1,5 @@
-#ifndef _MACH_IPI_H
-#define _MACH_IPI_H 1
+#ifndef ASM_X86__MACH_GENERIC__MACH_IPI_H
+#define ASM_X86__MACH_GENERIC__MACH_IPI_H
 
 #include <asm/genapic.h>
 
@@ -7,4 +7,4 @@
 #define send_IPI_allbutself (genapic->send_IPI_allbutself)
 #define send_IPI_all (genapic->send_IPI_all)
 
-#endif
+#endif /* ASM_X86__MACH_GENERIC__MACH_IPI_H */
diff --git a/include/asm-x86/mach-generic/mach_mpparse.h b/include/asm-x86/mach-generic/mach_mpparse.h
index 586cadb..3115564 100644
--- a/include/asm-x86/mach-generic/mach_mpparse.h
+++ b/include/asm-x86/mach-generic/mach_mpparse.h
@@ -1,5 +1,5 @@
-#ifndef _MACH_MPPARSE_H
-#define _MACH_MPPARSE_H 1
+#ifndef ASM_X86__MACH_GENERIC__MACH_MPPARSE_H
+#define ASM_X86__MACH_GENERIC__MACH_MPPARSE_H
 
 
 extern int mps_oem_check(struct mp_config_table *mpc, char *oem,
@@ -7,4 +7,4 @@
 
 extern int acpi_madt_oem_check(char *oem_id, char *oem_table_id);
 
-#endif
+#endif /* ASM_X86__MACH_GENERIC__MACH_MPPARSE_H */
diff --git a/include/asm-x86/mach-generic/mach_mpspec.h b/include/asm-x86/mach-generic/mach_mpspec.h
index c83c120..6061b15 100644
--- a/include/asm-x86/mach-generic/mach_mpspec.h
+++ b/include/asm-x86/mach-generic/mach_mpspec.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_MPSPEC_H
-#define __ASM_MACH_MPSPEC_H
+#ifndef ASM_X86__MACH_GENERIC__MACH_MPSPEC_H
+#define ASM_X86__MACH_GENERIC__MACH_MPSPEC_H
 
 #define MAX_IRQ_SOURCES 256
 
@@ -9,4 +9,4 @@
 
 extern void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
 				char *productid);
-#endif /* __ASM_MACH_MPSPEC_H */
+#endif /* ASM_X86__MACH_GENERIC__MACH_MPSPEC_H */
diff --git a/include/asm-x86/mach-numaq/mach_mpparse.h b/include/asm-x86/mach-numaq/mach_mpparse.h
deleted file mode 100644
index 626aef6..0000000
--- a/include/asm-x86/mach-numaq/mach_mpparse.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef __ASM_MACH_MPPARSE_H
-#define __ASM_MACH_MPPARSE_H
-
-extern void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
-				char *productid);
-
-#endif /* __ASM_MACH_MPPARSE_H */
diff --git a/include/asm-x86/mach-rdc321x/gpio.h b/include/asm-x86/mach-rdc321x/gpio.h
index acce0b7..6184561 100644
--- a/include/asm-x86/mach-rdc321x/gpio.h
+++ b/include/asm-x86/mach-rdc321x/gpio.h
@@ -1,5 +1,5 @@
-#ifndef _RDC321X_GPIO_H
-#define _RDC321X_GPIO_H
+#ifndef ASM_X86__MACH_RDC321X__GPIO_H
+#define ASM_X86__MACH_RDC321X__GPIO_H
 
 extern int rdc_gpio_get_value(unsigned gpio);
 extern void rdc_gpio_set_value(unsigned gpio, int value);
@@ -54,4 +54,4 @@
 /* For cansleep */
 #include <asm-generic/gpio.h>
 
-#endif /* _RDC321X_GPIO_H_ */
+#endif /* ASM_X86__MACH_RDC321X__GPIO_H */
diff --git a/include/asm-x86/mach-summit/mach_apicdef.h b/include/asm-x86/mach-summit/mach_apicdef.h
deleted file mode 100644
index a58ab5a..0000000
--- a/include/asm-x86/mach-summit/mach_apicdef.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef __ASM_MACH_APICDEF_H
-#define __ASM_MACH_APICDEF_H
-
-#define		APIC_ID_MASK		(0xFF<<24)
-
-static inline unsigned get_apic_id(unsigned long x) 
-{ 
-	return (((x)>>24)&0xFF);
-} 
-
-#define		GET_APIC_ID(x)	get_apic_id(x)
-
-#endif
diff --git a/include/asm-x86/math_emu.h b/include/asm-x86/math_emu.h
index 9bf4ae9..5768d8e 100644
--- a/include/asm-x86/math_emu.h
+++ b/include/asm-x86/math_emu.h
@@ -1,5 +1,5 @@
-#ifndef _I386_MATH_EMU_H
-#define _I386_MATH_EMU_H
+#ifndef ASM_X86__MATH_EMU_H
+#define ASM_X86__MATH_EMU_H
 
 /* This structure matches the layout of the data saved to the stack
    following a device-not-present interrupt, part of it saved
@@ -28,4 +28,4 @@
 	long ___vm86_fs;
 	long ___vm86_gs;
 };
-#endif
+#endif /* ASM_X86__MATH_EMU_H */
diff --git a/include/asm-x86/mc146818rtc.h b/include/asm-x86/mc146818rtc.h
index daf1ccd..a995f33 100644
--- a/include/asm-x86/mc146818rtc.h
+++ b/include/asm-x86/mc146818rtc.h
@@ -1,8 +1,8 @@
 /*
  * Machine dependent access functions for RTC registers.
  */
-#ifndef _ASM_MC146818RTC_H
-#define _ASM_MC146818RTC_H
+#ifndef ASM_X86__MC146818RTC_H
+#define ASM_X86__MC146818RTC_H
 
 #include <asm/io.h>
 #include <asm/system.h>
@@ -101,4 +101,4 @@
 
 #define RTC_IRQ 8
 
-#endif /* _ASM_MC146818RTC_H */
+#endif /* ASM_X86__MC146818RTC_H */
diff --git a/include/asm-x86/mca.h b/include/asm-x86/mca.h
index 09adf2e..60d1ed2 100644
--- a/include/asm-x86/mca.h
+++ b/include/asm-x86/mca.h
@@ -1,8 +1,8 @@
 /* -*- mode: c; c-basic-offset: 8 -*- */
 
 /* Platform specific MCA defines */
-#ifndef _ASM_MCA_H
-#define _ASM_MCA_H
+#ifndef ASM_X86__MCA_H
+#define ASM_X86__MCA_H
 
 /* Maximal number of MCA slots - actually, some machines have less, but
  * they all have sufficient number of POS registers to cover 8.
@@ -40,4 +40,4 @@
  */
 #define MCA_NUMADAPTERS (MCA_MAX_SLOT_NR+3)
 
-#endif
+#endif /* ASM_X86__MCA_H */
diff --git a/include/asm-x86/mca_dma.h b/include/asm-x86/mca_dma.h
index c3dca6e..49f22be 100644
--- a/include/asm-x86/mca_dma.h
+++ b/include/asm-x86/mca_dma.h
@@ -1,5 +1,5 @@
-#ifndef MCA_DMA_H
-#define MCA_DMA_H
+#ifndef ASM_X86__MCA_DMA_H
+#define ASM_X86__MCA_DMA_H
 
 #include <asm/io.h>
 #include <linux/ioport.h>
@@ -198,4 +198,4 @@
 	outb(mode, MCA_DMA_REG_EXE);
 }
 
-#endif /* MCA_DMA_H */
+#endif /* ASM_X86__MCA_DMA_H */
diff --git a/include/asm-x86/mce.h b/include/asm-x86/mce.h
index 531eaa5..036133e 100644
--- a/include/asm-x86/mce.h
+++ b/include/asm-x86/mce.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_MCE_H
-#define _ASM_X86_MCE_H
+#ifndef ASM_X86__MCE_H
+#define ASM_X86__MCE_H
 
 #ifdef __x86_64__
 
@@ -127,4 +127,4 @@
 
 #endif /* __KERNEL__ */
 
-#endif
+#endif /* ASM_X86__MCE_H */
diff --git a/include/asm-x86/mman.h b/include/asm-x86/mman.h
index 90bc410..4ef28e6 100644
--- a/include/asm-x86/mman.h
+++ b/include/asm-x86/mman.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_MMAN_H
-#define _ASM_X86_MMAN_H
+#ifndef ASM_X86__MMAN_H
+#define ASM_X86__MMAN_H
 
 #include <asm-generic/mman.h>
 
@@ -17,4 +17,4 @@
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
 
-#endif /* _ASM_X86_MMAN_H */
+#endif /* ASM_X86__MMAN_H */
diff --git a/include/asm-x86/mmconfig.h b/include/asm-x86/mmconfig.h
index e293ab8..fb79b1c 100644
--- a/include/asm-x86/mmconfig.h
+++ b/include/asm-x86/mmconfig.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_MMCONFIG_H
-#define _ASM_MMCONFIG_H
+#ifndef ASM_X86__MMCONFIG_H
+#define ASM_X86__MMCONFIG_H
 
 #ifdef CONFIG_PCI_MMCONFIG
 extern void __cpuinit fam10h_check_enable_mmcfg(void);
@@ -9,4 +9,4 @@
 static inline void check_enable_amd_mmconf_dmi(void) { }
 #endif
 
-#endif
+#endif /* ASM_X86__MMCONFIG_H */
diff --git a/include/asm-x86/mmu.h b/include/asm-x86/mmu.h
index 00e8867..a30d7a9 100644
--- a/include/asm-x86/mmu.h
+++ b/include/asm-x86/mmu.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_MMU_H
-#define _ASM_X86_MMU_H
+#ifndef ASM_X86__MMU_H
+#define ASM_X86__MMU_H
 
 #include <linux/spinlock.h>
 #include <linux/mutex.h>
@@ -28,4 +28,4 @@
 }
 #endif
 
-#endif /* _ASM_X86_MMU_H */
+#endif /* ASM_X86__MMU_H */
diff --git a/include/asm-x86/mmu_context.h b/include/asm-x86/mmu_context.h
index fac5701..8ec940b 100644
--- a/include/asm-x86/mmu_context.h
+++ b/include/asm-x86/mmu_context.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_X86_MMU_CONTEXT_H
-#define __ASM_X86_MMU_CONTEXT_H
+#ifndef ASM_X86__MMU_CONTEXT_H
+#define ASM_X86__MMU_CONTEXT_H
 
 #include <asm/desc.h>
 #include <asm/atomic.h>
@@ -34,4 +34,4 @@
 } while (0);
 
 
-#endif /* __ASM_X86_MMU_CONTEXT_H */
+#endif /* ASM_X86__MMU_CONTEXT_H */
diff --git a/include/asm-x86/mmu_context_32.h b/include/asm-x86/mmu_context_32.h
index 824fc57..cce6f6e 100644
--- a/include/asm-x86/mmu_context_32.h
+++ b/include/asm-x86/mmu_context_32.h
@@ -1,5 +1,5 @@
-#ifndef __I386_SCHED_H
-#define __I386_SCHED_H
+#ifndef ASM_X86__MMU_CONTEXT_32_H
+#define ASM_X86__MMU_CONTEXT_32_H
 
 static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
 {
@@ -53,4 +53,4 @@
 #define deactivate_mm(tsk, mm)			\
 	asm("movl %0,%%gs": :"r" (0));
 
-#endif
+#endif /* ASM_X86__MMU_CONTEXT_32_H */
diff --git a/include/asm-x86/mmu_context_64.h b/include/asm-x86/mmu_context_64.h
index c700063..2675867 100644
--- a/include/asm-x86/mmu_context_64.h
+++ b/include/asm-x86/mmu_context_64.h
@@ -1,5 +1,5 @@
-#ifndef __X86_64_MMU_CONTEXT_H
-#define __X86_64_MMU_CONTEXT_H
+#ifndef ASM_X86__MMU_CONTEXT_64_H
+#define ASM_X86__MMU_CONTEXT_64_H
 
 #include <asm/pda.h>
 
@@ -51,4 +51,4 @@
 	asm volatile("movl %0,%%fs"::"r"(0));	\
 } while (0)
 
-#endif
+#endif /* ASM_X86__MMU_CONTEXT_64_H */
diff --git a/include/asm-x86/mmx.h b/include/asm-x86/mmx.h
index 9408812..2e7299b 100644
--- a/include/asm-x86/mmx.h
+++ b/include/asm-x86/mmx.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_MMX_H
-#define _ASM_MMX_H
+#ifndef ASM_X86__MMX_H
+#define ASM_X86__MMX_H
 
 /*
  *	MMX 3Dnow! helper operations
@@ -11,4 +11,4 @@
 extern void mmx_clear_page(void *page);
 extern void mmx_copy_page(void *to, void *from);
 
-#endif
+#endif /* ASM_X86__MMX_H */
diff --git a/include/asm-x86/mmzone_32.h b/include/asm-x86/mmzone_32.h
index 5862e64..121b65d 100644
--- a/include/asm-x86/mmzone_32.h
+++ b/include/asm-x86/mmzone_32.h
@@ -3,8 +3,8 @@
  *
  */
 
-#ifndef _ASM_MMZONE_H_
-#define _ASM_MMZONE_H_
+#ifndef ASM_X86__MMZONE_32_H
+#define ASM_X86__MMZONE_32_H
 
 #include <asm/smp.h>
 
@@ -131,4 +131,4 @@
 })
 #endif /* CONFIG_NEED_MULTIPLE_NODES */
 
-#endif /* _ASM_MMZONE_H_ */
+#endif /* ASM_X86__MMZONE_32_H */
diff --git a/include/asm-x86/mmzone_64.h b/include/asm-x86/mmzone_64.h
index 594bd0d..626b03a 100644
--- a/include/asm-x86/mmzone_64.h
+++ b/include/asm-x86/mmzone_64.h
@@ -1,8 +1,8 @@
 /* K8 NUMA support */
 /* Copyright 2002,2003 by Andi Kleen, SuSE Labs */
 /* 2.5 Version loosely based on the NUMAQ Code by Pat Gaughen. */
-#ifndef _ASM_X86_64_MMZONE_H
-#define _ASM_X86_64_MMZONE_H 1
+#ifndef ASM_X86__MMZONE_64_H
+#define ASM_X86__MMZONE_64_H
 
 
 #ifdef CONFIG_NUMA
@@ -49,4 +49,4 @@
 #endif
 
 #endif
-#endif
+#endif /* ASM_X86__MMZONE_64_H */
diff --git a/include/asm-x86/module.h b/include/asm-x86/module.h
index bfedb24..48dc3e0 100644
--- a/include/asm-x86/module.h
+++ b/include/asm-x86/module.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_MODULE_H
-#define _ASM_MODULE_H
+#ifndef ASM_X86__MODULE_H
+#define ASM_X86__MODULE_H
 
 /* x86_32/64 are simple */
 struct mod_arch_specific {};
@@ -79,4 +79,4 @@
 # define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY MODULE_STACKSIZE
 #endif
 
-#endif /* _ASM_MODULE_H */
+#endif /* ASM_X86__MODULE_H */
diff --git a/include/asm-x86/mpspec.h b/include/asm-x86/mpspec.h
index b6995e5..118da36 100644
--- a/include/asm-x86/mpspec.h
+++ b/include/asm-x86/mpspec.h
@@ -1,5 +1,5 @@
-#ifndef _AM_X86_MPSPEC_H
-#define _AM_X86_MPSPEC_H
+#ifndef ASM_X86__MPSPEC_H
+#define ASM_X86__MPSPEC_H
 
 #include <linux/init.h>
 
@@ -141,4 +141,4 @@
 
 extern physid_mask_t phys_cpu_present_map;
 
-#endif
+#endif /* ASM_X86__MPSPEC_H */
diff --git a/include/asm-x86/mpspec_def.h b/include/asm-x86/mpspec_def.h
index 38d1e73..79166b0 100644
--- a/include/asm-x86/mpspec_def.h
+++ b/include/asm-x86/mpspec_def.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_MPSPEC_DEF_H
-#define __ASM_MPSPEC_DEF_H
+#ifndef ASM_X86__MPSPEC_DEF_H
+#define ASM_X86__MPSPEC_DEF_H
 
 /*
  * Structure definitions for SMP machines following the
@@ -177,4 +177,4 @@
 	MP_BUS_PCI,
 	MP_BUS_MCA,
 };
-#endif
+#endif /* ASM_X86__MPSPEC_DEF_H */
diff --git a/include/asm-x86/msgbuf.h b/include/asm-x86/msgbuf.h
index 7e4e948..1b538c9 100644
--- a/include/asm-x86/msgbuf.h
+++ b/include/asm-x86/msgbuf.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_MSGBUF_H
-#define _ASM_X86_MSGBUF_H
+#ifndef ASM_X86__MSGBUF_H
+#define ASM_X86__MSGBUF_H
 
 /*
  * The msqid64_ds structure for i386 architecture.
@@ -36,4 +36,4 @@
 	unsigned long  __unused5;
 };
 
-#endif /* _ASM_X86_MSGBUF_H */
+#endif /* ASM_X86__MSGBUF_H */
diff --git a/include/asm-x86/msidef.h b/include/asm-x86/msidef.h
index 296f29c..ed91902 100644
--- a/include/asm-x86/msidef.h
+++ b/include/asm-x86/msidef.h
@@ -1,5 +1,5 @@
-#ifndef ASM_MSIDEF_H
-#define ASM_MSIDEF_H
+#ifndef ASM_X86__MSIDEF_H
+#define ASM_X86__MSIDEF_H
 
 /*
  * Constants for Intel APIC based MSI messages.
@@ -48,4 +48,8 @@
 #define  MSI_ADDR_DEST_ID(dest)		(((dest) << MSI_ADDR_DEST_ID_SHIFT) & \
 					 MSI_ADDR_DEST_ID_MASK)
 
-#endif /* ASM_MSIDEF_H */
+#define MSI_ADDR_IR_EXT_INT		(1 << 4)
+#define MSI_ADDR_IR_SHV			(1 << 3)
+#define MSI_ADDR_IR_INDEX1(index)	((index & 0x8000) >> 13)
+#define MSI_ADDR_IR_INDEX2(index)	((index & 0x7fff) << 5)
+#endif /* ASM_X86__MSIDEF_H */
diff --git a/include/asm-x86/msr-index.h b/include/asm-x86/msr-index.h
index 44bce77..3052f05 100644
--- a/include/asm-x86/msr-index.h
+++ b/include/asm-x86/msr-index.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_MSR_INDEX_H
-#define __ASM_MSR_INDEX_H
+#ifndef ASM_X86__MSR_INDEX_H
+#define ASM_X86__MSR_INDEX_H
 
 /* CPU model specific register (MSR) numbers */
 
@@ -310,4 +310,4 @@
 /* Geode defined MSRs */
 #define MSR_GEODE_BUSCONT_CONF0		0x00001900
 
-#endif /* __ASM_MSR_INDEX_H */
+#endif /* ASM_X86__MSR_INDEX_H */
diff --git a/include/asm-x86/msr.h b/include/asm-x86/msr.h
index 2362cfd..eee83f7 100644
--- a/include/asm-x86/msr.h
+++ b/include/asm-x86/msr.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_X86_MSR_H_
-#define __ASM_X86_MSR_H_
+#ifndef ASM_X86__MSR_H
+#define ASM_X86__MSR_H
 
 #include <asm/msr-index.h>
 
@@ -221,4 +221,4 @@
 #endif /* __KERNEL__ */
 
 
-#endif
+#endif /* ASM_X86__MSR_H */
diff --git a/include/asm-x86/mtrr.h b/include/asm-x86/mtrr.h
index a69a01a..23a7f83 100644
--- a/include/asm-x86/mtrr.h
+++ b/include/asm-x86/mtrr.h
@@ -20,8 +20,8 @@
     The postal address is:
       Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
 */
-#ifndef _ASM_X86_MTRR_H
-#define _ASM_X86_MTRR_H
+#ifndef ASM_X86__MTRR_H
+#define ASM_X86__MTRR_H
 
 #include <linux/ioctl.h>
 #include <linux/errno.h>
@@ -170,4 +170,4 @@
 
 #endif /* __KERNEL__ */
 
-#endif  /*  _ASM_X86_MTRR_H  */
+#endif /* ASM_X86__MTRR_H */
diff --git a/include/asm-x86/mutex_32.h b/include/asm-x86/mutex_32.h
index 73e928e..25c16d8 100644
--- a/include/asm-x86/mutex_32.h
+++ b/include/asm-x86/mutex_32.h
@@ -6,8 +6,8 @@
  *
  *  Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
  */
-#ifndef _ASM_MUTEX_H
-#define _ASM_MUTEX_H
+#ifndef ASM_X86__MUTEX_32_H
+#define ASM_X86__MUTEX_32_H
 
 #include <asm/alternative.h>
 
@@ -122,4 +122,4 @@
 #endif
 }
 
-#endif
+#endif /* ASM_X86__MUTEX_32_H */
diff --git a/include/asm-x86/mutex_64.h b/include/asm-x86/mutex_64.h
index f3fae9b..918ba21 100644
--- a/include/asm-x86/mutex_64.h
+++ b/include/asm-x86/mutex_64.h
@@ -6,8 +6,8 @@
  *
  *  Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
  */
-#ifndef _ASM_MUTEX_H
-#define _ASM_MUTEX_H
+#ifndef ASM_X86__MUTEX_64_H
+#define ASM_X86__MUTEX_64_H
 
 /**
  * __mutex_fastpath_lock - decrement and call function if negative
@@ -97,4 +97,4 @@
 		return 0;
 }
 
-#endif
+#endif /* ASM_X86__MUTEX_64_H */
diff --git a/include/asm-x86/nmi.h b/include/asm-x86/nmi.h
index 21f8d02..f8b76f3 100644
--- a/include/asm-x86/nmi.h
+++ b/include/asm-x86/nmi.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_NMI_H_
-#define _ASM_X86_NMI_H_
+#ifndef ASM_X86__NMI_H
+#define ASM_X86__NMI_H
 
 #include <linux/pm.h>
 #include <asm/irq.h>
@@ -81,4 +81,4 @@
 void stop_nmi(void);
 void restart_nmi(void);
 
-#endif
+#endif /* ASM_X86__NMI_H */
diff --git a/include/asm-x86/nops.h b/include/asm-x86/nops.h
index ad0bedd..ae74272 100644
--- a/include/asm-x86/nops.h
+++ b/include/asm-x86/nops.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_NOPS_H
-#define _ASM_NOPS_H 1
+#ifndef ASM_X86__NOPS_H
+#define ASM_X86__NOPS_H
 
 /* Define nops for use with alternative() */
 
@@ -115,4 +115,4 @@
 
 #define ASM_NOP_MAX 8
 
-#endif
+#endif /* ASM_X86__NOPS_H */
diff --git a/include/asm-x86/numa_32.h b/include/asm-x86/numa_32.h
index 220d7b7..44cb078 100644
--- a/include/asm-x86/numa_32.h
+++ b/include/asm-x86/numa_32.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_32_NUMA_H
-#define _ASM_X86_32_NUMA_H 1
+#ifndef ASM_X86__NUMA_32_H
+#define ASM_X86__NUMA_32_H
 
 extern int pxm_to_nid(int pxm);
 extern void numa_remove_cpu(int cpu);
@@ -8,4 +8,4 @@
 extern void set_highmem_pages_init(void);
 #endif
 
-#endif /* _ASM_X86_32_NUMA_H */
+#endif /* ASM_X86__NUMA_32_H */
diff --git a/include/asm-x86/numa_64.h b/include/asm-x86/numa_64.h
index 3830094..15c9903 100644
--- a/include/asm-x86/numa_64.h
+++ b/include/asm-x86/numa_64.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X8664_NUMA_H
-#define _ASM_X8664_NUMA_H 1
+#ifndef ASM_X86__NUMA_64_H
+#define ASM_X86__NUMA_64_H
 
 #include <linux/nodemask.h>
 #include <asm/apicdef.h>
@@ -40,4 +40,4 @@
 static inline void numa_remove_cpu(int cpu)		{ }
 #endif
 
-#endif
+#endif /* ASM_X86__NUMA_64_H */
diff --git a/include/asm-x86/numaq.h b/include/asm-x86/numaq.h
index 34b92d5..124bf7d 100644
--- a/include/asm-x86/numaq.h
+++ b/include/asm-x86/numaq.h
@@ -23,8 +23,8 @@
  * Send feedback to <gone@us.ibm.com>
  */
 
-#ifndef NUMAQ_H
-#define NUMAQ_H
+#ifndef ASM_X86__NUMAQ_H
+#define ASM_X86__NUMAQ_H
 
 #ifdef CONFIG_X86_NUMAQ
 
@@ -165,5 +165,5 @@
 	return 0;
 }
 #endif /* CONFIG_X86_NUMAQ */
-#endif /* NUMAQ_H */
+#endif /* ASM_X86__NUMAQ_H */
 
diff --git a/include/asm-x86/mach-numaq/mach_apic.h b/include/asm-x86/numaq/apic.h
similarity index 96%
rename from include/asm-x86/mach-numaq/mach_apic.h
rename to include/asm-x86/numaq/apic.h
index d802465..a8344ba 100644
--- a/include/asm-x86/mach-numaq/mach_apic.h
+++ b/include/asm-x86/numaq/apic.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_APIC_H
-#define __ASM_MACH_APIC_H
+#ifndef __ASM_NUMAQ_APIC_H
+#define __ASM_NUMAQ_APIC_H
 
 #include <asm/io.h>
 #include <linux/mmzone.h>
@@ -135,4 +135,4 @@
 	return cpuid_apic >> index_msb;
 }
 
-#endif /* __ASM_MACH_APIC_H */
+#endif /* __ASM_NUMAQ_APIC_H */
diff --git a/include/asm-x86/mach-numaq/mach_apicdef.h b/include/asm-x86/numaq/apicdef.h
similarity index 74%
rename from include/asm-x86/mach-numaq/mach_apicdef.h
rename to include/asm-x86/numaq/apicdef.h
index bf439d0..e012a46 100644
--- a/include/asm-x86/mach-numaq/mach_apicdef.h
+++ b/include/asm-x86/numaq/apicdef.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_APICDEF_H
-#define __ASM_MACH_APICDEF_H
+#ifndef __ASM_NUMAQ_APICDEF_H
+#define __ASM_NUMAQ_APICDEF_H
 
 
 #define APIC_ID_MASK (0xF<<24)
diff --git a/include/asm-x86/mach-numaq/mach_ipi.h b/include/asm-x86/numaq/ipi.h
similarity index 83%
rename from include/asm-x86/mach-numaq/mach_ipi.h
rename to include/asm-x86/numaq/ipi.h
index c604448..935588d 100644
--- a/include/asm-x86/mach-numaq/mach_ipi.h
+++ b/include/asm-x86/numaq/ipi.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_IPI_H
-#define __ASM_MACH_IPI_H
+#ifndef __ASM_NUMAQ_IPI_H
+#define __ASM_NUMAQ_IPI_H
 
 void send_IPI_mask_sequence(cpumask_t, int vector);
 
@@ -22,4 +22,4 @@
 	send_IPI_mask(cpu_online_map, vector);
 }
 
-#endif /* __ASM_MACH_IPI_H */
+#endif /* __ASM_NUMAQ_IPI_H */
diff --git a/include/asm-x86/numaq/mpparse.h b/include/asm-x86/numaq/mpparse.h
new file mode 100644
index 0000000..252292e
--- /dev/null
+++ b/include/asm-x86/numaq/mpparse.h
@@ -0,0 +1,7 @@
+#ifndef __ASM_NUMAQ_MPPARSE_H
+#define __ASM_NUMAQ_MPPARSE_H
+
+extern void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
+				char *productid);
+
+#endif /* __ASM_NUMAQ_MPPARSE_H */
diff --git a/include/asm-x86/mach-numaq/mach_wakecpu.h b/include/asm-x86/numaq/wakecpu.h
similarity index 91%
rename from include/asm-x86/mach-numaq/mach_wakecpu.h
rename to include/asm-x86/numaq/wakecpu.h
index 0053004..c577bda 100644
--- a/include/asm-x86/mach-numaq/mach_wakecpu.h
+++ b/include/asm-x86/numaq/wakecpu.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_WAKECPU_H
-#define __ASM_MACH_WAKECPU_H
+#ifndef __ASM_NUMAQ_WAKECPU_H
+#define __ASM_NUMAQ_WAKECPU_H
 
 /* This file copes with machines that wakeup secondary CPUs by NMIs */
 
@@ -40,4 +40,4 @@
 
 #define inquire_remote_apic(apicid) {}
 
-#endif /* __ASM_MACH_WAKECPU_H */
+#endif /* __ASM_NUMAQ_WAKECPU_H */
diff --git a/include/asm-x86/olpc.h b/include/asm-x86/olpc.h
index 97d4713..d7328b1 100644
--- a/include/asm-x86/olpc.h
+++ b/include/asm-x86/olpc.h
@@ -1,7 +1,7 @@
 /* OLPC machine specific definitions */
 
-#ifndef ASM_OLPC_H_
-#define ASM_OLPC_H_
+#ifndef ASM_X86__OLPC_H
+#define ASM_X86__OLPC_H
 
 #include <asm/geode.h>
 
@@ -129,4 +129,4 @@
 #define OLPC_GPIO_LID		geode_gpio(26)
 #define OLPC_GPIO_ECSCI		geode_gpio(27)
 
-#endif
+#endif /* ASM_X86__OLPC_H */
diff --git a/include/asm-x86/page.h b/include/asm-x86/page.h
index 4998211..79544e6 100644
--- a/include/asm-x86/page.h
+++ b/include/asm-x86/page.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_PAGE_H
-#define _ASM_X86_PAGE_H
+#ifndef ASM_X86__PAGE_H
+#define ASM_X86__PAGE_H
 
 #include <linux/const.h>
 
@@ -199,4 +199,4 @@
 #define __HAVE_ARCH_GATE_AREA 1
 
 #endif	/* __KERNEL__ */
-#endif	/* _ASM_X86_PAGE_H */
+#endif /* ASM_X86__PAGE_H */
diff --git a/include/asm-x86/page_32.h b/include/asm-x86/page_32.h
index ab85287..f32062a 100644
--- a/include/asm-x86/page_32.h
+++ b/include/asm-x86/page_32.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_PAGE_32_H
-#define _ASM_X86_PAGE_32_H
+#ifndef ASM_X86__PAGE_32_H
+#define ASM_X86__PAGE_32_H
 
 /*
  * This handles the memory map.
@@ -96,6 +96,7 @@
 extern unsigned long init_memory_mapping(unsigned long start,
 					 unsigned long end);
 extern void initmem_init(unsigned long, unsigned long);
+extern void free_initmem(void);
 extern void setup_bootmem_allocator(void);
 
 
@@ -126,4 +127,4 @@
 #endif	/* CONFIG_X86_3DNOW */
 #endif	/* !__ASSEMBLY__ */
 
-#endif /* _ASM_X86_PAGE_32_H */
+#endif /* ASM_X86__PAGE_32_H */
diff --git a/include/asm-x86/page_64.h b/include/asm-x86/page_64.h
index c6916c8..5e64acf 100644
--- a/include/asm-x86/page_64.h
+++ b/include/asm-x86/page_64.h
@@ -1,5 +1,5 @@
-#ifndef _X86_64_PAGE_H
-#define _X86_64_PAGE_H
+#ifndef ASM_X86__PAGE_64_H
+#define ASM_X86__PAGE_64_H
 
 #define PAGETABLE_LEVELS	4
 
@@ -91,6 +91,7 @@
 					 unsigned long end);
 
 extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn);
+extern void free_initmem(void);
 
 extern void init_extra_mapping_uc(unsigned long phys, unsigned long size);
 extern void init_extra_mapping_wb(unsigned long phys, unsigned long size);
@@ -102,4 +103,4 @@
 #endif
 
 
-#endif /* _X86_64_PAGE_H */
+#endif /* ASM_X86__PAGE_64_H */
diff --git a/include/asm-x86/param.h b/include/asm-x86/param.h
index 6f0d042..0009cfb 100644
--- a/include/asm-x86/param.h
+++ b/include/asm-x86/param.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_PARAM_H
-#define _ASM_X86_PARAM_H
+#ifndef ASM_X86__PARAM_H
+#define ASM_X86__PARAM_H
 
 #ifdef __KERNEL__
 # define HZ		CONFIG_HZ	/* Internal kernel timer frequency */
@@ -19,4 +19,4 @@
 
 #define MAXHOSTNAMELEN	64	/* max length of hostname */
 
-#endif /* _ASM_X86_PARAM_H */
+#endif /* ASM_X86__PARAM_H */
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index fbbde93..ff9a7e4 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_PARAVIRT_H
-#define __ASM_PARAVIRT_H
+#ifndef ASM_X86__PARAVIRT_H
+#define ASM_X86__PARAVIRT_H
 /* Various instructions on x86 need to be replaced for
  * para-virtualization: those hooks are defined here. */
 
@@ -200,12 +200,6 @@
 
 struct pv_apic_ops {
 #ifdef CONFIG_X86_LOCAL_APIC
-	/*
-	 * Direct APIC operations, principally for VMI.  Ideally
-	 * these shouldn't be in this interface.
-	 */
-	void (*apic_write)(unsigned long reg, u32 v);
-	u32 (*apic_read)(unsigned long reg);
 	void (*setup_boot_clock)(void);
 	void (*setup_secondary_clock)(void);
 
@@ -898,19 +892,6 @@
 }
 
 #ifdef CONFIG_X86_LOCAL_APIC
-/*
- * Basic functions accessing APICs.
- */
-static inline void apic_write(unsigned long reg, u32 v)
-{
-	PVOP_VCALL2(pv_apic_ops.apic_write, reg, v);
-}
-
-static inline u32 apic_read(unsigned long reg)
-{
-	return PVOP_CALL1(unsigned long, pv_apic_ops.apic_read, reg);
-}
-
 static inline void setup_boot_clock(void)
 {
 	PVOP_VCALL0(pv_apic_ops.setup_boot_clock);
@@ -1634,4 +1615,4 @@
 
 #endif /* __ASSEMBLY__ */
 #endif /* CONFIG_PARAVIRT */
-#endif	/* __ASM_PARAVIRT_H */
+#endif /* ASM_X86__PARAVIRT_H */
diff --git a/include/asm-x86/parport.h b/include/asm-x86/parport.h
index 3c4ffeb..2e3dda4 100644
--- a/include/asm-x86/parport.h
+++ b/include/asm-x86/parport.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_PARPORT_H
-#define _ASM_X86_PARPORT_H
+#ifndef ASM_X86__PARPORT_H
+#define ASM_X86__PARPORT_H
 
 static int __devinit parport_pc_find_isa_ports(int autoirq, int autodma);
 static int __devinit parport_pc_find_nonpci_ports(int autoirq, int autodma)
@@ -7,4 +7,4 @@
 	return parport_pc_find_isa_ports(autoirq, autodma);
 }
 
-#endif /* _ASM_X86_PARPORT_H */
+#endif /* ASM_X86__PARPORT_H */
diff --git a/include/asm-x86/pat.h b/include/asm-x86/pat.h
index 7edc473..482c3e3 100644
--- a/include/asm-x86/pat.h
+++ b/include/asm-x86/pat.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_PAT_H
-#define _ASM_PAT_H
+#ifndef ASM_X86__PAT_H
+#define ASM_X86__PAT_H
 
 #include <linux/types.h>
 
@@ -19,4 +19,4 @@
 
 extern void pat_disable(char *reason);
 
-#endif
+#endif /* ASM_X86__PAT_H */
diff --git a/include/asm-x86/pci-direct.h b/include/asm-x86/pci-direct.h
index 80c775d..da42be0 100644
--- a/include/asm-x86/pci-direct.h
+++ b/include/asm-x86/pci-direct.h
@@ -1,5 +1,5 @@
-#ifndef ASM_PCI_DIRECT_H
-#define ASM_PCI_DIRECT_H 1
+#ifndef ASM_X86__PCI_DIRECT_H
+#define ASM_X86__PCI_DIRECT_H
 
 #include <linux/types.h>
 
@@ -18,4 +18,4 @@
 extern unsigned int pci_early_dump_regs;
 extern void early_dump_pci_device(u8 bus, u8 slot, u8 func);
 extern void early_dump_pci_devices(void);
-#endif
+#endif /* ASM_X86__PCI_DIRECT_H */
diff --git a/include/asm-x86/pci.h b/include/asm-x86/pci.h
index 2db14cf..6025831 100644
--- a/include/asm-x86/pci.h
+++ b/include/asm-x86/pci.h
@@ -1,5 +1,5 @@
-#ifndef __x86_PCI_H
-#define __x86_PCI_H
+#ifndef ASM_X86__PCI_H
+#define ASM_X86__PCI_H
 
 #include <linux/mm.h> /* for struct page */
 #include <linux/types.h>
@@ -111,4 +111,4 @@
 }
 #endif
 
-#endif
+#endif /* ASM_X86__PCI_H */
diff --git a/include/asm-x86/pci_32.h b/include/asm-x86/pci_32.h
index a50d468..3f22882 100644
--- a/include/asm-x86/pci_32.h
+++ b/include/asm-x86/pci_32.h
@@ -1,5 +1,5 @@
-#ifndef __i386_PCI_H
-#define __i386_PCI_H
+#ifndef ASM_X86__PCI_32_H
+#define ASM_X86__PCI_32_H
 
 
 #ifdef __KERNEL__
@@ -31,4 +31,4 @@
 #endif /* __KERNEL__ */
 
 
-#endif /* __i386_PCI_H */
+#endif /* ASM_X86__PCI_32_H */
diff --git a/include/asm-x86/pci_64.h b/include/asm-x86/pci_64.h
index f330234..f72e12d 100644
--- a/include/asm-x86/pci_64.h
+++ b/include/asm-x86/pci_64.h
@@ -1,5 +1,5 @@
-#ifndef __x8664_PCI_H
-#define __x8664_PCI_H
+#ifndef ASM_X86__PCI_64_H
+#define ASM_X86__PCI_64_H
 
 #ifdef __KERNEL__
 
@@ -63,4 +63,4 @@
 
 #endif /* __KERNEL__ */
 
-#endif /* __x8664_PCI_H */
+#endif /* ASM_X86__PCI_64_H */
diff --git a/include/asm-x86/pda.h b/include/asm-x86/pda.h
index b34e9a7..80860af 100644
--- a/include/asm-x86/pda.h
+++ b/include/asm-x86/pda.h
@@ -1,5 +1,5 @@
-#ifndef X86_64_PDA_H
-#define X86_64_PDA_H
+#ifndef ASM_X86__PDA_H
+#define ASM_X86__PDA_H
 
 #ifndef __ASSEMBLY__
 #include <linux/stddef.h>
@@ -134,4 +134,4 @@
 
 #define PDA_STACKOFFSET (5*8)
 
-#endif
+#endif /* ASM_X86__PDA_H */
diff --git a/include/asm-x86/percpu.h b/include/asm-x86/percpu.h
index f643a3a..e10a1d0 100644
--- a/include/asm-x86/percpu.h
+++ b/include/asm-x86/percpu.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_PERCPU_H_
-#define _ASM_X86_PERCPU_H_
+#ifndef ASM_X86__PERCPU_H
+#define ASM_X86__PERCPU_H
 
 #ifdef CONFIG_X86_64
 #include <linux/compiler.h>
@@ -215,4 +215,4 @@
 
 #endif	/* !CONFIG_SMP */
 
-#endif /* _ASM_X86_PERCPU_H_ */
+#endif /* ASM_X86__PERCPU_H */
diff --git a/include/asm-x86/pgalloc.h b/include/asm-x86/pgalloc.h
index d63ea43..3cd23ad 100644
--- a/include/asm-x86/pgalloc.h
+++ b/include/asm-x86/pgalloc.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_PGALLOC_H
-#define _ASM_X86_PGALLOC_H
+#ifndef ASM_X86__PGALLOC_H
+#define ASM_X86__PGALLOC_H
 
 #include <linux/threads.h>
 #include <linux/mm.h>		/* for struct page */
@@ -111,4 +111,4 @@
 #endif	/* PAGETABLE_LEVELS > 3 */
 #endif	/* PAGETABLE_LEVELS > 2 */
 
-#endif	/* _ASM_X86_PGALLOC_H */
+#endif /* ASM_X86__PGALLOC_H */
diff --git a/include/asm-x86/pgtable-2level-defs.h b/include/asm-x86/pgtable-2level-defs.h
index 0f71c9f..7ec48f4 100644
--- a/include/asm-x86/pgtable-2level-defs.h
+++ b/include/asm-x86/pgtable-2level-defs.h
@@ -1,5 +1,5 @@
-#ifndef _I386_PGTABLE_2LEVEL_DEFS_H
-#define _I386_PGTABLE_2LEVEL_DEFS_H
+#ifndef ASM_X86__PGTABLE_2LEVEL_DEFS_H
+#define ASM_X86__PGTABLE_2LEVEL_DEFS_H
 
 #define SHARED_KERNEL_PMD	0
 
@@ -17,4 +17,4 @@
 
 #define PTRS_PER_PTE	1024
 
-#endif /* _I386_PGTABLE_2LEVEL_DEFS_H */
+#endif /* ASM_X86__PGTABLE_2LEVEL_DEFS_H */
diff --git a/include/asm-x86/pgtable-2level.h b/include/asm-x86/pgtable-2level.h
index 46bc52c..60440b1 100644
--- a/include/asm-x86/pgtable-2level.h
+++ b/include/asm-x86/pgtable-2level.h
@@ -1,5 +1,5 @@
-#ifndef _I386_PGTABLE_2LEVEL_H
-#define _I386_PGTABLE_2LEVEL_H
+#ifndef ASM_X86__PGTABLE_2LEVEL_H
+#define ASM_X86__PGTABLE_2LEVEL_H
 
 #define pte_ERROR(e) \
 	printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, (e).pte_low)
@@ -78,4 +78,4 @@
 #define __pte_to_swp_entry(pte)		((swp_entry_t) { (pte).pte_low })
 #define __swp_entry_to_pte(x)		((pte_t) { .pte = (x).val })
 
-#endif /* _I386_PGTABLE_2LEVEL_H */
+#endif /* ASM_X86__PGTABLE_2LEVEL_H */
diff --git a/include/asm-x86/pgtable-3level-defs.h b/include/asm-x86/pgtable-3level-defs.h
index 448ac95..c05fe6f 100644
--- a/include/asm-x86/pgtable-3level-defs.h
+++ b/include/asm-x86/pgtable-3level-defs.h
@@ -1,5 +1,5 @@
-#ifndef _I386_PGTABLE_3LEVEL_DEFS_H
-#define _I386_PGTABLE_3LEVEL_DEFS_H
+#ifndef ASM_X86__PGTABLE_3LEVEL_DEFS_H
+#define ASM_X86__PGTABLE_3LEVEL_DEFS_H
 
 #ifdef CONFIG_PARAVIRT
 #define SHARED_KERNEL_PMD	(pv_info.shared_kernel_pmd)
@@ -25,4 +25,4 @@
  */
 #define PTRS_PER_PTE	512
 
-#endif /* _I386_PGTABLE_3LEVEL_DEFS_H */
+#endif /* ASM_X86__PGTABLE_3LEVEL_DEFS_H */
diff --git a/include/asm-x86/pgtable-3level.h b/include/asm-x86/pgtable-3level.h
index 105057f..e713bd5 100644
--- a/include/asm-x86/pgtable-3level.h
+++ b/include/asm-x86/pgtable-3level.h
@@ -1,5 +1,5 @@
-#ifndef _I386_PGTABLE_3LEVEL_H
-#define _I386_PGTABLE_3LEVEL_H
+#ifndef ASM_X86__PGTABLE_3LEVEL_H
+#define ASM_X86__PGTABLE_3LEVEL_H
 
 /*
  * Intel Physical Address Extension (PAE) Mode - three-level page
@@ -179,4 +179,4 @@
 #define __pte_to_swp_entry(pte)		((swp_entry_t){ (pte).pte_high })
 #define __swp_entry_to_pte(x)		((pte_t){ { .pte_high = (x).val } })
 
-#endif /* _I386_PGTABLE_3LEVEL_H */
+#endif /* ASM_X86__PGTABLE_3LEVEL_H */
diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h
index 04caa2f..57d919a 100644
--- a/include/asm-x86/pgtable.h
+++ b/include/asm-x86/pgtable.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_PGTABLE_H
-#define _ASM_X86_PGTABLE_H
+#ifndef ASM_X86__PGTABLE_H
+#define ASM_X86__PGTABLE_H
 
 #define FIRST_USER_ADDRESS	0
 
@@ -313,6 +313,8 @@
 static inline void native_pagetable_setup_done(pgd_t *base) {}
 #endif
 
+extern int arch_report_meminfo(char *page);
+
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #else  /* !CONFIG_PARAVIRT */
@@ -521,4 +523,4 @@
 #include <asm-generic/pgtable.h>
 #endif	/* __ASSEMBLY__ */
 
-#endif	/* _ASM_X86_PGTABLE_H */
+#endif /* ASM_X86__PGTABLE_H */
diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h
index 5c3b265..45c8235 100644
--- a/include/asm-x86/pgtable_32.h
+++ b/include/asm-x86/pgtable_32.h
@@ -1,5 +1,5 @@
-#ifndef _I386_PGTABLE_H
-#define _I386_PGTABLE_H
+#ifndef ASM_X86__PGTABLE_32_H
+#define ASM_X86__PGTABLE_32_H
 
 
 /*
@@ -31,6 +31,7 @@
 static inline void check_pgt_cache(void) { }
 void paging_init(void);
 
+extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t);
 
 /*
  * The Linux x86 paging architecture is 'compile-time dual-mode', it
@@ -186,4 +187,4 @@
 #define io_remap_pfn_range(vma, vaddr, pfn, size, prot)	\
 	remap_pfn_range(vma, vaddr, pfn, size, prot)
 
-#endif /* _I386_PGTABLE_H */
+#endif /* ASM_X86__PGTABLE_32_H */
diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h
index 549144d..e3dcf7a 100644
--- a/include/asm-x86/pgtable_64.h
+++ b/include/asm-x86/pgtable_64.h
@@ -1,5 +1,5 @@
-#ifndef _X86_64_PGTABLE_H
-#define _X86_64_PGTABLE_H
+#ifndef ASM_X86__PGTABLE_64_H
+#define ASM_X86__PGTABLE_64_H
 
 #include <linux/const.h>
 #ifndef __ASSEMBLY__
@@ -284,4 +284,4 @@
 #define __HAVE_ARCH_PTE_SAME
 #endif /* !__ASSEMBLY__ */
 
-#endif /* _X86_64_PGTABLE_H */
+#endif /* ASM_X86__PGTABLE_64_H */
diff --git a/include/asm-x86/posix_types_32.h b/include/asm-x86/posix_types_32.h
index b031efd..70cf2bb 100644
--- a/include/asm-x86/posix_types_32.h
+++ b/include/asm-x86/posix_types_32.h
@@ -1,5 +1,5 @@
-#ifndef __ARCH_I386_POSIX_TYPES_H
-#define __ARCH_I386_POSIX_TYPES_H
+#ifndef ASM_X86__POSIX_TYPES_32_H
+#define ASM_X86__POSIX_TYPES_32_H
 
 /*
  * This file is generally used by user-level software, so you need to
@@ -82,4 +82,4 @@
 
 #endif /* defined(__KERNEL__) */
 
-#endif
+#endif /* ASM_X86__POSIX_TYPES_32_H */
diff --git a/include/asm-x86/posix_types_64.h b/include/asm-x86/posix_types_64.h
index d6624c9..388b4e7 100644
--- a/include/asm-x86/posix_types_64.h
+++ b/include/asm-x86/posix_types_64.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_64_POSIX_TYPES_H
-#define _ASM_X86_64_POSIX_TYPES_H
+#ifndef ASM_X86__POSIX_TYPES_64_H
+#define ASM_X86__POSIX_TYPES_64_H
 
 /*
  * This file is generally used by user-level software, so you need to
@@ -116,4 +116,4 @@
 
 #endif /* defined(__KERNEL__) */
 
-#endif
+#endif /* ASM_X86__POSIX_TYPES_64_H */
diff --git a/include/asm-x86/prctl.h b/include/asm-x86/prctl.h
index 52952ad..e7ae34e 100644
--- a/include/asm-x86/prctl.h
+++ b/include/asm-x86/prctl.h
@@ -1,5 +1,5 @@
-#ifndef X86_64_PRCTL_H
-#define X86_64_PRCTL_H 1
+#ifndef ASM_X86__PRCTL_H
+#define ASM_X86__PRCTL_H
 
 #define ARCH_SET_GS 0x1001
 #define ARCH_SET_FS 0x1002
@@ -7,4 +7,4 @@
 #define ARCH_GET_GS 0x1004
 
 
-#endif
+#endif /* ASM_X86__PRCTL_H */
diff --git a/include/asm-x86/processor-cyrix.h b/include/asm-x86/processor-cyrix.h
index 97568ad..1198f2a 100644
--- a/include/asm-x86/processor-cyrix.h
+++ b/include/asm-x86/processor-cyrix.h
@@ -28,3 +28,11 @@
 	outb(reg, 0x22);
 	outb(data, 0x23);
 }
+
+#define getCx86_old(reg) ({ outb((reg), 0x22); inb(0x23); })
+
+#define setCx86_old(reg, data) do { \
+	outb((reg), 0x22); \
+	outb((data), 0x23); \
+} while (0)
+
diff --git a/include/asm-x86/processor-flags.h b/include/asm-x86/processor-flags.h
index eff2ecd..dc5f071 100644
--- a/include/asm-x86/processor-flags.h
+++ b/include/asm-x86/processor-flags.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_I386_PROCESSOR_FLAGS_H
-#define __ASM_I386_PROCESSOR_FLAGS_H
+#ifndef ASM_X86__PROCESSOR_FLAGS_H
+#define ASM_X86__PROCESSOR_FLAGS_H
 /* Various flags defined: can be included from assembler. */
 
 /*
@@ -59,6 +59,7 @@
 #define X86_CR4_OSFXSR	0x00000200 /* enable fast FPU save and restore */
 #define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */
 #define X86_CR4_VMXE	0x00002000 /* enable VMX virtualization */
+#define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */
 
 /*
  * x86-64 Task Priority Register, CR8
@@ -96,4 +97,4 @@
 #endif
 #endif
 
-#endif	/* __ASM_I386_PROCESSOR_FLAGS_H */
+#endif /* ASM_X86__PROCESSOR_FLAGS_H */
diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index 4df3e2f..df03c98 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_X86_PROCESSOR_H
-#define __ASM_X86_PROCESSOR_H
+#ifndef ASM_X86__PROCESSOR_H
+#define ASM_X86__PROCESSOR_H
 
 #include <asm/processor-flags.h>
 
@@ -140,6 +140,8 @@
 #define current_cpu_data	boot_cpu_data
 #endif
 
+extern const struct seq_operations cpuinfo_op;
+
 static inline int hlt_works(int cpu)
 {
 #ifdef CONFIG_X86_32
@@ -153,6 +155,8 @@
 
 extern void cpu_detect(struct cpuinfo_x86 *c);
 
+extern struct pt_regs *idle_regs(struct pt_regs *);
+
 extern void early_cpu_init(void);
 extern void identify_boot_cpu(void);
 extern void identify_secondary_cpu(struct cpuinfo_x86 *);
@@ -322,7 +326,12 @@
 	/* 16*16 bytes for each XMM-reg = 256 bytes:			*/
 	u32			xmm_space[64];
 
-	u32			padding[24];
+	u32			padding[12];
+
+	union {
+		u32		padding1[12];
+		u32		sw_reserved[12];
+	};
 
 } __attribute__((aligned(16)));
 
@@ -346,10 +355,23 @@
 	u32			entry_eip;
 };
 
+struct xsave_hdr_struct {
+	u64 xstate_bv;
+	u64 reserved1[2];
+	u64 reserved2[5];
+} __attribute__((packed));
+
+struct xsave_struct {
+	struct i387_fxsave_struct i387;
+	struct xsave_hdr_struct xsave_hdr;
+	/* new processor state extensions will go here */
+} __attribute__ ((packed, aligned (64)));
+
 union thread_xstate {
 	struct i387_fsave_struct	fsave;
 	struct i387_fxsave_struct	fxsave;
 	struct i387_soft_struct		soft;
+	struct xsave_struct		xsave;
 };
 
 #ifdef CONFIG_X86_64
@@ -943,4 +965,4 @@
 extern int get_tsc_mode(unsigned long adr);
 extern int set_tsc_mode(unsigned int val);
 
-#endif
+#endif /* ASM_X86__PROCESSOR_H */
diff --git a/include/asm-x86/proto.h b/include/asm-x86/proto.h
index 3dd458c..6e89e8b 100644
--- a/include/asm-x86/proto.h
+++ b/include/asm-x86/proto.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X8664_PROTO_H
-#define _ASM_X8664_PROTO_H 1
+#ifndef ASM_X86__PROTO_H
+#define ASM_X86__PROTO_H
 
 #include <asm/ldt.h>
 
@@ -29,4 +29,4 @@
 #define round_up(x, y) (((x) + (y) - 1) & ~((y) - 1))
 #define round_down(x, y) ((x) & ~((y) - 1))
 
-#endif
+#endif /* ASM_X86__PROTO_H */
diff --git a/include/asm-x86/ptrace-abi.h b/include/asm-x86/ptrace-abi.h
index 72e7b9d..d0cf334 100644
--- a/include/asm-x86/ptrace-abi.h
+++ b/include/asm-x86/ptrace-abi.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_PTRACE_ABI_H
-#define _ASM_X86_PTRACE_ABI_H
+#ifndef ASM_X86__PTRACE_ABI_H
+#define ASM_X86__PTRACE_ABI_H
 
 #ifdef __i386__
 
@@ -140,4 +140,4 @@
    Returns number of BTS records drained.
 */
 
-#endif
+#endif /* ASM_X86__PTRACE_ABI_H */
diff --git a/include/asm-x86/ptrace.h b/include/asm-x86/ptrace.h
index 8a71db8..a33f027 100644
--- a/include/asm-x86/ptrace.h
+++ b/include/asm-x86/ptrace.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_PTRACE_H
-#define _ASM_X86_PTRACE_H
+#ifndef ASM_X86__PTRACE_H
+#define ASM_X86__PTRACE_H
 
 #include <linux/compiler.h>	/* For __user */
 #include <asm/ptrace-abi.h>
@@ -148,6 +148,9 @@
 void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
 #endif
 
+extern long syscall_trace_enter(struct pt_regs *);
+extern void syscall_trace_leave(struct pt_regs *);
+
 static inline unsigned long regs_return_value(struct pt_regs *regs)
 {
 	return regs->ax;
@@ -213,6 +216,11 @@
 	return regs->bp;
 }
 
+static inline unsigned long user_stack_pointer(struct pt_regs *regs)
+{
+	return regs->sp;
+}
+
 /*
  * These are defined as per linux/ptrace.h, which see.
  */
@@ -239,4 +247,4 @@
 
 #endif /* !__ASSEMBLY__ */
 
-#endif
+#endif /* ASM_X86__PTRACE_H */
diff --git a/include/asm-x86/pvclock-abi.h b/include/asm-x86/pvclock-abi.h
index 6857f84..edb3b4e 100644
--- a/include/asm-x86/pvclock-abi.h
+++ b/include/asm-x86/pvclock-abi.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_PVCLOCK_ABI_H_
-#define _ASM_X86_PVCLOCK_ABI_H_
+#ifndef ASM_X86__PVCLOCK_ABI_H
+#define ASM_X86__PVCLOCK_ABI_H
 #ifndef __ASSEMBLY__
 
 /*
@@ -39,4 +39,4 @@
 } __attribute__((__packed__));
 
 #endif /* __ASSEMBLY__ */
-#endif /* _ASM_X86_PVCLOCK_ABI_H_ */
+#endif /* ASM_X86__PVCLOCK_ABI_H */
diff --git a/include/asm-x86/pvclock.h b/include/asm-x86/pvclock.h
index 85b1bba..1a38f68 100644
--- a/include/asm-x86/pvclock.h
+++ b/include/asm-x86/pvclock.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_PVCLOCK_H_
-#define _ASM_X86_PVCLOCK_H_
+#ifndef ASM_X86__PVCLOCK_H
+#define ASM_X86__PVCLOCK_H
 
 #include <linux/clocksource.h>
 #include <asm/pvclock-abi.h>
@@ -10,4 +10,4 @@
 			    struct pvclock_vcpu_time_info *vcpu,
 			    struct timespec *ts);
 
-#endif /* _ASM_X86_PVCLOCK_H_ */
+#endif /* ASM_X86__PVCLOCK_H */
diff --git a/include/asm-x86/reboot.h b/include/asm-x86/reboot.h
index 206f355..1c2f0ce 100644
--- a/include/asm-x86/reboot.h
+++ b/include/asm-x86/reboot.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_REBOOT_H
-#define _ASM_REBOOT_H
+#ifndef ASM_X86__REBOOT_H
+#define ASM_X86__REBOOT_H
 
 struct pt_regs;
 
@@ -18,4 +18,4 @@
 void native_machine_shutdown(void);
 void machine_real_restart(const unsigned char *code, int length);
 
-#endif	/* _ASM_REBOOT_H */
+#endif /* ASM_X86__REBOOT_H */
diff --git a/include/asm-x86/reboot_fixups.h b/include/asm-x86/reboot_fixups.h
index 0cb7d87..2c2987d 100644
--- a/include/asm-x86/reboot_fixups.h
+++ b/include/asm-x86/reboot_fixups.h
@@ -1,6 +1,6 @@
-#ifndef _LINUX_REBOOT_FIXUPS_H
-#define _LINUX_REBOOT_FIXUPS_H
+#ifndef ASM_X86__REBOOT_FIXUPS_H
+#define ASM_X86__REBOOT_FIXUPS_H
 
 extern void mach_reboot_fixups(void);
 
-#endif /* _LINUX_REBOOT_FIXUPS_H */
+#endif /* ASM_X86__REBOOT_FIXUPS_H */
diff --git a/include/asm-x86/required-features.h b/include/asm-x86/required-features.h
index adec887..a01c4e3 100644
--- a/include/asm-x86/required-features.h
+++ b/include/asm-x86/required-features.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_REQUIRED_FEATURES_H
-#define _ASM_REQUIRED_FEATURES_H 1
+#ifndef ASM_X86__REQUIRED_FEATURES_H
+#define ASM_X86__REQUIRED_FEATURES_H
 
 /* Define minimum CPUID feature set for kernel These bits are checked
    really early to actually display a visible error message before the
@@ -41,6 +41,12 @@
 # define NEED_3DNOW	0
 #endif
 
+#if defined(CONFIG_X86_P6_NOP) || defined(CONFIG_X86_64)
+# define NEED_NOPL	(1<<(X86_FEATURE_NOPL & 31))
+#else
+# define NEED_NOPL	0
+#endif
+
 #ifdef CONFIG_X86_64
 #define NEED_PSE	0
 #define NEED_MSR	(1<<(X86_FEATURE_MSR & 31))
@@ -67,10 +73,10 @@
 #define REQUIRED_MASK1	(NEED_LM|NEED_3DNOW)
 
 #define REQUIRED_MASK2	0
-#define REQUIRED_MASK3	0
+#define REQUIRED_MASK3	(NEED_NOPL)
 #define REQUIRED_MASK4	0
 #define REQUIRED_MASK5	0
 #define REQUIRED_MASK6	0
 #define REQUIRED_MASK7	0
 
-#endif
+#endif /* ASM_X86__REQUIRED_FEATURES_H */
diff --git a/include/asm-x86/resume-trace.h b/include/asm-x86/resume-trace.h
index 8d9f0b4..519a8ec 100644
--- a/include/asm-x86/resume-trace.h
+++ b/include/asm-x86/resume-trace.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_RESUME_TRACE_H
-#define _ASM_X86_RESUME_TRACE_H
+#ifndef ASM_X86__RESUME_TRACE_H
+#define ASM_X86__RESUME_TRACE_H
 
 #include <asm/asm.h>
 
@@ -18,4 +18,4 @@
 	}							\
 } while (0)
 
-#endif
+#endif /* ASM_X86__RESUME_TRACE_H */
diff --git a/include/asm-x86/rio.h b/include/asm-x86/rio.h
index c9448bd..5e1256b 100644
--- a/include/asm-x86/rio.h
+++ b/include/asm-x86/rio.h
@@ -5,8 +5,8 @@
  * Author: Laurent Vivier <Laurent.Vivier@bull.net>
  */
 
-#ifndef __ASM_RIO_H
-#define __ASM_RIO_H
+#ifndef ASM_X86__RIO_H
+#define ASM_X86__RIO_H
 
 #define RIO_TABLE_VERSION	3
 
@@ -60,4 +60,4 @@
 	ALT_CALGARY	= 5,	/* Second Planar Calgary      */
 };
 
-#endif /* __ASM_RIO_H */
+#endif /* ASM_X86__RIO_H */
diff --git a/include/asm-x86/rwlock.h b/include/asm-x86/rwlock.h
index 6a8c0d6..48a3109 100644
--- a/include/asm-x86/rwlock.h
+++ b/include/asm-x86/rwlock.h
@@ -1,8 +1,8 @@
-#ifndef _ASM_X86_RWLOCK_H
-#define _ASM_X86_RWLOCK_H
+#ifndef ASM_X86__RWLOCK_H
+#define ASM_X86__RWLOCK_H
 
 #define RW_LOCK_BIAS		 0x01000000
 
 /* Actual code is in asm/spinlock.h or in arch/x86/lib/rwlock.S */
 
-#endif /* _ASM_X86_RWLOCK_H */
+#endif /* ASM_X86__RWLOCK_H */
diff --git a/include/asm-x86/rwsem.h b/include/asm-x86/rwsem.h
index 750f2a3..3ff3015 100644
--- a/include/asm-x86/rwsem.h
+++ b/include/asm-x86/rwsem.h
@@ -29,8 +29,8 @@
  * front, then they'll all be woken up, but no other readers will be.
  */
 
-#ifndef _I386_RWSEM_H
-#define _I386_RWSEM_H
+#ifndef ASM_X86__RWSEM_H
+#define ASM_X86__RWSEM_H
 
 #ifndef _LINUX_RWSEM_H
 #error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead"
@@ -262,4 +262,4 @@
 }
 
 #endif /* __KERNEL__ */
-#endif /* _I386_RWSEM_H */
+#endif /* ASM_X86__RWSEM_H */
diff --git a/include/asm-x86/scatterlist.h b/include/asm-x86/scatterlist.h
index c043206..ee48f88 100644
--- a/include/asm-x86/scatterlist.h
+++ b/include/asm-x86/scatterlist.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_SCATTERLIST_H
-#define _ASM_X86_SCATTERLIST_H
+#ifndef ASM_X86__SCATTERLIST_H
+#define ASM_X86__SCATTERLIST_H
 
 #include <asm/types.h>
 
@@ -30,4 +30,4 @@
 # define sg_dma_len(sg)		((sg)->dma_length)
 #endif
 
-#endif
+#endif /* ASM_X86__SCATTERLIST_H */
diff --git a/include/asm-x86/seccomp_32.h b/include/asm-x86/seccomp_32.h
index 36e71c5..cf9ab2d 100644
--- a/include/asm-x86/seccomp_32.h
+++ b/include/asm-x86/seccomp_32.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_SECCOMP_H
-#define _ASM_SECCOMP_H
+#ifndef ASM_X86__SECCOMP_32_H
+#define ASM_X86__SECCOMP_32_H
 
 #include <linux/thread_info.h>
 
@@ -14,4 +14,4 @@
 #define __NR_seccomp_exit __NR_exit
 #define __NR_seccomp_sigreturn __NR_sigreturn
 
-#endif /* _ASM_SECCOMP_H */
+#endif /* ASM_X86__SECCOMP_32_H */
diff --git a/include/asm-x86/seccomp_64.h b/include/asm-x86/seccomp_64.h
index 76cfe69..03274ce 100644
--- a/include/asm-x86/seccomp_64.h
+++ b/include/asm-x86/seccomp_64.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_SECCOMP_H
-#define _ASM_SECCOMP_H
+#ifndef ASM_X86__SECCOMP_64_H
+#define ASM_X86__SECCOMP_64_H
 
 #include <linux/thread_info.h>
 
@@ -22,4 +22,4 @@
 #define __NR_seccomp_exit_32 __NR_ia32_exit
 #define __NR_seccomp_sigreturn_32 __NR_ia32_sigreturn
 
-#endif /* _ASM_SECCOMP_H */
+#endif /* ASM_X86__SECCOMP_64_H */
diff --git a/include/asm-x86/segment.h b/include/asm-x86/segment.h
index 646452e..ea5f0a8 100644
--- a/include/asm-x86/segment.h
+++ b/include/asm-x86/segment.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_SEGMENT_H_
-#define _ASM_X86_SEGMENT_H_
+#ifndef ASM_X86__SEGMENT_H
+#define ASM_X86__SEGMENT_H
 
 /* Constructor for a conventional segment GDT (or LDT) entry */
 /* This is a macro so it can be used in initializers */
@@ -212,4 +212,4 @@
 #endif
 #endif
 
-#endif
+#endif /* ASM_X86__SEGMENT_H */
diff --git a/include/asm-x86/sembuf.h b/include/asm-x86/sembuf.h
index ee50c80..81f06b7 100644
--- a/include/asm-x86/sembuf.h
+++ b/include/asm-x86/sembuf.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_SEMBUF_H
-#define _ASM_X86_SEMBUF_H
+#ifndef ASM_X86__SEMBUF_H
+#define ASM_X86__SEMBUF_H
 
 /*
  * The semid64_ds structure for x86 architecture.
@@ -21,4 +21,4 @@
 	unsigned long	__unused4;
 };
 
-#endif /* _ASM_X86_SEMBUF_H */
+#endif /* ASM_X86__SEMBUF_H */
diff --git a/include/asm-x86/serial.h b/include/asm-x86/serial.h
index 628c801..303660b 100644
--- a/include/asm-x86/serial.h
+++ b/include/asm-x86/serial.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_SERIAL_H
-#define _ASM_X86_SERIAL_H
+#ifndef ASM_X86__SERIAL_H
+#define ASM_X86__SERIAL_H
 
 /*
  * This assumes you have a 1.8432 MHz clock for your UART.
@@ -26,4 +26,4 @@
 	{ 0, BASE_BAUD, 0x3E8, 4, STD_COM_FLAGS },	/* ttyS2 */	\
 	{ 0, BASE_BAUD, 0x2E8, 3, STD_COM4_FLAGS },	/* ttyS3 */
 
-#endif /* _ASM_X86_SERIAL_H */
+#endif /* ASM_X86__SERIAL_H */
diff --git a/include/asm-x86/setup.h b/include/asm-x86/setup.h
index a07c6f1..11b6cc1 100644
--- a/include/asm-x86/setup.h
+++ b/include/asm-x86/setup.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_SETUP_H
-#define _ASM_X86_SETUP_H
+#ifndef ASM_X86__SETUP_H
+#define ASM_X86__SETUP_H
 
 #define COMMAND_LINE_SIZE 2048
 
@@ -38,9 +38,11 @@
 	void (*mpc_oem_pci_bus)(struct mpc_config_bus *m);
 	void (*smp_read_mpc_oem)(struct mp_config_oemtable *oemtable,
                                     unsigned short oemsize);
+	int (*setup_ioapic_ids)(void);
 };
 
 extern struct x86_quirks *x86_quirks;
+extern unsigned long saved_video_mode;
 
 #ifndef CONFIG_PARAVIRT
 #define paravirt_post_allocator_init()	do {} while (0)
@@ -100,4 +102,4 @@
 #endif /* __ASSEMBLY__ */
 #endif  /*  __KERNEL__  */
 
-#endif /* _ASM_X86_SETUP_H */
+#endif /* ASM_X86__SETUP_H */
diff --git a/include/asm-x86/shmbuf.h b/include/asm-x86/shmbuf.h
index b51413b..f51aec2 100644
--- a/include/asm-x86/shmbuf.h
+++ b/include/asm-x86/shmbuf.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_SHMBUF_H
-#define _ASM_X86_SHMBUF_H
+#ifndef ASM_X86__SHMBUF_H
+#define ASM_X86__SHMBUF_H
 
 /*
  * The shmid64_ds structure for x86 architecture.
@@ -48,4 +48,4 @@
 	unsigned long	__unused4;
 };
 
-#endif /* _ASM_X86_SHMBUF_H */
+#endif /* ASM_X86__SHMBUF_H */
diff --git a/include/asm-x86/shmparam.h b/include/asm-x86/shmparam.h
index 0880cf0..a83a1fd 100644
--- a/include/asm-x86/shmparam.h
+++ b/include/asm-x86/shmparam.h
@@ -1,6 +1,6 @@
-#ifndef _ASM_X86_SHMPARAM_H
-#define _ASM_X86_SHMPARAM_H
+#ifndef ASM_X86__SHMPARAM_H
+#define ASM_X86__SHMPARAM_H
 
 #define SHMLBA PAGE_SIZE	 /* attach addr a multiple of this */
 
-#endif /* _ASM_X86_SHMPARAM_H */
+#endif /* ASM_X86__SHMPARAM_H */
diff --git a/include/asm-x86/sigcontext.h b/include/asm-x86/sigcontext.h
index 2f9c884..ee813f4 100644
--- a/include/asm-x86/sigcontext.h
+++ b/include/asm-x86/sigcontext.h
@@ -1,9 +1,43 @@
-#ifndef _ASM_X86_SIGCONTEXT_H
-#define _ASM_X86_SIGCONTEXT_H
+#ifndef ASM_X86__SIGCONTEXT_H
+#define ASM_X86__SIGCONTEXT_H
 
 #include <linux/compiler.h>
 #include <asm/types.h>
 
+#define FP_XSTATE_MAGIC1	0x46505853U
+#define FP_XSTATE_MAGIC2	0x46505845U
+#define FP_XSTATE_MAGIC2_SIZE	sizeof(FP_XSTATE_MAGIC2)
+
+/*
+ * bytes 464..511 in the current 512byte layout of fxsave/fxrstor frame
+ * are reserved for SW usage. On cpu's supporting xsave/xrstor, these bytes
+ * are used to extended the fpstate pointer in the sigcontext, which now
+ * includes the extended state information along with fpstate information.
+ *
+ * Presence of FP_XSTATE_MAGIC1 at the beginning of this SW reserved
+ * area and FP_XSTATE_MAGIC2 at the end of memory layout
+ * (extended_size - FP_XSTATE_MAGIC2_SIZE) indicates the presence of the
+ * extended state information in the memory layout pointed by the fpstate
+ * pointer in sigcontext.
+ */
+struct _fpx_sw_bytes {
+	__u32 magic1;		/* FP_XSTATE_MAGIC1 */
+	__u32 extended_size;	/* total size of the layout referred by
+				 * fpstate pointer in the sigcontext.
+				 */
+	__u64 xstate_bv;
+				/* feature bit mask (including fp/sse/extended
+				 * state) that is present in the memory
+				 * layout.
+				 */
+	__u32 xstate_size;	/* actual xsave state size, based on the
+				 * features saved in the layout.
+				 * 'extended_size' will be greater than
+				 * 'xstate_size'.
+				 */
+	__u32 padding[7];	/*  for future use. */
+};
+
 #ifdef __i386__
 /*
  * As documented in the iBCS2 standard..
@@ -53,7 +87,13 @@
 	unsigned long	reserved;
 	struct _fpxreg	_fxsr_st[8];	/* FXSR FPU reg data is ignored */
 	struct _xmmreg	_xmm[8];
-	unsigned long	padding[56];
+	unsigned long	padding1[44];
+
+	union {
+		unsigned long	padding2[12];
+		struct _fpx_sw_bytes sw_reserved; /* represents the extended
+						   * state info */
+	};
 };
 
 #define X86_FXSR_MAGIC		0x0000
@@ -79,7 +119,15 @@
 	unsigned long flags;
 	unsigned long sp_at_signal;
 	unsigned short ss, __ssh;
-	struct _fpstate __user *fpstate;
+
+	/*
+	 * fpstate is really (struct _fpstate *) or (struct _xstate *)
+	 * depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved
+	 * bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end
+	 * of extended memory layout. See comments at the defintion of
+	 * (struct _fpx_sw_bytes)
+	 */
+	void __user *fpstate;		/* zero when no FPU/extended context */
 	unsigned long oldmask;
 	unsigned long cr2;
 };
@@ -130,7 +178,12 @@
 	__u32	mxcsr_mask;
 	__u32	st_space[32];	/* 8*16 bytes for each FP-reg */
 	__u32	xmm_space[64];	/* 16*16 bytes for each XMM-reg  */
-	__u32	reserved2[24];
+	__u32	reserved2[12];
+	union {
+		__u32	reserved3[12];
+		struct _fpx_sw_bytes sw_reserved; /* represents the extended
+						   * state information */
+	};
 };
 
 #ifdef __KERNEL__
@@ -161,7 +214,15 @@
 	unsigned long trapno;
 	unsigned long oldmask;
 	unsigned long cr2;
-	struct _fpstate __user *fpstate;	/* zero when no FPU context */
+
+	/*
+	 * fpstate is really (struct _fpstate *) or (struct _xstate *)
+	 * depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved
+	 * bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end
+	 * of extended memory layout. See comments at the defintion of
+	 * (struct _fpx_sw_bytes)
+	 */
+	void __user *fpstate;		/* zero when no FPU/extended context */
 	unsigned long reserved1[8];
 };
 #else /* __KERNEL__ */
@@ -202,4 +263,22 @@
 
 #endif /* !__i386__ */
 
-#endif
+struct _xsave_hdr {
+	__u64 xstate_bv;
+	__u64 reserved1[2];
+	__u64 reserved2[5];
+};
+
+/*
+ * Extended state pointed by the fpstate pointer in the sigcontext.
+ * In addition to the fpstate, information encoded in the xstate_hdr
+ * indicates the presence of other extended state information
+ * supported by the processor and OS.
+ */
+struct _xstate {
+	struct _fpstate fpstate;
+	struct _xsave_hdr xstate_hdr;
+	/* new processor state extensions go here */
+};
+
+#endif /* ASM_X86__SIGCONTEXT_H */
diff --git a/include/asm-x86/sigcontext32.h b/include/asm-x86/sigcontext32.h
index 57a9686..8c34703 100644
--- a/include/asm-x86/sigcontext32.h
+++ b/include/asm-x86/sigcontext32.h
@@ -1,5 +1,5 @@
-#ifndef _SIGCONTEXT32_H
-#define _SIGCONTEXT32_H 1
+#ifndef ASM_X86__SIGCONTEXT32_H
+#define ASM_X86__SIGCONTEXT32_H
 
 /* signal context for 32bit programs. */
 
@@ -40,7 +40,11 @@
 	__u32	reserved;
 	struct _fpxreg	_fxsr_st[8];
 	struct _xmmreg	_xmm[8];	/* It's actually 16 */
-	__u32	padding[56];
+	__u32	padding[44];
+	union {
+		__u32 padding2[12];
+		struct _fpx_sw_bytes sw_reserved;
+	};
 };
 
 struct sigcontext_ia32 {
@@ -68,4 +72,4 @@
        unsigned int cr2;
 };
 
-#endif
+#endif /* ASM_X86__SIGCONTEXT32_H */
diff --git a/include/asm-x86/siginfo.h b/include/asm-x86/siginfo.h
index a477bea..808bdfb 100644
--- a/include/asm-x86/siginfo.h
+++ b/include/asm-x86/siginfo.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_SIGINFO_H
-#define _ASM_X86_SIGINFO_H
+#ifndef ASM_X86__SIGINFO_H
+#define ASM_X86__SIGINFO_H
 
 #ifdef __x86_64__
 # define __ARCH_SI_PREAMBLE_SIZE	(4 * sizeof(int))
@@ -7,4 +7,4 @@
 
 #include <asm-generic/siginfo.h>
 
-#endif
+#endif /* ASM_X86__SIGINFO_H */
diff --git a/include/asm-x86/signal.h b/include/asm-x86/signal.h
index 6dac493..65acc82 100644
--- a/include/asm-x86/signal.h
+++ b/include/asm-x86/signal.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_SIGNAL_H
-#define _ASM_X86_SIGNAL_H
+#ifndef ASM_X86__SIGNAL_H
+#define ASM_X86__SIGNAL_H
 
 #ifndef __ASSEMBLY__
 #include <linux/types.h>
@@ -140,6 +140,9 @@
 struct k_sigaction {
 	struct sigaction sa;
 };
+
+extern void do_notify_resume(struct pt_regs *, void *, __u32);
+
 # else /* __KERNEL__ */
 /* Here we must cater to libcs that poke about in kernel headers.  */
 
@@ -256,4 +259,4 @@
 #endif /* __KERNEL__ */
 #endif /* __ASSEMBLY__ */
 
-#endif
+#endif /* ASM_X86__SIGNAL_H */
diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h
index 3c877f7..29324c1 100644
--- a/include/asm-x86/smp.h
+++ b/include/asm-x86/smp.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_SMP_H_
-#define _ASM_X86_SMP_H_
+#ifndef ASM_X86__SMP_H
+#define ASM_X86__SMP_H
 #ifndef __ASSEMBLY__
 #include <linux/cpumask.h>
 #include <linux/init.h>
@@ -34,6 +34,9 @@
 DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
 DECLARE_PER_CPU(cpumask_t, cpu_core_map);
 DECLARE_PER_CPU(u16, cpu_llc_id);
+#ifdef CONFIG_X86_32
+DECLARE_PER_CPU(int, cpu_number);
+#endif
 
 DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid);
 DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid);
@@ -142,7 +145,6 @@
  * from the initial startup. We map APIC_BASE very early in page_setup(),
  * so this is correct in the x86 case.
  */
-DECLARE_PER_CPU(int, cpu_number);
 #define raw_smp_processor_id() (x86_read_percpu(cpu_number))
 extern int safe_smp_processor_id(void);
 
@@ -165,30 +167,33 @@
 
 #ifdef CONFIG_X86_LOCAL_APIC
 
+#ifndef CONFIG_X86_64
 static inline int logical_smp_processor_id(void)
 {
 	/* we don't want to mark this access volatile - bad code generation */
 	return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR));
 }
 
-#ifndef CONFIG_X86_64
+#include <mach_apicdef.h>
 static inline unsigned int read_apic_id(void)
 {
-	return *(u32 *)(APIC_BASE + APIC_ID);
+	unsigned int reg;
+
+	reg = *(u32 *)(APIC_BASE + APIC_ID);
+
+	return GET_APIC_ID(reg);
 }
-#else
-extern unsigned int read_apic_id(void);
 #endif
 
 
-# ifdef APIC_DEFINITION
+# if defined(APIC_DEFINITION) || defined(CONFIG_X86_64)
 extern int hard_smp_processor_id(void);
 # else
-#  include <mach_apicdef.h>
+#include <mach_apicdef.h>
 static inline int hard_smp_processor_id(void)
 {
 	/* we don't want to mark this access volatile - bad code generation */
-	return GET_APIC_ID(read_apic_id());
+	return read_apic_id();
 }
 # endif /* APIC_DEFINITION */
 
@@ -205,4 +210,4 @@
 #endif
 
 #endif /* __ASSEMBLY__ */
-#endif
+#endif /* ASM_X86__SMP_H */
diff --git a/include/asm-x86/socket.h b/include/asm-x86/socket.h
index 80af9c4..db73274 100644
--- a/include/asm-x86/socket.h
+++ b/include/asm-x86/socket.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_SOCKET_H
-#define _ASM_SOCKET_H
+#ifndef ASM_X86__SOCKET_H
+#define ASM_X86__SOCKET_H
 
 #include <asm/sockios.h>
 
@@ -54,4 +54,4 @@
 
 #define SO_MARK			36
 
-#endif /* _ASM_SOCKET_H */
+#endif /* ASM_X86__SOCKET_H */
diff --git a/include/asm-x86/sockios.h b/include/asm-x86/sockios.h
index 49cc72b..a006704 100644
--- a/include/asm-x86/sockios.h
+++ b/include/asm-x86/sockios.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_SOCKIOS_H
-#define _ASM_X86_SOCKIOS_H
+#ifndef ASM_X86__SOCKIOS_H
+#define ASM_X86__SOCKIOS_H
 
 /* Socket-level I/O control calls. */
 #define FIOSETOWN	0x8901
@@ -10,4 +10,4 @@
 #define SIOCGSTAMP	0x8906		/* Get stamp (timeval) */
 #define SIOCGSTAMPNS	0x8907		/* Get stamp (timespec) */
 
-#endif /* _ASM_X86_SOCKIOS_H */
+#endif /* ASM_X86__SOCKIOS_H */
diff --git a/include/asm-x86/sparsemem.h b/include/asm-x86/sparsemem.h
index 9bd48b0..38f8e6b 100644
--- a/include/asm-x86/sparsemem.h
+++ b/include/asm-x86/sparsemem.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_SPARSEMEM_H
-#define _ASM_X86_SPARSEMEM_H
+#ifndef ASM_X86__SPARSEMEM_H
+#define ASM_X86__SPARSEMEM_H
 
 #ifdef CONFIG_SPARSEMEM
 /*
@@ -31,4 +31,4 @@
 #endif
 
 #endif /* CONFIG_SPARSEMEM */
-#endif
+#endif /* ASM_X86__SPARSEMEM_H */
diff --git a/include/asm-x86/spinlock.h b/include/asm-x86/spinlock.h
index e39c790..5d08fa2 100644
--- a/include/asm-x86/spinlock.h
+++ b/include/asm-x86/spinlock.h
@@ -1,5 +1,5 @@
-#ifndef _X86_SPINLOCK_H_
-#define _X86_SPINLOCK_H_
+#ifndef ASM_X86__SPINLOCK_H
+#define ASM_X86__SPINLOCK_H
 
 #include <asm/atomic.h>
 #include <asm/rwlock.h>
@@ -366,4 +366,4 @@
 #define _raw_read_relax(lock)	cpu_relax()
 #define _raw_write_relax(lock)	cpu_relax()
 
-#endif
+#endif /* ASM_X86__SPINLOCK_H */
diff --git a/include/asm-x86/spinlock_types.h b/include/asm-x86/spinlock_types.h
index 06c071c..6aa9b56 100644
--- a/include/asm-x86/spinlock_types.h
+++ b/include/asm-x86/spinlock_types.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_SPINLOCK_TYPES_H
-#define __ASM_SPINLOCK_TYPES_H
+#ifndef ASM_X86__SPINLOCK_TYPES_H
+#define ASM_X86__SPINLOCK_TYPES_H
 
 #ifndef __LINUX_SPINLOCK_TYPES_H
 # error "please don't include this file directly"
@@ -17,4 +17,4 @@
 
 #define __RAW_RW_LOCK_UNLOCKED		{ RW_LOCK_BIAS }
 
-#endif
+#endif /* ASM_X86__SPINLOCK_TYPES_H */
diff --git a/include/asm-x86/srat.h b/include/asm-x86/srat.h
index 774c919..5363e4f 100644
--- a/include/asm-x86/srat.h
+++ b/include/asm-x86/srat.h
@@ -24,8 +24,8 @@
  * Send feedback to Pat Gaughen <gone@us.ibm.com>
  */
 
-#ifndef _ASM_SRAT_H_
-#define _ASM_SRAT_H_
+#ifndef ASM_X86__SRAT_H
+#define ASM_X86__SRAT_H
 
 #ifdef CONFIG_ACPI_NUMA
 extern int get_memcfg_from_srat(void);
@@ -36,4 +36,4 @@
 }
 #endif
 
-#endif /* _ASM_SRAT_H_ */
+#endif /* ASM_X86__SRAT_H */
diff --git a/include/asm-x86/stacktrace.h b/include/asm-x86/stacktrace.h
index 30f8252..f43517e 100644
--- a/include/asm-x86/stacktrace.h
+++ b/include/asm-x86/stacktrace.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_STACKTRACE_H
-#define _ASM_STACKTRACE_H 1
+#ifndef ASM_X86__STACKTRACE_H
+#define ASM_X86__STACKTRACE_H
 
 extern int kstack_depth_to_print;
 
@@ -18,4 +18,4 @@
 		unsigned long *stack, unsigned long bp,
 		const struct stacktrace_ops *ops, void *data);
 
-#endif
+#endif /* ASM_X86__STACKTRACE_H */
diff --git a/include/asm-x86/stat.h b/include/asm-x86/stat.h
index 5c22dcb..1e120f6 100644
--- a/include/asm-x86/stat.h
+++ b/include/asm-x86/stat.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_STAT_H
-#define _ASM_X86_STAT_H
+#ifndef ASM_X86__STAT_H
+#define ASM_X86__STAT_H
 
 #define STAT_HAVE_NSEC 1
 
@@ -111,4 +111,4 @@
 #endif
 };
 
-#endif
+#endif /* ASM_X86__STAT_H */
diff --git a/include/asm-x86/statfs.h b/include/asm-x86/statfs.h
index 7c651aa..3f005bc 100644
--- a/include/asm-x86/statfs.h
+++ b/include/asm-x86/statfs.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_STATFS_H
-#define _ASM_X86_STATFS_H
+#ifndef ASM_X86__STATFS_H
+#define ASM_X86__STATFS_H
 
 #ifdef __i386__
 #include <asm-generic/statfs.h>
@@ -60,4 +60,4 @@
 } __attribute__((packed));
 
 #endif /* !__i386__ */
-#endif
+#endif /* ASM_X86__STATFS_H */
diff --git a/include/asm-x86/string_32.h b/include/asm-x86/string_32.h
index 193578c..487843e 100644
--- a/include/asm-x86/string_32.h
+++ b/include/asm-x86/string_32.h
@@ -1,5 +1,5 @@
-#ifndef _I386_STRING_H_
-#define _I386_STRING_H_
+#ifndef ASM_X86__STRING_32_H
+#define ASM_X86__STRING_32_H
 
 #ifdef __KERNEL__
 
@@ -323,4 +323,4 @@
 
 #endif /* __KERNEL__ */
 
-#endif
+#endif /* ASM_X86__STRING_32_H */
diff --git a/include/asm-x86/string_64.h b/include/asm-x86/string_64.h
index 52b5ab3..a2add11d 100644
--- a/include/asm-x86/string_64.h
+++ b/include/asm-x86/string_64.h
@@ -1,5 +1,5 @@
-#ifndef _X86_64_STRING_H_
-#define _X86_64_STRING_H_
+#ifndef ASM_X86__STRING_64_H
+#define ASM_X86__STRING_64_H
 
 #ifdef __KERNEL__
 
@@ -57,4 +57,4 @@
 
 #endif /* __KERNEL__ */
 
-#endif
+#endif /* ASM_X86__STRING_64_H */
diff --git a/include/asm-x86/mach-summit/mach_apic.h b/include/asm-x86/summit/apic.h
similarity index 93%
rename from include/asm-x86/mach-summit/mach_apic.h
rename to include/asm-x86/summit/apic.h
index c47e2ab..c5b2e4b 100644
--- a/include/asm-x86/mach-summit/mach_apic.h
+++ b/include/asm-x86/summit/apic.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_APIC_H
-#define __ASM_MACH_APIC_H
+#ifndef __ASM_SUMMIT_APIC_H
+#define __ASM_SUMMIT_APIC_H
 
 #include <asm/smp.h>
 
@@ -21,7 +21,7 @@
 	 * Just start on cpu 0.  IRQ balancing will spread load
 	 */
 	return cpumask_of_cpu(0);
-} 
+}
 #define TARGET_CPUS	(target_cpus())
 
 #define INT_DELIVERY_MODE (dest_LowestPrio)
@@ -30,10 +30,10 @@
 static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
 {
 	return 0;
-} 
+}
 
 /* we don't use the phys_cpu_present_map to indicate apicid presence */
-static inline unsigned long check_apicid_present(int bit) 
+static inline unsigned long check_apicid_present(int bit)
 {
 	return 1;
 }
@@ -122,7 +122,7 @@
 
 static inline physid_mask_t apicid_to_cpu_present(int apicid)
 {
-	return physid_mask_of_physid(apicid);
+	return physid_mask_of_physid(0);
 }
 
 static inline void setup_portio_remap(void)
@@ -143,22 +143,22 @@
 	int num_bits_set;
 	int cpus_found = 0;
 	int cpu;
-	int apicid;	
+	int apicid;
 
 	num_bits_set = cpus_weight(cpumask);
 	/* Return id to all */
 	if (num_bits_set == NR_CPUS)
 		return (int) 0xFF;
-	/* 
-	 * The cpus in the mask must all be on the apic cluster.  If are not 
-	 * on the same apicid cluster return default value of TARGET_CPUS. 
+	/*
+	 * The cpus in the mask must all be on the apic cluster.  If are not
+	 * on the same apicid cluster return default value of TARGET_CPUS.
 	 */
 	cpu = first_cpu(cpumask);
 	apicid = cpu_to_logical_apicid(cpu);
 	while (cpus_found < num_bits_set) {
 		if (cpu_isset(cpu, cpumask)) {
 			int new_apicid = cpu_to_logical_apicid(cpu);
-			if (apicid_cluster(apicid) != 
+			if (apicid_cluster(apicid) !=
 					apicid_cluster(new_apicid)){
 				printk ("%s: Not a valid mask!\n",__FUNCTION__);
 				return 0xFF;
@@ -182,4 +182,4 @@
 	return hard_smp_processor_id() >> index_msb;
 }
 
-#endif /* __ASM_MACH_APIC_H */
+#endif /* __ASM_SUMMIT_APIC_H */
diff --git a/include/asm-x86/summit/apicdef.h b/include/asm-x86/summit/apicdef.h
new file mode 100644
index 0000000..f3fbca1
--- /dev/null
+++ b/include/asm-x86/summit/apicdef.h
@@ -0,0 +1,13 @@
+#ifndef __ASM_SUMMIT_APICDEF_H
+#define __ASM_SUMMIT_APICDEF_H
+
+#define		APIC_ID_MASK		(0xFF<<24)
+
+static inline unsigned get_apic_id(unsigned long x)
+{
+	return (x>>24)&0xFF;
+}
+
+#define		GET_APIC_ID(x)	get_apic_id(x)
+
+#endif
diff --git a/include/asm-x86/mach-summit/mach_ipi.h b/include/asm-x86/summit/ipi.h
similarity index 83%
rename from include/asm-x86/mach-summit/mach_ipi.h
rename to include/asm-x86/summit/ipi.h
index 9404c53..53bd1e7 100644
--- a/include/asm-x86/mach-summit/mach_ipi.h
+++ b/include/asm-x86/summit/ipi.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_IPI_H
-#define __ASM_MACH_IPI_H
+#ifndef __ASM_SUMMIT_IPI_H
+#define __ASM_SUMMIT_IPI_H
 
 void send_IPI_mask_sequence(cpumask_t mask, int vector);
 
@@ -22,4 +22,4 @@
 	send_IPI_mask(cpu_online_map, vector);
 }
 
-#endif /* __ASM_MACH_IPI_H */
+#endif /* __ASM_SUMMIT_IPI_H */
diff --git a/include/asm-x86/mach-summit/irq_vectors_limits.h b/include/asm-x86/summit/irq_vectors_limits.h
similarity index 100%
rename from include/asm-x86/mach-summit/irq_vectors_limits.h
rename to include/asm-x86/summit/irq_vectors_limits.h
diff --git a/include/asm-x86/mach-summit/mach_mpparse.h b/include/asm-x86/summit/mpparse.h
similarity index 95%
rename from include/asm-x86/mach-summit/mach_mpparse.h
rename to include/asm-x86/summit/mpparse.h
index fdf5917..013ce6f 100644
--- a/include/asm-x86/mach-summit/mach_mpparse.h
+++ b/include/asm-x86/summit/mpparse.h
@@ -1,7 +1,6 @@
-#ifndef __ASM_MACH_MPPARSE_H
-#define __ASM_MACH_MPPARSE_H
+#ifndef __ASM_SUMMIT_MPPARSE_H
+#define __ASM_SUMMIT_MPPARSE_H
 
-#include <mach_apic.h>
 #include <asm/tsc.h>
 
 extern int use_cyclone;
@@ -12,11 +11,11 @@
 #define setup_summit()	{}
 #endif
 
-static inline int mps_oem_check(struct mp_config_table *mpc, char *oem, 
+static inline int mps_oem_check(struct mp_config_table *mpc, char *oem,
 		char *productid)
 {
-	if (!strncmp(oem, "IBM ENSW", 8) && 
-			(!strncmp(productid, "VIGIL SMP", 9) 
+	if (!strncmp(oem, "IBM ENSW", 8) &&
+			(!strncmp(productid, "VIGIL SMP", 9)
 			 || !strncmp(productid, "EXA", 3)
 			 || !strncmp(productid, "RUTHLESS SMP", 12))){
 		mark_tsc_unstable("Summit based system");
@@ -107,4 +106,4 @@
 		rio->type == LookOutAWPEG || rio->type == LookOutBWPEG);
 }
 
-#endif /* __ASM_MACH_MPPARSE_H */
+#endif /* __ASM_SUMMIT_MPPARSE_H */
diff --git a/include/asm-x86/suspend_32.h b/include/asm-x86/suspend_32.h
index 8675c67..acb6d4d 100644
--- a/include/asm-x86/suspend_32.h
+++ b/include/asm-x86/suspend_32.h
@@ -3,8 +3,8 @@
  * Based on code
  * Copyright 2001 Patrick Mochel <mochel@osdl.org>
  */
-#ifndef __ASM_X86_32_SUSPEND_H
-#define __ASM_X86_32_SUSPEND_H
+#ifndef ASM_X86__SUSPEND_32_H
+#define ASM_X86__SUSPEND_32_H
 
 #include <asm/desc.h>
 #include <asm/i387.h>
@@ -48,4 +48,4 @@
 extern int acpi_save_state_mem(void);
 #endif
 
-#endif /* __ASM_X86_32_SUSPEND_H */
+#endif /* ASM_X86__SUSPEND_32_H */
diff --git a/include/asm-x86/suspend_64.h b/include/asm-x86/suspend_64.h
index dc3262b..cf821dd 100644
--- a/include/asm-x86/suspend_64.h
+++ b/include/asm-x86/suspend_64.h
@@ -3,8 +3,8 @@
  * Based on code
  * Copyright 2001 Patrick Mochel <mochel@osdl.org>
  */
-#ifndef __ASM_X86_64_SUSPEND_H
-#define __ASM_X86_64_SUSPEND_H
+#ifndef ASM_X86__SUSPEND_64_H
+#define ASM_X86__SUSPEND_64_H
 
 #include <asm/desc.h>
 #include <asm/i387.h>
@@ -49,4 +49,4 @@
 extern char core_restore_code;
 extern char restore_registers;
 
-#endif /* __ASM_X86_64_SUSPEND_H */
+#endif /* ASM_X86__SUSPEND_64_H */
diff --git a/include/asm-x86/swiotlb.h b/include/asm-x86/swiotlb.h
index 2730b35..1e20adb 100644
--- a/include/asm-x86/swiotlb.h
+++ b/include/asm-x86/swiotlb.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_SWIOTLB_H
-#define _ASM_SWIOTLB_H 1
+#ifndef ASM_X86__SWIOTLB_H
+#define ASM_X86__SWIOTLB_H
 
 #include <asm/dma-mapping.h>
 
@@ -55,4 +55,4 @@
 
 static inline void dma_mark_clean(void *addr, size_t size) {}
 
-#endif /* _ASM_SWIOTLB_H */
+#endif /* ASM_X86__SWIOTLB_H */
diff --git a/include/asm-x86/sync_bitops.h b/include/asm-x86/sync_bitops.h
index b47a1d0..b689bee 100644
--- a/include/asm-x86/sync_bitops.h
+++ b/include/asm-x86/sync_bitops.h
@@ -1,5 +1,5 @@
-#ifndef _I386_SYNC_BITOPS_H
-#define _I386_SYNC_BITOPS_H
+#ifndef ASM_X86__SYNC_BITOPS_H
+#define ASM_X86__SYNC_BITOPS_H
 
 /*
  * Copyright 1992, Linus Torvalds.
@@ -127,4 +127,4 @@
 
 #undef ADDR
 
-#endif /* _I386_SYNC_BITOPS_H */
+#endif /* ASM_X86__SYNC_BITOPS_H */
diff --git a/include/asm-x86/syscall.h b/include/asm-x86/syscall.h
new file mode 100644
index 0000000..04c47dc
--- /dev/null
+++ b/include/asm-x86/syscall.h
@@ -0,0 +1,211 @@
+/*
+ * Access to user system call parameters and results
+ *
+ * Copyright (C) 2008 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * See asm-generic/syscall.h for descriptions of what we must do here.
+ */
+
+#ifndef _ASM_SYSCALL_H
+#define _ASM_SYSCALL_H	1
+
+#include <linux/sched.h>
+#include <linux/err.h>
+
+static inline long syscall_get_nr(struct task_struct *task,
+				  struct pt_regs *regs)
+{
+	/*
+	 * We always sign-extend a -1 value being set here,
+	 * so this is always either -1L or a syscall number.
+	 */
+	return regs->orig_ax;
+}
+
+static inline void syscall_rollback(struct task_struct *task,
+				    struct pt_regs *regs)
+{
+	regs->ax = regs->orig_ax;
+}
+
+static inline long syscall_get_error(struct task_struct *task,
+				     struct pt_regs *regs)
+{
+	unsigned long error = regs->ax;
+#ifdef CONFIG_IA32_EMULATION
+	/*
+	 * TS_COMPAT is set for 32-bit syscall entries and then
+	 * remains set until we return to user mode.
+	 */
+	if (task_thread_info(task)->status & TS_COMPAT)
+		/*
+		 * Sign-extend the value so (int)-EFOO becomes (long)-EFOO
+		 * and will match correctly in comparisons.
+		 */
+		error = (long) (int) error;
+#endif
+	return IS_ERR_VALUE(error) ? error : 0;
+}
+
+static inline long syscall_get_return_value(struct task_struct *task,
+					    struct pt_regs *regs)
+{
+	return regs->ax;
+}
+
+static inline void syscall_set_return_value(struct task_struct *task,
+					    struct pt_regs *regs,
+					    int error, long val)
+{
+	regs->ax = (long) error ?: val;
+}
+
+#ifdef CONFIG_X86_32
+
+static inline void syscall_get_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned int i, unsigned int n,
+					 unsigned long *args)
+{
+	BUG_ON(i + n > 6);
+	memcpy(args, &regs->bx + i, n * sizeof(args[0]));
+}
+
+static inline void syscall_set_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned int i, unsigned int n,
+					 const unsigned long *args)
+{
+	BUG_ON(i + n > 6);
+	memcpy(&regs->bx + i, args, n * sizeof(args[0]));
+}
+
+#else	 /* CONFIG_X86_64 */
+
+static inline void syscall_get_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned int i, unsigned int n,
+					 unsigned long *args)
+{
+# ifdef CONFIG_IA32_EMULATION
+	if (task_thread_info(task)->status & TS_COMPAT)
+		switch (i + n) {
+		case 6:
+			if (!n--) break;
+			*args++ = regs->bp;
+		case 5:
+			if (!n--) break;
+			*args++ = regs->di;
+		case 4:
+			if (!n--) break;
+			*args++ = regs->si;
+		case 3:
+			if (!n--) break;
+			*args++ = regs->dx;
+		case 2:
+			if (!n--) break;
+			*args++ = regs->cx;
+		case 1:
+			if (!n--) break;
+			*args++ = regs->bx;
+		case 0:
+			if (!n--) break;
+		default:
+			BUG();
+			break;
+		}
+	else
+# endif
+		switch (i + n) {
+		case 6:
+			if (!n--) break;
+			*args++ = regs->r9;
+		case 5:
+			if (!n--) break;
+			*args++ = regs->r8;
+		case 4:
+			if (!n--) break;
+			*args++ = regs->r10;
+		case 3:
+			if (!n--) break;
+			*args++ = regs->dx;
+		case 2:
+			if (!n--) break;
+			*args++ = regs->si;
+		case 1:
+			if (!n--) break;
+			*args++ = regs->di;
+		case 0:
+			if (!n--) break;
+		default:
+			BUG();
+			break;
+		}
+}
+
+static inline void syscall_set_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned int i, unsigned int n,
+					 const unsigned long *args)
+{
+# ifdef CONFIG_IA32_EMULATION
+	if (task_thread_info(task)->status & TS_COMPAT)
+		switch (i + n) {
+		case 6:
+			if (!n--) break;
+			regs->bp = *args++;
+		case 5:
+			if (!n--) break;
+			regs->di = *args++;
+		case 4:
+			if (!n--) break;
+			regs->si = *args++;
+		case 3:
+			if (!n--) break;
+			regs->dx = *args++;
+		case 2:
+			if (!n--) break;
+			regs->cx = *args++;
+		case 1:
+			if (!n--) break;
+			regs->bx = *args++;
+		case 0:
+			if (!n--) break;
+		default:
+			BUG();
+		}
+	else
+# endif
+		switch (i + n) {
+		case 6:
+			if (!n--) break;
+			regs->r9 = *args++;
+		case 5:
+			if (!n--) break;
+			regs->r8 = *args++;
+		case 4:
+			if (!n--) break;
+			regs->r10 = *args++;
+		case 3:
+			if (!n--) break;
+			regs->dx = *args++;
+		case 2:
+			if (!n--) break;
+			regs->si = *args++;
+		case 1:
+			if (!n--) break;
+			regs->di = *args++;
+		case 0:
+			if (!n--) break;
+		default:
+			BUG();
+		}
+}
+
+#endif	/* CONFIG_X86_32 */
+
+#endif	/* _ASM_SYSCALL_H */
diff --git a/include/asm-x86/syscalls.h b/include/asm-x86/syscalls.h
new file mode 100644
index 0000000..87803da
--- /dev/null
+++ b/include/asm-x86/syscalls.h
@@ -0,0 +1,93 @@
+/*
+ * syscalls.h - Linux syscall interfaces (arch-specific)
+ *
+ * Copyright (c) 2008 Jaswinder Singh
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#ifndef _ASM_X86_SYSCALLS_H
+#define _ASM_X86_SYSCALLS_H
+
+#include <linux/compiler.h>
+#include <linux/linkage.h>
+#include <linux/types.h>
+#include <linux/signal.h>
+
+/* Common in X86_32 and X86_64 */
+/* kernel/ioport.c */
+asmlinkage long sys_ioperm(unsigned long, unsigned long, int);
+
+/* X86_32 only */
+#ifdef CONFIG_X86_32
+/* kernel/process_32.c */
+asmlinkage int sys_fork(struct pt_regs);
+asmlinkage int sys_clone(struct pt_regs);
+asmlinkage int sys_vfork(struct pt_regs);
+asmlinkage int sys_execve(struct pt_regs);
+
+/* kernel/signal_32.c */
+asmlinkage int sys_sigsuspend(int, int, old_sigset_t);
+asmlinkage int sys_sigaction(int, const struct old_sigaction __user *,
+			     struct old_sigaction __user *);
+asmlinkage int sys_sigaltstack(unsigned long);
+asmlinkage unsigned long sys_sigreturn(unsigned long);
+asmlinkage int sys_rt_sigreturn(unsigned long);
+
+/* kernel/ioport.c */
+asmlinkage long sys_iopl(unsigned long);
+
+/* kernel/ldt.c */
+asmlinkage int sys_modify_ldt(int, void __user *, unsigned long);
+
+/* kernel/sys_i386_32.c */
+asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long,
+			  unsigned long, unsigned long, unsigned long);
+struct mmap_arg_struct;
+asmlinkage int old_mmap(struct mmap_arg_struct __user *);
+struct sel_arg_struct;
+asmlinkage int old_select(struct sel_arg_struct __user *);
+asmlinkage int sys_ipc(uint, int, int, int, void __user *, long);
+struct old_utsname;
+asmlinkage int sys_uname(struct old_utsname __user *);
+struct oldold_utsname;
+asmlinkage int sys_olduname(struct oldold_utsname __user *);
+
+/* kernel/tls.c */
+asmlinkage int sys_set_thread_area(struct user_desc __user *);
+asmlinkage int sys_get_thread_area(struct user_desc __user *);
+
+/* kernel/vm86_32.c */
+asmlinkage int sys_vm86old(struct pt_regs);
+asmlinkage int sys_vm86(struct pt_regs);
+
+#else /* CONFIG_X86_32 */
+
+/* X86_64 only */
+/* kernel/process_64.c */
+asmlinkage long sys_fork(struct pt_regs *);
+asmlinkage long sys_clone(unsigned long, unsigned long,
+			  void __user *, void __user *,
+			  struct pt_regs *);
+asmlinkage long sys_vfork(struct pt_regs *);
+asmlinkage long sys_execve(char __user *, char __user * __user *,
+			   char __user * __user *,
+			   struct pt_regs *);
+
+/* kernel/ioport.c */
+asmlinkage long sys_iopl(unsigned int, struct pt_regs *);
+
+/* kernel/signal_64.c */
+asmlinkage long sys_sigaltstack(const stack_t __user *, stack_t __user *,
+				struct pt_regs *);
+asmlinkage long sys_rt_sigreturn(struct pt_regs *);
+
+/* kernel/sys_x86_64.c */
+asmlinkage long sys_mmap(unsigned long, unsigned long, unsigned long,
+			 unsigned long, unsigned long, unsigned long);
+struct new_utsname;
+asmlinkage long sys_uname(struct new_utsname __user *);
+
+#endif /* CONFIG_X86_32 */
+#endif /* _ASM_X86_SYSCALLS_H */
diff --git a/include/asm-x86/system.h b/include/asm-x86/system.h
index 983ce37..34505dd 100644
--- a/include/asm-x86/system.h
+++ b/include/asm-x86/system.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_SYSTEM_H_
-#define _ASM_X86_SYSTEM_H_
+#ifndef ASM_X86__SYSTEM_H
+#define ASM_X86__SYSTEM_H
 
 #include <asm/asm.h>
 #include <asm/segment.h>
@@ -419,4 +419,4 @@
 	alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);
 }
 
-#endif
+#endif /* ASM_X86__SYSTEM_H */
diff --git a/include/asm-x86/system_64.h b/include/asm-x86/system_64.h
index 97fa251..5aedb8b 100644
--- a/include/asm-x86/system_64.h
+++ b/include/asm-x86/system_64.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_SYSTEM_H
-#define __ASM_SYSTEM_H
+#ifndef ASM_X86__SYSTEM_64_H
+#define ASM_X86__SYSTEM_64_H
 
 #include <asm/segment.h>
 #include <asm/cmpxchg.h>
@@ -19,4 +19,4 @@
 
 #include <linux/irqflags.h>
 
-#endif
+#endif /* ASM_X86__SYSTEM_64_H */
diff --git a/include/asm-x86/tce.h b/include/asm-x86/tce.h
index b1a4ea0..e7932d7 100644
--- a/include/asm-x86/tce.h
+++ b/include/asm-x86/tce.h
@@ -21,8 +21,8 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
 
-#ifndef _ASM_X86_64_TCE_H
-#define _ASM_X86_64_TCE_H
+#ifndef ASM_X86__TCE_H
+#define ASM_X86__TCE_H
 
 extern unsigned int specified_table_size;
 struct iommu_table;
@@ -45,4 +45,4 @@
 extern void __init free_tce_table(void *tbl);
 extern int __init build_tce_table(struct pci_dev *dev, void __iomem *bbar);
 
-#endif /* _ASM_X86_64_TCE_H */
+#endif /* ASM_X86__TCE_H */
diff --git a/include/asm-x86/termbits.h b/include/asm-x86/termbits.h
index af1b70e..3d00dc5 100644
--- a/include/asm-x86/termbits.h
+++ b/include/asm-x86/termbits.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_TERMBITS_H
-#define _ASM_X86_TERMBITS_H
+#ifndef ASM_X86__TERMBITS_H
+#define ASM_X86__TERMBITS_H
 
 #include <linux/posix_types.h>
 
@@ -195,4 +195,4 @@
 #define	TCSADRAIN	1
 #define	TCSAFLUSH	2
 
-#endif /* _ASM_X86_TERMBITS_H */
+#endif /* ASM_X86__TERMBITS_H */
diff --git a/include/asm-x86/termios.h b/include/asm-x86/termios.h
index f729563..e235db2 100644
--- a/include/asm-x86/termios.h
+++ b/include/asm-x86/termios.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_TERMIOS_H
-#define _ASM_X86_TERMIOS_H
+#ifndef ASM_X86__TERMIOS_H
+#define ASM_X86__TERMIOS_H
 
 #include <asm/termbits.h>
 #include <asm/ioctls.h>
@@ -110,4 +110,4 @@
 
 #endif	/* __KERNEL__ */
 
-#endif /* _ASM_X86_TERMIOS_H */
+#endif /* ASM_X86__TERMIOS_H */
diff --git a/include/asm-x86/therm_throt.h b/include/asm-x86/therm_throt.h
index 399bf60..1c7f57b 100644
--- a/include/asm-x86/therm_throt.h
+++ b/include/asm-x86/therm_throt.h
@@ -1,9 +1,9 @@
-#ifndef __ASM_I386_THERM_THROT_H__
-#define __ASM_I386_THERM_THROT_H__ 1
+#ifndef ASM_X86__THERM_THROT_H
+#define ASM_X86__THERM_THROT_H
 
 #include <asm/atomic.h>
 
 extern atomic_t therm_throt_en;
 int therm_throt_process(int curr);
 
-#endif /* __ASM_I386_THERM_THROT_H__ */
+#endif /* ASM_X86__THERM_THROT_H */
diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h
index da0a675..3f4e52b 100644
--- a/include/asm-x86/thread_info.h
+++ b/include/asm-x86/thread_info.h
@@ -4,8 +4,8 @@
  * - Incorporating suggestions made by Linus Torvalds and Dave Miller
  */
 
-#ifndef _ASM_X86_THREAD_INFO_H
-#define _ASM_X86_THREAD_INFO_H
+#ifndef ASM_X86__THREAD_INFO_H
+#define ASM_X86__THREAD_INFO_H
 
 #include <linux/compiler.h>
 #include <asm/page.h>
@@ -71,6 +71,7 @@
  * Warning: layout of LSW is hardcoded in entry.S
  */
 #define TIF_SYSCALL_TRACE	0	/* syscall trace active */
+#define TIF_NOTIFY_RESUME	1	/* callback before returning to user */
 #define TIF_SIGPENDING		2	/* signal pending */
 #define TIF_NEED_RESCHED	3	/* rescheduling necessary */
 #define TIF_SINGLESTEP		4	/* reenable singlestep on user return*/
@@ -93,6 +94,7 @@
 #define TIF_BTS_TRACE_TS	27      /* record scheduling event timestamps */
 
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
+#define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
 #define _TIF_SINGLESTEP		(1 << TIF_SINGLESTEP)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
@@ -133,7 +135,7 @@
 
 /* Only used for 64 bit */
 #define _TIF_DO_NOTIFY_MASK						\
-	(_TIF_SIGPENDING|_TIF_MCE_NOTIFY)
+	(_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_NOTIFY_RESUME)
 
 /* flags to check in __switch_to() */
 #define _TIF_WORK_CTXSW							\
@@ -239,6 +241,7 @@
 #define TS_POLLING		0x0004	/* true if in idle loop
 					   and not sleeping */
 #define TS_RESTORE_SIGMASK	0x0008	/* restore signal mask in do_signal() */
+#define TS_XSAVE		0x0010	/* Use xsave/xrstor */
 
 #define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING)
 
@@ -258,4 +261,4 @@
 extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
 #define arch_task_cache_init arch_task_cache_init
 #endif
-#endif /* _ASM_X86_THREAD_INFO_H */
+#endif /* ASM_X86__THREAD_INFO_H */
diff --git a/include/asm-x86/time.h b/include/asm-x86/time.h
index a17fa47..3e724ee 100644
--- a/include/asm-x86/time.h
+++ b/include/asm-x86/time.h
@@ -1,5 +1,5 @@
-#ifndef _ASMX86_TIME_H
-#define _ASMX86_TIME_H
+#ifndef ASM_X86__TIME_H
+#define ASM_X86__TIME_H
 
 extern void hpet_time_init(void);
 
@@ -46,6 +46,8 @@
 
 #endif
 
+extern void time_init(void);
+
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #else /* !CONFIG_PARAVIRT */
@@ -58,4 +60,4 @@
 
 extern unsigned long __init calibrate_cpu(void);
 
-#endif
+#endif /* ASM_X86__TIME_H */
diff --git a/include/asm-x86/timer.h b/include/asm-x86/timer.h
index fb2a4dd..d0babce 100644
--- a/include/asm-x86/timer.h
+++ b/include/asm-x86/timer.h
@@ -1,5 +1,5 @@
-#ifndef _ASMi386_TIMER_H
-#define _ASMi386_TIMER_H
+#ifndef ASM_X86__TIMER_H
+#define ASM_X86__TIMER_H
 #include <linux/init.h>
 #include <linux/pm.h>
 #include <linux/percpu.h>
@@ -9,9 +9,12 @@
 unsigned long long native_sched_clock(void);
 unsigned long native_calibrate_tsc(void);
 
+#ifdef CONFIG_X86_32
 extern int timer_ack;
-extern int no_timer_check;
 extern int recalibrate_cpu_khz(void);
+#endif /* CONFIG_X86_32 */
+
+extern int no_timer_check;
 
 #ifndef CONFIG_PARAVIRT
 #define calibrate_tsc() native_calibrate_tsc()
@@ -60,4 +63,4 @@
 	return ns;
 }
 
-#endif
+#endif /* ASM_X86__TIMER_H */
diff --git a/include/asm-x86/timex.h b/include/asm-x86/timex.h
index 43e5a78..d1ce241 100644
--- a/include/asm-x86/timex.h
+++ b/include/asm-x86/timex.h
@@ -1,6 +1,6 @@
 /* x86 architecture timex specifications */
-#ifndef _ASM_X86_TIMEX_H
-#define _ASM_X86_TIMEX_H
+#ifndef ASM_X86__TIMEX_H
+#define ASM_X86__TIMEX_H
 
 #include <asm/processor.h>
 #include <asm/tsc.h>
@@ -16,4 +16,4 @@
 
 #define ARCH_HAS_READ_CURRENT_TIMER
 
-#endif
+#endif /* ASM_X86__TIMEX_H */
diff --git a/include/asm-x86/tlb.h b/include/asm-x86/tlb.h
index e4e9e2d..db36e9e 100644
--- a/include/asm-x86/tlb.h
+++ b/include/asm-x86/tlb.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_TLB_H
-#define _ASM_X86_TLB_H
+#ifndef ASM_X86__TLB_H
+#define ASM_X86__TLB_H
 
 #define tlb_start_vma(tlb, vma) do { } while (0)
 #define tlb_end_vma(tlb, vma) do { } while (0)
@@ -8,4 +8,4 @@
 
 #include <asm-generic/tlb.h>
 
-#endif
+#endif /* ASM_X86__TLB_H */
diff --git a/include/asm-x86/tlbflush.h b/include/asm-x86/tlbflush.h
index 35c76ce..ef68b76 100644
--- a/include/asm-x86/tlbflush.h
+++ b/include/asm-x86/tlbflush.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_TLBFLUSH_H
-#define _ASM_X86_TLBFLUSH_H
+#ifndef ASM_X86__TLBFLUSH_H
+#define ASM_X86__TLBFLUSH_H
 
 #include <linux/mm.h>
 #include <linux/sched.h>
@@ -165,4 +165,4 @@
 	flush_tlb_all();
 }
 
-#endif /* _ASM_X86_TLBFLUSH_H */
+#endif /* ASM_X86__TLBFLUSH_H */
diff --git a/include/asm-x86/topology.h b/include/asm-x86/topology.h
index 90ac771..7eca9bc 100644
--- a/include/asm-x86/topology.h
+++ b/include/asm-x86/topology.h
@@ -22,8 +22,8 @@
  *
  * Send feedback to <colpatch@us.ibm.com>
  */
-#ifndef _ASM_X86_TOPOLOGY_H
-#define _ASM_X86_TOPOLOGY_H
+#ifndef ASM_X86__TOPOLOGY_H
+#define ASM_X86__TOPOLOGY_H
 
 #ifdef CONFIG_X86_32
 # ifdef CONFIG_X86_HT
@@ -255,4 +255,4 @@
 }
 #endif
 
-#endif /* _ASM_X86_TOPOLOGY_H */
+#endif /* ASM_X86__TOPOLOGY_H */
diff --git a/include/asm-x86/trampoline.h b/include/asm-x86/trampoline.h
index b156b08..0406bbd 100644
--- a/include/asm-x86/trampoline.h
+++ b/include/asm-x86/trampoline.h
@@ -1,5 +1,5 @@
-#ifndef __TRAMPOLINE_HEADER
-#define __TRAMPOLINE_HEADER
+#ifndef ASM_X86__TRAMPOLINE_H
+#define ASM_X86__TRAMPOLINE_H
 
 #ifndef __ASSEMBLY__
 
@@ -18,4 +18,4 @@
 
 #endif /* __ASSEMBLY__ */
 
-#endif /* __TRAMPOLINE_HEADER */
+#endif /* ASM_X86__TRAMPOLINE_H */
diff --git a/include/asm-x86/traps.h b/include/asm-x86/traps.h
index a4b65a7..2ccebc6 100644
--- a/include/asm-x86/traps.h
+++ b/include/asm-x86/traps.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_TRAPS_H
-#define _ASM_X86_TRAPS_H
+#ifndef ASM_X86__TRAPS_H
+#define ASM_X86__TRAPS_H
 
 /* Common in X86_32 and X86_64 */
 asmlinkage void divide_error(void);
@@ -51,6 +51,8 @@
 unsigned long patch_espfix_desc(unsigned long, unsigned long);
 asmlinkage void math_emulate(long);
 
+void do_page_fault(struct pt_regs *regs, unsigned long error_code);
+
 #else /* CONFIG_X86_32 */
 
 asmlinkage void double_fault(void);
@@ -62,5 +64,7 @@
 asmlinkage void do_simd_coprocessor_error(struct pt_regs *);
 asmlinkage void do_spurious_interrupt_bug(struct pt_regs *);
 
+asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code);
+
 #endif /* CONFIG_X86_32 */
-#endif /* _ASM_X86_TRAPS_H */
+#endif /* ASM_X86__TRAPS_H */
diff --git a/include/asm-x86/tsc.h b/include/asm-x86/tsc.h
index cb6f6ee..ad0f5c4 100644
--- a/include/asm-x86/tsc.h
+++ b/include/asm-x86/tsc.h
@@ -1,8 +1,8 @@
 /*
  * x86 TSC related functions
  */
-#ifndef _ASM_X86_TSC_H
-#define _ASM_X86_TSC_H
+#ifndef ASM_X86__TSC_H
+#define ASM_X86__TSC_H
 
 #include <asm/processor.h>
 
@@ -59,4 +59,4 @@
 
 extern int notsc_setup(char *);
 
-#endif
+#endif /* ASM_X86__TSC_H */
diff --git a/include/asm-x86/types.h b/include/asm-x86/types.h
index 1ac80cd..e78b52e 100644
--- a/include/asm-x86/types.h
+++ b/include/asm-x86/types.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_TYPES_H
-#define _ASM_X86_TYPES_H
+#ifndef ASM_X86__TYPES_H
+#define ASM_X86__TYPES_H
 
 #include <asm-generic/int-ll64.h>
 
@@ -33,4 +33,4 @@
 #endif /* __ASSEMBLY__ */
 #endif /* __KERNEL__ */
 
-#endif
+#endif /* ASM_X86__TYPES_H */
diff --git a/include/asm-x86/uaccess.h b/include/asm-x86/uaccess.h
index 5f702d1..48ebc0a 100644
--- a/include/asm-x86/uaccess.h
+++ b/include/asm-x86/uaccess.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_UACCES_H_
-#define _ASM_UACCES_H_
+#ifndef ASM_X86__UACCESS_H
+#define ASM_X86__UACCESS_H
 /*
  * User space memory access functions
  */
@@ -450,5 +450,5 @@
 # include "uaccess_64.h"
 #endif
 
-#endif
+#endif /* ASM_X86__UACCESS_H */
 
diff --git a/include/asm-x86/uaccess_32.h b/include/asm-x86/uaccess_32.h
index 6fdef39..6b5b57d 100644
--- a/include/asm-x86/uaccess_32.h
+++ b/include/asm-x86/uaccess_32.h
@@ -1,5 +1,5 @@
-#ifndef __i386_UACCESS_H
-#define __i386_UACCESS_H
+#ifndef ASM_X86__UACCESS_32_H
+#define ASM_X86__UACCESS_32_H
 
 /*
  * User space memory access functions
@@ -215,4 +215,4 @@
 unsigned long __must_check clear_user(void __user *mem, unsigned long len);
 unsigned long __must_check __clear_user(void __user *mem, unsigned long len);
 
-#endif /* __i386_UACCESS_H */
+#endif /* ASM_X86__UACCESS_32_H */
diff --git a/include/asm-x86/uaccess_64.h b/include/asm-x86/uaccess_64.h
index 515d4dc..5cfd295 100644
--- a/include/asm-x86/uaccess_64.h
+++ b/include/asm-x86/uaccess_64.h
@@ -1,5 +1,5 @@
-#ifndef __X86_64_UACCESS_H
-#define __X86_64_UACCESS_H
+#ifndef ASM_X86__UACCESS_64_H
+#define ASM_X86__UACCESS_64_H
 
 /*
  * User space memory access functions
@@ -198,4 +198,4 @@
 unsigned long
 copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest);
 
-#endif /* __X86_64_UACCESS_H */
+#endif /* ASM_X86__UACCESS_64_H */
diff --git a/include/asm-x86/ucontext.h b/include/asm-x86/ucontext.h
index 50a79f7..89eaa54 100644
--- a/include/asm-x86/ucontext.h
+++ b/include/asm-x86/ucontext.h
@@ -1,5 +1,11 @@
-#ifndef _ASM_X86_UCONTEXT_H
-#define _ASM_X86_UCONTEXT_H
+#ifndef ASM_X86__UCONTEXT_H
+#define ASM_X86__UCONTEXT_H
+
+#define UC_FP_XSTATE	0x1	/* indicates the presence of extended state
+				 * information in the memory layout pointed
+				 * by the fpstate pointer in the ucontext's
+				 * sigcontext struct (uc_mcontext).
+				 */
 
 struct ucontext {
 	unsigned long	  uc_flags;
@@ -9,4 +15,4 @@
 	sigset_t	  uc_sigmask;	/* mask last for extensibility */
 };
 
-#endif /* _ASM_X86_UCONTEXT_H */
+#endif /* ASM_X86__UCONTEXT_H */
diff --git a/include/asm-x86/unaligned.h b/include/asm-x86/unaligned.h
index a7bd416..59dcdec 100644
--- a/include/asm-x86/unaligned.h
+++ b/include/asm-x86/unaligned.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_UNALIGNED_H
-#define _ASM_X86_UNALIGNED_H
+#ifndef ASM_X86__UNALIGNED_H
+#define ASM_X86__UNALIGNED_H
 
 /*
  * The x86 can do unaligned accesses itself.
@@ -11,4 +11,4 @@
 #define get_unaligned __get_unaligned_le
 #define put_unaligned __put_unaligned_le
 
-#endif /* _ASM_X86_UNALIGNED_H */
+#endif /* ASM_X86__UNALIGNED_H */
diff --git a/include/asm-x86/unistd_32.h b/include/asm-x86/unistd_32.h
index d739467..017f4a8 100644
--- a/include/asm-x86/unistd_32.h
+++ b/include/asm-x86/unistd_32.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_I386_UNISTD_H_
-#define _ASM_I386_UNISTD_H_
+#ifndef ASM_X86__UNISTD_32_H
+#define ASM_X86__UNISTD_32_H
 
 /*
  * This file contains the system call numbers.
@@ -376,4 +376,4 @@
 #endif
 
 #endif /* __KERNEL__ */
-#endif /* _ASM_I386_UNISTD_H_ */
+#endif /* ASM_X86__UNISTD_32_H */
diff --git a/include/asm-x86/unistd_64.h b/include/asm-x86/unistd_64.h
index 3a341d7..ace83f1 100644
--- a/include/asm-x86/unistd_64.h
+++ b/include/asm-x86/unistd_64.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_64_UNISTD_H_
-#define _ASM_X86_64_UNISTD_H_
+#ifndef ASM_X86__UNISTD_64_H
+#define ASM_X86__UNISTD_64_H
 
 #ifndef __SYSCALL
 #define __SYSCALL(a, b)
@@ -690,4 +690,4 @@
 #define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall")
 #endif	/* __KERNEL__ */
 
-#endif /* _ASM_X86_64_UNISTD_H_ */
+#endif /* ASM_X86__UNISTD_64_H */
diff --git a/include/asm-x86/unwind.h b/include/asm-x86/unwind.h
index 8b064bd..a215156 100644
--- a/include/asm-x86/unwind.h
+++ b/include/asm-x86/unwind.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_UNWIND_H
-#define _ASM_X86_UNWIND_H
+#ifndef ASM_X86__UNWIND_H
+#define ASM_X86__UNWIND_H
 
 #define UNW_PC(frame) ((void)(frame), 0UL)
 #define UNW_SP(frame) ((void)(frame), 0UL)
@@ -10,4 +10,4 @@
 	return 0;
 }
 
-#endif /* _ASM_X86_UNWIND_H */
+#endif /* ASM_X86__UNWIND_H */
diff --git a/include/asm-x86/user32.h b/include/asm-x86/user32.h
index a3d9100..aa66c18 100644
--- a/include/asm-x86/user32.h
+++ b/include/asm-x86/user32.h
@@ -1,5 +1,5 @@
-#ifndef USER32_H
-#define USER32_H 1
+#ifndef ASM_X86__USER32_H
+#define ASM_X86__USER32_H
 
 /* IA32 compatible user structures for ptrace.
  * These should be used for 32bit coredumps too. */
@@ -67,4 +67,4 @@
 };
 
 
-#endif
+#endif /* ASM_X86__USER32_H */
diff --git a/include/asm-x86/user_32.h b/include/asm-x86/user_32.h
index d6e51ed..e0fe2f5 100644
--- a/include/asm-x86/user_32.h
+++ b/include/asm-x86/user_32.h
@@ -1,5 +1,5 @@
-#ifndef _I386_USER_H
-#define _I386_USER_H
+#ifndef ASM_X86__USER_32_H
+#define ASM_X86__USER_32_H
 
 #include <asm/page.h>
 /* Core file format: The core file is written in such a way that gdb
@@ -128,4 +128,4 @@
 #define HOST_TEXT_START_ADDR (u.start_code)
 #define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG)
 
-#endif /* _I386_USER_H */
+#endif /* ASM_X86__USER_32_H */
diff --git a/include/asm-x86/user_64.h b/include/asm-x86/user_64.h
index 6037b63..38b5799 100644
--- a/include/asm-x86/user_64.h
+++ b/include/asm-x86/user_64.h
@@ -1,5 +1,5 @@
-#ifndef _X86_64_USER_H
-#define _X86_64_USER_H
+#ifndef ASM_X86__USER_64_H
+#define ASM_X86__USER_64_H
 
 #include <asm/types.h>
 #include <asm/page.h>
@@ -134,4 +134,4 @@
 #define HOST_TEXT_START_ADDR (u.start_code)
 #define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG)
 
-#endif /* _X86_64_USER_H */
+#endif /* ASM_X86__USER_64_H */
diff --git a/include/asm-x86/uv/bios.h b/include/asm-x86/uv/bios.h
index aa73362..7cd6d7e 100644
--- a/include/asm-x86/uv/bios.h
+++ b/include/asm-x86/uv/bios.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_BIOS_H
-#define _ASM_X86_BIOS_H
+#ifndef ASM_X86__UV__BIOS_H
+#define ASM_X86__UV__BIOS_H
 
 /*
  * BIOS layer definitions.
@@ -65,4 +65,4 @@
 		   unsigned long *drift_info);
 extern const char *x86_bios_strerror(long status);
 
-#endif /* _ASM_X86_BIOS_H */
+#endif /* ASM_X86__UV__BIOS_H */
diff --git a/include/asm-x86/uv/uv_bau.h b/include/asm-x86/uv/uv_bau.h
index 610b6b3..77153fb 100644
--- a/include/asm-x86/uv/uv_bau.h
+++ b/include/asm-x86/uv/uv_bau.h
@@ -8,8 +8,8 @@
  * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved.
  */
 
-#ifndef __ASM_X86_UV_BAU__
-#define __ASM_X86_UV_BAU__
+#ifndef ASM_X86__UV__UV_BAU_H
+#define ASM_X86__UV__UV_BAU_H
 
 #include <linux/bitmap.h>
 #define BITSPERBYTE 8
@@ -329,4 +329,4 @@
 extern void uv_bau_message_intr1(void);
 extern void uv_bau_timeout_intr1(void);
 
-#endif /* __ASM_X86_UV_BAU__ */
+#endif /* ASM_X86__UV__UV_BAU_H */
diff --git a/include/asm-x86/uv/uv_hub.h b/include/asm-x86/uv/uv_hub.h
index a4ef26e..bdb5b01 100644
--- a/include/asm-x86/uv/uv_hub.h
+++ b/include/asm-x86/uv/uv_hub.h
@@ -8,8 +8,8 @@
  * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved.
  */
 
-#ifndef __ASM_X86_UV_HUB_H__
-#define __ASM_X86_UV_HUB_H__
+#ifndef ASM_X86__UV__UV_HUB_H
+#define ASM_X86__UV__UV_HUB_H
 
 #include <linux/numa.h>
 #include <linux/percpu.h>
@@ -350,5 +350,5 @@
 	return uv_possible_blades;
 }
 
-#endif /* __ASM_X86_UV_HUB__ */
+#endif /* ASM_X86__UV__UV_HUB_H */
 
diff --git a/include/asm-x86/uv/uv_mmrs.h b/include/asm-x86/uv/uv_mmrs.h
index 151fd7f..8b03d89 100644
--- a/include/asm-x86/uv/uv_mmrs.h
+++ b/include/asm-x86/uv/uv_mmrs.h
@@ -8,8 +8,8 @@
  * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved.
  */
 
-#ifndef __ASM_X86_UV_MMRS__
-#define __ASM_X86_UV_MMRS__
+#ifndef ASM_X86__UV__UV_MMRS_H
+#define ASM_X86__UV__UV_MMRS_H
 
 #define UV_MMR_ENABLE		(1UL << 63)
 
@@ -1292,4 +1292,4 @@
 };
 
 
-#endif /* __ASM_X86_UV_MMRS__ */
+#endif /* ASM_X86__UV__UV_MMRS_H */
diff --git a/include/asm-x86/vdso.h b/include/asm-x86/vdso.h
index 8e18fb8..4ab3209 100644
--- a/include/asm-x86/vdso.h
+++ b/include/asm-x86/vdso.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_VDSO_H
-#define _ASM_X86_VDSO_H	1
+#ifndef ASM_X86__VDSO_H
+#define ASM_X86__VDSO_H
 
 #ifdef CONFIG_X86_64
 extern const char VDSO64_PRELINK[];
@@ -44,4 +44,4 @@
 extern const char vdso32_syscall_start, vdso32_syscall_end;
 extern const char vdso32_sysenter_start, vdso32_sysenter_end;
 
-#endif	/* asm-x86/vdso.h */
+#endif /* ASM_X86__VDSO_H */
diff --git a/include/asm-x86/vga.h b/include/asm-x86/vga.h
index 0ccf804..b9e493d 100644
--- a/include/asm-x86/vga.h
+++ b/include/asm-x86/vga.h
@@ -4,8 +4,8 @@
  *	(c) 1998 Martin Mares <mj@ucw.cz>
  */
 
-#ifndef _LINUX_ASM_VGA_H_
-#define _LINUX_ASM_VGA_H_
+#ifndef ASM_X86__VGA_H
+#define ASM_X86__VGA_H
 
 /*
  *	On the PC, we can just recalculate addresses and then
@@ -17,4 +17,4 @@
 #define vga_readb(x) (*(x))
 #define vga_writeb(x, y) (*(y) = (x))
 
-#endif
+#endif /* ASM_X86__VGA_H */
diff --git a/include/asm-x86/vgtod.h b/include/asm-x86/vgtod.h
index 3301f09..38fd133 100644
--- a/include/asm-x86/vgtod.h
+++ b/include/asm-x86/vgtod.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_VGTOD_H
-#define _ASM_VGTOD_H 1
+#ifndef ASM_X86__VGTOD_H
+#define ASM_X86__VGTOD_H
 
 #include <asm/vsyscall.h>
 #include <linux/clocksource.h>
@@ -26,4 +26,4 @@
 __section_vsyscall_gtod_data;
 extern struct vsyscall_gtod_data vsyscall_gtod_data;
 
-#endif
+#endif /* ASM_X86__VGTOD_H */
diff --git a/include/asm-x86/visws/cobalt.h b/include/asm-x86/visws/cobalt.h
index 9952588..9627a8f 100644
--- a/include/asm-x86/visws/cobalt.h
+++ b/include/asm-x86/visws/cobalt.h
@@ -1,5 +1,5 @@
-#ifndef __I386_SGI_COBALT_H
-#define __I386_SGI_COBALT_H
+#ifndef ASM_X86__VISWS__COBALT_H
+#define ASM_X86__VISWS__COBALT_H
 
 #include <asm/fixmap.h>
 
@@ -122,4 +122,4 @@
 
 extern char visws_board_rev;
 
-#endif /* __I386_SGI_COBALT_H */
+#endif /* ASM_X86__VISWS__COBALT_H */
diff --git a/include/asm-x86/visws/lithium.h b/include/asm-x86/visws/lithium.h
index dfcd4f0..b36d3b3 100644
--- a/include/asm-x86/visws/lithium.h
+++ b/include/asm-x86/visws/lithium.h
@@ -1,5 +1,5 @@
-#ifndef __I386_SGI_LITHIUM_H
-#define __I386_SGI_LITHIUM_H
+#ifndef ASM_X86__VISWS__LITHIUM_H
+#define ASM_X86__VISWS__LITHIUM_H
 
 #include <asm/fixmap.h>
 
@@ -49,5 +49,5 @@
 	return *((volatile unsigned short *)(LI_PCIB_VADDR+reg));
 }
 
-#endif
+#endif /* ASM_X86__VISWS__LITHIUM_H */
 
diff --git a/include/asm-x86/visws/piix4.h b/include/asm-x86/visws/piix4.h
index 83ea4f4..61c9380 100644
--- a/include/asm-x86/visws/piix4.h
+++ b/include/asm-x86/visws/piix4.h
@@ -1,5 +1,5 @@
-#ifndef __I386_SGI_PIIX_H
-#define __I386_SGI_PIIX_H
+#ifndef ASM_X86__VISWS__PIIX4_H
+#define ASM_X86__VISWS__PIIX4_H
 
 /*
  * PIIX4 as used on SGI Visual Workstations
@@ -104,4 +104,4 @@
  */
 #define	PIIX_GPI_STPCLK		0x4	// STPCLK signal routed back in
 
-#endif
+#endif /* ASM_X86__VISWS__PIIX4_H */
diff --git a/include/asm-x86/vm86.h b/include/asm-x86/vm86.h
index 5ce3513..998bd18 100644
--- a/include/asm-x86/vm86.h
+++ b/include/asm-x86/vm86.h
@@ -1,5 +1,5 @@
-#ifndef _LINUX_VM86_H
-#define _LINUX_VM86_H
+#ifndef ASM_X86__VM86_H
+#define ASM_X86__VM86_H
 
 /*
  * I'm guessing at the VIF/VIP flag usage, but hope that this is how
@@ -205,4 +205,4 @@
 
 #endif /* __KERNEL__ */
 
-#endif
+#endif /* ASM_X86__VM86_H */
diff --git a/include/asm-x86/vmi_time.h b/include/asm-x86/vmi_time.h
index c3118c3..b2d39e6 100644
--- a/include/asm-x86/vmi_time.h
+++ b/include/asm-x86/vmi_time.h
@@ -22,8 +22,8 @@
  *
  */
 
-#ifndef __VMI_TIME_H
-#define __VMI_TIME_H
+#ifndef ASM_X86__VMI_TIME_H
+#define ASM_X86__VMI_TIME_H
 
 /*
  * Raw VMI call indices for timer functions
@@ -95,4 +95,4 @@
 
 #define CONFIG_VMI_ALARM_HZ	100
 
-#endif
+#endif /* ASM_X86__VMI_TIME_H */
diff --git a/include/asm-x86/vsyscall.h b/include/asm-x86/vsyscall.h
index 6b66ff9..dcd4682 100644
--- a/include/asm-x86/vsyscall.h
+++ b/include/asm-x86/vsyscall.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_64_VSYSCALL_H_
-#define _ASM_X86_64_VSYSCALL_H_
+#ifndef ASM_X86__VSYSCALL_H
+#define ASM_X86__VSYSCALL_H
 
 enum vsyscall_num {
 	__NR_vgettimeofday,
@@ -41,4 +41,4 @@
 
 #endif /* __KERNEL__ */
 
-#endif /* _ASM_X86_64_VSYSCALL_H_ */
+#endif /* ASM_X86__VSYSCALL_H */
diff --git a/include/asm-x86/xcr.h b/include/asm-x86/xcr.h
new file mode 100644
index 0000000..f2cba4e7
--- /dev/null
+++ b/include/asm-x86/xcr.h
@@ -0,0 +1,49 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ *   Copyright 2008 rPath, Inc. - All Rights Reserved
+ *
+ *   This file is part of the Linux kernel, and is made available under
+ *   the terms of the GNU General Public License version 2 or (at your
+ *   option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * asm-x86/xcr.h
+ *
+ * Definitions for the eXtended Control Register instructions
+ */
+
+#ifndef _ASM_X86_XCR_H
+#define _ASM_X86_XCR_H
+
+#define XCR_XFEATURE_ENABLED_MASK	0x00000000
+
+#ifdef __KERNEL__
+# ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+
+static inline u64 xgetbv(u32 index)
+{
+	u32 eax, edx;
+
+	asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */
+		     : "=a" (eax), "=d" (edx)
+		     : "c" (index));
+	return eax + ((u64)edx << 32);
+}
+
+static inline void xsetbv(u32 index, u64 value)
+{
+	u32 eax = value;
+	u32 edx = value >> 32;
+
+	asm volatile(".byte 0x0f,0x01,0xd1" /* xsetbv */
+		     : : "a" (eax), "d" (edx), "c" (index));
+}
+
+# endif /* __ASSEMBLY__ */
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_X86_XCR_H */
diff --git a/include/asm-x86/xen/events.h b/include/asm-x86/xen/events.h
index 8ded747..8151f5b 100644
--- a/include/asm-x86/xen/events.h
+++ b/include/asm-x86/xen/events.h
@@ -1,5 +1,5 @@
-#ifndef __XEN_EVENTS_H
-#define __XEN_EVENTS_H
+#ifndef ASM_X86__XEN__EVENTS_H
+#define ASM_X86__XEN__EVENTS_H
 
 enum ipi_vector {
 	XEN_RESCHEDULE_VECTOR,
@@ -21,4 +21,4 @@
 	do_IRQ(regs);
 }
 
-#endif /* __XEN_EVENTS_H */
+#endif /* ASM_X86__XEN__EVENTS_H */
diff --git a/include/asm-x86/xen/grant_table.h b/include/asm-x86/xen/grant_table.h
index 2444d45..c4baab4 100644
--- a/include/asm-x86/xen/grant_table.h
+++ b/include/asm-x86/xen/grant_table.h
@@ -1,7 +1,7 @@
-#ifndef __XEN_GRANT_TABLE_H
-#define __XEN_GRANT_TABLE_H
+#ifndef ASM_X86__XEN__GRANT_TABLE_H
+#define ASM_X86__XEN__GRANT_TABLE_H
 
 #define xen_alloc_vm_area(size)	alloc_vm_area(size)
 #define xen_free_vm_area(area)	free_vm_area(area)
 
-#endif /* __XEN_GRANT_TABLE_H */
+#endif /* ASM_X86__XEN__GRANT_TABLE_H */
diff --git a/include/asm-x86/xen/hypercall.h b/include/asm-x86/xen/hypercall.h
index 91cb7fd..44f4259 100644
--- a/include/asm-x86/xen/hypercall.h
+++ b/include/asm-x86/xen/hypercall.h
@@ -30,8 +30,8 @@
  * IN THE SOFTWARE.
  */
 
-#ifndef __HYPERCALL_H__
-#define __HYPERCALL_H__
+#ifndef ASM_X86__XEN__HYPERCALL_H
+#define ASM_X86__XEN__HYPERCALL_H
 
 #include <linux/errno.h>
 #include <linux/string.h>
@@ -524,4 +524,4 @@
 	mcl->args[1] = esp;
 }
 
-#endif /* __HYPERCALL_H__ */
+#endif /* ASM_X86__XEN__HYPERCALL_H */
diff --git a/include/asm-x86/xen/hypervisor.h b/include/asm-x86/xen/hypervisor.h
index 04ee061..0ef3a88 100644
--- a/include/asm-x86/xen/hypervisor.h
+++ b/include/asm-x86/xen/hypervisor.h
@@ -30,8 +30,8 @@
  * IN THE SOFTWARE.
  */
 
-#ifndef __HYPERVISOR_H__
-#define __HYPERVISOR_H__
+#ifndef ASM_X86__XEN__HYPERVISOR_H
+#define ASM_X86__XEN__HYPERVISOR_H
 
 #include <linux/types.h>
 #include <linux/kernel.h>
@@ -69,4 +69,4 @@
 
 #define is_running_on_xen()	(xen_start_info ? 1 : 0)
 
-#endif /* __HYPERVISOR_H__ */
+#endif /* ASM_X86__XEN__HYPERVISOR_H */
diff --git a/include/asm-x86/xen/interface.h b/include/asm-x86/xen/interface.h
index 9d810f2..d077bba 100644
--- a/include/asm-x86/xen/interface.h
+++ b/include/asm-x86/xen/interface.h
@@ -6,8 +6,8 @@
  * Copyright (c) 2004, K A Fraser
  */
 
-#ifndef __ASM_X86_XEN_INTERFACE_H
-#define __ASM_X86_XEN_INTERFACE_H
+#ifndef ASM_X86__XEN__INTERFACE_H
+#define ASM_X86__XEN__INTERFACE_H
 
 #ifdef __XEN__
 #define __DEFINE_GUEST_HANDLE(name, type) \
@@ -172,4 +172,4 @@
 #define XEN_CPUID          XEN_EMULATE_PREFIX "cpuid"
 #endif
 
-#endif	/* __ASM_X86_XEN_INTERFACE_H */
+#endif /* ASM_X86__XEN__INTERFACE_H */
diff --git a/include/asm-x86/xen/interface_32.h b/include/asm-x86/xen/interface_32.h
index d8ac41d..08167e1 100644
--- a/include/asm-x86/xen/interface_32.h
+++ b/include/asm-x86/xen/interface_32.h
@@ -6,8 +6,8 @@
  * Copyright (c) 2004, K A Fraser
  */
 
-#ifndef __ASM_X86_XEN_INTERFACE_32_H
-#define __ASM_X86_XEN_INTERFACE_32_H
+#ifndef ASM_X86__XEN__INTERFACE_32_H
+#define ASM_X86__XEN__INTERFACE_32_H
 
 
 /*
@@ -94,4 +94,4 @@
 #define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20))
 #define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20))
 
-#endif	/* __ASM_X86_XEN_INTERFACE_32_H */
+#endif /* ASM_X86__XEN__INTERFACE_32_H */
diff --git a/include/asm-x86/xen/interface_64.h b/include/asm-x86/xen/interface_64.h
index 842266c..046c0f1 100644
--- a/include/asm-x86/xen/interface_64.h
+++ b/include/asm-x86/xen/interface_64.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_X86_XEN_INTERFACE_64_H
-#define __ASM_X86_XEN_INTERFACE_64_H
+#ifndef ASM_X86__XEN__INTERFACE_64_H
+#define ASM_X86__XEN__INTERFACE_64_H
 
 /*
  * 64-bit segment selectors
@@ -156,4 +156,4 @@
 #endif /* !__ASSEMBLY__ */
 
 
-#endif	/* __ASM_X86_XEN_INTERFACE_64_H */
+#endif /* ASM_X86__XEN__INTERFACE_64_H */
diff --git a/include/asm-x86/xen/page.h b/include/asm-x86/xen/page.h
index 7b3835d..c50185d 100644
--- a/include/asm-x86/xen/page.h
+++ b/include/asm-x86/xen/page.h
@@ -1,5 +1,5 @@
-#ifndef __XEN_PAGE_H
-#define __XEN_PAGE_H
+#ifndef ASM_X86__XEN__PAGE_H
+#define ASM_X86__XEN__PAGE_H
 
 #include <linux/pfn.h>
 
@@ -162,4 +162,4 @@
 void make_lowmem_page_readonly(void *vaddr);
 void make_lowmem_page_readwrite(void *vaddr);
 
-#endif /* __XEN_PAGE_H */
+#endif /* ASM_X86__XEN__PAGE_H */
diff --git a/include/asm-x86/xsave.h b/include/asm-x86/xsave.h
new file mode 100644
index 0000000..08e9a1a
--- /dev/null
+++ b/include/asm-x86/xsave.h
@@ -0,0 +1,118 @@
+#ifndef __ASM_X86_XSAVE_H
+#define __ASM_X86_XSAVE_H
+
+#include <linux/types.h>
+#include <asm/processor.h>
+#include <asm/i387.h>
+
+#define XSTATE_FP	0x1
+#define XSTATE_SSE	0x2
+
+#define XSTATE_FPSSE	(XSTATE_FP | XSTATE_SSE)
+
+#define FXSAVE_SIZE	512
+
+/*
+ * These are the features that the OS can handle currently.
+ */
+#define XCNTXT_MASK	(XSTATE_FP | XSTATE_SSE)
+
+#ifdef CONFIG_X86_64
+#define REX_PREFIX	"0x48, "
+#else
+#define REX_PREFIX
+#endif
+
+extern unsigned int xstate_size;
+extern u64 pcntxt_mask;
+extern struct xsave_struct *init_xstate_buf;
+
+extern void xsave_cntxt_init(void);
+extern void xsave_init(void);
+extern int init_fpu(struct task_struct *child);
+extern int check_for_xstate(struct i387_fxsave_struct __user *buf,
+			    void __user *fpstate,
+			    struct _fpx_sw_bytes *sw);
+
+static inline int xrstor_checking(struct xsave_struct *fx)
+{
+	int err;
+
+	asm volatile("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n\t"
+		     "2:\n"
+		     ".section .fixup,\"ax\"\n"
+		     "3:  movl $-1,%[err]\n"
+		     "    jmp  2b\n"
+		     ".previous\n"
+		     _ASM_EXTABLE(1b, 3b)
+		     : [err] "=r" (err)
+		     : "D" (fx), "m" (*fx), "a" (-1), "d" (-1), "0" (0)
+		     : "memory");
+
+	return err;
+}
+
+static inline int xsave_user(struct xsave_struct __user *buf)
+{
+	int err;
+	__asm__ __volatile__("1: .byte " REX_PREFIX "0x0f,0xae,0x27\n"
+			     "2:\n"
+			     ".section .fixup,\"ax\"\n"
+			     "3:  movl $-1,%[err]\n"
+			     "    jmp  2b\n"
+			     ".previous\n"
+			     ".section __ex_table,\"a\"\n"
+			     _ASM_ALIGN "\n"
+			     _ASM_PTR "1b,3b\n"
+			     ".previous"
+			     : [err] "=r" (err)
+			     : "D" (buf), "a" (-1), "d" (-1), "0" (0)
+			     : "memory");
+	if (unlikely(err) && __clear_user(buf, xstate_size))
+		err = -EFAULT;
+	/* No need to clear here because the caller clears USED_MATH */
+	return err;
+}
+
+static inline int xrestore_user(struct xsave_struct __user *buf, u64 mask)
+{
+	int err;
+	struct xsave_struct *xstate = ((__force struct xsave_struct *)buf);
+	u32 lmask = mask;
+	u32 hmask = mask >> 32;
+
+	__asm__ __volatile__("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n"
+			     "2:\n"
+			     ".section .fixup,\"ax\"\n"
+			     "3:  movl $-1,%[err]\n"
+			     "    jmp  2b\n"
+			     ".previous\n"
+			     ".section __ex_table,\"a\"\n"
+			     _ASM_ALIGN "\n"
+			     _ASM_PTR "1b,3b\n"
+			     ".previous"
+			     : [err] "=r" (err)
+			     : "D" (xstate), "a" (lmask), "d" (hmask), "0" (0)
+			     : "memory");	/* memory required? */
+	return err;
+}
+
+static inline void xrstor_state(struct xsave_struct *fx, u64 mask)
+{
+	u32 lmask = mask;
+	u32 hmask = mask >> 32;
+
+	asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x2f\n\t"
+		     : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
+		     :   "memory");
+}
+
+static inline void xsave(struct task_struct *tsk)
+{
+	/* This, however, we can work around by forcing the compiler to select
+	   an addressing mode that doesn't require extended registers. */
+	__asm__ __volatile__(".byte " REX_PREFIX "0x0f,0xae,0x27"
+			     : : "D" (&(tsk->thread.xstate->xsave)),
+				 "a" (-1), "d"(-1) : "memory");
+}
+#endif
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index 56c73b8..c360c55 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -25,9 +25,99 @@
 #include <linux/types.h>
 #include <linux/msi.h>
 
-#ifdef CONFIG_DMAR
+#if defined(CONFIG_DMAR) || defined(CONFIG_INTR_REMAP)
 struct intel_iommu;
 
+struct dmar_drhd_unit {
+	struct list_head list;		/* list of drhd units	*/
+	struct  acpi_dmar_header *hdr;	/* ACPI header		*/
+	u64	reg_base_addr;		/* register base address*/
+	struct	pci_dev **devices; 	/* target device array	*/
+	int	devices_cnt;		/* target device count	*/
+	u8	ignored:1; 		/* ignore drhd		*/
+	u8	include_all:1;
+	struct intel_iommu *iommu;
+};
+
+extern struct list_head dmar_drhd_units;
+
+#define for_each_drhd_unit(drhd) \
+	list_for_each_entry(drhd, &dmar_drhd_units, list)
+
+extern int dmar_table_init(void);
+extern int early_dmar_detect(void);
+extern int dmar_dev_scope_init(void);
+
+/* Intel IOMMU detection */
+extern void detect_intel_iommu(void);
+
+
+extern int parse_ioapics_under_ir(void);
+extern int alloc_iommu(struct dmar_drhd_unit *);
+#else
+static inline void detect_intel_iommu(void)
+{
+	return;
+}
+
+static inline int dmar_table_init(void)
+{
+	return -ENODEV;
+}
+#endif /* !CONFIG_DMAR && !CONFIG_INTR_REMAP */
+
+#ifdef CONFIG_INTR_REMAP
+extern int intr_remapping_enabled;
+extern int enable_intr_remapping(int);
+
+struct irte {
+	union {
+		struct {
+			__u64	present 	: 1,
+				fpd		: 1,
+				dst_mode	: 1,
+				redir_hint	: 1,
+				trigger_mode	: 1,
+				dlvry_mode	: 3,
+				avail		: 4,
+				__reserved_1	: 4,
+				vector		: 8,
+				__reserved_2	: 8,
+				dest_id		: 32;
+		};
+		__u64 low;
+	};
+
+	union {
+		struct {
+			__u64	sid		: 16,
+				sq		: 2,
+				svt		: 2,
+				__reserved_3	: 44;
+		};
+		__u64 high;
+	};
+};
+extern int get_irte(int irq, struct irte *entry);
+extern int modify_irte(int irq, struct irte *irte_modified);
+extern int alloc_irte(struct intel_iommu *iommu, int irq, u16 count);
+extern int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index,
+   			u16 sub_handle);
+extern int map_irq_to_irte_handle(int irq, u16 *sub_handle);
+extern int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index);
+extern int flush_irte(int irq);
+extern int free_irte(int irq);
+
+extern int irq_remapped(int irq);
+extern struct intel_iommu *map_dev_to_ir(struct pci_dev *dev);
+extern struct intel_iommu *map_ioapic_to_ir(int apic);
+#else
+#define irq_remapped(irq)		(0)
+#define enable_intr_remapping(mode)	(-1)
+#define intr_remapping_enabled		(0)
+#endif
+
+#ifdef CONFIG_DMAR
 extern const char *dmar_get_fault_reason(u8 fault_reason);
 
 /* Can't use the common MSI interrupt functions
@@ -40,47 +130,30 @@
 extern int dmar_set_interrupt(struct intel_iommu *iommu);
 extern int arch_setup_dmar_msi(unsigned int irq);
 
-/* Intel IOMMU detection and initialization functions */
-extern void detect_intel_iommu(void);
-extern int intel_iommu_init(void);
-
-extern int dmar_table_init(void);
-extern int early_dmar_detect(void);
-
-extern struct list_head dmar_drhd_units;
+extern int iommu_detected, no_iommu;
 extern struct list_head dmar_rmrr_units;
-
-struct dmar_drhd_unit {
-	struct list_head list;		/* list of drhd units	*/
-	u64	reg_base_addr;		/* register base address*/
-	struct	pci_dev **devices; 	/* target device array	*/
-	int	devices_cnt;		/* target device count	*/
-	u8	ignored:1; 		/* ignore drhd		*/
-	u8	include_all:1;
-	struct intel_iommu *iommu;
-};
-
 struct dmar_rmrr_unit {
 	struct list_head list;		/* list of rmrr units	*/
+	struct acpi_dmar_header *hdr;	/* ACPI header		*/
 	u64	base_address;		/* reserved base address*/
 	u64	end_address;		/* reserved end address */
 	struct pci_dev **devices;	/* target devices */
 	int	devices_cnt;		/* target device count */
 };
 
-#define for_each_drhd_unit(drhd) \
-	list_for_each_entry(drhd, &dmar_drhd_units, list)
 #define for_each_rmrr_units(rmrr) \
 	list_for_each_entry(rmrr, &dmar_rmrr_units, list)
+/* Intel DMAR  initialization functions */
+extern int intel_iommu_init(void);
+extern int dmar_disabled;
 #else
-static inline void detect_intel_iommu(void)
-{
-	return;
-}
 static inline int intel_iommu_init(void)
 {
+#ifdef CONFIG_INTR_REMAP
+	return dmar_dev_scope_init();
+#else
 	return -ENODEV;
+#endif
 }
-
 #endif /* !CONFIG_DMAR */
 #endif /* __DMAR_H__ */
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 8ccb462..8d9411b 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -62,6 +62,7 @@
 #define IRQ_MOVE_PENDING	0x00200000	/* need to re-target IRQ destination */
 #define IRQ_NO_BALANCING	0x00400000	/* IRQ is excluded from balancing */
 #define IRQ_SPURIOUS_DISABLED	0x00800000	/* IRQ was disabled by the spurious trap */
+#define IRQ_MOVE_PCNTXT	0x01000000	/* IRQ migration from process context */
 
 #ifdef CONFIG_IRQ_PER_CPU
 # define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU)
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 0314074..60c49e3 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -89,7 +89,14 @@
 	set_balance_irq_affinity(irq, cpumask);
 
 #ifdef CONFIG_GENERIC_PENDING_IRQ
-	set_pending_irq(irq, cpumask);
+	if (desc->status & IRQ_MOVE_PCNTXT) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&desc->lock, flags);
+		desc->chip->set_affinity(irq, cpumask);
+		spin_unlock_irqrestore(&desc->lock, flags);
+	} else
+		set_pending_irq(irq, cpumask);
 #else
 	desc->affinity = cpumask;
 	desc->chip->set_affinity(irq, cpumask);