Merge branch 'master' into percpu

Conflicts:
	arch/powerpc/platforms/pseries/hvCall.S
	include/linux/percpu.h
diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S
index b0ed0b4..01b2f58 100644
--- a/arch/blackfin/mach-common/entry.S
+++ b/arch/blackfin/mach-common/entry.S
@@ -816,8 +816,8 @@
 
 ENTRY(_ret_from_exception)
 #ifdef CONFIG_IPIPE
-	p2.l = _per_cpu__ipipe_percpu_domain;
-	p2.h = _per_cpu__ipipe_percpu_domain;
+	p2.l = _ipipe_percpu_domain;
+	p2.h = _ipipe_percpu_domain;
 	r0.l = _ipipe_root;
 	r0.h = _ipipe_root;
 	r2 = [p2];
diff --git a/arch/cris/arch-v10/kernel/entry.S b/arch/cris/arch-v10/kernel/entry.S
index 2c18d08..c52bef3 100644
--- a/arch/cris/arch-v10/kernel/entry.S
+++ b/arch/cris/arch-v10/kernel/entry.S
@@ -358,7 +358,7 @@
 1:	btstq	12, $r1		   ; Refill?
 	bpl	2f
 	lsrq	24, $r1     ; Get PGD index (bit 24-31)
-	move.d  [per_cpu__current_pgd], $r0 ; PGD for the current process
+	move.d  [current_pgd], $r0 ; PGD for the current process
 	move.d	[$r0+$r1.d], $r0   ; Get PMD
 	beq	2f
 	nop
diff --git a/arch/cris/arch-v32/mm/mmu.S b/arch/cris/arch-v32/mm/mmu.S
index 2238d15..f125d91 100644
--- a/arch/cris/arch-v32/mm/mmu.S
+++ b/arch/cris/arch-v32/mm/mmu.S
@@ -115,7 +115,7 @@
 #ifdef CONFIG_SMP
 	move    $s7, $acr	; PGD
 #else
-	move.d  per_cpu__current_pgd, $acr ; PGD
+	move.d  current_pgd, $acr ; PGD
 #endif
 	; Look up PMD in PGD
 	lsrq	24, $r0	; Get PMD index into PGD (bit 24-31)
diff --git a/arch/ia64/include/asm/percpu.h b/arch/ia64/include/asm/percpu.h
index 30cf465..f7c00a5 100644
--- a/arch/ia64/include/asm/percpu.h
+++ b/arch/ia64/include/asm/percpu.h
@@ -9,7 +9,7 @@
 #define PERCPU_ENOUGH_ROOM PERCPU_PAGE_SIZE
 
 #ifdef __ASSEMBLY__
-# define THIS_CPU(var)	(per_cpu__##var)  /* use this to mark accesses to per-CPU variables... */
+# define THIS_CPU(var)	(var)  /* use this to mark accesses to per-CPU variables... */
 #else /* !__ASSEMBLY__ */
 
 
@@ -39,7 +39,7 @@
  * On the positive side, using __ia64_per_cpu_var() instead of __get_cpu_var() is slightly
  * more efficient.
  */
-#define __ia64_per_cpu_var(var)	per_cpu__##var
+#define __ia64_per_cpu_var(var)	var
 
 #include <asm-generic/percpu.h>
 
diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c
index 461b999..7f4a0ed 100644
--- a/arch/ia64/kernel/ia64_ksyms.c
+++ b/arch/ia64/kernel/ia64_ksyms.c
@@ -30,9 +30,9 @@
 #endif
 
 #include <asm/processor.h>
-EXPORT_SYMBOL(per_cpu__ia64_cpu_info);
+EXPORT_SYMBOL(ia64_cpu_info);
 #ifdef CONFIG_SMP
-EXPORT_SYMBOL(per_cpu__local_per_cpu_offset);
+EXPORT_SYMBOL(local_per_cpu_offset);
 #endif
 
 #include <asm/uaccess.h>
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index 19c4b21..8d586d1 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -459,7 +459,7 @@
 		cpu = 0;
 		node = node_cpuid[cpu].nid;
 		cpu0_cpu_info = (struct cpuinfo_ia64 *)(__phys_per_cpu_start +
-			((char *)&per_cpu__ia64_cpu_info - __per_cpu_start));
+			((char *)&ia64_cpu_info - __per_cpu_start));
 		cpu0_cpu_info->node_data = mem_data[node].node_data;
 	}
 #endif /* CONFIG_SMP */
diff --git a/arch/microblaze/include/asm/entry.h b/arch/microblaze/include/asm/entry.h
index 61abbd2..ec89f2a 100644
--- a/arch/microblaze/include/asm/entry.h
+++ b/arch/microblaze/include/asm/entry.h
@@ -21,7 +21,7 @@
  * places
  */
 
-#define PER_CPU(var) per_cpu__##var
+#define PER_CPU(var) var
 
 # ifndef __ASSEMBLY__
 DECLARE_PER_CPU(unsigned int, KSP); /* Saved kernel stack pointer */
diff --git a/arch/parisc/lib/fixup.S b/arch/parisc/lib/fixup.S
index d172d42..f8c45cc 100644
--- a/arch/parisc/lib/fixup.S
+++ b/arch/parisc/lib/fixup.S
@@ -36,8 +36,8 @@
 #endif
 	/* t2 = &__per_cpu_offset[smp_processor_id()]; */
 	LDREGX \t2(\t1),\t2 
-	addil LT%per_cpu__exception_data,%r27
-	LDREG RT%per_cpu__exception_data(%r1),\t1
+	addil LT%exception_data,%r27
+	LDREG RT%exception_data(%r1),\t1
 	/* t1 = &__get_cpu_var(exception_data) */
 	add,l \t1,\t2,\t1
 	/* t1 = t1->fault_ip */
@@ -46,8 +46,8 @@
 #else
 	.macro  get_fault_ip t1 t2
 	/* t1 = &__get_cpu_var(exception_data) */
-	addil LT%per_cpu__exception_data,%r27
-	LDREG RT%per_cpu__exception_data(%r1),\t2
+	addil LT%exception_data,%r27
+	LDREG RT%exception_data(%r1),\t2
 	/* t1 = t2->fault_ip */
 	LDREG EXCDATA_IP(\t2), \t1
 	.endm
diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c
index f30f4a1..2ad288f 100644
--- a/arch/sparc/kernel/nmi.c
+++ b/arch/sparc/kernel/nmi.c
@@ -112,13 +112,13 @@
 		touched = 1;
 	}
 	if (!touched && __get_cpu_var(last_irq_sum) == sum) {
-		__this_cpu_inc(per_cpu_var(alert_counter));
-		if (__this_cpu_read(per_cpu_var(alert_counter)) == 30 * nmi_hz)
+		__this_cpu_inc(alert_counter);
+		if (__this_cpu_read(alert_counter) == 30 * nmi_hz)
 			die_nmi("BUG: NMI Watchdog detected LOCKUP",
 				regs, panic_on_timeout);
 	} else {
 		__get_cpu_var(last_irq_sum) = sum;
-		__this_cpu_write(per_cpu_var(alert_counter), 0);
+		__this_cpu_write(alert_counter, 0);
 	}
 	if (__get_cpu_var(wd_enabled)) {
 		write_pic(picl_value(nmi_hz));
diff --git a/arch/sparc/kernel/rtrap_64.S b/arch/sparc/kernel/rtrap_64.S
index fd3cee4..1ddec40 100644
--- a/arch/sparc/kernel/rtrap_64.S
+++ b/arch/sparc/kernel/rtrap_64.S
@@ -149,11 +149,11 @@
 rtrap_irq:
 rtrap:
 #ifndef CONFIG_SMP
-		sethi			%hi(per_cpu____cpu_data), %l0
-		lduw			[%l0 + %lo(per_cpu____cpu_data)], %l1
+		sethi			%hi(__cpu_data), %l0
+		lduw			[%l0 + %lo(__cpu_data)], %l1
 #else
-		sethi			%hi(per_cpu____cpu_data), %l0
-		or			%l0, %lo(per_cpu____cpu_data), %l0
+		sethi			%hi(__cpu_data), %l0
+		or			%l0, %lo(__cpu_data), %l0
 		lduw			[%l0 + %g5], %l1
 #endif
 		cmp			%l1, 0
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 0c44196..4c170cc 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -25,19 +25,18 @@
  */
 #ifdef CONFIG_SMP
 #define PER_CPU(var, reg)						\
-	__percpu_mov_op %__percpu_seg:per_cpu__this_cpu_off, reg;	\
-	lea per_cpu__##var(reg), reg
-#define PER_CPU_VAR(var)	%__percpu_seg:per_cpu__##var
+	__percpu_mov_op %__percpu_seg:this_cpu_off, reg;		\
+	lea var(reg), reg
+#define PER_CPU_VAR(var)	%__percpu_seg:var
 #else /* ! SMP */
-#define PER_CPU(var, reg)						\
-	__percpu_mov_op $per_cpu__##var, reg
-#define PER_CPU_VAR(var)	per_cpu__##var
+#define PER_CPU(var, reg)	__percpu_mov_op $var, reg
+#define PER_CPU_VAR(var)	var
 #endif	/* SMP */
 
 #ifdef CONFIG_X86_64_SMP
 #define INIT_PER_CPU_VAR(var)  init_per_cpu__##var
 #else
-#define INIT_PER_CPU_VAR(var)  per_cpu__##var
+#define INIT_PER_CPU_VAR(var)  var
 #endif
 
 #else /* ...!ASSEMBLY */
@@ -60,12 +59,12 @@
  * There also must be an entry in vmlinux_64.lds.S
  */
 #define DECLARE_INIT_PER_CPU(var) \
-       extern typeof(per_cpu_var(var)) init_per_cpu_var(var)
+       extern typeof(var) init_per_cpu_var(var)
 
 #ifdef CONFIG_X86_64_SMP
 #define init_per_cpu_var(var)  init_per_cpu__##var
 #else
-#define init_per_cpu_var(var)  per_cpu_var(var)
+#define init_per_cpu_var(var)  var
 #endif
 
 /* For arch-specific code, we can use direct single-insn ops (they
@@ -142,16 +141,14 @@
  * per-thread variables implemented as per-cpu variables and thus
  * stable for the duration of the respective task.
  */
-#define percpu_read(var)	percpu_from_op("mov", per_cpu__##var,	\
-					       "m" (per_cpu__##var))
-#define percpu_read_stable(var)	percpu_from_op("mov", per_cpu__##var,	\
-					       "p" (&per_cpu__##var))
-#define percpu_write(var, val)	percpu_to_op("mov", per_cpu__##var, val)
-#define percpu_add(var, val)	percpu_to_op("add", per_cpu__##var, val)
-#define percpu_sub(var, val)	percpu_to_op("sub", per_cpu__##var, val)
-#define percpu_and(var, val)	percpu_to_op("and", per_cpu__##var, val)
-#define percpu_or(var, val)	percpu_to_op("or", per_cpu__##var, val)
-#define percpu_xor(var, val)	percpu_to_op("xor", per_cpu__##var, val)
+#define percpu_read(var)		percpu_from_op("mov", var, "m" (var))
+#define percpu_read_stable(var)		percpu_from_op("mov", var, "p" (&(var)))
+#define percpu_write(var, val)		percpu_to_op("mov", var, val)
+#define percpu_add(var, val)		percpu_to_op("add", var, val)
+#define percpu_sub(var, val)		percpu_to_op("sub", var, val)
+#define percpu_and(var, val)		percpu_to_op("and", var, val)
+#define percpu_or(var, val)		percpu_to_op("or", var, val)
+#define percpu_xor(var, val)		percpu_to_op("xor", var, val)
 
 #define __this_cpu_read_1(pcp)		percpu_from_op("mov", (pcp), "m"(pcp))
 #define __this_cpu_read_2(pcp)		percpu_from_op("mov", (pcp), "m"(pcp))
@@ -236,7 +233,7 @@
 ({									\
 	int old__;							\
 	asm volatile("btr %2,"__percpu_arg(1)"\n\tsbbl %0,%0"		\
-		     : "=r" (old__), "+m" (per_cpu__##var)		\
+		     : "=r" (old__), "+m" (var)				\
 		     : "dIr" (bit));					\
 	old__;								\
 })
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index ecb544e..e529f26 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -32,7 +32,7 @@
 	"movl %P[task_canary](%[next]), %%ebx\n\t"			\
 	"movl %%ebx, "__percpu_arg([stack_canary])"\n\t"
 #define __switch_canary_oparam						\
-	, [stack_canary] "=m" (per_cpu_var(stack_canary.canary))
+	, [stack_canary] "=m" (stack_canary.canary)
 #define __switch_canary_iparam						\
 	, [task_canary] "i" (offsetof(struct task_struct, stack_canary))
 #else	/* CC_STACKPROTECTOR */
@@ -114,7 +114,7 @@
 	"movq %P[task_canary](%%rsi),%%r8\n\t"				  \
 	"movq %%r8,"__percpu_arg([gs_canary])"\n\t"
 #define __switch_canary_oparam						  \
-	, [gs_canary] "=m" (per_cpu_var(irq_stack_union.stack_canary))
+	, [gs_canary] "=m" (irq_stack_union.stack_canary)
 #define __switch_canary_iparam						  \
 	, [task_canary] "i" (offsetof(struct task_struct, stack_canary))
 #else	/* CC_STACKPROTECTOR */
@@ -133,7 +133,7 @@
 	     __switch_canary						  \
 	     "movq %P[thread_info](%%rsi),%%r8\n\t"			  \
 	     "movq %%rax,%%rdi\n\t" 					  \
-	     "testl  %[_tif_fork],%P[ti_flags](%%r8)\n\t"	  \
+	     "testl  %[_tif_fork],%P[ti_flags](%%r8)\n\t"		  \
 	     "jnz   ret_from_fork\n\t"					  \
 	     RESTORE_CONTEXT						  \
 	     : "=a" (last)					  	  \
@@ -143,7 +143,7 @@
 	       [ti_flags] "i" (offsetof(struct thread_info, flags)),	  \
 	       [_tif_fork] "i" (_TIF_FORK),			  	  \
 	       [thread_info] "i" (offsetof(struct task_struct, stack)),   \
-	       [current_task] "m" (per_cpu_var(current_task))		  \
+	       [current_task] "m" (current_task)			  \
 	       __switch_canary_iparam					  \
 	     : "memory", "cc" __EXTRA_CLOBBER)
 #endif
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
index 0159a69..4ada42c 100644
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -438,8 +438,8 @@
 		 * Ayiee, looks like this CPU is stuck ...
 		 * wait a few IRQs (5 seconds) before doing the oops ...
 		 */
-		__this_cpu_inc(per_cpu_var(alert_counter));
-		if (__this_cpu_read(per_cpu_var(alert_counter)) == 5 * nmi_hz)
+		__this_cpu_inc(alert_counter);
+		if (__this_cpu_read(alert_counter) == 5 * nmi_hz)
 			/*
 			 * die_nmi will return ONLY if NOTIFY_STOP happens..
 			 */
@@ -447,7 +447,7 @@
 				regs, panic_on_timeout);
 	} else {
 		__get_cpu_var(last_irq_sum) = sum;
-		__this_cpu_write(per_cpu_var(alert_counter), 0);
+		__this_cpu_write(alert_counter, 0);
 	}
 
 	/* see if the nmi watchdog went off */
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 7fd318b..37c3d4b 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -442,8 +442,8 @@
 	 */
 	cmpb $0,ready
 	jne 1f
-	movl $per_cpu__gdt_page,%eax
-	movl $per_cpu__stack_canary,%ecx
+	movl $gdt_page,%eax
+	movl $stack_canary,%ecx
 	movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax)
 	shrl $16, %ecx
 	movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax)
@@ -706,7 +706,7 @@
 	.word 0				# 32 bit align gdt_desc.address
 ENTRY(early_gdt_descr)
 	.word GDT_ENTRIES*8-1
-	.long per_cpu__gdt_page		/* Overwritten for secondary CPUs */
+	.long gdt_page			/* Overwritten for secondary CPUs */
 
 /*
  * The boot_gdt must mirror the equivalent in setup.S and is
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index f92a0da..44879df 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -341,7 +341,7 @@
  * Per-cpu symbols which need to be offset from __per_cpu_load
  * for the boot processor.
  */
-#define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load
+#define INIT_PER_CPU(x) init_per_cpu__##x = x + __per_cpu_load
 INIT_PER_CPU(gdt_page);
 INIT_PER_CPU(irq_stack_union);
 
@@ -352,7 +352,7 @@
 	   "kernel image bigger than KERNEL_IMAGE_SIZE");
 
 #ifdef CONFIG_SMP
-. = ASSERT((per_cpu__irq_stack_union == 0),
+. = ASSERT((irq_stack_union == 0),
            "irq_stack_union is not at start of per-cpu area");
 #endif
 
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S
index 88e15de..22a2093 100644
--- a/arch/x86/xen/xen-asm_32.S
+++ b/arch/x86/xen/xen-asm_32.S
@@ -90,9 +90,9 @@
 	GET_THREAD_INFO(%eax)
 	movl TI_cpu(%eax), %eax
 	movl __per_cpu_offset(,%eax,4), %eax
-	mov per_cpu__xen_vcpu(%eax), %eax
+	mov xen_vcpu(%eax), %eax
 #else
-	movl per_cpu__xen_vcpu, %eax
+	movl xen_vcpu, %eax
 #endif
 
 	/* check IF state we're restoring */
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index 8087b90..04f91c2 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -41,7 +41,11 @@
  * Only S390 provides its own means of moving the pointer.
  */
 #ifndef SHIFT_PERCPU_PTR
-#define SHIFT_PERCPU_PTR(__p, __offset)	RELOC_HIDE((__p), (__offset))
+/* Weird cast keeps both GCC and sparse happy. */
+#define SHIFT_PERCPU_PTR(__p, __offset)	({				\
+	__verify_pcpu_ptr((__p));					\
+	RELOC_HIDE((typeof(*(__p)) __kernel __force *)(__p), (__offset)); \
+})
 #endif
 
 /*
@@ -50,11 +54,11 @@
  * offset.
  */
 #define per_cpu(var, cpu) \
-	(*SHIFT_PERCPU_PTR(&per_cpu_var(var), per_cpu_offset(cpu)))
+	(*SHIFT_PERCPU_PTR(&(var), per_cpu_offset(cpu)))
 #define __get_cpu_var(var) \
-	(*SHIFT_PERCPU_PTR(&per_cpu_var(var), my_cpu_offset))
+	(*SHIFT_PERCPU_PTR(&(var), my_cpu_offset))
 #define __raw_get_cpu_var(var) \
-	(*SHIFT_PERCPU_PTR(&per_cpu_var(var), __my_cpu_offset))
+	(*SHIFT_PERCPU_PTR(&(var), __my_cpu_offset))
 
 #define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset)
 #define __this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset)
@@ -66,9 +70,9 @@
 
 #else /* ! SMP */
 
-#define per_cpu(var, cpu)			(*((void)(cpu), &per_cpu_var(var)))
-#define __get_cpu_var(var)			per_cpu_var(var)
-#define __raw_get_cpu_var(var)			per_cpu_var(var)
+#define per_cpu(var, cpu)			(*((void)(cpu), &(var)))
+#define __get_cpu_var(var)			(var)
+#define __raw_get_cpu_var(var)			(var)
 #define this_cpu_ptr(ptr) per_cpu_ptr(ptr, 0)
 #define __this_cpu_ptr(ptr) this_cpu_ptr(ptr)
 
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 5be3dab..a5a472b 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -5,7 +5,7 @@
 
 #ifdef __CHECKER__
 # define __user		__attribute__((noderef, address_space(1)))
-# define __kernel	/* default address space */
+# define __kernel	__attribute__((address_space(0)))
 # define __safe		__attribute__((safe))
 # define __force	__attribute__((force))
 # define __nocast	__attribute__((nocast))
@@ -15,6 +15,7 @@
 # define __acquire(x)	__context__(x,1)
 # define __release(x)	__context__(x,-1)
 # define __cond_lock(x,c)	((c) ? ({ __acquire(x); 1; }) : 0)
+# define __percpu	__attribute__((noderef, address_space(3)))
 extern void __chk_user_ptr(const volatile void __user *);
 extern void __chk_io_ptr(const volatile void __iomem *);
 #else
@@ -32,6 +33,7 @@
 # define __acquire(x) (void)0
 # define __release(x) (void)0
 # define __cond_lock(x,c) (c)
+# define __percpu
 #endif
 
 #ifdef __KERNEL__
diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index 5a5d6ce..68567c0 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -2,12 +2,6 @@
 #define _LINUX_PERCPU_DEFS_H
 
 /*
- * Determine the real variable name from the name visible in the
- * kernel sources.
- */
-#define per_cpu_var(var) per_cpu__##var
-
-/*
  * Base implementations of per-CPU variable declarations and definitions, where
  * the section in which the variable is to be placed is provided by the
  * 'sec' argument.  This may be used to affect the parameters governing the
@@ -18,13 +12,23 @@
  * that section.
  */
 #define __PCPU_ATTRS(sec)						\
-	__attribute__((section(PER_CPU_BASE_SECTION sec)))		\
+	__percpu __attribute__((section(PER_CPU_BASE_SECTION sec)))	\
 	PER_CPU_ATTRIBUTES
 
 #define __PCPU_DUMMY_ATTRS						\
 	__attribute__((section(".discard"), unused))
 
 /*
+ * Macro which verifies @ptr is a percpu pointer without evaluating
+ * @ptr.  This is to be used in percpu accessors to verify that the
+ * input parameter is a percpu pointer.
+ */
+#define __verify_pcpu_ptr(ptr)	do {					\
+	const void __percpu *__vpp_verify = (typeof(ptr))NULL;		\
+	(void)__vpp_verify;						\
+} while (0)
+
+/*
  * s390 and alpha modules require percpu variables to be defined as
  * weak to force the compiler to generate GOT based external
  * references for them.  This is necessary because percpu sections
@@ -56,24 +60,24 @@
  */
 #define DECLARE_PER_CPU_SECTION(type, name, sec)			\
 	extern __PCPU_DUMMY_ATTRS char __pcpu_scope_##name;		\
-	extern __PCPU_ATTRS(sec) __typeof__(type) per_cpu__##name
+	extern __PCPU_ATTRS(sec) __typeof__(type) name
 
 #define DEFINE_PER_CPU_SECTION(type, name, sec)				\
 	__PCPU_DUMMY_ATTRS char __pcpu_scope_##name;			\
 	extern __PCPU_DUMMY_ATTRS char __pcpu_unique_##name;		\
 	__PCPU_DUMMY_ATTRS char __pcpu_unique_##name;			\
 	__PCPU_ATTRS(sec) PER_CPU_DEF_ATTRIBUTES __weak			\
-	__typeof__(type) per_cpu__##name
+	__typeof__(type) name
 #else
 /*
  * Normal declaration and definition macros.
  */
 #define DECLARE_PER_CPU_SECTION(type, name, sec)			\
-	extern __PCPU_ATTRS(sec) __typeof__(type) per_cpu__##name
+	extern __PCPU_ATTRS(sec) __typeof__(type) name
 
 #define DEFINE_PER_CPU_SECTION(type, name, sec)				\
 	__PCPU_ATTRS(sec) PER_CPU_DEF_ATTRIBUTES			\
-	__typeof__(type) per_cpu__##name
+	__typeof__(type) name
 #endif
 
 /*
@@ -135,10 +139,16 @@
 	__aligned(PAGE_SIZE)
 
 /*
- * Intermodule exports for per-CPU variables.
+ * Intermodule exports for per-CPU variables.  sparse forgets about
+ * address space across EXPORT_SYMBOL(), change EXPORT_SYMBOL() to
+ * noop if __CHECKER__.
  */
-#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
-#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
-
+#ifndef __CHECKER__
+#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(var)
+#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(var)
+#else
+#define EXPORT_PER_CPU_SYMBOL(var)
+#define EXPORT_PER_CPU_SYMBOL_GPL(var)
+#endif
 
 #endif /* _LINUX_PERCPU_DEFS_H */
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index cf5efbc..a93e5bf 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -27,10 +27,17 @@
  * we force a syntax error here if it isn't.
  */
 #define get_cpu_var(var) (*({				\
-	extern int simple_identifier_##var(void);	\
 	preempt_disable();				\
 	&__get_cpu_var(var); }))
-#define put_cpu_var(var) preempt_enable()
+
+/*
+ * The weird & is necessary because sparse considers (void)(var) to be
+ * a direct dereference of percpu variable (var).
+ */
+#define put_cpu_var(var) do {				\
+	(void)&(var);					\
+	preempt_enable();				\
+} while (0)
 
 #ifdef CONFIG_SMP
 
@@ -127,9 +134,9 @@
  */
 #define per_cpu_ptr(ptr, cpu)	SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu)))
 
-extern void *__alloc_reserved_percpu(size_t size, size_t align);
-extern void *__alloc_percpu(size_t size, size_t align);
-extern void free_percpu(void *__pdata);
+extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align);
+extern void __percpu *__alloc_percpu(size_t size, size_t align);
+extern void free_percpu(void __percpu *__pdata);
 extern phys_addr_t per_cpu_ptr_to_phys(void *addr);
 
 #ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
@@ -140,7 +147,7 @@
 
 #define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); })
 
-static inline void *__alloc_percpu(size_t size, size_t align)
+static inline void __percpu *__alloc_percpu(size_t size, size_t align)
 {
 	/*
 	 * Can't easily make larger alignment work with kmalloc.  WARN
@@ -151,7 +158,7 @@
 	return kzalloc(size, GFP_KERNEL);
 }
 
-static inline void free_percpu(void *p)
+static inline void free_percpu(void __percpu *p)
 {
 	kfree(p);
 }
@@ -171,7 +178,7 @@
 #endif /* CONFIG_SMP */
 
 #define alloc_percpu(type)	\
-	(typeof(type) *)__alloc_percpu(sizeof(type), __alignof__(type))
+	(typeof(type) __percpu *)__alloc_percpu(sizeof(type), __alignof__(type))
 
 /*
  * Optional methods for optimized non-lvalue per-cpu variable access.
@@ -188,17 +195,19 @@
 #ifndef percpu_read
 # define percpu_read(var)						\
   ({									\
-	typeof(per_cpu_var(var)) __tmp_var__;				\
-	__tmp_var__ = get_cpu_var(var);					\
-	put_cpu_var(var);						\
-	__tmp_var__;							\
+	typeof(var) *pr_ptr__ = &(var);					\
+	typeof(var) pr_ret__;						\
+	pr_ret__ = get_cpu_var(*pr_ptr__);				\
+	put_cpu_var(*pr_ptr__);						\
+	pr_ret__;							\
   })
 #endif
 
 #define __percpu_generic_to_op(var, val, op)				\
 do {									\
-	get_cpu_var(var) op val;					\
-	put_cpu_var(var);						\
+	typeof(var) *pgto_ptr__ = &(var);				\
+	get_cpu_var(*pgto_ptr__) op val;				\
+	put_cpu_var(*pgto_ptr__);					\
 } while (0)
 
 #ifndef percpu_write
@@ -234,6 +243,7 @@
 
 #define __pcpu_size_call_return(stem, variable)				\
 ({	typeof(variable) pscr_ret__;					\
+	__verify_pcpu_ptr(&(variable));					\
 	switch(sizeof(variable)) {					\
 	case 1: pscr_ret__ = stem##1(variable);break;			\
 	case 2: pscr_ret__ = stem##2(variable);break;			\
@@ -247,6 +257,7 @@
 
 #define __pcpu_size_call(stem, variable, ...)				\
 do {									\
+	__verify_pcpu_ptr(&(variable));					\
 	switch(sizeof(variable)) {					\
 		case 1: stem##1(variable, __VA_ARGS__);break;		\
 		case 2: stem##2(variable, __VA_ARGS__);break;		\
@@ -259,8 +270,7 @@
 
 /*
  * Optimized manipulation for memory allocated through the per cpu
- * allocator or for addresses of per cpu variables (can be determined
- * using per_cpu_var(xx).
+ * allocator or for addresses of per cpu variables.
  *
  * These operation guarantee exclusivity of access for other operations
  * on the *same* processor. The assumption is that per cpu data is only
@@ -311,7 +321,7 @@
 #define _this_cpu_generic_to_op(pcp, val, op)				\
 do {									\
 	preempt_disable();						\
-	*__this_cpu_ptr(&pcp) op val;					\
+	*__this_cpu_ptr(&(pcp)) op val;					\
 	preempt_enable();						\
 } while (0)
 
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index ee03bba..117f0dd 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -78,22 +78,22 @@
 
 static inline void __count_vm_event(enum vm_event_item item)
 {
-	__this_cpu_inc(per_cpu_var(vm_event_states).event[item]);
+	__this_cpu_inc(vm_event_states.event[item]);
 }
 
 static inline void count_vm_event(enum vm_event_item item)
 {
-	this_cpu_inc(per_cpu_var(vm_event_states).event[item]);
+	this_cpu_inc(vm_event_states.event[item]);
 }
 
 static inline void __count_vm_events(enum vm_event_item item, long delta)
 {
-	__this_cpu_add(per_cpu_var(vm_event_states).event[item], delta);
+	__this_cpu_add(vm_event_states.event[item], delta);
 }
 
 static inline void count_vm_events(enum vm_event_item item, long delta)
 {
-	this_cpu_add(per_cpu_var(vm_event_states).event[item], delta);
+	this_cpu_add(vm_event_states.event[item], delta);
 }
 
 extern void all_vm_events(unsigned long *);
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 9bb5217..0b52175 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -763,13 +763,13 @@
 		/* Should not happen, but... */
 		pipe_count = RCU_TORTURE_PIPE_LEN;
 	}
-	__this_cpu_inc(per_cpu_var(rcu_torture_count)[pipe_count]);
+	__this_cpu_inc(rcu_torture_count[pipe_count]);
 	completed = cur_ops->completed() - completed;
 	if (completed > RCU_TORTURE_PIPE_LEN) {
 		/* Should not happen, but... */
 		completed = RCU_TORTURE_PIPE_LEN;
 	}
-	__this_cpu_inc(per_cpu_var(rcu_torture_batch)[completed]);
+	__this_cpu_inc(rcu_torture_batch[completed]);
 	preempt_enable();
 	cur_ops->readunlock(idx);
 }
@@ -818,13 +818,13 @@
 			/* Should not happen, but... */
 			pipe_count = RCU_TORTURE_PIPE_LEN;
 		}
-		__this_cpu_inc(per_cpu_var(rcu_torture_count)[pipe_count]);
+		__this_cpu_inc(rcu_torture_count[pipe_count]);
 		completed = cur_ops->completed() - completed;
 		if (completed > RCU_TORTURE_PIPE_LEN) {
 			/* Should not happen, but... */
 			completed = RCU_TORTURE_PIPE_LEN;
 		}
-		__this_cpu_inc(per_cpu_var(rcu_torture_batch)[completed]);
+		__this_cpu_inc(rcu_torture_batch[completed]);
 		preempt_enable();
 		cur_ops->readunlock(idx);
 		schedule();
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 0df1b0f..ab2bbb0 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -91,12 +91,12 @@
 static inline void ftrace_disable_cpu(void)
 {
 	preempt_disable();
-	__this_cpu_inc(per_cpu_var(ftrace_cpu_disabled));
+	__this_cpu_inc(ftrace_cpu_disabled);
 }
 
 static inline void ftrace_enable_cpu(void)
 {
-	__this_cpu_dec(per_cpu_var(ftrace_cpu_disabled));
+	__this_cpu_dec(ftrace_cpu_disabled);
 	preempt_enable();
 }
 
@@ -1084,7 +1084,7 @@
 	struct ftrace_entry *entry;
 
 	/* If we are reading the ring buffer, don't trace */
-	if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled))))
+	if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
 		return;
 
 	event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index b1342c5..9d976f3 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -187,7 +187,7 @@
 	struct ring_buffer *buffer = tr->buffer;
 	struct ftrace_graph_ent_entry *entry;
 
-	if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled))))
+	if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
 		return 0;
 
 	event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT,
@@ -251,7 +251,7 @@
 	struct ring_buffer *buffer = tr->buffer;
 	struct ftrace_graph_ret_entry *entry;
 
-	if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled))))
+	if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
 		return;
 
 	event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET,
diff --git a/mm/percpu.c b/mm/percpu.c
index 442010c..626e43c 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -913,11 +913,10 @@
 	int rs, re;
 
 	/* quick path, check whether it's empty already */
-	pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) {
-		if (rs == page_start && re == page_end)
-			return;
-		break;
-	}
+	rs = page_start;
+	pcpu_next_unpop(chunk, &rs, &re, page_end);
+	if (rs == page_start && re == page_end)
+		return;
 
 	/* immutable chunks can't be depopulated */
 	WARN_ON(chunk->immutable);
@@ -968,11 +967,10 @@
 	int rs, re, rc;
 
 	/* quick path, check whether all pages are already there */
-	pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end) {
-		if (rs == page_start && re == page_end)
-			goto clear;
-		break;
-	}
+	rs = page_start;
+	pcpu_next_pop(chunk, &rs, &re, page_end);
+	if (rs == page_start && re == page_end)
+		goto clear;
 
 	/* need to allocate and map pages, this chunk can't be immutable */
 	WARN_ON(chunk->immutable);