[PATCH] x86_64: On Intel CPUs don't do an additional CPU sync before RDTSC

RDTSC serialization using cpuid is not needed for Intel platforms.
This increases gettimeofday performance.

Cc: vojtech@suse.cz
Cc: rohit.seth@intel.com

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/include/asm-x86_64/cpufeature.h b/include/asm-x86_64/cpufeature.h
index aea308c..72b39f5 100644
--- a/include/asm-x86_64/cpufeature.h
+++ b/include/asm-x86_64/cpufeature.h
@@ -63,6 +63,7 @@
 #define X86_FEATURE_CENTAUR_MCR	(3*32+ 3) /* Centaur MCRs (= MTRRs) */
 #define X86_FEATURE_K8_C	(3*32+ 4) /* C stepping K8 */
 #define X86_FEATURE_CONSTANT_TSC (3*32+5) /* TSC runs at constant rate */
+#define X86_FEATURE_SYNC_RDTSC  (3*32+6)  /* RDTSC syncs CPU core */
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
 #define X86_FEATURE_XMM3	(4*32+ 0) /* Streaming SIMD Extensions-3 */
diff --git a/include/asm-x86_64/timex.h b/include/asm-x86_64/timex.h
index f971f45..f18443f 100644
--- a/include/asm-x86_64/timex.h
+++ b/include/asm-x86_64/timex.h
@@ -10,6 +10,9 @@
 #include <asm/msr.h>
 #include <asm/vsyscall.h>
 #include <asm/hpet.h>
+#include <asm/system.h>
+#include <asm/processor.h>
+#include <linux/compiler.h>
 
 #define CLOCK_TICK_RATE	PIT_TICK_RATE	/* Underlying HZ */
 
@@ -23,6 +26,19 @@
 	return ret;
 }
 
+/* Like get_cycles, but make sure the CPU is synchronized. */
+static __always_inline cycles_t get_cycles_sync(void)
+{
+	unsigned long long ret;
+	unsigned eax;
+	/* Don't do an additional sync on CPUs where we know
+	   RDTSC is already synchronous. */
+	alternative_io(ASM_NOP2, "cpuid", X86_FEATURE_SYNC_RDTSC,
+			  "=a" (eax), "0" (1) : "ebx","ecx","edx","memory");
+	rdtscll(ret);
+	return ret;
+}
+
 extern unsigned int cpu_khz;
 
 extern int read_current_timer(unsigned long *timer_value);