| #ifdef CONFIG_CPU_SUP_INTEL |
| |
| /* |
| * Intel PerfMon v3. Used on Core2 and later. |
| */ |
| static const u64 intel_perfmon_event_map[] = |
| { |
| [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, |
| [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, |
| [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e, |
| [PERF_COUNT_HW_CACHE_MISSES] = 0x412e, |
| [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, |
| [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, |
| [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, |
| }; |
| |
| static struct event_constraint intel_core_event_constraints[] = |
| { |
| INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ |
| INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ |
| INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ |
| INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ |
| INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ |
| INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */ |
| EVENT_CONSTRAINT_END |
| }; |
| |
| static struct event_constraint intel_core2_event_constraints[] = |
| { |
| FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ |
| FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ |
| INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ |
| INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ |
| INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ |
| INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ |
| INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ |
| INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */ |
| INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ |
| INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */ |
| INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */ |
| EVENT_CONSTRAINT_END |
| }; |
| |
| static struct event_constraint intel_nehalem_event_constraints[] = |
| { |
| FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ |
| FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ |
| INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ |
| INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ |
| INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ |
| INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */ |
| INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */ |
| INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */ |
| INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ |
| INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ |
| EVENT_CONSTRAINT_END |
| }; |
| |
| static struct event_constraint intel_westmere_event_constraints[] = |
| { |
| FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ |
| FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ |
| INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ |
| INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */ |
| INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ |
| EVENT_CONSTRAINT_END |
| }; |
| |
| static struct event_constraint intel_gen_event_constraints[] = |
| { |
| FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ |
| FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ |
| EVENT_CONSTRAINT_END |
| }; |
| |
| static u64 intel_pmu_event_map(int hw_event) |
| { |
| return intel_perfmon_event_map[hw_event]; |
| } |
| |
| static __initconst u64 westmere_hw_cache_event_ids |
| [PERF_COUNT_HW_CACHE_MAX] |
| [PERF_COUNT_HW_CACHE_OP_MAX] |
| [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
| { |
| [ C(L1D) ] = { |
| [ C(OP_READ) ] = { |
| [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ |
| [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */ |
| }, |
| [ C(OP_WRITE) ] = { |
| [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ |
| [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */ |
| }, |
| [ C(OP_PREFETCH) ] = { |
| [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ |
| [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ |
| }, |
| }, |
| [ C(L1I ) ] = { |
| [ C(OP_READ) ] = { |
| [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ |
| [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ |
| }, |
| [ C(OP_WRITE) ] = { |
| [ C(RESULT_ACCESS) ] = -1, |
| [ C(RESULT_MISS) ] = -1, |
| }, |
| [ C(OP_PREFETCH) ] = { |
| [ C(RESULT_ACCESS) ] = 0x0, |
| [ C(RESULT_MISS) ] = 0x0, |
| }, |
| }, |
| [ C(LL ) ] = { |
| [ C(OP_READ) ] = { |
| [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ |
| [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ |
| }, |
| [ C(OP_WRITE) ] = { |
| [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ |
| [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ |
| }, |
| [ C(OP_PREFETCH) ] = { |
| [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ |
| [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ |
| }, |
| }, |
| [ C(DTLB) ] = { |
| [ C(OP_READ) ] = { |
| [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ |
| [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ |
| }, |
| [ C(OP_WRITE) ] = { |
| [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ |
| [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ |
| }, |
| [ C(OP_PREFETCH) ] = { |
| [ C(RESULT_ACCESS) ] = 0x0, |
| [ C(RESULT_MISS) ] = 0x0, |
| }, |
| }, |
| [ C(ITLB) ] = { |
| [ C(OP_READ) ] = { |
| [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ |
| [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.ANY */ |
| }, |
| [ C(OP_WRITE) ] = { |
| [ C(RESULT_ACCESS) ] = -1, |
| [ C(RESULT_MISS) ] = -1, |
| }, |
| [ C(OP_PREFETCH) ] = { |
| [ C(RESULT_ACCESS) ] = -1, |
| [ C(RESULT_MISS) ] = -1, |
| }, |
| }, |
| [ C(BPU ) ] = { |
| [ C(OP_READ) ] = { |
| [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ |
| [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ |
| }, |
| [ C(OP_WRITE) ] = { |
| [ C(RESULT_ACCESS) ] = -1, |
| [ C(RESULT_MISS) ] = -1, |
| }, |
| [ C(OP_PREFETCH) ] = { |
| [ C(RESULT_ACCESS) ] = -1, |
| [ C(RESULT_MISS) ] = -1, |
| }, |
| }, |
| }; |
| |
| static __initconst u64 nehalem_hw_cache_event_ids |
| [PERF_COUNT_HW_CACHE_MAX] |
| [PERF_COUNT_HW_CACHE_OP_MAX] |
| [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
| { |
| [ C(L1D) ] = { |
| [ C(OP_READ) ] = { |
| [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ |
| [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ |
| }, |
| [ C(OP_WRITE) ] = { |
| [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ |
| [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ |
| }, |
| [ C(OP_PREFETCH) ] = { |
| [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ |
| [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ |
| }, |
| }, |
| [ C(L1I ) ] = { |
| [ C(OP_READ) ] = { |
| [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ |
| [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ |
| }, |
| [ C(OP_WRITE) ] = { |
| [ C(RESULT_ACCESS) ] = -1, |
| [ C(RESULT_MISS) ] = -1, |
| }, |
| [ C(OP_PREFETCH) ] = { |
| [ C(RESULT_ACCESS) ] = 0x0, |
| [ C(RESULT_MISS) ] = 0x0, |
| }, |
| }, |
| [ C(LL ) ] = { |
| [ C(OP_READ) ] = { |
| [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ |
| [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ |
| }, |
| [ C(OP_WRITE) ] = { |
| [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ |
| [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ |
| }, |
| [ C(OP_PREFETCH) ] = { |
| [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ |
| [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ |
| }, |
| }, |
| [ C(DTLB) ] = { |
| [ C(OP_READ) ] = { |
| [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ |
| [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ |
| }, |
| [ C(OP_WRITE) ] = { |
| [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ |
| [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ |
| }, |
| [ C(OP_PREFETCH) ] = { |
| [ C(RESULT_ACCESS) ] = 0x0, |
| [ C(RESULT_MISS) ] = 0x0, |
| }, |
| }, |
| [ C(ITLB) ] = { |
| [ C(OP_READ) ] = { |
| [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ |
| [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */ |
| }, |
| [ C(OP_WRITE) ] = { |
| [ C(RESULT_ACCESS) ] = -1, |
| [ C(RESULT_MISS) ] = -1, |
| }, |
| [ C(OP_PREFETCH) ] = { |
| [ C(RESULT_ACCESS) ] = -1, |
| [ C(RESULT_MISS) ] = -1, |
| }, |
| }, |
| [ C(BPU ) ] = { |
| [ C(OP_READ) ] = { |
| [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ |
| [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ |
| }, |
| [ C(OP_WRITE) ] = { |
| [ C(RESULT_ACCESS) ] = -1, |
| [ C(RESULT_MISS) ] = -1, |
| }, |
| [ C(OP_PREFETCH) ] = { |
| [ C(RESULT_ACCESS) ] = -1, |
| [ C(RESULT_MISS) ] = -1, |
| }, |
| }, |
| }; |
| |
| static __initconst u64 core2_hw_cache_event_ids |
| [PERF_COUNT_HW_CACHE_MAX] |
| [PERF_COUNT_HW_CACHE_OP_MAX] |
| [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
| { |
| [ C(L1D) ] = { |
| [ C(OP_READ) ] = { |
| [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ |
| [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ |
| }, |
| [ C(OP_WRITE) ] = { |
| [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ |
| [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ |
| }, |
| [ C(OP_PREFETCH) ] = { |
| [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */ |
| [ C(RESULT_MISS) ] = 0, |
| }, |
| }, |
| [ C(L1I ) ] = { |
| [ C(OP_READ) ] = { |
| [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */ |
| [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */ |
| }, |
| [ C(OP_WRITE) ] = { |
| [ C(RESULT_ACCESS) ] = -1, |
| [ C(RESULT_MISS) ] = -1, |
| }, |
| [ C(OP_PREFETCH) ] = { |
| [ C(RESULT_ACCESS) ] = 0, |
| [ C(RESULT_MISS) ] = 0, |
| }, |
| }, |
| [ C(LL ) ] = { |
| [ C(OP_READ) ] = { |
| [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ |
| [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ |
| }, |
| [ C(OP_WRITE) ] = { |
| [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ |
| [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ |
| }, |
| [ C(OP_PREFETCH) ] = { |
| [ C(RESULT_ACCESS) ] = 0, |
| [ C(RESULT_MISS) ] = 0, |
| }, |
| }, |
| [ C(DTLB) ] = { |
| [ C(OP_READ) ] = { |
| [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ |
| [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */ |
| }, |
| [ C(OP_WRITE) ] = { |
| [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ |
| [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */ |
| }, |
| [ C(OP_PREFETCH) ] = { |
| [ C(RESULT_ACCESS) ] = 0, |
| [ C(RESULT_MISS) ] = 0, |
| }, |
| }, |
| [ C(ITLB) ] = { |
| [ C(OP_READ) ] = { |
| [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ |
| [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */ |
| }, |
| [ C(OP_WRITE) ] = { |
| [ C(RESULT_ACCESS) ] = -1, |
| [ C(RESULT_MISS) ] = -1, |
| }, |
| [ C(OP_PREFETCH) ] = { |
| [ C(RESULT_ACCESS) ] = -1, |
| [ C(RESULT_MISS) ] = -1, |
| }, |
| }, |
| [ C(BPU ) ] = { |
| [ C(OP_READ) ] = { |
| [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ |
| [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ |
| }, |
| [ C(OP_WRITE) ] = { |
| [ C(RESULT_ACCESS) ] = -1, |
| [ C(RESULT_MISS) ] = -1, |
| }, |
| [ C(OP_PREFETCH) ] = { |
| [ C(RESULT_ACCESS) ] = -1, |
| [ C(RESULT_MISS) ] = -1, |
| }, |
| }, |
| }; |
| |
| static __initconst u64 atom_hw_cache_event_ids |
| [PERF_COUNT_HW_CACHE_MAX] |
| [PERF_COUNT_HW_CACHE_OP_MAX] |
| [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
| { |
| [ C(L1D) ] = { |
| [ C(OP_READ) ] = { |
| [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */ |
| [ C(RESULT_MISS) ] = 0, |
| }, |
| [ C(OP_WRITE) ] = { |
| [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */ |
| [ C(RESULT_MISS) ] = 0, |
| }, |
| [ C(OP_PREFETCH) ] = { |
| [ C(RESULT_ACCESS) ] = 0x0, |
| [ C(RESULT_MISS) ] = 0, |
| }, |
| }, |
| [ C(L1I ) ] = { |
| [ C(OP_READ) ] = { |
| [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ |
| [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ |
| }, |
| [ C(OP_WRITE) ] = { |
| [ C(RESULT_ACCESS) ] = -1, |
| [ C(RESULT_MISS) ] = -1, |
| }, |
| [ C(OP_PREFETCH) ] = { |
| [ C(RESULT_ACCESS) ] = 0, |
| [ C(RESULT_MISS) ] = 0, |
| }, |
| }, |
| [ C(LL ) ] = { |
| [ C(OP_READ) ] = { |
| [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ |
| [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ |
| }, |
| [ C(OP_WRITE) ] = { |
| [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ |
| [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ |
| }, |
| [ C(OP_PREFETCH) ] = { |
| [ C(RESULT_ACCESS) ] = 0, |
| [ C(RESULT_MISS) ] = 0, |
| }, |
| }, |
| [ C(DTLB) ] = { |
| [ C(OP_READ) ] = { |
| [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */ |
| [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */ |
| }, |
| [ C(OP_WRITE) ] = { |
| [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */ |
| [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */ |
| }, |
| [ C(OP_PREFETCH) ] = { |
| [ C(RESULT_ACCESS) ] = 0, |
| [ C(RESULT_MISS) ] = 0, |
| }, |
| }, |
| [ C(ITLB) ] = { |
| [ C(OP_READ) ] = { |
| [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ |
| [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */ |
| }, |
| [ C(OP_WRITE) ] = { |
| [ C(RESULT_ACCESS) ] = -1, |
| [ C(RESULT_MISS) ] = -1, |
| }, |
| [ C(OP_PREFETCH) ] = { |
| [ C(RESULT_ACCESS) ] = -1, |
| [ C(RESULT_MISS) ] = -1, |
| }, |
| }, |
| [ C(BPU ) ] = { |
| [ C(OP_READ) ] = { |
| [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ |
| [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ |
| }, |
| [ C(OP_WRITE) ] = { |
| [ C(RESULT_ACCESS) ] = -1, |
| [ C(RESULT_MISS) ] = -1, |
| }, |
| [ C(OP_PREFETCH) ] = { |
| [ C(RESULT_ACCESS) ] = -1, |
| [ C(RESULT_MISS) ] = -1, |
| }, |
| }, |
| }; |
| |
| static u64 intel_pmu_raw_event(u64 hw_event) |
| { |
| #define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL |
| #define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL |
| #define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL |
| #define CORE_EVNTSEL_INV_MASK 0x00800000ULL |
| #define CORE_EVNTSEL_REG_MASK 0xFF000000ULL |
| |
| #define CORE_EVNTSEL_MASK \ |
| (INTEL_ARCH_EVTSEL_MASK | \ |
| INTEL_ARCH_UNIT_MASK | \ |
| INTEL_ARCH_EDGE_MASK | \ |
| INTEL_ARCH_INV_MASK | \ |
| INTEL_ARCH_CNT_MASK) |
| |
| return hw_event & CORE_EVNTSEL_MASK; |
| } |
| |
| static void intel_pmu_enable_bts(u64 config) |
| { |
| unsigned long debugctlmsr; |
| |
| debugctlmsr = get_debugctlmsr(); |
| |
| debugctlmsr |= X86_DEBUGCTL_TR; |
| debugctlmsr |= X86_DEBUGCTL_BTS; |
| debugctlmsr |= X86_DEBUGCTL_BTINT; |
| |
| if (!(config & ARCH_PERFMON_EVENTSEL_OS)) |
| debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS; |
| |
| if (!(config & ARCH_PERFMON_EVENTSEL_USR)) |
| debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR; |
| |
| update_debugctlmsr(debugctlmsr); |
| } |
| |
| static void intel_pmu_disable_bts(void) |
| { |
| struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| unsigned long debugctlmsr; |
| |
| if (!cpuc->ds) |
| return; |
| |
| debugctlmsr = get_debugctlmsr(); |
| |
| debugctlmsr &= |
| ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT | |
| X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR); |
| |
| update_debugctlmsr(debugctlmsr); |
| } |
| |
| static void intel_pmu_disable_all(void) |
| { |
| struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| |
| wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); |
| |
| if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) |
| intel_pmu_disable_bts(); |
| } |
| |
| static void intel_pmu_enable_all(void) |
| { |
| struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| |
| wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); |
| |
| if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { |
| struct perf_event *event = |
| cpuc->events[X86_PMC_IDX_FIXED_BTS]; |
| |
| if (WARN_ON_ONCE(!event)) |
| return; |
| |
| intel_pmu_enable_bts(event->hw.config); |
| } |
| } |
| |
| static inline u64 intel_pmu_get_status(void) |
| { |
| u64 status; |
| |
| rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); |
| |
| return status; |
| } |
| |
| static inline void intel_pmu_ack_status(u64 ack) |
| { |
| wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); |
| } |
| |
| static inline void |
| intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx) |
| { |
| int idx = __idx - X86_PMC_IDX_FIXED; |
| u64 ctrl_val, mask; |
| |
| mask = 0xfULL << (idx * 4); |
| |
| rdmsrl(hwc->config_base, ctrl_val); |
| ctrl_val &= ~mask; |
| (void)checking_wrmsrl(hwc->config_base, ctrl_val); |
| } |
| |
| static void intel_pmu_drain_bts_buffer(void) |
| { |
| struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| struct debug_store *ds = cpuc->ds; |
| struct bts_record { |
| u64 from; |
| u64 to; |
| u64 flags; |
| }; |
| struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; |
| struct bts_record *at, *top; |
| struct perf_output_handle handle; |
| struct perf_event_header header; |
| struct perf_sample_data data; |
| struct pt_regs regs; |
| |
| if (!event) |
| return; |
| |
| if (!ds) |
| return; |
| |
| at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; |
| top = (struct bts_record *)(unsigned long)ds->bts_index; |
| |
| if (top <= at) |
| return; |
| |
| ds->bts_index = ds->bts_buffer_base; |
| |
| |
| data.period = event->hw.last_period; |
| data.addr = 0; |
| data.raw = NULL; |
| regs.ip = 0; |
| |
| /* |
| * Prepare a generic sample, i.e. fill in the invariant fields. |
| * We will overwrite the from and to address before we output |
| * the sample. |
| */ |
| perf_prepare_sample(&header, &data, event, ®s); |
| |
| if (perf_output_begin(&handle, event, |
| header.size * (top - at), 1, 1)) |
| return; |
| |
| for (; at < top; at++) { |
| data.ip = at->from; |
| data.addr = at->to; |
| |
| perf_output_sample(&handle, &header, &data, event); |
| } |
| |
| perf_output_end(&handle); |
| |
| /* There's new data available. */ |
| event->hw.interrupts++; |
| event->pending_kill = POLL_IN; |
| } |
| |
| static inline void |
| intel_pmu_disable_event(struct hw_perf_event *hwc, int idx) |
| { |
| if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { |
| intel_pmu_disable_bts(); |
| intel_pmu_drain_bts_buffer(); |
| return; |
| } |
| |
| if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { |
| intel_pmu_disable_fixed(hwc, idx); |
| return; |
| } |
| |
| x86_pmu_disable_event(hwc, idx); |
| } |
| |
| static inline void |
| intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx) |
| { |
| int idx = __idx - X86_PMC_IDX_FIXED; |
| u64 ctrl_val, bits, mask; |
| int err; |
| |
| /* |
| * Enable IRQ generation (0x8), |
| * and enable ring-3 counting (0x2) and ring-0 counting (0x1) |
| * if requested: |
| */ |
| bits = 0x8ULL; |
| if (hwc->config & ARCH_PERFMON_EVENTSEL_USR) |
| bits |= 0x2; |
| if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) |
| bits |= 0x1; |
| |
| /* |
| * ANY bit is supported in v3 and up |
| */ |
| if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY) |
| bits |= 0x4; |
| |
| bits <<= (idx * 4); |
| mask = 0xfULL << (idx * 4); |
| |
| rdmsrl(hwc->config_base, ctrl_val); |
| ctrl_val &= ~mask; |
| ctrl_val |= bits; |
| err = checking_wrmsrl(hwc->config_base, ctrl_val); |
| } |
| |
| static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx) |
| { |
| if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { |
| if (!__get_cpu_var(cpu_hw_events).enabled) |
| return; |
| |
| intel_pmu_enable_bts(hwc->config); |
| return; |
| } |
| |
| if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { |
| intel_pmu_enable_fixed(hwc, idx); |
| return; |
| } |
| |
| __x86_pmu_enable_event(hwc, idx); |
| } |
| |
| /* |
| * Save and restart an expired event. Called by NMI contexts, |
| * so it has to be careful about preempting normal event ops: |
| */ |
| static int intel_pmu_save_and_restart(struct perf_event *event) |
| { |
| struct hw_perf_event *hwc = &event->hw; |
| int idx = hwc->idx; |
| int ret; |
| |
| x86_perf_event_update(event, hwc, idx); |
| ret = x86_perf_event_set_period(event, hwc, idx); |
| |
| return ret; |
| } |
| |
| static void intel_pmu_reset(void) |
| { |
| struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds; |
| unsigned long flags; |
| int idx; |
| |
| if (!x86_pmu.num_events) |
| return; |
| |
| local_irq_save(flags); |
| |
| printk("clearing PMU state on CPU#%d\n", smp_processor_id()); |
| |
| for (idx = 0; idx < x86_pmu.num_events; idx++) { |
| checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); |
| checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); |
| } |
| for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { |
| checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); |
| } |
| if (ds) |
| ds->bts_index = ds->bts_buffer_base; |
| |
| local_irq_restore(flags); |
| } |
| |
| /* |
| * This handler is triggered by the local APIC, so the APIC IRQ handling |
| * rules apply: |
| */ |
| static int intel_pmu_handle_irq(struct pt_regs *regs) |
| { |
| struct perf_sample_data data; |
| struct cpu_hw_events *cpuc; |
| int bit, loops; |
| u64 ack, status; |
| |
| data.addr = 0; |
| data.raw = NULL; |
| |
| cpuc = &__get_cpu_var(cpu_hw_events); |
| |
| perf_disable(); |
| intel_pmu_drain_bts_buffer(); |
| status = intel_pmu_get_status(); |
| if (!status) { |
| perf_enable(); |
| return 0; |
| } |
| |
| loops = 0; |
| again: |
| if (++loops > 100) { |
| WARN_ONCE(1, "perfevents: irq loop stuck!\n"); |
| perf_event_print_debug(); |
| intel_pmu_reset(); |
| perf_enable(); |
| return 1; |
| } |
| |
| inc_irq_stat(apic_perf_irqs); |
| ack = status; |
| for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { |
| struct perf_event *event = cpuc->events[bit]; |
| |
| clear_bit(bit, (unsigned long *) &status); |
| if (!test_bit(bit, cpuc->active_mask)) |
| continue; |
| |
| if (!intel_pmu_save_and_restart(event)) |
| continue; |
| |
| data.period = event->hw.last_period; |
| |
| if (perf_event_overflow(event, 1, &data, regs)) |
| intel_pmu_disable_event(&event->hw, bit); |
| } |
| |
| intel_pmu_ack_status(ack); |
| |
| /* |
| * Repeat if there is more work to be done: |
| */ |
| status = intel_pmu_get_status(); |
| if (status) |
| goto again; |
| |
| perf_enable(); |
| |
| return 1; |
| } |
| |
| static struct event_constraint bts_constraint = |
| EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); |
| |
| static struct event_constraint * |
| intel_special_constraints(struct perf_event *event) |
| { |
| unsigned int hw_event; |
| |
| hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK; |
| |
| if (unlikely((hw_event == |
| x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && |
| (event->hw.sample_period == 1))) { |
| |
| return &bts_constraint; |
| } |
| return NULL; |
| } |
| |
| static struct event_constraint * |
| intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) |
| { |
| struct event_constraint *c; |
| |
| c = intel_special_constraints(event); |
| if (c) |
| return c; |
| |
| return x86_get_event_constraints(cpuc, event); |
| } |
| |
| static __initconst struct x86_pmu core_pmu = { |
| .name = "core", |
| .handle_irq = x86_pmu_handle_irq, |
| .disable_all = x86_pmu_disable_all, |
| .enable_all = x86_pmu_enable_all, |
| .enable = x86_pmu_enable_event, |
| .disable = x86_pmu_disable_event, |
| .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, |
| .perfctr = MSR_ARCH_PERFMON_PERFCTR0, |
| .event_map = intel_pmu_event_map, |
| .raw_event = intel_pmu_raw_event, |
| .max_events = ARRAY_SIZE(intel_perfmon_event_map), |
| .apic = 1, |
| /* |
| * Intel PMCs cannot be accessed sanely above 32 bit width, |
| * so we install an artificial 1<<31 period regardless of |
| * the generic event period: |
| */ |
| .max_period = (1ULL << 31) - 1, |
| .get_event_constraints = intel_get_event_constraints, |
| .event_constraints = intel_core_event_constraints, |
| }; |
| |
| static __initconst struct x86_pmu intel_pmu = { |
| .name = "Intel", |
| .handle_irq = intel_pmu_handle_irq, |
| .disable_all = intel_pmu_disable_all, |
| .enable_all = intel_pmu_enable_all, |
| .enable = intel_pmu_enable_event, |
| .disable = intel_pmu_disable_event, |
| .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, |
| .perfctr = MSR_ARCH_PERFMON_PERFCTR0, |
| .event_map = intel_pmu_event_map, |
| .raw_event = intel_pmu_raw_event, |
| .max_events = ARRAY_SIZE(intel_perfmon_event_map), |
| .apic = 1, |
| /* |
| * Intel PMCs cannot be accessed sanely above 32 bit width, |
| * so we install an artificial 1<<31 period regardless of |
| * the generic event period: |
| */ |
| .max_period = (1ULL << 31) - 1, |
| .enable_bts = intel_pmu_enable_bts, |
| .disable_bts = intel_pmu_disable_bts, |
| .get_event_constraints = intel_get_event_constraints |
| }; |
| |
| static __init int intel_pmu_init(void) |
| { |
| union cpuid10_edx edx; |
| union cpuid10_eax eax; |
| unsigned int unused; |
| unsigned int ebx; |
| int version; |
| |
| if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { |
| /* check for P6 processor family */ |
| if (boot_cpu_data.x86 == 6) { |
| return p6_pmu_init(); |
| } else { |
| return -ENODEV; |
| } |
| } |
| |
| /* |
| * Check whether the Architectural PerfMon supports |
| * Branch Misses Retired hw_event or not. |
| */ |
| cpuid(10, &eax.full, &ebx, &unused, &edx.full); |
| if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) |
| return -ENODEV; |
| |
| version = eax.split.version_id; |
| if (version < 2) |
| x86_pmu = core_pmu; |
| else |
| x86_pmu = intel_pmu; |
| |
| x86_pmu.version = version; |
| x86_pmu.num_events = eax.split.num_events; |
| x86_pmu.event_bits = eax.split.bit_width; |
| x86_pmu.event_mask = (1ULL << eax.split.bit_width) - 1; |
| |
| /* |
| * Quirk: v2 perfmon does not report fixed-purpose events, so |
| * assume at least 3 events: |
| */ |
| if (version > 1) |
| x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3); |
| |
| /* |
| * Install the hw-cache-events table: |
| */ |
| switch (boot_cpu_data.x86_model) { |
| case 14: /* 65 nm core solo/duo, "Yonah" */ |
| pr_cont("Core events, "); |
| break; |
| |
| case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ |
| case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ |
| case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ |
| case 29: /* six-core 45 nm xeon "Dunnington" */ |
| memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, |
| sizeof(hw_cache_event_ids)); |
| |
| x86_pmu.event_constraints = intel_core2_event_constraints; |
| pr_cont("Core2 events, "); |
| break; |
| |
| case 26: /* 45 nm nehalem, "Bloomfield" */ |
| case 30: /* 45 nm nehalem, "Lynnfield" */ |
| memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, |
| sizeof(hw_cache_event_ids)); |
| |
| x86_pmu.event_constraints = intel_nehalem_event_constraints; |
| pr_cont("Nehalem/Corei7 events, "); |
| break; |
| case 28: |
| memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, |
| sizeof(hw_cache_event_ids)); |
| |
| x86_pmu.event_constraints = intel_gen_event_constraints; |
| pr_cont("Atom events, "); |
| break; |
| |
| case 37: /* 32 nm nehalem, "Clarkdale" */ |
| case 44: /* 32 nm nehalem, "Gulftown" */ |
| memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, |
| sizeof(hw_cache_event_ids)); |
| |
| x86_pmu.event_constraints = intel_westmere_event_constraints; |
| pr_cont("Westmere events, "); |
| break; |
| default: |
| /* |
| * default constraints for v2 and up |
| */ |
| x86_pmu.event_constraints = intel_gen_event_constraints; |
| pr_cont("generic architected perfmon, "); |
| } |
| return 0; |
| } |
| |
| #else /* CONFIG_CPU_SUP_INTEL */ |
| |
| static int intel_pmu_init(void) |
| { |
| return 0; |
| } |
| |
| #endif /* CONFIG_CPU_SUP_INTEL */ |