Kevin Winchester | de0428a | 2011-08-30 20:41:05 -0300 | [diff] [blame] | 1 | #include <linux/perf_event.h> |
Joerg Roedel | 1018faa | 2012-02-29 14:57:32 +0100 | [diff] [blame] | 2 | #include <linux/export.h> |
Kevin Winchester | de0428a | 2011-08-30 20:41:05 -0300 | [diff] [blame] | 3 | #include <linux/types.h> |
| 4 | #include <linux/init.h> |
| 5 | #include <linux/slab.h> |
Randy Dunlap | d6eed55 | 2011-09-27 10:00:40 -0700 | [diff] [blame] | 6 | #include <asm/apicdef.h> |
Kevin Winchester | de0428a | 2011-08-30 20:41:05 -0300 | [diff] [blame] | 7 | |
| 8 | #include "perf_event.h" |
Peter Zijlstra | f22f54f | 2010-02-26 12:05:05 +0100 | [diff] [blame] | 9 | |
Peter Zijlstra | caaa8be | 2010-03-29 13:09:53 +0200 | [diff] [blame] | 10 | static __initconst const u64 amd_hw_cache_event_ids |
Peter Zijlstra | f22f54f | 2010-02-26 12:05:05 +0100 | [diff] [blame] | 11 | [PERF_COUNT_HW_CACHE_MAX] |
| 12 | [PERF_COUNT_HW_CACHE_OP_MAX] |
| 13 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
| 14 | { |
| 15 | [ C(L1D) ] = { |
| 16 | [ C(OP_READ) ] = { |
| 17 | [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ |
Andre Przywara | 83112e6 | 2011-04-16 02:27:53 +0200 | [diff] [blame] | 18 | [ C(RESULT_MISS) ] = 0x0141, /* Data Cache Misses */ |
Peter Zijlstra | f22f54f | 2010-02-26 12:05:05 +0100 | [diff] [blame] | 19 | }, |
| 20 | [ C(OP_WRITE) ] = { |
| 21 | [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */ |
| 22 | [ C(RESULT_MISS) ] = 0, |
| 23 | }, |
| 24 | [ C(OP_PREFETCH) ] = { |
| 25 | [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */ |
| 26 | [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */ |
| 27 | }, |
| 28 | }, |
| 29 | [ C(L1I ) ] = { |
| 30 | [ C(OP_READ) ] = { |
| 31 | [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */ |
| 32 | [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */ |
| 33 | }, |
| 34 | [ C(OP_WRITE) ] = { |
| 35 | [ C(RESULT_ACCESS) ] = -1, |
| 36 | [ C(RESULT_MISS) ] = -1, |
| 37 | }, |
| 38 | [ C(OP_PREFETCH) ] = { |
| 39 | [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */ |
| 40 | [ C(RESULT_MISS) ] = 0, |
| 41 | }, |
| 42 | }, |
| 43 | [ C(LL ) ] = { |
| 44 | [ C(OP_READ) ] = { |
| 45 | [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */ |
| 46 | [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */ |
| 47 | }, |
| 48 | [ C(OP_WRITE) ] = { |
| 49 | [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */ |
| 50 | [ C(RESULT_MISS) ] = 0, |
| 51 | }, |
| 52 | [ C(OP_PREFETCH) ] = { |
| 53 | [ C(RESULT_ACCESS) ] = 0, |
| 54 | [ C(RESULT_MISS) ] = 0, |
| 55 | }, |
| 56 | }, |
| 57 | [ C(DTLB) ] = { |
| 58 | [ C(OP_READ) ] = { |
| 59 | [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ |
Stephane Eranian | ba0cef3 | 2010-10-15 15:15:01 +0200 | [diff] [blame] | 60 | [ C(RESULT_MISS) ] = 0x0746, /* L1_DTLB_AND_L2_DLTB_MISS.ALL */ |
Peter Zijlstra | f22f54f | 2010-02-26 12:05:05 +0100 | [diff] [blame] | 61 | }, |
| 62 | [ C(OP_WRITE) ] = { |
| 63 | [ C(RESULT_ACCESS) ] = 0, |
| 64 | [ C(RESULT_MISS) ] = 0, |
| 65 | }, |
| 66 | [ C(OP_PREFETCH) ] = { |
| 67 | [ C(RESULT_ACCESS) ] = 0, |
| 68 | [ C(RESULT_MISS) ] = 0, |
| 69 | }, |
| 70 | }, |
| 71 | [ C(ITLB) ] = { |
| 72 | [ C(OP_READ) ] = { |
| 73 | [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */ |
Stephane Eranian | ba0cef3 | 2010-10-15 15:15:01 +0200 | [diff] [blame] | 74 | [ C(RESULT_MISS) ] = 0x0385, /* L1_ITLB_AND_L2_ITLB_MISS.ALL */ |
Peter Zijlstra | f22f54f | 2010-02-26 12:05:05 +0100 | [diff] [blame] | 75 | }, |
| 76 | [ C(OP_WRITE) ] = { |
| 77 | [ C(RESULT_ACCESS) ] = -1, |
| 78 | [ C(RESULT_MISS) ] = -1, |
| 79 | }, |
| 80 | [ C(OP_PREFETCH) ] = { |
| 81 | [ C(RESULT_ACCESS) ] = -1, |
| 82 | [ C(RESULT_MISS) ] = -1, |
| 83 | }, |
| 84 | }, |
| 85 | [ C(BPU ) ] = { |
| 86 | [ C(OP_READ) ] = { |
| 87 | [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */ |
| 88 | [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */ |
| 89 | }, |
| 90 | [ C(OP_WRITE) ] = { |
| 91 | [ C(RESULT_ACCESS) ] = -1, |
| 92 | [ C(RESULT_MISS) ] = -1, |
| 93 | }, |
| 94 | [ C(OP_PREFETCH) ] = { |
| 95 | [ C(RESULT_ACCESS) ] = -1, |
| 96 | [ C(RESULT_MISS) ] = -1, |
| 97 | }, |
| 98 | }, |
Peter Zijlstra | 89d6c0b | 2011-04-22 23:37:06 +0200 | [diff] [blame] | 99 | [ C(NODE) ] = { |
| 100 | [ C(OP_READ) ] = { |
| 101 | [ C(RESULT_ACCESS) ] = 0xb8e9, /* CPU Request to Memory, l+r */ |
| 102 | [ C(RESULT_MISS) ] = 0x98e9, /* CPU Request to Memory, r */ |
| 103 | }, |
| 104 | [ C(OP_WRITE) ] = { |
| 105 | [ C(RESULT_ACCESS) ] = -1, |
| 106 | [ C(RESULT_MISS) ] = -1, |
| 107 | }, |
| 108 | [ C(OP_PREFETCH) ] = { |
| 109 | [ C(RESULT_ACCESS) ] = -1, |
| 110 | [ C(RESULT_MISS) ] = -1, |
| 111 | }, |
| 112 | }, |
Peter Zijlstra | f22f54f | 2010-02-26 12:05:05 +0100 | [diff] [blame] | 113 | }; |
| 114 | |
| 115 | /* |
| 116 | * AMD Performance Monitor K7 and later. |
| 117 | */ |
| 118 | static const u64 amd_perfmon_event_map[] = |
| 119 | { |
Ingo Molnar | 91fc4cc | 2011-04-29 14:17:19 +0200 | [diff] [blame] | 120 | [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, |
| 121 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, |
| 122 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, |
| 123 | [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, |
| 124 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, |
| 125 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, |
| 126 | [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */ |
| 127 | [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x00d1, /* "Dispatch stalls" event */ |
Peter Zijlstra | f22f54f | 2010-02-26 12:05:05 +0100 | [diff] [blame] | 128 | }; |
| 129 | |
| 130 | static u64 amd_pmu_event_map(int hw_event) |
| 131 | { |
| 132 | return amd_perfmon_event_map[hw_event]; |
| 133 | } |
| 134 | |
Peter Zijlstra | b4cdc5c | 2010-03-30 17:00:06 +0200 | [diff] [blame] | 135 | static int amd_pmu_hw_config(struct perf_event *event) |
Peter Zijlstra | f22f54f | 2010-02-26 12:05:05 +0100 | [diff] [blame] | 136 | { |
Robert Richter | 450bbd4 | 2012-03-12 12:54:32 +0100 | [diff] [blame] | 137 | int ret; |
Peter Zijlstra | b4cdc5c | 2010-03-30 17:00:06 +0200 | [diff] [blame] | 138 | |
Robert Richter | 450bbd4 | 2012-03-12 12:54:32 +0100 | [diff] [blame] | 139 | /* pass precise event sampling to ibs: */ |
| 140 | if (event->attr.precise_ip && get_ibs_caps()) |
| 141 | return -ENOENT; |
| 142 | |
| 143 | ret = x86_pmu_hw_config(event); |
Peter Zijlstra | b4cdc5c | 2010-03-30 17:00:06 +0200 | [diff] [blame] | 144 | if (ret) |
| 145 | return ret; |
| 146 | |
Stephane Eranian | 2481c5f | 2012-02-09 23:20:59 +0100 | [diff] [blame] | 147 | if (has_branch_stack(event)) |
| 148 | return -EOPNOTSUPP; |
| 149 | |
Joerg Roedel | 011af85 | 2011-10-05 14:01:17 +0200 | [diff] [blame] | 150 | if (event->attr.exclude_host && event->attr.exclude_guest) |
| 151 | /* |
| 152 | * When HO == GO == 1 the hardware treats that as GO == HO == 0 |
| 153 | * and will count in both modes. We don't want to count in that |
| 154 | * case so we emulate no-counting by setting US = OS = 0. |
| 155 | */ |
| 156 | event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR | |
| 157 | ARCH_PERFMON_EVENTSEL_OS); |
| 158 | else if (event->attr.exclude_host) |
| 159 | event->hw.config |= AMD_PERFMON_EVENTSEL_GUESTONLY; |
| 160 | else if (event->attr.exclude_guest) |
| 161 | event->hw.config |= AMD_PERFMON_EVENTSEL_HOSTONLY; |
| 162 | |
Peter Zijlstra | b4cdc5c | 2010-03-30 17:00:06 +0200 | [diff] [blame] | 163 | if (event->attr.type != PERF_TYPE_RAW) |
| 164 | return 0; |
| 165 | |
| 166 | event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK; |
| 167 | |
| 168 | return 0; |
Peter Zijlstra | f22f54f | 2010-02-26 12:05:05 +0100 | [diff] [blame] | 169 | } |
| 170 | |
| 171 | /* |
| 172 | * AMD64 events are detected based on their event codes. |
| 173 | */ |
Robert Richter | 4979d27 | 2011-02-02 17:36:12 +0100 | [diff] [blame] | 174 | static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc) |
| 175 | { |
| 176 | return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff); |
| 177 | } |
| 178 | |
Peter Zijlstra | f22f54f | 2010-02-26 12:05:05 +0100 | [diff] [blame] | 179 | static inline int amd_is_nb_event(struct hw_perf_event *hwc) |
| 180 | { |
| 181 | return (hwc->config & 0xe0) == 0xe0; |
| 182 | } |
| 183 | |
Peter Zijlstra | b38b24e | 2010-03-23 19:31:15 +0100 | [diff] [blame] | 184 | static inline int amd_has_nb(struct cpu_hw_events *cpuc) |
| 185 | { |
| 186 | struct amd_nb *nb = cpuc->amd_nb; |
| 187 | |
| 188 | return nb && nb->nb_id != -1; |
| 189 | } |
| 190 | |
Peter Zijlstra | f22f54f | 2010-02-26 12:05:05 +0100 | [diff] [blame] | 191 | static void amd_put_event_constraints(struct cpu_hw_events *cpuc, |
| 192 | struct perf_event *event) |
| 193 | { |
| 194 | struct hw_perf_event *hwc = &event->hw; |
| 195 | struct amd_nb *nb = cpuc->amd_nb; |
| 196 | int i; |
| 197 | |
| 198 | /* |
| 199 | * only care about NB events |
| 200 | */ |
Peter Zijlstra | b38b24e | 2010-03-23 19:31:15 +0100 | [diff] [blame] | 201 | if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc))) |
Peter Zijlstra | f22f54f | 2010-02-26 12:05:05 +0100 | [diff] [blame] | 202 | return; |
| 203 | |
| 204 | /* |
| 205 | * need to scan whole list because event may not have |
| 206 | * been assigned during scheduling |
| 207 | * |
| 208 | * no race condition possible because event can only |
| 209 | * be removed on one CPU at a time AND PMU is disabled |
| 210 | * when we come here |
| 211 | */ |
Robert Richter | 948b1bb | 2010-03-29 18:36:50 +0200 | [diff] [blame] | 212 | for (i = 0; i < x86_pmu.num_counters; i++) { |
Robert Richter | 5f09fc6 | 2012-04-05 18:24:42 +0200 | [diff] [blame] | 213 | if (cmpxchg(nb->owners + i, event, NULL) == event) |
Peter Zijlstra | f22f54f | 2010-02-26 12:05:05 +0100 | [diff] [blame] | 214 | break; |
Peter Zijlstra | f22f54f | 2010-02-26 12:05:05 +0100 | [diff] [blame] | 215 | } |
| 216 | } |
| 217 | |
| 218 | /* |
| 219 | * AMD64 NorthBridge events need special treatment because |
| 220 | * counter access needs to be synchronized across all cores |
| 221 | * of a package. Refer to BKDG section 3.12 |
| 222 | * |
| 223 | * NB events are events measuring L3 cache, Hypertransport |
| 224 | * traffic. They are identified by an event code >= 0xe00. |
| 225 | * They measure events on the NorthBride which is shared |
| 226 | * by all cores on a package. NB events are counted on a |
| 227 | * shared set of counters. When a NB event is programmed |
| 228 | * in a counter, the data actually comes from a shared |
| 229 | * counter. Thus, access to those counters needs to be |
| 230 | * synchronized. |
| 231 | * |
| 232 | * We implement the synchronization such that no two cores |
| 233 | * can be measuring NB events using the same counters. Thus, |
| 234 | * we maintain a per-NB allocation table. The available slot |
| 235 | * is propagated using the event_constraint structure. |
| 236 | * |
| 237 | * We provide only one choice for each NB event based on |
| 238 | * the fact that only NB events have restrictions. Consequently, |
| 239 | * if a counter is available, there is a guarantee the NB event |
| 240 | * will be assigned to it. If no slot is available, an empty |
| 241 | * constraint is returned and scheduling will eventually fail |
| 242 | * for this event. |
| 243 | * |
| 244 | * Note that all cores attached the same NB compete for the same |
| 245 | * counters to host NB events, this is why we use atomic ops. Some |
| 246 | * multi-chip CPUs may have more than one NB. |
| 247 | * |
| 248 | * Given that resources are allocated (cmpxchg), they must be |
| 249 | * eventually freed for others to use. This is accomplished by |
| 250 | * calling amd_put_event_constraints(). |
| 251 | * |
| 252 | * Non NB events are not impacted by this restriction. |
| 253 | */ |
| 254 | static struct event_constraint * |
| 255 | amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) |
| 256 | { |
| 257 | struct hw_perf_event *hwc = &event->hw; |
| 258 | struct amd_nb *nb = cpuc->amd_nb; |
| 259 | struct perf_event *old = NULL; |
Robert Richter | 948b1bb | 2010-03-29 18:36:50 +0200 | [diff] [blame] | 260 | int max = x86_pmu.num_counters; |
Peter Zijlstra | f22f54f | 2010-02-26 12:05:05 +0100 | [diff] [blame] | 261 | int i, j, k = -1; |
| 262 | |
| 263 | /* |
| 264 | * if not NB event or no NB, then no constraints |
| 265 | */ |
Peter Zijlstra | b38b24e | 2010-03-23 19:31:15 +0100 | [diff] [blame] | 266 | if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc))) |
Peter Zijlstra | f22f54f | 2010-02-26 12:05:05 +0100 | [diff] [blame] | 267 | return &unconstrained; |
| 268 | |
| 269 | /* |
| 270 | * detect if already present, if so reuse |
| 271 | * |
| 272 | * cannot merge with actual allocation |
| 273 | * because of possible holes |
| 274 | * |
| 275 | * event can already be present yet not assigned (in hwc->idx) |
| 276 | * because of successive calls to x86_schedule_events() from |
| 277 | * hw_perf_group_sched_in() without hw_perf_enable() |
| 278 | */ |
| 279 | for (i = 0; i < max; i++) { |
| 280 | /* |
| 281 | * keep track of first free slot |
| 282 | */ |
| 283 | if (k == -1 && !nb->owners[i]) |
| 284 | k = i; |
| 285 | |
| 286 | /* already present, reuse */ |
| 287 | if (nb->owners[i] == event) |
| 288 | goto done; |
| 289 | } |
| 290 | /* |
| 291 | * not present, so grab a new slot |
| 292 | * starting either at: |
| 293 | */ |
| 294 | if (hwc->idx != -1) { |
| 295 | /* previous assignment */ |
| 296 | i = hwc->idx; |
| 297 | } else if (k != -1) { |
| 298 | /* start from free slot found */ |
| 299 | i = k; |
| 300 | } else { |
| 301 | /* |
| 302 | * event not found, no slot found in |
| 303 | * first pass, try again from the |
| 304 | * beginning |
| 305 | */ |
| 306 | i = 0; |
| 307 | } |
| 308 | j = i; |
| 309 | do { |
| 310 | old = cmpxchg(nb->owners+i, NULL, event); |
| 311 | if (!old) |
| 312 | break; |
| 313 | if (++i == max) |
| 314 | i = 0; |
| 315 | } while (i != j); |
| 316 | done: |
| 317 | if (!old) |
| 318 | return &nb->event_constraints[i]; |
| 319 | |
| 320 | return &emptyconstraint; |
| 321 | } |
| 322 | |
Peter Zijlstra | c079c79 | 2010-11-25 08:56:17 +0100 | [diff] [blame] | 323 | static struct amd_nb *amd_alloc_nb(int cpu) |
Peter Zijlstra | f22f54f | 2010-02-26 12:05:05 +0100 | [diff] [blame] | 324 | { |
| 325 | struct amd_nb *nb; |
| 326 | int i; |
| 327 | |
Peter Zijlstra | 034c6ef | 2010-11-01 18:52:05 +0100 | [diff] [blame] | 328 | nb = kmalloc_node(sizeof(struct amd_nb), GFP_KERNEL | __GFP_ZERO, |
| 329 | cpu_to_node(cpu)); |
Peter Zijlstra | f22f54f | 2010-02-26 12:05:05 +0100 | [diff] [blame] | 330 | if (!nb) |
| 331 | return NULL; |
| 332 | |
Peter Zijlstra | c079c79 | 2010-11-25 08:56:17 +0100 | [diff] [blame] | 333 | nb->nb_id = -1; |
Peter Zijlstra | f22f54f | 2010-02-26 12:05:05 +0100 | [diff] [blame] | 334 | |
| 335 | /* |
| 336 | * initialize all possible NB constraints |
| 337 | */ |
Robert Richter | 948b1bb | 2010-03-29 18:36:50 +0200 | [diff] [blame] | 338 | for (i = 0; i < x86_pmu.num_counters; i++) { |
Peter Zijlstra | 34538ee | 2010-03-02 21:16:55 +0100 | [diff] [blame] | 339 | __set_bit(i, nb->event_constraints[i].idxmsk); |
Peter Zijlstra | f22f54f | 2010-02-26 12:05:05 +0100 | [diff] [blame] | 340 | nb->event_constraints[i].weight = 1; |
| 341 | } |
| 342 | return nb; |
| 343 | } |
| 344 | |
Peter Zijlstra | b38b24e | 2010-03-23 19:31:15 +0100 | [diff] [blame] | 345 | static int amd_pmu_cpu_prepare(int cpu) |
Peter Zijlstra | f22f54f | 2010-02-26 12:05:05 +0100 | [diff] [blame] | 346 | { |
Peter Zijlstra | b38b24e | 2010-03-23 19:31:15 +0100 | [diff] [blame] | 347 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); |
| 348 | |
| 349 | WARN_ON_ONCE(cpuc->amd_nb); |
| 350 | |
| 351 | if (boot_cpu_data.x86_max_cores < 2) |
| 352 | return NOTIFY_OK; |
| 353 | |
Peter Zijlstra | c079c79 | 2010-11-25 08:56:17 +0100 | [diff] [blame] | 354 | cpuc->amd_nb = amd_alloc_nb(cpu); |
Peter Zijlstra | b38b24e | 2010-03-23 19:31:15 +0100 | [diff] [blame] | 355 | if (!cpuc->amd_nb) |
| 356 | return NOTIFY_BAD; |
| 357 | |
| 358 | return NOTIFY_OK; |
| 359 | } |
| 360 | |
| 361 | static void amd_pmu_cpu_starting(int cpu) |
| 362 | { |
| 363 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); |
| 364 | struct amd_nb *nb; |
Peter Zijlstra | f22f54f | 2010-02-26 12:05:05 +0100 | [diff] [blame] | 365 | int i, nb_id; |
| 366 | |
Joerg Roedel | 1018faa | 2012-02-29 14:57:32 +0100 | [diff] [blame] | 367 | cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY; |
| 368 | |
Robert Richter | b1dc3c4 | 2012-06-20 20:46:35 +0200 | [diff] [blame] | 369 | if (boot_cpu_data.x86_max_cores < 2) |
Peter Zijlstra | f22f54f | 2010-02-26 12:05:05 +0100 | [diff] [blame] | 370 | return; |
| 371 | |
Peter Zijlstra | f22f54f | 2010-02-26 12:05:05 +0100 | [diff] [blame] | 372 | nb_id = amd_get_nb_id(cpu); |
Peter Zijlstra | b38b24e | 2010-03-23 19:31:15 +0100 | [diff] [blame] | 373 | WARN_ON_ONCE(nb_id == BAD_APICID); |
Peter Zijlstra | f22f54f | 2010-02-26 12:05:05 +0100 | [diff] [blame] | 374 | |
Peter Zijlstra | f22f54f | 2010-02-26 12:05:05 +0100 | [diff] [blame] | 375 | for_each_online_cpu(i) { |
Peter Zijlstra | b38b24e | 2010-03-23 19:31:15 +0100 | [diff] [blame] | 376 | nb = per_cpu(cpu_hw_events, i).amd_nb; |
| 377 | if (WARN_ON_ONCE(!nb)) |
Peter Zijlstra | f22f54f | 2010-02-26 12:05:05 +0100 | [diff] [blame] | 378 | continue; |
Peter Zijlstra | b38b24e | 2010-03-23 19:31:15 +0100 | [diff] [blame] | 379 | |
| 380 | if (nb->nb_id == nb_id) { |
Peter Zijlstra | 7fdba1c | 2011-07-22 13:41:54 +0200 | [diff] [blame] | 381 | cpuc->kfree_on_online = cpuc->amd_nb; |
Peter Zijlstra | b38b24e | 2010-03-23 19:31:15 +0100 | [diff] [blame] | 382 | cpuc->amd_nb = nb; |
| 383 | break; |
| 384 | } |
Peter Zijlstra | f22f54f | 2010-02-26 12:05:05 +0100 | [diff] [blame] | 385 | } |
| 386 | |
Peter Zijlstra | b38b24e | 2010-03-23 19:31:15 +0100 | [diff] [blame] | 387 | cpuc->amd_nb->nb_id = nb_id; |
| 388 | cpuc->amd_nb->refcnt++; |
Peter Zijlstra | f22f54f | 2010-02-26 12:05:05 +0100 | [diff] [blame] | 389 | } |
| 390 | |
Peter Zijlstra | b38b24e | 2010-03-23 19:31:15 +0100 | [diff] [blame] | 391 | static void amd_pmu_cpu_dead(int cpu) |
Peter Zijlstra | f22f54f | 2010-02-26 12:05:05 +0100 | [diff] [blame] | 392 | { |
| 393 | struct cpu_hw_events *cpuhw; |
| 394 | |
| 395 | if (boot_cpu_data.x86_max_cores < 2) |
| 396 | return; |
| 397 | |
| 398 | cpuhw = &per_cpu(cpu_hw_events, cpu); |
| 399 | |
Rafael J. Wysocki | a90110c | 2010-03-21 21:51:51 +0100 | [diff] [blame] | 400 | if (cpuhw->amd_nb) { |
Peter Zijlstra | b38b24e | 2010-03-23 19:31:15 +0100 | [diff] [blame] | 401 | struct amd_nb *nb = cpuhw->amd_nb; |
Peter Zijlstra | f22f54f | 2010-02-26 12:05:05 +0100 | [diff] [blame] | 402 | |
Peter Zijlstra | b38b24e | 2010-03-23 19:31:15 +0100 | [diff] [blame] | 403 | if (nb->nb_id == -1 || --nb->refcnt == 0) |
| 404 | kfree(nb); |
Peter Zijlstra | f22f54f | 2010-02-26 12:05:05 +0100 | [diff] [blame] | 405 | |
Rafael J. Wysocki | a90110c | 2010-03-21 21:51:51 +0100 | [diff] [blame] | 406 | cpuhw->amd_nb = NULL; |
| 407 | } |
Peter Zijlstra | f22f54f | 2010-02-26 12:05:05 +0100 | [diff] [blame] | 408 | } |
| 409 | |
Jiri Olsa | 641cc93 | 2012-03-15 20:09:14 +0100 | [diff] [blame] | 410 | PMU_FORMAT_ATTR(event, "config:0-7,32-35"); |
| 411 | PMU_FORMAT_ATTR(umask, "config:8-15" ); |
| 412 | PMU_FORMAT_ATTR(edge, "config:18" ); |
| 413 | PMU_FORMAT_ATTR(inv, "config:23" ); |
| 414 | PMU_FORMAT_ATTR(cmask, "config:24-31" ); |
| 415 | |
| 416 | static struct attribute *amd_format_attr[] = { |
| 417 | &format_attr_event.attr, |
| 418 | &format_attr_umask.attr, |
| 419 | &format_attr_edge.attr, |
| 420 | &format_attr_inv.attr, |
| 421 | &format_attr_cmask.attr, |
| 422 | NULL, |
| 423 | }; |
| 424 | |
Robert Richter | 4979d27 | 2011-02-02 17:36:12 +0100 | [diff] [blame] | 425 | /* AMD Family 15h */ |
| 426 | |
| 427 | #define AMD_EVENT_TYPE_MASK 0x000000F0ULL |
| 428 | |
| 429 | #define AMD_EVENT_FP 0x00000000ULL ... 0x00000010ULL |
| 430 | #define AMD_EVENT_LS 0x00000020ULL ... 0x00000030ULL |
| 431 | #define AMD_EVENT_DC 0x00000040ULL ... 0x00000050ULL |
| 432 | #define AMD_EVENT_CU 0x00000060ULL ... 0x00000070ULL |
| 433 | #define AMD_EVENT_IC_DE 0x00000080ULL ... 0x00000090ULL |
| 434 | #define AMD_EVENT_EX_LS 0x000000C0ULL |
| 435 | #define AMD_EVENT_DE 0x000000D0ULL |
| 436 | #define AMD_EVENT_NB 0x000000E0ULL ... 0x000000F0ULL |
| 437 | |
| 438 | /* |
| 439 | * AMD family 15h event code/PMC mappings: |
| 440 | * |
| 441 | * type = event_code & 0x0F0: |
| 442 | * |
| 443 | * 0x000 FP PERF_CTL[5:3] |
| 444 | * 0x010 FP PERF_CTL[5:3] |
| 445 | * 0x020 LS PERF_CTL[5:0] |
| 446 | * 0x030 LS PERF_CTL[5:0] |
| 447 | * 0x040 DC PERF_CTL[5:0] |
| 448 | * 0x050 DC PERF_CTL[5:0] |
| 449 | * 0x060 CU PERF_CTL[2:0] |
| 450 | * 0x070 CU PERF_CTL[2:0] |
| 451 | * 0x080 IC/DE PERF_CTL[2:0] |
| 452 | * 0x090 IC/DE PERF_CTL[2:0] |
| 453 | * 0x0A0 --- |
| 454 | * 0x0B0 --- |
| 455 | * 0x0C0 EX/LS PERF_CTL[5:0] |
| 456 | * 0x0D0 DE PERF_CTL[2:0] |
| 457 | * 0x0E0 NB NB_PERF_CTL[3:0] |
| 458 | * 0x0F0 NB NB_PERF_CTL[3:0] |
| 459 | * |
| 460 | * Exceptions: |
| 461 | * |
Robert Richter | 855357a | 2011-04-16 02:27:54 +0200 | [diff] [blame] | 462 | * 0x000 FP PERF_CTL[3], PERF_CTL[5:3] (*) |
Robert Richter | 4979d27 | 2011-02-02 17:36:12 +0100 | [diff] [blame] | 463 | * 0x003 FP PERF_CTL[3] |
Robert Richter | 855357a | 2011-04-16 02:27:54 +0200 | [diff] [blame] | 464 | * 0x004 FP PERF_CTL[3], PERF_CTL[5:3] (*) |
Robert Richter | 4979d27 | 2011-02-02 17:36:12 +0100 | [diff] [blame] | 465 | * 0x00B FP PERF_CTL[3] |
| 466 | * 0x00D FP PERF_CTL[3] |
| 467 | * 0x023 DE PERF_CTL[2:0] |
| 468 | * 0x02D LS PERF_CTL[3] |
| 469 | * 0x02E LS PERF_CTL[3,0] |
Robert Richter | 5bcdf5e | 2012-05-18 12:40:42 +0200 | [diff] [blame] | 470 | * 0x031 LS PERF_CTL[2:0] (**) |
Robert Richter | 4979d27 | 2011-02-02 17:36:12 +0100 | [diff] [blame] | 471 | * 0x043 CU PERF_CTL[2:0] |
| 472 | * 0x045 CU PERF_CTL[2:0] |
| 473 | * 0x046 CU PERF_CTL[2:0] |
| 474 | * 0x054 CU PERF_CTL[2:0] |
| 475 | * 0x055 CU PERF_CTL[2:0] |
| 476 | * 0x08F IC PERF_CTL[0] |
| 477 | * 0x187 DE PERF_CTL[0] |
| 478 | * 0x188 DE PERF_CTL[0] |
| 479 | * 0x0DB EX PERF_CTL[5:0] |
| 480 | * 0x0DC LS PERF_CTL[5:0] |
| 481 | * 0x0DD LS PERF_CTL[5:0] |
| 482 | * 0x0DE LS PERF_CTL[5:0] |
| 483 | * 0x0DF LS PERF_CTL[5:0] |
Robert Richter | 5bcdf5e | 2012-05-18 12:40:42 +0200 | [diff] [blame] | 484 | * 0x1C0 EX PERF_CTL[5:3] |
Robert Richter | 4979d27 | 2011-02-02 17:36:12 +0100 | [diff] [blame] | 485 | * 0x1D6 EX PERF_CTL[5:0] |
| 486 | * 0x1D8 EX PERF_CTL[5:0] |
Robert Richter | 855357a | 2011-04-16 02:27:54 +0200 | [diff] [blame] | 487 | * |
Robert Richter | 5bcdf5e | 2012-05-18 12:40:42 +0200 | [diff] [blame] | 488 | * (*) depending on the umask all FPU counters may be used |
| 489 | * (**) only one unitmask enabled at a time |
Robert Richter | 4979d27 | 2011-02-02 17:36:12 +0100 | [diff] [blame] | 490 | */ |
| 491 | |
| 492 | static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0); |
| 493 | static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0); |
| 494 | static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0); |
Robert Richter | bc1738f | 2011-11-18 12:35:22 +0100 | [diff] [blame] | 495 | static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0); |
Robert Richter | 4979d27 | 2011-02-02 17:36:12 +0100 | [diff] [blame] | 496 | static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0); |
| 497 | static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); |
| 498 | |
| 499 | static struct event_constraint * |
| 500 | amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event) |
| 501 | { |
Robert Richter | 855357a | 2011-04-16 02:27:54 +0200 | [diff] [blame] | 502 | struct hw_perf_event *hwc = &event->hw; |
| 503 | unsigned int event_code = amd_get_event_code(hwc); |
Robert Richter | 4979d27 | 2011-02-02 17:36:12 +0100 | [diff] [blame] | 504 | |
| 505 | switch (event_code & AMD_EVENT_TYPE_MASK) { |
| 506 | case AMD_EVENT_FP: |
| 507 | switch (event_code) { |
Robert Richter | 855357a | 2011-04-16 02:27:54 +0200 | [diff] [blame] | 508 | case 0x000: |
| 509 | if (!(hwc->config & 0x0000F000ULL)) |
| 510 | break; |
| 511 | if (!(hwc->config & 0x00000F00ULL)) |
| 512 | break; |
| 513 | return &amd_f15_PMC3; |
| 514 | case 0x004: |
| 515 | if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1) |
| 516 | break; |
| 517 | return &amd_f15_PMC3; |
Robert Richter | 4979d27 | 2011-02-02 17:36:12 +0100 | [diff] [blame] | 518 | case 0x003: |
| 519 | case 0x00B: |
| 520 | case 0x00D: |
| 521 | return &amd_f15_PMC3; |
Robert Richter | 4979d27 | 2011-02-02 17:36:12 +0100 | [diff] [blame] | 522 | } |
Robert Richter | 855357a | 2011-04-16 02:27:54 +0200 | [diff] [blame] | 523 | return &amd_f15_PMC53; |
Robert Richter | 4979d27 | 2011-02-02 17:36:12 +0100 | [diff] [blame] | 524 | case AMD_EVENT_LS: |
| 525 | case AMD_EVENT_DC: |
| 526 | case AMD_EVENT_EX_LS: |
| 527 | switch (event_code) { |
| 528 | case 0x023: |
| 529 | case 0x043: |
| 530 | case 0x045: |
| 531 | case 0x046: |
| 532 | case 0x054: |
| 533 | case 0x055: |
| 534 | return &amd_f15_PMC20; |
| 535 | case 0x02D: |
| 536 | return &amd_f15_PMC3; |
| 537 | case 0x02E: |
| 538 | return &amd_f15_PMC30; |
Robert Richter | 5bcdf5e | 2012-05-18 12:40:42 +0200 | [diff] [blame] | 539 | case 0x031: |
| 540 | if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1) |
| 541 | return &amd_f15_PMC20; |
| 542 | return &emptyconstraint; |
| 543 | case 0x1C0: |
| 544 | return &amd_f15_PMC53; |
Robert Richter | 4979d27 | 2011-02-02 17:36:12 +0100 | [diff] [blame] | 545 | default: |
| 546 | return &amd_f15_PMC50; |
| 547 | } |
| 548 | case AMD_EVENT_CU: |
| 549 | case AMD_EVENT_IC_DE: |
| 550 | case AMD_EVENT_DE: |
| 551 | switch (event_code) { |
| 552 | case 0x08F: |
| 553 | case 0x187: |
| 554 | case 0x188: |
| 555 | return &amd_f15_PMC0; |
| 556 | case 0x0DB ... 0x0DF: |
| 557 | case 0x1D6: |
| 558 | case 0x1D8: |
| 559 | return &amd_f15_PMC50; |
| 560 | default: |
| 561 | return &amd_f15_PMC20; |
| 562 | } |
| 563 | case AMD_EVENT_NB: |
| 564 | /* not yet implemented */ |
| 565 | return &emptyconstraint; |
| 566 | default: |
| 567 | return &emptyconstraint; |
| 568 | } |
| 569 | } |
| 570 | |
Jiri Olsa | 0bf79d4 | 2012-10-10 14:53:14 +0200 | [diff] [blame] | 571 | static ssize_t amd_event_sysfs_show(char *page, u64 config) |
| 572 | { |
| 573 | u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT) | |
| 574 | (config & AMD64_EVENTSEL_EVENT) >> 24; |
| 575 | |
| 576 | return x86_event_sysfs_show(page, config, event); |
| 577 | } |
| 578 | |
Robert Richter | b1dc3c4 | 2012-06-20 20:46:35 +0200 | [diff] [blame] | 579 | static __initconst const struct x86_pmu amd_pmu = { |
| 580 | .name = "AMD", |
Robert Richter | 4979d27 | 2011-02-02 17:36:12 +0100 | [diff] [blame] | 581 | .handle_irq = x86_pmu_handle_irq, |
| 582 | .disable_all = x86_pmu_disable_all, |
| 583 | .enable_all = x86_pmu_enable_all, |
| 584 | .enable = x86_pmu_enable_event, |
| 585 | .disable = x86_pmu_disable_event, |
| 586 | .hw_config = amd_pmu_hw_config, |
| 587 | .schedule_events = x86_schedule_events, |
Robert Richter | b1dc3c4 | 2012-06-20 20:46:35 +0200 | [diff] [blame] | 588 | .eventsel = MSR_K7_EVNTSEL0, |
| 589 | .perfctr = MSR_K7_PERFCTR0, |
Robert Richter | 4979d27 | 2011-02-02 17:36:12 +0100 | [diff] [blame] | 590 | .event_map = amd_pmu_event_map, |
| 591 | .max_events = ARRAY_SIZE(amd_perfmon_event_map), |
Robert Richter | b1dc3c4 | 2012-06-20 20:46:35 +0200 | [diff] [blame] | 592 | .num_counters = AMD64_NUM_COUNTERS, |
Robert Richter | 4979d27 | 2011-02-02 17:36:12 +0100 | [diff] [blame] | 593 | .cntval_bits = 48, |
| 594 | .cntval_mask = (1ULL << 48) - 1, |
| 595 | .apic = 1, |
| 596 | /* use highest bit to detect overflow */ |
| 597 | .max_period = (1ULL << 47) - 1, |
Robert Richter | b1dc3c4 | 2012-06-20 20:46:35 +0200 | [diff] [blame] | 598 | .get_event_constraints = amd_get_event_constraints, |
Robert Richter | 4979d27 | 2011-02-02 17:36:12 +0100 | [diff] [blame] | 599 | .put_event_constraints = amd_put_event_constraints, |
| 600 | |
Jiri Olsa | 641cc93 | 2012-03-15 20:09:14 +0100 | [diff] [blame] | 601 | .format_attrs = amd_format_attr, |
Jiri Olsa | 0bf79d4 | 2012-10-10 14:53:14 +0200 | [diff] [blame] | 602 | .events_sysfs_show = amd_event_sysfs_show, |
Robert Richter | b1dc3c4 | 2012-06-20 20:46:35 +0200 | [diff] [blame] | 603 | |
| 604 | .cpu_prepare = amd_pmu_cpu_prepare, |
| 605 | .cpu_starting = amd_pmu_cpu_starting, |
| 606 | .cpu_dead = amd_pmu_cpu_dead, |
Robert Richter | 4979d27 | 2011-02-02 17:36:12 +0100 | [diff] [blame] | 607 | }; |
| 608 | |
Robert Richter | b1dc3c4 | 2012-06-20 20:46:35 +0200 | [diff] [blame] | 609 | static int setup_event_constraints(void) |
| 610 | { |
| 611 | if (boot_cpu_data.x86 >= 0x15) |
| 612 | x86_pmu.get_event_constraints = amd_get_event_constraints_f15h; |
| 613 | return 0; |
| 614 | } |
| 615 | |
| 616 | static int setup_perfctr_core(void) |
| 617 | { |
| 618 | if (!cpu_has_perfctr_core) { |
| 619 | WARN(x86_pmu.get_event_constraints == amd_get_event_constraints_f15h, |
| 620 | KERN_ERR "Odd, counter constraints enabled but no core perfctrs detected!"); |
| 621 | return -ENODEV; |
| 622 | } |
| 623 | |
| 624 | WARN(x86_pmu.get_event_constraints == amd_get_event_constraints, |
| 625 | KERN_ERR "hw perf events core counters need constraints handler!"); |
| 626 | |
| 627 | /* |
| 628 | * If core performance counter extensions exists, we must use |
| 629 | * MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also |
| 630 | * x86_pmu_addr_offset(). |
| 631 | */ |
| 632 | x86_pmu.eventsel = MSR_F15H_PERF_CTL; |
| 633 | x86_pmu.perfctr = MSR_F15H_PERF_CTR; |
| 634 | x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE; |
| 635 | |
| 636 | printk(KERN_INFO "perf: AMD core performance counters detected\n"); |
| 637 | |
| 638 | return 0; |
| 639 | } |
| 640 | |
Kevin Winchester | de0428a | 2011-08-30 20:41:05 -0300 | [diff] [blame] | 641 | __init int amd_pmu_init(void) |
Peter Zijlstra | f22f54f | 2010-02-26 12:05:05 +0100 | [diff] [blame] | 642 | { |
| 643 | /* Performance-monitoring supported from K7 and later: */ |
| 644 | if (boot_cpu_data.x86 < 6) |
| 645 | return -ENODEV; |
| 646 | |
Robert Richter | b1dc3c4 | 2012-06-20 20:46:35 +0200 | [diff] [blame] | 647 | x86_pmu = amd_pmu; |
| 648 | |
| 649 | setup_event_constraints(); |
| 650 | setup_perfctr_core(); |
Peter Zijlstra | f22f54f | 2010-02-26 12:05:05 +0100 | [diff] [blame] | 651 | |
| 652 | /* Events are common for all AMDs */ |
| 653 | memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, |
| 654 | sizeof(hw_cache_event_ids)); |
| 655 | |
Peter Zijlstra | f22f54f | 2010-02-26 12:05:05 +0100 | [diff] [blame] | 656 | return 0; |
| 657 | } |
Joerg Roedel | 1018faa | 2012-02-29 14:57:32 +0100 | [diff] [blame] | 658 | |
| 659 | void amd_pmu_enable_virt(void) |
| 660 | { |
| 661 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| 662 | |
| 663 | cpuc->perf_ctr_virt_mask = 0; |
| 664 | |
| 665 | /* Reload all events */ |
| 666 | x86_pmu_disable_all(); |
| 667 | x86_pmu_enable_all(0); |
| 668 | } |
| 669 | EXPORT_SYMBOL_GPL(amd_pmu_enable_virt); |
| 670 | |
| 671 | void amd_pmu_disable_virt(void) |
| 672 | { |
| 673 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| 674 | |
| 675 | /* |
| 676 | * We only mask out the Host-only bit so that host-only counting works |
| 677 | * when SVM is disabled. If someone sets up a guest-only counter when |
| 678 | * SVM is disabled the Guest-only bits still gets set and the counter |
| 679 | * will not count anything. |
| 680 | */ |
| 681 | cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY; |
| 682 | |
| 683 | /* Reload all events */ |
| 684 | x86_pmu_disable_all(); |
| 685 | x86_pmu_enable_all(0); |
| 686 | } |
| 687 | EXPORT_SYMBOL_GPL(amd_pmu_disable_virt); |