blob: f12a7822fcf17f33c39f5a9336204ae86732fa76 [file] [log] [blame]
Peter Zijlstrade9ac072009-04-08 15:01:31 +02001
2
Peter Zijlstra1a482f32009-05-23 18:28:58 +02003#include "perf.h"
Thomas Gleixner6eda5832009-05-01 18:29:57 +02004#include "util/util.h"
Ingo Molnar0e9b20b2009-05-26 09:17:18 +02005#include "util/parse-options.h"
6#include "util/exec_cmd.h"
Thomas Gleixner6eda5832009-05-01 18:29:57 +02007
Peter Zijlstrade9ac072009-04-08 15:01:31 +02008#include <sys/types.h>
9#include <sys/stat.h>
10#include <sys/time.h>
11#include <unistd.h>
12#include <stdint.h>
13#include <stdlib.h>
14#include <string.h>
15#include <limits.h>
Peter Zijlstrade9ac072009-04-08 15:01:31 +020016#include <assert.h>
17#include <fcntl.h>
18#include <stdio.h>
19#include <errno.h>
Peter Zijlstrade9ac072009-04-08 15:01:31 +020020#include <time.h>
21#include <sched.h>
22#include <pthread.h>
23
24#include <sys/syscall.h>
25#include <sys/ioctl.h>
26#include <sys/poll.h>
27#include <sys/prctl.h>
28#include <sys/wait.h>
29#include <sys/uio.h>
30#include <sys/mman.h>
31
32#include <linux/unistd.h>
33#include <linux/types.h>
34
Peter Zijlstrade9ac072009-04-08 15:01:31 +020035
Peter Zijlstrade9ac072009-04-08 15:01:31 +020036
Ingo Molnar0e9b20b2009-05-26 09:17:18 +020037#define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1)
38#define __ALIGN_MASK(x, mask) (((x)+(mask))&~(mask))
Arnaldo Carvalho de Melo1a853e32009-05-14 22:50:46 -030039
Peter Zijlstrade9ac072009-04-08 15:01:31 +020040static int nr_counters = 0;
41static __u64 event_id[MAX_COUNTERS] = { };
42static int default_interval = 100000;
43static int event_count[MAX_COUNTERS];
44static int fd[MAX_NR_CPUS][MAX_COUNTERS];
45static int nr_cpus = 0;
46static unsigned int page_size;
47static unsigned int mmap_pages = 16;
48static int output;
Ingo Molnar0e9b20b2009-05-26 09:17:18 +020049static const char *output_name = "output.perf";
Peter Zijlstrade9ac072009-04-08 15:01:31 +020050static int group = 0;
Peter Zijlstra16c8a102009-05-05 17:50:27 +020051static unsigned int realtime_prio = 0;
52static int system_wide = 0;
Arnaldo Carvalho de Melo1a853e32009-05-14 22:50:46 -030053static pid_t target_pid = -1;
Peter Zijlstra16c8a102009-05-05 17:50:27 +020054static int inherit = 1;
55static int nmi = 1;
Peter Zijlstrade9ac072009-04-08 15:01:31 +020056
57const unsigned int default_count[] = {
58 1000000,
59 1000000,
60 10000,
61 10000,
62 1000000,
63 10000,
64};
65
Peter Zijlstrade9ac072009-04-08 15:01:31 +020066struct mmap_data {
67 int counter;
68 void *base;
69 unsigned int mask;
70 unsigned int prev;
71};
72
73static unsigned int mmap_read_head(struct mmap_data *md)
74{
75 struct perf_counter_mmap_page *pc = md->base;
76 int head;
77
78 head = pc->data_head;
79 rmb();
80
81 return head;
82}
83
84static long events;
85static struct timeval last_read, this_read;
86
87static void mmap_read(struct mmap_data *md)
88{
89 unsigned int head = mmap_read_head(md);
90 unsigned int old = md->prev;
91 unsigned char *data = md->base + page_size;
92 unsigned long size;
93 void *buf;
94 int diff;
95
96 gettimeofday(&this_read, NULL);
97
98 /*
99 * If we're further behind than half the buffer, there's a chance
100 * the writer will bite our tail and screw up the events under us.
101 *
102 * If we somehow ended up ahead of the head, we got messed up.
103 *
104 * In either case, truncate and restart at head.
105 */
106 diff = head - old;
107 if (diff > md->mask / 2 || diff < 0) {
108 struct timeval iv;
109 unsigned long msecs;
110
111 timersub(&this_read, &last_read, &iv);
112 msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
113
114 fprintf(stderr, "WARNING: failed to keep up with mmap data."
115 " Last read %lu msecs ago.\n", msecs);
116
117 /*
118 * head points to a known good entry, start there.
119 */
120 old = head;
121 }
122
123 last_read = this_read;
124
125 if (old != head)
126 events++;
127
128 size = head - old;
129
130 if ((old & md->mask) + size != (head & md->mask)) {
131 buf = &data[old & md->mask];
132 size = md->mask + 1 - (old & md->mask);
133 old += size;
134 while (size) {
135 int ret = write(output, buf, size);
136 if (ret < 0) {
137 perror("failed to write");
138 exit(-1);
139 }
140 size -= ret;
141 buf += ret;
142 }
143 }
144
145 buf = &data[old & md->mask];
146 size = head - old;
147 old += size;
148 while (size) {
149 int ret = write(output, buf, size);
150 if (ret < 0) {
151 perror("failed to write");
152 exit(-1);
153 }
154 size -= ret;
155 buf += ret;
156 }
157
158 md->prev = old;
159}
160
161static volatile int done = 0;
162
Peter Zijlstra16c8a102009-05-05 17:50:27 +0200163static void sig_handler(int sig)
Peter Zijlstrade9ac072009-04-08 15:01:31 +0200164{
Peter Zijlstra16c8a102009-05-05 17:50:27 +0200165 done = 1;
Peter Zijlstrade9ac072009-04-08 15:01:31 +0200166}
167
Peter Zijlstra16c8a102009-05-05 17:50:27 +0200168static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
169static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
170
171static int nr_poll;
172static int nr_cpu;
173
Arnaldo Carvalho de Melo1a853e32009-05-14 22:50:46 -0300174struct mmap_event {
175 struct perf_event_header header;
176 __u32 pid, tid;
177 __u64 start;
178 __u64 len;
179 __u64 pgoff;
180 char filename[PATH_MAX];
181};
182struct comm_event {
183 struct perf_event_header header;
184 __u32 pid,tid;
185 char comm[16];
186};
187
188static pid_t pid_synthesize_comm_event(pid_t pid)
189{
190 char filename[PATH_MAX];
191 char bf[BUFSIZ];
192 struct comm_event comm_ev;
193 size_t size;
194 int fd;
195
196 snprintf(filename, sizeof(filename), "/proc/%d/stat", pid);
197
198 fd = open(filename, O_RDONLY);
199 if (fd < 0) {
200 fprintf(stderr, "couldn't open %s\n", filename);
201 exit(EXIT_FAILURE);
202 }
203 if (read(fd, bf, sizeof(bf)) < 0) {
204 fprintf(stderr, "couldn't read %s\n", filename);
205 exit(EXIT_FAILURE);
206 }
207 close(fd);
208
209 pid_t spid, ppid;
210 char state;
211 char comm[18];
212
213 memset(&comm_ev, 0, sizeof(comm_ev));
214 int nr = sscanf(bf, "%d %s %c %d %d ",
215 &spid, comm, &state, &ppid, &comm_ev.pid);
216 if (nr != 5) {
217 fprintf(stderr, "couldn't get COMM and pgid, malformed %s\n",
218 filename);
219 exit(EXIT_FAILURE);
220 }
221 comm_ev.header.type = PERF_EVENT_COMM;
222 comm_ev.tid = pid;
223 size = strlen(comm);
224 comm[--size] = '\0'; /* Remove the ')' at the end */
225 --size; /* Remove the '(' at the begin */
226 memcpy(comm_ev.comm, comm + 1, size);
227 size = ALIGN(size, sizeof(uint64_t));
228 comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size);
229 int ret = write(output, &comm_ev, comm_ev.header.size);
230 if (ret < 0) {
231 perror("failed to write");
232 exit(-1);
233 }
234 return comm_ev.pid;
235}
236
237static void pid_synthesize_mmap_events(pid_t pid, pid_t pgid)
238{
239 char filename[PATH_MAX];
240 FILE *fp;
241
242 snprintf(filename, sizeof(filename), "/proc/%d/maps", pid);
243
244 fp = fopen(filename, "r");
245 if (fp == NULL) {
246 fprintf(stderr, "couldn't open %s\n", filename);
247 exit(EXIT_FAILURE);
248 }
249 while (1) {
250 char bf[BUFSIZ];
251 unsigned char vm_read, vm_write, vm_exec, vm_mayshare;
252 struct mmap_event mmap_ev = {
253 .header.type = PERF_EVENT_MMAP,
254 };
255 unsigned long ino;
256 int major, minor;
257 size_t size;
258 if (fgets(bf, sizeof(bf), fp) == NULL)
259 break;
260
261 /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */
262 sscanf(bf, "%llx-%llx %c%c%c%c %llx %x:%x %lu",
263 &mmap_ev.start, &mmap_ev.len,
264 &vm_read, &vm_write, &vm_exec, &vm_mayshare,
265 &mmap_ev.pgoff, &major, &minor, &ino);
266 if (vm_exec == 'x') {
267 char *execname = strrchr(bf, ' ');
268
269 if (execname == NULL || execname[1] != '/')
270 continue;
271
272 execname += 1;
273 size = strlen(execname);
274 execname[size - 1] = '\0'; /* Remove \n */
275 memcpy(mmap_ev.filename, execname, size);
276 size = ALIGN(size, sizeof(uint64_t));
277 mmap_ev.len -= mmap_ev.start;
278 mmap_ev.header.size = (sizeof(mmap_ev) -
279 (sizeof(mmap_ev.filename) - size));
280 mmap_ev.pid = pgid;
281 mmap_ev.tid = pid;
282
283 if (write(output, &mmap_ev, mmap_ev.header.size) < 0) {
284 perror("failed to write");
285 exit(-1);
286 }
287 }
288 }
289
290 fclose(fp);
291}
292
293static void open_counters(int cpu, pid_t pid)
Peter Zijlstrade9ac072009-04-08 15:01:31 +0200294{
Peter Zijlstrade9ac072009-04-08 15:01:31 +0200295 struct perf_counter_hw_event hw_event;
Peter Zijlstra16c8a102009-05-05 17:50:27 +0200296 int counter, group_fd;
297 int track = 1;
Peter Zijlstra16c8a102009-05-05 17:50:27 +0200298
Arnaldo Carvalho de Melo1a853e32009-05-14 22:50:46 -0300299 if (pid > 0) {
300 pid_t pgid = pid_synthesize_comm_event(pid);
301 pid_synthesize_mmap_events(pid, pgid);
302 }
Peter Zijlstra16c8a102009-05-05 17:50:27 +0200303
304 group_fd = -1;
305 for (counter = 0; counter < nr_counters; counter++) {
306
307 memset(&hw_event, 0, sizeof(hw_event));
308 hw_event.config = event_id[counter];
309 hw_event.irq_period = event_count[counter];
310 hw_event.record_type = PERF_RECORD_IP | PERF_RECORD_TID;
311 hw_event.nmi = nmi;
312 hw_event.mmap = track;
313 hw_event.comm = track;
314 hw_event.inherit = (cpu < 0) && inherit;
315
316 track = 0; // only the first counter needs these
317
318 fd[nr_cpu][counter] =
319 sys_perf_counter_open(&hw_event, pid, cpu, group_fd, 0);
320
321 if (fd[nr_cpu][counter] < 0) {
322 int err = errno;
323 printf("kerneltop error: syscall returned with %d (%s)\n",
324 fd[nr_cpu][counter], strerror(err));
325 if (err == EPERM)
326 printf("Are you root?\n");
327 exit(-1);
328 }
329 assert(fd[nr_cpu][counter] >= 0);
330 fcntl(fd[nr_cpu][counter], F_SETFL, O_NONBLOCK);
331
332 /*
333 * First counter acts as the group leader:
334 */
335 if (group && group_fd == -1)
336 group_fd = fd[nr_cpu][counter];
337
338 event_array[nr_poll].fd = fd[nr_cpu][counter];
339 event_array[nr_poll].events = POLLIN;
340 nr_poll++;
341
342 mmap_array[nr_cpu][counter].counter = counter;
343 mmap_array[nr_cpu][counter].prev = 0;
344 mmap_array[nr_cpu][counter].mask = mmap_pages*page_size - 1;
345 mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
346 PROT_READ, MAP_SHARED, fd[nr_cpu][counter], 0);
347 if (mmap_array[nr_cpu][counter].base == MAP_FAILED) {
348 printf("kerneltop error: failed to mmap with %d (%s)\n",
349 errno, strerror(errno));
350 exit(-1);
351 }
352 }
353 nr_cpu++;
354}
355
Ingo Molnar0e9b20b2009-05-26 09:17:18 +0200356static int __cmd_record(int argc, const char **argv)
Peter Zijlstra16c8a102009-05-05 17:50:27 +0200357{
358 int i, counter;
Peter Zijlstrade9ac072009-04-08 15:01:31 +0200359 pid_t pid;
360 int ret;
361
362 page_size = sysconf(_SC_PAGE_SIZE);
Peter Zijlstrade9ac072009-04-08 15:01:31 +0200363 nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
364 assert(nr_cpus <= MAX_NR_CPUS);
365 assert(nr_cpus >= 0);
366
367 output = open(output_name, O_CREAT|O_RDWR, S_IRWXU);
368 if (output < 0) {
369 perror("failed to create output file");
370 exit(-1);
371 }
372
Arnaldo Carvalho de Melo1a853e32009-05-14 22:50:46 -0300373 if (!system_wide) {
374 open_counters(-1, target_pid != -1 ? target_pid : 0);
375 } else for (i = 0; i < nr_cpus; i++)
376 open_counters(i, target_pid);
Peter Zijlstrade9ac072009-04-08 15:01:31 +0200377
Peter Zijlstra16c8a102009-05-05 17:50:27 +0200378 signal(SIGCHLD, sig_handler);
379 signal(SIGINT, sig_handler);
Peter Zijlstrade9ac072009-04-08 15:01:31 +0200380
Arnaldo Carvalho de Melo1a853e32009-05-14 22:50:46 -0300381 if (target_pid == -1) {
382 pid = fork();
383 if (pid < 0)
384 perror("failed to fork");
Peter Zijlstrade9ac072009-04-08 15:01:31 +0200385
Arnaldo Carvalho de Melo1a853e32009-05-14 22:50:46 -0300386 if (!pid) {
Ingo Molnar0e9b20b2009-05-26 09:17:18 +0200387 if (execvp(argv[0], (char **)argv)) {
Arnaldo Carvalho de Melo1a853e32009-05-14 22:50:46 -0300388 perror(argv[0]);
389 exit(-1);
390 }
Peter Zijlstrade9ac072009-04-08 15:01:31 +0200391 }
392 }
393
394 if (realtime_prio) {
395 struct sched_param param;
396
397 param.sched_priority = realtime_prio;
398 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
399 printf("Could not set realtime priority.\n");
400 exit(-1);
401 }
402 }
403
404 /*
405 * TODO: store the current /proc/$/maps information somewhere
406 */
407
408 while (!done) {
409 int hits = events;
410
Peter Zijlstra16c8a102009-05-05 17:50:27 +0200411 for (i = 0; i < nr_cpu; i++) {
Peter Zijlstrade9ac072009-04-08 15:01:31 +0200412 for (counter = 0; counter < nr_counters; counter++)
413 mmap_read(&mmap_array[i][counter]);
414 }
415
416 if (hits == events)
417 ret = poll(event_array, nr_poll, 100);
418 }
419
420 return 0;
421}
Ingo Molnar0e9b20b2009-05-26 09:17:18 +0200422
423struct event_symbol {
424 __u64 event;
425 char *symbol;
426};
427
428static struct event_symbol event_symbols[] = {
429 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cpu-cycles", },
430 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cycles", },
431 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS), "instructions", },
432 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES), "cache-references", },
433 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES), "cache-misses", },
434 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branch-instructions", },
435 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branches", },
436 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES), "branch-misses", },
437 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES), "bus-cycles", },
438
439 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK), "cpu-clock", },
440 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), "task-clock", },
441 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "page-faults", },
442 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "faults", },
443 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN), "minor-faults", },
444 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ), "major-faults", },
445 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "context-switches", },
446 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "cs", },
447 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "cpu-migrations", },
448 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "migrations", },
449};
450
451/*
452 * Each event can have multiple symbolic names.
453 * Symbolic names are (almost) exactly matched.
454 */
455static __u64 match_event_symbols(const char *str)
456{
457 __u64 config, id;
458 int type;
459 unsigned int i;
460
461 if (sscanf(str, "r%llx", &config) == 1)
462 return config | PERF_COUNTER_RAW_MASK;
463
464 if (sscanf(str, "%d:%llu", &type, &id) == 2)
465 return EID(type, id);
466
467 for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
468 if (!strncmp(str, event_symbols[i].symbol,
469 strlen(event_symbols[i].symbol)))
470 return event_symbols[i].event;
471 }
472
473 return ~0ULL;
474}
475
476static int parse_events(const struct option *opt, const char *str, int unset)
477{
478 __u64 config;
479
480again:
481 if (nr_counters == MAX_COUNTERS)
482 return -1;
483
484 config = match_event_symbols(str);
485 if (config == ~0ULL)
486 return -1;
487
488 event_id[nr_counters] = config;
489 nr_counters++;
490
491 str = strstr(str, ",");
492 if (str) {
493 str++;
494 goto again;
495 }
496
497 return 0;
498}
499
500static char events_help[100000];
501
502#define __PERF_COUNTER_FIELD(config, name) \
503 ((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)
504
505#define PERF_COUNTER_RAW(config) __PERF_COUNTER_FIELD(config, RAW)
506#define PERF_COUNTER_CONFIG(config) __PERF_COUNTER_FIELD(config, CONFIG)
507#define PERF_COUNTER_TYPE(config) __PERF_COUNTER_FIELD(config, TYPE)
508#define PERF_COUNTER_ID(config) __PERF_COUNTER_FIELD(config, EVENT)
509
510
511
512static void create_events_help(void)
513{
514 unsigned int i;
515 char *str;
516 __u64 e;
517
518 str = events_help;
519
520 str += sprintf(str,
521 "event name: [");
522
523 for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
524 int type, id;
525
526 e = event_symbols[i].event;
527 type = PERF_COUNTER_TYPE(e);
528 id = PERF_COUNTER_ID(e);
529
530 if (i)
531 str += sprintf(str, "|");
532
533 str += sprintf(str, "%s",
534 event_symbols[i].symbol);
535 }
536
537 str += sprintf(str, "|rNNN]");
538}
539
540static const char * const record_usage[] = {
541 "perf record [<options>] <command>",
542 NULL
543};
544
545const struct option options[] = {
546 OPT_CALLBACK('e', "event", NULL, "event",
547 events_help, parse_events),
548 OPT_INTEGER('c', "count", &default_interval,
549 "event period to sample"),
550 OPT_INTEGER('m', "mmap-pages", &mmap_pages,
551 "number of mmap data pages"),
552 OPT_STRING('o', "output", &output_name, "file",
553 "output file name"),
554 OPT_BOOLEAN('i', "inherit", &inherit,
555 "child tasks inherit counters"),
556 OPT_INTEGER('p', "pid", &target_pid,
557 "record events on existing pid"),
558 OPT_INTEGER('r', "realtime", &realtime_prio,
559 "collect data with this RT SCHED_FIFO priority"),
560 OPT_BOOLEAN('a', "all-cpus", &system_wide,
561 "system-wide collection from all CPUs"),
562 OPT_END()
563};
564
565int cmd_record(int argc, const char **argv, const char *prefix)
566{
567 int counter;
568
569 create_events_help();
570
571 argc = parse_options(argc, argv, options, record_usage, 0);
572 if (!argc)
573 usage_with_options(record_usage, options);
574
575 if (!nr_counters) {
576 nr_counters = 1;
577 event_id[0] = 0;
578 }
579
580 for (counter = 0; counter < nr_counters; counter++) {
581 if (event_count[counter])
582 continue;
583
584 event_count[counter] = default_interval;
585 }
586
587 return __cmd_record(argc, argv);
588}