Merge tag 'sound-3.7' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound
[cascardo/linux.git] / tools / perf / builtin-record.c
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #define _FILE_OFFSET_BITS 64
9
10 #include "builtin.h"
11
12 #include "perf.h"
13
14 #include "util/build-id.h"
15 #include "util/util.h"
16 #include "util/parse-options.h"
17 #include "util/parse-events.h"
18
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/session.h"
25 #include "util/tool.h"
26 #include "util/symbol.h"
27 #include "util/cpumap.h"
28 #include "util/thread_map.h"
29
30 #include <unistd.h>
31 #include <sched.h>
32 #include <sys/mman.h>
33
34 #define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: "
35
36 #ifdef NO_LIBUNWIND_SUPPORT
37 static char callchain_help[] = CALLCHAIN_HELP "[fp]";
38 #else
39 static unsigned long default_stack_dump_size = 8192;
40 static char callchain_help[] = CALLCHAIN_HELP "[fp] dwarf";
41 #endif
42
43 enum write_mode_t {
44         WRITE_FORCE,
45         WRITE_APPEND
46 };
47
48 struct perf_record {
49         struct perf_tool        tool;
50         struct perf_record_opts opts;
51         u64                     bytes_written;
52         const char              *output_name;
53         struct perf_evlist      *evlist;
54         struct perf_session     *session;
55         const char              *progname;
56         int                     output;
57         unsigned int            page_size;
58         int                     realtime_prio;
59         enum write_mode_t       write_mode;
60         bool                    no_buildid;
61         bool                    no_buildid_cache;
62         bool                    force;
63         bool                    file_new;
64         bool                    append_file;
65         long                    samples;
66         off_t                   post_processing_offset;
67 };
68
69 static void advance_output(struct perf_record *rec, size_t size)
70 {
71         rec->bytes_written += size;
72 }
73
74 static int write_output(struct perf_record *rec, void *buf, size_t size)
75 {
76         while (size) {
77                 int ret = write(rec->output, buf, size);
78
79                 if (ret < 0) {
80                         pr_err("failed to write\n");
81                         return -1;
82                 }
83
84                 size -= ret;
85                 buf += ret;
86
87                 rec->bytes_written += ret;
88         }
89
90         return 0;
91 }
92
93 static int process_synthesized_event(struct perf_tool *tool,
94                                      union perf_event *event,
95                                      struct perf_sample *sample __maybe_unused,
96                                      struct machine *machine __maybe_unused)
97 {
98         struct perf_record *rec = container_of(tool, struct perf_record, tool);
99         if (write_output(rec, event, event->header.size) < 0)
100                 return -1;
101
102         return 0;
103 }
104
105 static int perf_record__mmap_read(struct perf_record *rec,
106                                    struct perf_mmap *md)
107 {
108         unsigned int head = perf_mmap__read_head(md);
109         unsigned int old = md->prev;
110         unsigned char *data = md->base + rec->page_size;
111         unsigned long size;
112         void *buf;
113         int rc = 0;
114
115         if (old == head)
116                 return 0;
117
118         rec->samples++;
119
120         size = head - old;
121
122         if ((old & md->mask) + size != (head & md->mask)) {
123                 buf = &data[old & md->mask];
124                 size = md->mask + 1 - (old & md->mask);
125                 old += size;
126
127                 if (write_output(rec, buf, size) < 0) {
128                         rc = -1;
129                         goto out;
130                 }
131         }
132
133         buf = &data[old & md->mask];
134         size = head - old;
135         old += size;
136
137         if (write_output(rec, buf, size) < 0) {
138                 rc = -1;
139                 goto out;
140         }
141
142         md->prev = old;
143         perf_mmap__write_tail(md, old);
144
145 out:
146         return rc;
147 }
148
149 static volatile int done = 0;
150 static volatile int signr = -1;
151 static volatile int child_finished = 0;
152
153 static void sig_handler(int sig)
154 {
155         if (sig == SIGCHLD)
156                 child_finished = 1;
157
158         done = 1;
159         signr = sig;
160 }
161
162 static void perf_record__sig_exit(int exit_status __maybe_unused, void *arg)
163 {
164         struct perf_record *rec = arg;
165         int status;
166
167         if (rec->evlist->workload.pid > 0) {
168                 if (!child_finished)
169                         kill(rec->evlist->workload.pid, SIGTERM);
170
171                 wait(&status);
172                 if (WIFSIGNALED(status))
173                         psignal(WTERMSIG(status), rec->progname);
174         }
175
176         if (signr == -1 || signr == SIGUSR1)
177                 return;
178
179         signal(signr, SIG_DFL);
180         kill(getpid(), signr);
181 }
182
183 static bool perf_evlist__equal(struct perf_evlist *evlist,
184                                struct perf_evlist *other)
185 {
186         struct perf_evsel *pos, *pair;
187
188         if (evlist->nr_entries != other->nr_entries)
189                 return false;
190
191         pair = perf_evlist__first(other);
192
193         list_for_each_entry(pos, &evlist->entries, node) {
194                 if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
195                         return false;
196                 pair = perf_evsel__next(pair);
197         }
198
199         return true;
200 }
201
202 static int perf_record__open(struct perf_record *rec)
203 {
204         struct perf_evsel *pos;
205         struct perf_evlist *evlist = rec->evlist;
206         struct perf_session *session = rec->session;
207         struct perf_record_opts *opts = &rec->opts;
208         int rc = 0;
209
210         perf_evlist__config_attrs(evlist, opts);
211
212         if (opts->group)
213                 perf_evlist__set_leader(evlist);
214
215         list_for_each_entry(pos, &evlist->entries, node) {
216                 struct perf_event_attr *attr = &pos->attr;
217                 /*
218                  * Check if parse_single_tracepoint_event has already asked for
219                  * PERF_SAMPLE_TIME.
220                  *
221                  * XXX this is kludgy but short term fix for problems introduced by
222                  * eac23d1c that broke 'perf script' by having different sample_types
223                  * when using multiple tracepoint events when we use a perf binary
224                  * that tries to use sample_id_all on an older kernel.
225                  *
226                  * We need to move counter creation to perf_session, support
227                  * different sample_types, etc.
228                  */
229                 bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
230
231 fallback_missing_features:
232                 if (opts->exclude_guest_missing)
233                         attr->exclude_guest = attr->exclude_host = 0;
234 retry_sample_id:
235                 attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1;
236 try_again:
237                 if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
238                         int err = errno;
239
240                         if (err == EPERM || err == EACCES) {
241                                 ui__error_paranoid();
242                                 rc = -err;
243                                 goto out;
244                         } else if (err ==  ENODEV && opts->target.cpu_list) {
245                                 pr_err("No such device - did you specify"
246                                        " an out-of-range profile CPU?\n");
247                                 rc = -err;
248                                 goto out;
249                         } else if (err == EINVAL) {
250                                 if (!opts->exclude_guest_missing &&
251                                     (attr->exclude_guest || attr->exclude_host)) {
252                                         pr_debug("Old kernel, cannot exclude "
253                                                  "guest or host samples.\n");
254                                         opts->exclude_guest_missing = true;
255                                         goto fallback_missing_features;
256                                 } else if (!opts->sample_id_all_missing) {
257                                         /*
258                                          * Old kernel, no attr->sample_id_type_all field
259                                          */
260                                         opts->sample_id_all_missing = true;
261                                         if (!opts->sample_time && !opts->raw_samples && !time_needed)
262                                                 attr->sample_type &= ~PERF_SAMPLE_TIME;
263
264                                         goto retry_sample_id;
265                                 }
266                         }
267
268                         /*
269                          * If it's cycles then fall back to hrtimer
270                          * based cpu-clock-tick sw counter, which
271                          * is always available even if no PMU support.
272                          *
273                          * PPC returns ENXIO until 2.6.37 (behavior changed
274                          * with commit b0a873e).
275                          */
276                         if ((err == ENOENT || err == ENXIO)
277                                         && attr->type == PERF_TYPE_HARDWARE
278                                         && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
279
280                                 if (verbose)
281                                         ui__warning("The cycles event is not supported, "
282                                                     "trying to fall back to cpu-clock-ticks\n");
283                                 attr->type = PERF_TYPE_SOFTWARE;
284                                 attr->config = PERF_COUNT_SW_CPU_CLOCK;
285                                 if (pos->name) {
286                                         free(pos->name);
287                                         pos->name = NULL;
288                                 }
289                                 goto try_again;
290                         }
291
292                         if (err == ENOENT) {
293                                 ui__error("The %s event is not supported.\n",
294                                           perf_evsel__name(pos));
295                                 rc = -err;
296                                 goto out;
297                         }
298
299                         printf("\n");
300                         error("sys_perf_event_open() syscall returned with %d "
301                               "(%s) for event %s. /bin/dmesg may provide "
302                               "additional information.\n",
303                               err, strerror(err), perf_evsel__name(pos));
304
305 #if defined(__i386__) || defined(__x86_64__)
306                         if (attr->type == PERF_TYPE_HARDWARE &&
307                             err == EOPNOTSUPP) {
308                                 pr_err("No hardware sampling interrupt available."
309                                        " No APIC? If so then you can boot the kernel"
310                                        " with the \"lapic\" boot parameter to"
311                                        " force-enable it.\n");
312                                 rc = -err;
313                                 goto out;
314                         }
315 #endif
316
317                         pr_err("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
318                         rc = -err;
319                         goto out;
320                 }
321         }
322
323         if (perf_evlist__apply_filters(evlist)) {
324                 error("failed to set filter with %d (%s)\n", errno,
325                         strerror(errno));
326                 rc = -1;
327                 goto out;
328         }
329
330         if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
331                 if (errno == EPERM) {
332                         pr_err("Permission error mapping pages.\n"
333                                "Consider increasing "
334                                "/proc/sys/kernel/perf_event_mlock_kb,\n"
335                                "or try again with a smaller value of -m/--mmap_pages.\n"
336                                "(current value: %d)\n", opts->mmap_pages);
337                         rc = -errno;
338                 } else if (!is_power_of_2(opts->mmap_pages)) {
339                         pr_err("--mmap_pages/-m value must be a power of two.");
340                         rc = -EINVAL;
341                 } else {
342                         pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno));
343                         rc = -errno;
344                 }
345                 goto out;
346         }
347
348         if (rec->file_new)
349                 session->evlist = evlist;
350         else {
351                 if (!perf_evlist__equal(session->evlist, evlist)) {
352                         fprintf(stderr, "incompatible append\n");
353                         rc = -1;
354                         goto out;
355                 }
356         }
357
358         perf_session__set_id_hdr_size(session);
359 out:
360         return rc;
361 }
362
363 static int process_buildids(struct perf_record *rec)
364 {
365         u64 size = lseek(rec->output, 0, SEEK_CUR);
366
367         if (size == 0)
368                 return 0;
369
370         rec->session->fd = rec->output;
371         return __perf_session__process_events(rec->session, rec->post_processing_offset,
372                                               size - rec->post_processing_offset,
373                                               size, &build_id__mark_dso_hit_ops);
374 }
375
376 static void perf_record__exit(int status, void *arg)
377 {
378         struct perf_record *rec = arg;
379
380         if (status != 0)
381                 return;
382
383         if (!rec->opts.pipe_output) {
384                 rec->session->header.data_size += rec->bytes_written;
385
386                 if (!rec->no_buildid)
387                         process_buildids(rec);
388                 perf_session__write_header(rec->session, rec->evlist,
389                                            rec->output, true);
390                 perf_session__delete(rec->session);
391                 perf_evlist__delete(rec->evlist);
392                 symbol__exit();
393         }
394 }
395
396 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
397 {
398         int err;
399         struct perf_tool *tool = data;
400
401         if (machine__is_host(machine))
402                 return;
403
404         /*
405          *As for guest kernel when processing subcommand record&report,
406          *we arrange module mmap prior to guest kernel mmap and trigger
407          *a preload dso because default guest module symbols are loaded
408          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
409          *method is used to avoid symbol missing when the first addr is
410          *in module instead of in guest kernel.
411          */
412         err = perf_event__synthesize_modules(tool, process_synthesized_event,
413                                              machine);
414         if (err < 0)
415                 pr_err("Couldn't record guest kernel [%d]'s reference"
416                        " relocation symbol.\n", machine->pid);
417
418         /*
419          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
420          * have no _text sometimes.
421          */
422         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
423                                                  machine, "_text");
424         if (err < 0)
425                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
426                                                          machine, "_stext");
427         if (err < 0)
428                 pr_err("Couldn't record guest kernel [%d]'s reference"
429                        " relocation symbol.\n", machine->pid);
430 }
431
432 static struct perf_event_header finished_round_event = {
433         .size = sizeof(struct perf_event_header),
434         .type = PERF_RECORD_FINISHED_ROUND,
435 };
436
437 static int perf_record__mmap_read_all(struct perf_record *rec)
438 {
439         int i;
440         int rc = 0;
441
442         for (i = 0; i < rec->evlist->nr_mmaps; i++) {
443                 if (rec->evlist->mmap[i].base) {
444                         if (perf_record__mmap_read(rec, &rec->evlist->mmap[i]) != 0) {
445                                 rc = -1;
446                                 goto out;
447                         }
448                 }
449         }
450
451         if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
452                 rc = write_output(rec, &finished_round_event,
453                                   sizeof(finished_round_event));
454
455 out:
456         return rc;
457 }
458
459 static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
460 {
461         struct stat st;
462         int flags;
463         int err, output, feat;
464         unsigned long waking = 0;
465         const bool forks = argc > 0;
466         struct machine *machine;
467         struct perf_tool *tool = &rec->tool;
468         struct perf_record_opts *opts = &rec->opts;
469         struct perf_evlist *evsel_list = rec->evlist;
470         const char *output_name = rec->output_name;
471         struct perf_session *session;
472
473         rec->progname = argv[0];
474
475         rec->page_size = sysconf(_SC_PAGE_SIZE);
476
477         on_exit(perf_record__sig_exit, rec);
478         signal(SIGCHLD, sig_handler);
479         signal(SIGINT, sig_handler);
480         signal(SIGUSR1, sig_handler);
481
482         if (!output_name) {
483                 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
484                         opts->pipe_output = true;
485                 else
486                         rec->output_name = output_name = "perf.data";
487         }
488         if (output_name) {
489                 if (!strcmp(output_name, "-"))
490                         opts->pipe_output = true;
491                 else if (!stat(output_name, &st) && st.st_size) {
492                         if (rec->write_mode == WRITE_FORCE) {
493                                 char oldname[PATH_MAX];
494                                 snprintf(oldname, sizeof(oldname), "%s.old",
495                                          output_name);
496                                 unlink(oldname);
497                                 rename(output_name, oldname);
498                         }
499                 } else if (rec->write_mode == WRITE_APPEND) {
500                         rec->write_mode = WRITE_FORCE;
501                 }
502         }
503
504         flags = O_CREAT|O_RDWR;
505         if (rec->write_mode == WRITE_APPEND)
506                 rec->file_new = 0;
507         else
508                 flags |= O_TRUNC;
509
510         if (opts->pipe_output)
511                 output = STDOUT_FILENO;
512         else
513                 output = open(output_name, flags, S_IRUSR | S_IWUSR);
514         if (output < 0) {
515                 perror("failed to create output file");
516                 return -1;
517         }
518
519         rec->output = output;
520
521         session = perf_session__new(output_name, O_WRONLY,
522                                     rec->write_mode == WRITE_FORCE, false, NULL);
523         if (session == NULL) {
524                 pr_err("Not enough memory for reading perf file header\n");
525                 return -1;
526         }
527
528         rec->session = session;
529
530         for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
531                 perf_header__set_feat(&session->header, feat);
532
533         if (rec->no_buildid)
534                 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
535
536         if (!have_tracepoints(&evsel_list->entries))
537                 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
538
539         if (!rec->opts.branch_stack)
540                 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
541
542         if (!rec->file_new) {
543                 err = perf_session__read_header(session, output);
544                 if (err < 0)
545                         goto out_delete_session;
546         }
547
548         if (forks) {
549                 err = perf_evlist__prepare_workload(evsel_list, opts, argv);
550                 if (err < 0) {
551                         pr_err("Couldn't run the workload!\n");
552                         goto out_delete_session;
553                 }
554         }
555
556         if (perf_record__open(rec) != 0) {
557                 err = -1;
558                 goto out_delete_session;
559         }
560
561         /*
562          * perf_session__delete(session) will be called at perf_record__exit()
563          */
564         on_exit(perf_record__exit, rec);
565
566         if (opts->pipe_output) {
567                 err = perf_header__write_pipe(output);
568                 if (err < 0)
569                         goto out_delete_session;
570         } else if (rec->file_new) {
571                 err = perf_session__write_header(session, evsel_list,
572                                                  output, false);
573                 if (err < 0)
574                         goto out_delete_session;
575         }
576
577         if (!rec->no_buildid
578             && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
579                 pr_err("Couldn't generate buildids. "
580                        "Use --no-buildid to profile anyway.\n");
581                 err = -1;
582                 goto out_delete_session;
583         }
584
585         rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
586
587         machine = perf_session__find_host_machine(session);
588         if (!machine) {
589                 pr_err("Couldn't find native kernel information.\n");
590                 err = -1;
591                 goto out_delete_session;
592         }
593
594         if (opts->pipe_output) {
595                 err = perf_event__synthesize_attrs(tool, session,
596                                                    process_synthesized_event);
597                 if (err < 0) {
598                         pr_err("Couldn't synthesize attrs.\n");
599                         goto out_delete_session;
600                 }
601
602                 err = perf_event__synthesize_event_types(tool, process_synthesized_event,
603                                                          machine);
604                 if (err < 0) {
605                         pr_err("Couldn't synthesize event_types.\n");
606                         goto out_delete_session;
607                 }
608
609                 if (have_tracepoints(&evsel_list->entries)) {
610                         /*
611                          * FIXME err <= 0 here actually means that
612                          * there were no tracepoints so its not really
613                          * an error, just that we don't need to
614                          * synthesize anything.  We really have to
615                          * return this more properly and also
616                          * propagate errors that now are calling die()
617                          */
618                         err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
619                                                                   process_synthesized_event);
620                         if (err <= 0) {
621                                 pr_err("Couldn't record tracing data.\n");
622                                 goto out_delete_session;
623                         }
624                         advance_output(rec, err);
625                 }
626         }
627
628         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
629                                                  machine, "_text");
630         if (err < 0)
631                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
632                                                          machine, "_stext");
633         if (err < 0)
634                 pr_err("Couldn't record kernel reference relocation symbol\n"
635                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
636                        "Check /proc/kallsyms permission or run as root.\n");
637
638         err = perf_event__synthesize_modules(tool, process_synthesized_event,
639                                              machine);
640         if (err < 0)
641                 pr_err("Couldn't record kernel module information.\n"
642                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
643                        "Check /proc/modules permission or run as root.\n");
644
645         if (perf_guest)
646                 perf_session__process_machines(session, tool,
647                                                perf_event__synthesize_guest_os);
648
649         if (!opts->target.system_wide)
650                 err = perf_event__synthesize_thread_map(tool, evsel_list->threads,
651                                                   process_synthesized_event,
652                                                   machine);
653         else
654                 err = perf_event__synthesize_threads(tool, process_synthesized_event,
655                                                machine);
656
657         if (err != 0)
658                 goto out_delete_session;
659
660         if (rec->realtime_prio) {
661                 struct sched_param param;
662
663                 param.sched_priority = rec->realtime_prio;
664                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
665                         pr_err("Could not set realtime priority.\n");
666                         err = -1;
667                         goto out_delete_session;
668                 }
669         }
670
671         perf_evlist__enable(evsel_list);
672
673         /*
674          * Let the child rip
675          */
676         if (forks)
677                 perf_evlist__start_workload(evsel_list);
678
679         for (;;) {
680                 int hits = rec->samples;
681
682                 if (perf_record__mmap_read_all(rec) < 0) {
683                         err = -1;
684                         goto out_delete_session;
685                 }
686
687                 if (hits == rec->samples) {
688                         if (done)
689                                 break;
690                         err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
691                         waking++;
692                 }
693
694                 if (done)
695                         perf_evlist__disable(evsel_list);
696         }
697
698         if (quiet || signr == SIGUSR1)
699                 return 0;
700
701         fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
702
703         /*
704          * Approximate RIP event size: 24 bytes.
705          */
706         fprintf(stderr,
707                 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
708                 (double)rec->bytes_written / 1024.0 / 1024.0,
709                 output_name,
710                 rec->bytes_written / 24);
711
712         return 0;
713
714 out_delete_session:
715         perf_session__delete(session);
716         return err;
717 }
718
719 #define BRANCH_OPT(n, m) \
720         { .name = n, .mode = (m) }
721
722 #define BRANCH_END { .name = NULL }
723
724 struct branch_mode {
725         const char *name;
726         int mode;
727 };
728
729 static const struct branch_mode branch_modes[] = {
730         BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
731         BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
732         BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
733         BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
734         BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
735         BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
736         BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
737         BRANCH_END
738 };
739
740 static int
741 parse_branch_stack(const struct option *opt, const char *str, int unset)
742 {
743 #define ONLY_PLM \
744         (PERF_SAMPLE_BRANCH_USER        |\
745          PERF_SAMPLE_BRANCH_KERNEL      |\
746          PERF_SAMPLE_BRANCH_HV)
747
748         uint64_t *mode = (uint64_t *)opt->value;
749         const struct branch_mode *br;
750         char *s, *os = NULL, *p;
751         int ret = -1;
752
753         if (unset)
754                 return 0;
755
756         /*
757          * cannot set it twice, -b + --branch-filter for instance
758          */
759         if (*mode)
760                 return -1;
761
762         /* str may be NULL in case no arg is passed to -b */
763         if (str) {
764                 /* because str is read-only */
765                 s = os = strdup(str);
766                 if (!s)
767                         return -1;
768
769                 for (;;) {
770                         p = strchr(s, ',');
771                         if (p)
772                                 *p = '\0';
773
774                         for (br = branch_modes; br->name; br++) {
775                                 if (!strcasecmp(s, br->name))
776                                         break;
777                         }
778                         if (!br->name) {
779                                 ui__warning("unknown branch filter %s,"
780                                             " check man page\n", s);
781                                 goto error;
782                         }
783
784                         *mode |= br->mode;
785
786                         if (!p)
787                                 break;
788
789                         s = p + 1;
790                 }
791         }
792         ret = 0;
793
794         /* default to any branch */
795         if ((*mode & ~ONLY_PLM) == 0) {
796                 *mode = PERF_SAMPLE_BRANCH_ANY;
797         }
798 error:
799         free(os);
800         return ret;
801 }
802
803 #ifndef NO_LIBUNWIND_SUPPORT
804 static int get_stack_size(char *str, unsigned long *_size)
805 {
806         char *endptr;
807         unsigned long size;
808         unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
809
810         size = strtoul(str, &endptr, 0);
811
812         do {
813                 if (*endptr)
814                         break;
815
816                 size = round_up(size, sizeof(u64));
817                 if (!size || size > max_size)
818                         break;
819
820                 *_size = size;
821                 return 0;
822
823         } while (0);
824
825         pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
826                max_size, str);
827         return -1;
828 }
829 #endif /* !NO_LIBUNWIND_SUPPORT */
830
831 static int
832 parse_callchain_opt(const struct option *opt __maybe_unused, const char *arg,
833                     int unset)
834 {
835         struct perf_record *rec = (struct perf_record *)opt->value;
836         char *tok, *name, *saveptr = NULL;
837         char *buf;
838         int ret = -1;
839
840         /* --no-call-graph */
841         if (unset)
842                 return 0;
843
844         /* We specified default option if none is provided. */
845         BUG_ON(!arg);
846
847         /* We need buffer that we know we can write to. */
848         buf = malloc(strlen(arg) + 1);
849         if (!buf)
850                 return -ENOMEM;
851
852         strcpy(buf, arg);
853
854         tok = strtok_r((char *)buf, ",", &saveptr);
855         name = tok ? : (char *)buf;
856
857         do {
858                 /* Framepointer style */
859                 if (!strncmp(name, "fp", sizeof("fp"))) {
860                         if (!strtok_r(NULL, ",", &saveptr)) {
861                                 rec->opts.call_graph = CALLCHAIN_FP;
862                                 ret = 0;
863                         } else
864                                 pr_err("callchain: No more arguments "
865                                        "needed for -g fp\n");
866                         break;
867
868 #ifndef NO_LIBUNWIND_SUPPORT
869                 /* Dwarf style */
870                 } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
871                         ret = 0;
872                         rec->opts.call_graph = CALLCHAIN_DWARF;
873                         rec->opts.stack_dump_size = default_stack_dump_size;
874
875                         tok = strtok_r(NULL, ",", &saveptr);
876                         if (tok) {
877                                 unsigned long size = 0;
878
879                                 ret = get_stack_size(tok, &size);
880                                 rec->opts.stack_dump_size = size;
881                         }
882
883                         if (!ret)
884                                 pr_debug("callchain: stack dump size %d\n",
885                                          rec->opts.stack_dump_size);
886 #endif /* !NO_LIBUNWIND_SUPPORT */
887                 } else {
888                         pr_err("callchain: Unknown -g option "
889                                "value: %s\n", arg);
890                         break;
891                 }
892
893         } while (0);
894
895         free(buf);
896
897         if (!ret)
898                 pr_debug("callchain: type %d\n", rec->opts.call_graph);
899
900         return ret;
901 }
902
903 static const char * const record_usage[] = {
904         "perf record [<options>] [<command>]",
905         "perf record [<options>] -- <command> [<options>]",
906         NULL
907 };
908
909 /*
910  * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
911  * because we need to have access to it in perf_record__exit, that is called
912  * after cmd_record() exits, but since record_options need to be accessible to
913  * builtin-script, leave it here.
914  *
915  * At least we don't ouch it in all the other functions here directly.
916  *
917  * Just say no to tons of global variables, sigh.
918  */
919 static struct perf_record record = {
920         .opts = {
921                 .mmap_pages          = UINT_MAX,
922                 .user_freq           = UINT_MAX,
923                 .user_interval       = ULLONG_MAX,
924                 .freq                = 4000,
925                 .target              = {
926                         .uses_mmap   = true,
927                 },
928         },
929         .write_mode = WRITE_FORCE,
930         .file_new   = true,
931 };
932
933 /*
934  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
935  * with it and switch to use the library functions in perf_evlist that came
936  * from builtin-record.c, i.e. use perf_record_opts,
937  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
938  * using pipes, etc.
939  */
940 const struct option record_options[] = {
941         OPT_CALLBACK('e', "event", &record.evlist, "event",
942                      "event selector. use 'perf list' to list available events",
943                      parse_events_option),
944         OPT_CALLBACK(0, "filter", &record.evlist, "filter",
945                      "event filter", parse_filter),
946         OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
947                     "record events on existing process id"),
948         OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
949                     "record events on existing thread id"),
950         OPT_INTEGER('r', "realtime", &record.realtime_prio,
951                     "collect data with this RT SCHED_FIFO priority"),
952         OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
953                     "collect data without buffering"),
954         OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
955                     "collect raw sample records from all opened counters"),
956         OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
957                             "system-wide collection from all CPUs"),
958         OPT_BOOLEAN('A', "append", &record.append_file,
959                             "append to the output file to do incremental profiling"),
960         OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
961                     "list of cpus to monitor"),
962         OPT_BOOLEAN('f', "force", &record.force,
963                         "overwrite existing data file (deprecated)"),
964         OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
965         OPT_STRING('o', "output", &record.output_name, "file",
966                     "output file name"),
967         OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
968                     "child tasks do not inherit counters"),
969         OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
970         OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
971                      "number of mmap data pages"),
972         OPT_BOOLEAN(0, "group", &record.opts.group,
973                     "put the counters into a counter group"),
974         OPT_CALLBACK_DEFAULT('g', "call-graph", &record, "mode[,dump_size]",
975                              callchain_help, &parse_callchain_opt,
976                              "fp"),
977         OPT_INCR('v', "verbose", &verbose,
978                     "be more verbose (show counter open errors, etc)"),
979         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
980         OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
981                     "per thread counts"),
982         OPT_BOOLEAN('d', "data", &record.opts.sample_address,
983                     "Sample addresses"),
984         OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
985         OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
986         OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
987                     "don't sample"),
988         OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
989                     "do not update the buildid cache"),
990         OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
991                     "do not collect buildids in perf.data"),
992         OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
993                      "monitor event in cgroup name only",
994                      parse_cgroups),
995         OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
996                    "user to profile"),
997
998         OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
999                      "branch any", "sample any taken branches",
1000                      parse_branch_stack),
1001
1002         OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1003                      "branch filter mask", "branch stack filter modes",
1004                      parse_branch_stack),
1005         OPT_END()
1006 };
1007
1008 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
1009 {
1010         int err = -ENOMEM;
1011         struct perf_evsel *pos;
1012         struct perf_evlist *evsel_list;
1013         struct perf_record *rec = &record;
1014         char errbuf[BUFSIZ];
1015
1016         evsel_list = perf_evlist__new(NULL, NULL);
1017         if (evsel_list == NULL)
1018                 return -ENOMEM;
1019
1020         rec->evlist = evsel_list;
1021
1022         argc = parse_options(argc, argv, record_options, record_usage,
1023                             PARSE_OPT_STOP_AT_NON_OPTION);
1024         if (!argc && perf_target__none(&rec->opts.target))
1025                 usage_with_options(record_usage, record_options);
1026
1027         if (rec->force && rec->append_file) {
1028                 ui__error("Can't overwrite and append at the same time."
1029                           " You need to choose between -f and -A");
1030                 usage_with_options(record_usage, record_options);
1031         } else if (rec->append_file) {
1032                 rec->write_mode = WRITE_APPEND;
1033         } else {
1034                 rec->write_mode = WRITE_FORCE;
1035         }
1036
1037         if (nr_cgroups && !rec->opts.target.system_wide) {
1038                 ui__error("cgroup monitoring only available in"
1039                           " system-wide mode\n");
1040                 usage_with_options(record_usage, record_options);
1041         }
1042
1043         symbol__init();
1044
1045         if (symbol_conf.kptr_restrict)
1046                 pr_warning(
1047 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1048 "check /proc/sys/kernel/kptr_restrict.\n\n"
1049 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1050 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1051 "Samples in kernel modules won't be resolved at all.\n\n"
1052 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1053 "even with a suitable vmlinux or kallsyms file.\n\n");
1054
1055         if (rec->no_buildid_cache || rec->no_buildid)
1056                 disable_buildid_cache();
1057
1058         if (evsel_list->nr_entries == 0 &&
1059             perf_evlist__add_default(evsel_list) < 0) {
1060                 pr_err("Not enough memory for event selector list\n");
1061                 goto out_symbol_exit;
1062         }
1063
1064         err = perf_target__validate(&rec->opts.target);
1065         if (err) {
1066                 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1067                 ui__warning("%s", errbuf);
1068         }
1069
1070         err = perf_target__parse_uid(&rec->opts.target);
1071         if (err) {
1072                 int saved_errno = errno;
1073
1074                 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1075                 ui__error("%s", errbuf);
1076
1077                 err = -saved_errno;
1078                 goto out_free_fd;
1079         }
1080
1081         err = -ENOMEM;
1082         if (perf_evlist__create_maps(evsel_list, &rec->opts.target) < 0)
1083                 usage_with_options(record_usage, record_options);
1084
1085         list_for_each_entry(pos, &evsel_list->entries, node) {
1086                 if (perf_header__push_event(pos->attr.config, perf_evsel__name(pos)))
1087                         goto out_free_fd;
1088         }
1089
1090         if (rec->opts.user_interval != ULLONG_MAX)
1091                 rec->opts.default_interval = rec->opts.user_interval;
1092         if (rec->opts.user_freq != UINT_MAX)
1093                 rec->opts.freq = rec->opts.user_freq;
1094
1095         /*
1096          * User specified count overrides default frequency.
1097          */
1098         if (rec->opts.default_interval)
1099                 rec->opts.freq = 0;
1100         else if (rec->opts.freq) {
1101                 rec->opts.default_interval = rec->opts.freq;
1102         } else {
1103                 ui__error("frequency and count are zero, aborting\n");
1104                 err = -EINVAL;
1105                 goto out_free_fd;
1106         }
1107
1108         err = __cmd_record(&record, argc, argv);
1109 out_free_fd:
1110         perf_evlist__delete_maps(evsel_list);
1111 out_symbol_exit:
1112         symbol__exit();
1113         return err;
1114 }