]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - tools/perf/builtin-record.c
Merge branch 'stable' of git://git.kernel.org/pub/scm/linux/kernel/git/cmetcalf/linux...
[karo-tx-linux.git] / tools / perf / builtin-record.c
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #define _FILE_OFFSET_BITS 64
9
10 #include "builtin.h"
11
12 #include "perf.h"
13
14 #include "util/build-id.h"
15 #include "util/util.h"
16 #include "util/parse-options.h"
17 #include "util/parse-events.h"
18
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/session.h"
25 #include "util/tool.h"
26 #include "util/symbol.h"
27 #include "util/cpumap.h"
28 #include "util/thread_map.h"
29
30 #include <unistd.h>
31 #include <sched.h>
32 #include <sys/mman.h>
33
34 #ifndef HAVE_ON_EXIT
35 #ifndef ATEXIT_MAX
36 #define ATEXIT_MAX 32
37 #endif
38 static int __on_exit_count = 0;
39 typedef void (*on_exit_func_t) (int, void *);
40 static on_exit_func_t __on_exit_funcs[ATEXIT_MAX];
41 static void *__on_exit_args[ATEXIT_MAX];
42 static int __exitcode = 0;
43 static void __handle_on_exit_funcs(void);
44 static int on_exit(on_exit_func_t function, void *arg);
45 #define exit(x) (exit)(__exitcode = (x))
46
47 static int on_exit(on_exit_func_t function, void *arg)
48 {
49         if (__on_exit_count == ATEXIT_MAX)
50                 return -ENOMEM;
51         else if (__on_exit_count == 0)
52                 atexit(__handle_on_exit_funcs);
53         __on_exit_funcs[__on_exit_count] = function;
54         __on_exit_args[__on_exit_count++] = arg;
55         return 0;
56 }
57
58 static void __handle_on_exit_funcs(void)
59 {
60         int i;
61         for (i = 0; i < __on_exit_count; i++)
62                 __on_exit_funcs[i] (__exitcode, __on_exit_args[i]);
63 }
64 #endif
65
66 enum write_mode_t {
67         WRITE_FORCE,
68         WRITE_APPEND
69 };
70
71 struct perf_record {
72         struct perf_tool        tool;
73         struct perf_record_opts opts;
74         u64                     bytes_written;
75         const char              *output_name;
76         struct perf_evlist      *evlist;
77         struct perf_session     *session;
78         const char              *progname;
79         int                     output;
80         unsigned int            page_size;
81         int                     realtime_prio;
82         enum write_mode_t       write_mode;
83         bool                    no_buildid;
84         bool                    no_buildid_cache;
85         bool                    force;
86         bool                    file_new;
87         bool                    append_file;
88         long                    samples;
89         off_t                   post_processing_offset;
90 };
91
92 static void advance_output(struct perf_record *rec, size_t size)
93 {
94         rec->bytes_written += size;
95 }
96
97 static int write_output(struct perf_record *rec, void *buf, size_t size)
98 {
99         while (size) {
100                 int ret = write(rec->output, buf, size);
101
102                 if (ret < 0) {
103                         pr_err("failed to write\n");
104                         return -1;
105                 }
106
107                 size -= ret;
108                 buf += ret;
109
110                 rec->bytes_written += ret;
111         }
112
113         return 0;
114 }
115
116 static int process_synthesized_event(struct perf_tool *tool,
117                                      union perf_event *event,
118                                      struct perf_sample *sample __maybe_unused,
119                                      struct machine *machine __maybe_unused)
120 {
121         struct perf_record *rec = container_of(tool, struct perf_record, tool);
122         if (write_output(rec, event, event->header.size) < 0)
123                 return -1;
124
125         return 0;
126 }
127
128 static int perf_record__mmap_read(struct perf_record *rec,
129                                    struct perf_mmap *md)
130 {
131         unsigned int head = perf_mmap__read_head(md);
132         unsigned int old = md->prev;
133         unsigned char *data = md->base + rec->page_size;
134         unsigned long size;
135         void *buf;
136         int rc = 0;
137
138         if (old == head)
139                 return 0;
140
141         rec->samples++;
142
143         size = head - old;
144
145         if ((old & md->mask) + size != (head & md->mask)) {
146                 buf = &data[old & md->mask];
147                 size = md->mask + 1 - (old & md->mask);
148                 old += size;
149
150                 if (write_output(rec, buf, size) < 0) {
151                         rc = -1;
152                         goto out;
153                 }
154         }
155
156         buf = &data[old & md->mask];
157         size = head - old;
158         old += size;
159
160         if (write_output(rec, buf, size) < 0) {
161                 rc = -1;
162                 goto out;
163         }
164
165         md->prev = old;
166         perf_mmap__write_tail(md, old);
167
168 out:
169         return rc;
170 }
171
172 static volatile int done = 0;
173 static volatile int signr = -1;
174 static volatile int child_finished = 0;
175
176 static void sig_handler(int sig)
177 {
178         if (sig == SIGCHLD)
179                 child_finished = 1;
180
181         done = 1;
182         signr = sig;
183 }
184
185 static void perf_record__sig_exit(int exit_status __maybe_unused, void *arg)
186 {
187         struct perf_record *rec = arg;
188         int status;
189
190         if (rec->evlist->workload.pid > 0) {
191                 if (!child_finished)
192                         kill(rec->evlist->workload.pid, SIGTERM);
193
194                 wait(&status);
195                 if (WIFSIGNALED(status))
196                         psignal(WTERMSIG(status), rec->progname);
197         }
198
199         if (signr == -1 || signr == SIGUSR1)
200                 return;
201
202         signal(signr, SIG_DFL);
203         kill(getpid(), signr);
204 }
205
206 static bool perf_evlist__equal(struct perf_evlist *evlist,
207                                struct perf_evlist *other)
208 {
209         struct perf_evsel *pos, *pair;
210
211         if (evlist->nr_entries != other->nr_entries)
212                 return false;
213
214         pair = perf_evlist__first(other);
215
216         list_for_each_entry(pos, &evlist->entries, node) {
217                 if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
218                         return false;
219                 pair = perf_evsel__next(pair);
220         }
221
222         return true;
223 }
224
225 static int perf_record__open(struct perf_record *rec)
226 {
227         char msg[512];
228         struct perf_evsel *pos;
229         struct perf_evlist *evlist = rec->evlist;
230         struct perf_session *session = rec->session;
231         struct perf_record_opts *opts = &rec->opts;
232         int rc = 0;
233
234         perf_evlist__config(evlist, opts);
235
236         list_for_each_entry(pos, &evlist->entries, node) {
237 try_again:
238                 if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
239                         if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
240                                 if (verbose)
241                                         ui__warning("%s\n", msg);
242                                 goto try_again;
243                         }
244
245                         rc = -errno;
246                         perf_evsel__open_strerror(pos, &opts->target,
247                                                   errno, msg, sizeof(msg));
248                         ui__error("%s\n", msg);
249                         goto out;
250                 }
251         }
252
253         if (perf_evlist__apply_filters(evlist)) {
254                 error("failed to set filter with %d (%s)\n", errno,
255                         strerror(errno));
256                 rc = -1;
257                 goto out;
258         }
259
260         if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
261                 if (errno == EPERM) {
262                         pr_err("Permission error mapping pages.\n"
263                                "Consider increasing "
264                                "/proc/sys/kernel/perf_event_mlock_kb,\n"
265                                "or try again with a smaller value of -m/--mmap_pages.\n"
266                                "(current value: %d)\n", opts->mmap_pages);
267                         rc = -errno;
268                 } else if (!is_power_of_2(opts->mmap_pages) &&
269                            (opts->mmap_pages != UINT_MAX)) {
270                         pr_err("--mmap_pages/-m value must be a power of two.");
271                         rc = -EINVAL;
272                 } else {
273                         pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno));
274                         rc = -errno;
275                 }
276                 goto out;
277         }
278
279         if (rec->file_new)
280                 session->evlist = evlist;
281         else {
282                 if (!perf_evlist__equal(session->evlist, evlist)) {
283                         fprintf(stderr, "incompatible append\n");
284                         rc = -1;
285                         goto out;
286                 }
287         }
288
289         perf_session__set_id_hdr_size(session);
290 out:
291         return rc;
292 }
293
294 static int process_buildids(struct perf_record *rec)
295 {
296         u64 size = lseek(rec->output, 0, SEEK_CUR);
297
298         if (size == 0)
299                 return 0;
300
301         rec->session->fd = rec->output;
302         return __perf_session__process_events(rec->session, rec->post_processing_offset,
303                                               size - rec->post_processing_offset,
304                                               size, &build_id__mark_dso_hit_ops);
305 }
306
307 static void perf_record__exit(int status, void *arg)
308 {
309         struct perf_record *rec = arg;
310
311         if (status != 0)
312                 return;
313
314         if (!rec->opts.pipe_output) {
315                 rec->session->header.data_size += rec->bytes_written;
316
317                 if (!rec->no_buildid)
318                         process_buildids(rec);
319                 perf_session__write_header(rec->session, rec->evlist,
320                                            rec->output, true);
321                 perf_session__delete(rec->session);
322                 perf_evlist__delete(rec->evlist);
323                 symbol__exit();
324         }
325 }
326
327 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
328 {
329         int err;
330         struct perf_tool *tool = data;
331         /*
332          *As for guest kernel when processing subcommand record&report,
333          *we arrange module mmap prior to guest kernel mmap and trigger
334          *a preload dso because default guest module symbols are loaded
335          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
336          *method is used to avoid symbol missing when the first addr is
337          *in module instead of in guest kernel.
338          */
339         err = perf_event__synthesize_modules(tool, process_synthesized_event,
340                                              machine);
341         if (err < 0)
342                 pr_err("Couldn't record guest kernel [%d]'s reference"
343                        " relocation symbol.\n", machine->pid);
344
345         /*
346          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
347          * have no _text sometimes.
348          */
349         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
350                                                  machine, "_text");
351         if (err < 0)
352                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
353                                                          machine, "_stext");
354         if (err < 0)
355                 pr_err("Couldn't record guest kernel [%d]'s reference"
356                        " relocation symbol.\n", machine->pid);
357 }
358
359 static struct perf_event_header finished_round_event = {
360         .size = sizeof(struct perf_event_header),
361         .type = PERF_RECORD_FINISHED_ROUND,
362 };
363
364 static int perf_record__mmap_read_all(struct perf_record *rec)
365 {
366         int i;
367         int rc = 0;
368
369         for (i = 0; i < rec->evlist->nr_mmaps; i++) {
370                 if (rec->evlist->mmap[i].base) {
371                         if (perf_record__mmap_read(rec, &rec->evlist->mmap[i]) != 0) {
372                                 rc = -1;
373                                 goto out;
374                         }
375                 }
376         }
377
378         if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
379                 rc = write_output(rec, &finished_round_event,
380                                   sizeof(finished_round_event));
381
382 out:
383         return rc;
384 }
385
386 static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
387 {
388         struct stat st;
389         int flags;
390         int err, output, feat;
391         unsigned long waking = 0;
392         const bool forks = argc > 0;
393         struct machine *machine;
394         struct perf_tool *tool = &rec->tool;
395         struct perf_record_opts *opts = &rec->opts;
396         struct perf_evlist *evsel_list = rec->evlist;
397         const char *output_name = rec->output_name;
398         struct perf_session *session;
399         bool disabled = false;
400
401         rec->progname = argv[0];
402
403         rec->page_size = sysconf(_SC_PAGE_SIZE);
404
405         on_exit(perf_record__sig_exit, rec);
406         signal(SIGCHLD, sig_handler);
407         signal(SIGINT, sig_handler);
408         signal(SIGUSR1, sig_handler);
409
410         if (!output_name) {
411                 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
412                         opts->pipe_output = true;
413                 else
414                         rec->output_name = output_name = "perf.data";
415         }
416         if (output_name) {
417                 if (!strcmp(output_name, "-"))
418                         opts->pipe_output = true;
419                 else if (!stat(output_name, &st) && st.st_size) {
420                         if (rec->write_mode == WRITE_FORCE) {
421                                 char oldname[PATH_MAX];
422                                 snprintf(oldname, sizeof(oldname), "%s.old",
423                                          output_name);
424                                 unlink(oldname);
425                                 rename(output_name, oldname);
426                         }
427                 } else if (rec->write_mode == WRITE_APPEND) {
428                         rec->write_mode = WRITE_FORCE;
429                 }
430         }
431
432         flags = O_CREAT|O_RDWR;
433         if (rec->write_mode == WRITE_APPEND)
434                 rec->file_new = 0;
435         else
436                 flags |= O_TRUNC;
437
438         if (opts->pipe_output)
439                 output = STDOUT_FILENO;
440         else
441                 output = open(output_name, flags, S_IRUSR | S_IWUSR);
442         if (output < 0) {
443                 perror("failed to create output file");
444                 return -1;
445         }
446
447         rec->output = output;
448
449         session = perf_session__new(output_name, O_WRONLY,
450                                     rec->write_mode == WRITE_FORCE, false, NULL);
451         if (session == NULL) {
452                 pr_err("Not enough memory for reading perf file header\n");
453                 return -1;
454         }
455
456         rec->session = session;
457
458         for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
459                 perf_header__set_feat(&session->header, feat);
460
461         if (rec->no_buildid)
462                 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
463
464         if (!have_tracepoints(&evsel_list->entries))
465                 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
466
467         if (!rec->opts.branch_stack)
468                 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
469
470         if (!rec->file_new) {
471                 err = perf_session__read_header(session, output);
472                 if (err < 0)
473                         goto out_delete_session;
474         }
475
476         if (forks) {
477                 err = perf_evlist__prepare_workload(evsel_list, opts, argv);
478                 if (err < 0) {
479                         pr_err("Couldn't run the workload!\n");
480                         goto out_delete_session;
481                 }
482         }
483
484         if (perf_record__open(rec) != 0) {
485                 err = -1;
486                 goto out_delete_session;
487         }
488
489         if (!evsel_list->nr_groups)
490                 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
491
492         /*
493          * perf_session__delete(session) will be called at perf_record__exit()
494          */
495         on_exit(perf_record__exit, rec);
496
497         if (opts->pipe_output) {
498                 err = perf_header__write_pipe(output);
499                 if (err < 0)
500                         goto out_delete_session;
501         } else if (rec->file_new) {
502                 err = perf_session__write_header(session, evsel_list,
503                                                  output, false);
504                 if (err < 0)
505                         goto out_delete_session;
506         }
507
508         if (!rec->no_buildid
509             && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
510                 pr_err("Couldn't generate buildids. "
511                        "Use --no-buildid to profile anyway.\n");
512                 err = -1;
513                 goto out_delete_session;
514         }
515
516         rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
517
518         machine = &session->machines.host;
519
520         if (opts->pipe_output) {
521                 err = perf_event__synthesize_attrs(tool, session,
522                                                    process_synthesized_event);
523                 if (err < 0) {
524                         pr_err("Couldn't synthesize attrs.\n");
525                         goto out_delete_session;
526                 }
527
528                 err = perf_event__synthesize_event_types(tool, process_synthesized_event,
529                                                          machine);
530                 if (err < 0) {
531                         pr_err("Couldn't synthesize event_types.\n");
532                         goto out_delete_session;
533                 }
534
535                 if (have_tracepoints(&evsel_list->entries)) {
536                         /*
537                          * FIXME err <= 0 here actually means that
538                          * there were no tracepoints so its not really
539                          * an error, just that we don't need to
540                          * synthesize anything.  We really have to
541                          * return this more properly and also
542                          * propagate errors that now are calling die()
543                          */
544                         err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
545                                                                   process_synthesized_event);
546                         if (err <= 0) {
547                                 pr_err("Couldn't record tracing data.\n");
548                                 goto out_delete_session;
549                         }
550                         advance_output(rec, err);
551                 }
552         }
553
554         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
555                                                  machine, "_text");
556         if (err < 0)
557                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
558                                                          machine, "_stext");
559         if (err < 0)
560                 pr_err("Couldn't record kernel reference relocation symbol\n"
561                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
562                        "Check /proc/kallsyms permission or run as root.\n");
563
564         err = perf_event__synthesize_modules(tool, process_synthesized_event,
565                                              machine);
566         if (err < 0)
567                 pr_err("Couldn't record kernel module information.\n"
568                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
569                        "Check /proc/modules permission or run as root.\n");
570
571         if (perf_guest) {
572                 machines__process_guests(&session->machines,
573                                          perf_event__synthesize_guest_os, tool);
574         }
575
576         if (perf_target__has_task(&opts->target))
577                 err = perf_event__synthesize_thread_map(tool, evsel_list->threads,
578                                                   process_synthesized_event,
579                                                   machine);
580         else if (perf_target__has_cpu(&opts->target))
581                 err = perf_event__synthesize_threads(tool, process_synthesized_event,
582                                                machine);
583         else /* command specified */
584                 err = 0;
585
586         if (err != 0)
587                 goto out_delete_session;
588
589         if (rec->realtime_prio) {
590                 struct sched_param param;
591
592                 param.sched_priority = rec->realtime_prio;
593                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
594                         pr_err("Could not set realtime priority.\n");
595                         err = -1;
596                         goto out_delete_session;
597                 }
598         }
599
600         /*
601          * When perf is starting the traced process, all the events
602          * (apart from group members) have enable_on_exec=1 set,
603          * so don't spoil it by prematurely enabling them.
604          */
605         if (!perf_target__none(&opts->target))
606                 perf_evlist__enable(evsel_list);
607
608         /*
609          * Let the child rip
610          */
611         if (forks)
612                 perf_evlist__start_workload(evsel_list);
613
614         for (;;) {
615                 int hits = rec->samples;
616
617                 if (perf_record__mmap_read_all(rec) < 0) {
618                         err = -1;
619                         goto out_delete_session;
620                 }
621
622                 if (hits == rec->samples) {
623                         if (done)
624                                 break;
625                         err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
626                         waking++;
627                 }
628
629                 /*
630                  * When perf is starting the traced process, at the end events
631                  * die with the process and we wait for that. Thus no need to
632                  * disable events in this case.
633                  */
634                 if (done && !disabled && !perf_target__none(&opts->target)) {
635                         perf_evlist__disable(evsel_list);
636                         disabled = true;
637                 }
638         }
639
640         if (quiet || signr == SIGUSR1)
641                 return 0;
642
643         fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
644
645         /*
646          * Approximate RIP event size: 24 bytes.
647          */
648         fprintf(stderr,
649                 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
650                 (double)rec->bytes_written / 1024.0 / 1024.0,
651                 output_name,
652                 rec->bytes_written / 24);
653
654         return 0;
655
656 out_delete_session:
657         perf_session__delete(session);
658         return err;
659 }
660
661 #define BRANCH_OPT(n, m) \
662         { .name = n, .mode = (m) }
663
664 #define BRANCH_END { .name = NULL }
665
666 struct branch_mode {
667         const char *name;
668         int mode;
669 };
670
671 static const struct branch_mode branch_modes[] = {
672         BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
673         BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
674         BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
675         BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
676         BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
677         BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
678         BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
679         BRANCH_END
680 };
681
682 static int
683 parse_branch_stack(const struct option *opt, const char *str, int unset)
684 {
685 #define ONLY_PLM \
686         (PERF_SAMPLE_BRANCH_USER        |\
687          PERF_SAMPLE_BRANCH_KERNEL      |\
688          PERF_SAMPLE_BRANCH_HV)
689
690         uint64_t *mode = (uint64_t *)opt->value;
691         const struct branch_mode *br;
692         char *s, *os = NULL, *p;
693         int ret = -1;
694
695         if (unset)
696                 return 0;
697
698         /*
699          * cannot set it twice, -b + --branch-filter for instance
700          */
701         if (*mode)
702                 return -1;
703
704         /* str may be NULL in case no arg is passed to -b */
705         if (str) {
706                 /* because str is read-only */
707                 s = os = strdup(str);
708                 if (!s)
709                         return -1;
710
711                 for (;;) {
712                         p = strchr(s, ',');
713                         if (p)
714                                 *p = '\0';
715
716                         for (br = branch_modes; br->name; br++) {
717                                 if (!strcasecmp(s, br->name))
718                                         break;
719                         }
720                         if (!br->name) {
721                                 ui__warning("unknown branch filter %s,"
722                                             " check man page\n", s);
723                                 goto error;
724                         }
725
726                         *mode |= br->mode;
727
728                         if (!p)
729                                 break;
730
731                         s = p + 1;
732                 }
733         }
734         ret = 0;
735
736         /* default to any branch */
737         if ((*mode & ~ONLY_PLM) == 0) {
738                 *mode = PERF_SAMPLE_BRANCH_ANY;
739         }
740 error:
741         free(os);
742         return ret;
743 }
744
745 #ifdef LIBUNWIND_SUPPORT
746 static int get_stack_size(char *str, unsigned long *_size)
747 {
748         char *endptr;
749         unsigned long size;
750         unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
751
752         size = strtoul(str, &endptr, 0);
753
754         do {
755                 if (*endptr)
756                         break;
757
758                 size = round_up(size, sizeof(u64));
759                 if (!size || size > max_size)
760                         break;
761
762                 *_size = size;
763                 return 0;
764
765         } while (0);
766
767         pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
768                max_size, str);
769         return -1;
770 }
771 #endif /* LIBUNWIND_SUPPORT */
772
773 int record_parse_callchain_opt(const struct option *opt,
774                                const char *arg, int unset)
775 {
776         struct perf_record_opts *opts = opt->value;
777         char *tok, *name, *saveptr = NULL;
778         char *buf;
779         int ret = -1;
780
781         /* --no-call-graph */
782         if (unset)
783                 return 0;
784
785         /* We specified default option if none is provided. */
786         BUG_ON(!arg);
787
788         /* We need buffer that we know we can write to. */
789         buf = malloc(strlen(arg) + 1);
790         if (!buf)
791                 return -ENOMEM;
792
793         strcpy(buf, arg);
794
795         tok = strtok_r((char *)buf, ",", &saveptr);
796         name = tok ? : (char *)buf;
797
798         do {
799                 /* Framepointer style */
800                 if (!strncmp(name, "fp", sizeof("fp"))) {
801                         if (!strtok_r(NULL, ",", &saveptr)) {
802                                 opts->call_graph = CALLCHAIN_FP;
803                                 ret = 0;
804                         } else
805                                 pr_err("callchain: No more arguments "
806                                        "needed for -g fp\n");
807                         break;
808
809 #ifdef LIBUNWIND_SUPPORT
810                 /* Dwarf style */
811                 } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
812                         const unsigned long default_stack_dump_size = 8192;
813
814                         ret = 0;
815                         opts->call_graph = CALLCHAIN_DWARF;
816                         opts->stack_dump_size = default_stack_dump_size;
817
818                         tok = strtok_r(NULL, ",", &saveptr);
819                         if (tok) {
820                                 unsigned long size = 0;
821
822                                 ret = get_stack_size(tok, &size);
823                                 opts->stack_dump_size = size;
824                         }
825
826                         if (!ret)
827                                 pr_debug("callchain: stack dump size %d\n",
828                                          opts->stack_dump_size);
829 #endif /* LIBUNWIND_SUPPORT */
830                 } else {
831                         pr_err("callchain: Unknown -g option "
832                                "value: %s\n", arg);
833                         break;
834                 }
835
836         } while (0);
837
838         free(buf);
839
840         if (!ret)
841                 pr_debug("callchain: type %d\n", opts->call_graph);
842
843         return ret;
844 }
845
846 static const char * const record_usage[] = {
847         "perf record [<options>] [<command>]",
848         "perf record [<options>] -- <command> [<options>]",
849         NULL
850 };
851
852 /*
853  * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
854  * because we need to have access to it in perf_record__exit, that is called
855  * after cmd_record() exits, but since record_options need to be accessible to
856  * builtin-script, leave it here.
857  *
858  * At least we don't ouch it in all the other functions here directly.
859  *
860  * Just say no to tons of global variables, sigh.
861  */
862 static struct perf_record record = {
863         .opts = {
864                 .mmap_pages          = UINT_MAX,
865                 .user_freq           = UINT_MAX,
866                 .user_interval       = ULLONG_MAX,
867                 .freq                = 4000,
868                 .target              = {
869                         .uses_mmap   = true,
870                 },
871         },
872         .write_mode = WRITE_FORCE,
873         .file_new   = true,
874 };
875
876 #define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: "
877
878 #ifdef LIBUNWIND_SUPPORT
879 const char record_callchain_help[] = CALLCHAIN_HELP "[fp] dwarf";
880 #else
881 const char record_callchain_help[] = CALLCHAIN_HELP "[fp]";
882 #endif
883
884 /*
885  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
886  * with it and switch to use the library functions in perf_evlist that came
887  * from builtin-record.c, i.e. use perf_record_opts,
888  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
889  * using pipes, etc.
890  */
891 const struct option record_options[] = {
892         OPT_CALLBACK('e', "event", &record.evlist, "event",
893                      "event selector. use 'perf list' to list available events",
894                      parse_events_option),
895         OPT_CALLBACK(0, "filter", &record.evlist, "filter",
896                      "event filter", parse_filter),
897         OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
898                     "record events on existing process id"),
899         OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
900                     "record events on existing thread id"),
901         OPT_INTEGER('r', "realtime", &record.realtime_prio,
902                     "collect data with this RT SCHED_FIFO priority"),
903         OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
904                     "collect data without buffering"),
905         OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
906                     "collect raw sample records from all opened counters"),
907         OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
908                             "system-wide collection from all CPUs"),
909         OPT_BOOLEAN('A', "append", &record.append_file,
910                             "append to the output file to do incremental profiling"),
911         OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
912                     "list of cpus to monitor"),
913         OPT_BOOLEAN('f', "force", &record.force,
914                         "overwrite existing data file (deprecated)"),
915         OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
916         OPT_STRING('o', "output", &record.output_name, "file",
917                     "output file name"),
918         OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
919                     "child tasks do not inherit counters"),
920         OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
921         OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
922                      "number of mmap data pages"),
923         OPT_BOOLEAN(0, "group", &record.opts.group,
924                     "put the counters into a counter group"),
925         OPT_CALLBACK_DEFAULT('g', "call-graph", &record.opts,
926                              "mode[,dump_size]", record_callchain_help,
927                              &record_parse_callchain_opt, "fp"),
928         OPT_INCR('v', "verbose", &verbose,
929                     "be more verbose (show counter open errors, etc)"),
930         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
931         OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
932                     "per thread counts"),
933         OPT_BOOLEAN('d', "data", &record.opts.sample_address,
934                     "Sample addresses"),
935         OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
936         OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
937         OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
938                     "don't sample"),
939         OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
940                     "do not update the buildid cache"),
941         OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
942                     "do not collect buildids in perf.data"),
943         OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
944                      "monitor event in cgroup name only",
945                      parse_cgroups),
946         OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
947                    "user to profile"),
948
949         OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
950                      "branch any", "sample any taken branches",
951                      parse_branch_stack),
952
953         OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
954                      "branch filter mask", "branch stack filter modes",
955                      parse_branch_stack),
956         OPT_END()
957 };
958
959 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
960 {
961         int err = -ENOMEM;
962         struct perf_evsel *pos;
963         struct perf_evlist *evsel_list;
964         struct perf_record *rec = &record;
965         char errbuf[BUFSIZ];
966
967         evsel_list = perf_evlist__new(NULL, NULL);
968         if (evsel_list == NULL)
969                 return -ENOMEM;
970
971         rec->evlist = evsel_list;
972
973         argc = parse_options(argc, argv, record_options, record_usage,
974                             PARSE_OPT_STOP_AT_NON_OPTION);
975         if (!argc && perf_target__none(&rec->opts.target))
976                 usage_with_options(record_usage, record_options);
977
978         if (rec->force && rec->append_file) {
979                 ui__error("Can't overwrite and append at the same time."
980                           " You need to choose between -f and -A");
981                 usage_with_options(record_usage, record_options);
982         } else if (rec->append_file) {
983                 rec->write_mode = WRITE_APPEND;
984         } else {
985                 rec->write_mode = WRITE_FORCE;
986         }
987
988         if (nr_cgroups && !rec->opts.target.system_wide) {
989                 ui__error("cgroup monitoring only available in"
990                           " system-wide mode\n");
991                 usage_with_options(record_usage, record_options);
992         }
993
994         symbol__init();
995
996         if (symbol_conf.kptr_restrict)
997                 pr_warning(
998 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
999 "check /proc/sys/kernel/kptr_restrict.\n\n"
1000 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1001 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1002 "Samples in kernel modules won't be resolved at all.\n\n"
1003 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1004 "even with a suitable vmlinux or kallsyms file.\n\n");
1005
1006         if (rec->no_buildid_cache || rec->no_buildid)
1007                 disable_buildid_cache();
1008
1009         if (evsel_list->nr_entries == 0 &&
1010             perf_evlist__add_default(evsel_list) < 0) {
1011                 pr_err("Not enough memory for event selector list\n");
1012                 goto out_symbol_exit;
1013         }
1014
1015         err = perf_target__validate(&rec->opts.target);
1016         if (err) {
1017                 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1018                 ui__warning("%s", errbuf);
1019         }
1020
1021         err = perf_target__parse_uid(&rec->opts.target);
1022         if (err) {
1023                 int saved_errno = errno;
1024
1025                 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1026                 ui__error("%s", errbuf);
1027
1028                 err = -saved_errno;
1029                 goto out_free_fd;
1030         }
1031
1032         err = -ENOMEM;
1033         if (perf_evlist__create_maps(evsel_list, &rec->opts.target) < 0)
1034                 usage_with_options(record_usage, record_options);
1035
1036         list_for_each_entry(pos, &evsel_list->entries, node) {
1037                 if (perf_header__push_event(pos->attr.config, perf_evsel__name(pos)))
1038                         goto out_free_fd;
1039         }
1040
1041         if (rec->opts.user_interval != ULLONG_MAX)
1042                 rec->opts.default_interval = rec->opts.user_interval;
1043         if (rec->opts.user_freq != UINT_MAX)
1044                 rec->opts.freq = rec->opts.user_freq;
1045
1046         /*
1047          * User specified count overrides default frequency.
1048          */
1049         if (rec->opts.default_interval)
1050                 rec->opts.freq = 0;
1051         else if (rec->opts.freq) {
1052                 rec->opts.default_interval = rec->opts.freq;
1053         } else {
1054                 ui__error("frequency and count are zero, aborting\n");
1055                 err = -EINVAL;
1056                 goto out_free_fd;
1057         }
1058
1059         err = __cmd_record(&record, argc, argv);
1060 out_free_fd:
1061         perf_evlist__delete_maps(evsel_list);
1062 out_symbol_exit:
1063         symbol__exit();
1064         return err;
1065 }