]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - tools/perf/builtin-record.c
Merge tag 'iommu-updates-v3.9' of git://git.kernel.org/pub/scm/linux/kernel/git/joro...
[karo-tx-linux.git] / tools / perf / builtin-record.c
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #define _FILE_OFFSET_BITS 64
9
10 #include "builtin.h"
11
12 #include "perf.h"
13
14 #include "util/build-id.h"
15 #include "util/util.h"
16 #include "util/parse-options.h"
17 #include "util/parse-events.h"
18
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/session.h"
25 #include "util/tool.h"
26 #include "util/symbol.h"
27 #include "util/cpumap.h"
28 #include "util/thread_map.h"
29
30 #include <unistd.h>
31 #include <sched.h>
32 #include <sys/mman.h>
33
34 #ifndef HAVE_ON_EXIT
35 #ifndef ATEXIT_MAX
36 #define ATEXIT_MAX 32
37 #endif
38 static int __on_exit_count = 0;
39 typedef void (*on_exit_func_t) (int, void *);
40 static on_exit_func_t __on_exit_funcs[ATEXIT_MAX];
41 static void *__on_exit_args[ATEXIT_MAX];
42 static int __exitcode = 0;
43 static void __handle_on_exit_funcs(void);
44 static int on_exit(on_exit_func_t function, void *arg);
45 #define exit(x) (exit)(__exitcode = (x))
46
47 static int on_exit(on_exit_func_t function, void *arg)
48 {
49         if (__on_exit_count == ATEXIT_MAX)
50                 return -ENOMEM;
51         else if (__on_exit_count == 0)
52                 atexit(__handle_on_exit_funcs);
53         __on_exit_funcs[__on_exit_count] = function;
54         __on_exit_args[__on_exit_count++] = arg;
55         return 0;
56 }
57
58 static void __handle_on_exit_funcs(void)
59 {
60         int i;
61         for (i = 0; i < __on_exit_count; i++)
62                 __on_exit_funcs[i] (__exitcode, __on_exit_args[i]);
63 }
64 #endif
65
66 enum write_mode_t {
67         WRITE_FORCE,
68         WRITE_APPEND
69 };
70
71 struct perf_record {
72         struct perf_tool        tool;
73         struct perf_record_opts opts;
74         u64                     bytes_written;
75         const char              *output_name;
76         struct perf_evlist      *evlist;
77         struct perf_session     *session;
78         const char              *progname;
79         int                     output;
80         unsigned int            page_size;
81         int                     realtime_prio;
82         enum write_mode_t       write_mode;
83         bool                    no_buildid;
84         bool                    no_buildid_cache;
85         bool                    force;
86         bool                    file_new;
87         bool                    append_file;
88         long                    samples;
89         off_t                   post_processing_offset;
90 };
91
92 static void advance_output(struct perf_record *rec, size_t size)
93 {
94         rec->bytes_written += size;
95 }
96
97 static int write_output(struct perf_record *rec, void *buf, size_t size)
98 {
99         while (size) {
100                 int ret = write(rec->output, buf, size);
101
102                 if (ret < 0) {
103                         pr_err("failed to write\n");
104                         return -1;
105                 }
106
107                 size -= ret;
108                 buf += ret;
109
110                 rec->bytes_written += ret;
111         }
112
113         return 0;
114 }
115
116 static int process_synthesized_event(struct perf_tool *tool,
117                                      union perf_event *event,
118                                      struct perf_sample *sample __maybe_unused,
119                                      struct machine *machine __maybe_unused)
120 {
121         struct perf_record *rec = container_of(tool, struct perf_record, tool);
122         if (write_output(rec, event, event->header.size) < 0)
123                 return -1;
124
125         return 0;
126 }
127
128 static int perf_record__mmap_read(struct perf_record *rec,
129                                    struct perf_mmap *md)
130 {
131         unsigned int head = perf_mmap__read_head(md);
132         unsigned int old = md->prev;
133         unsigned char *data = md->base + rec->page_size;
134         unsigned long size;
135         void *buf;
136         int rc = 0;
137
138         if (old == head)
139                 return 0;
140
141         rec->samples++;
142
143         size = head - old;
144
145         if ((old & md->mask) + size != (head & md->mask)) {
146                 buf = &data[old & md->mask];
147                 size = md->mask + 1 - (old & md->mask);
148                 old += size;
149
150                 if (write_output(rec, buf, size) < 0) {
151                         rc = -1;
152                         goto out;
153                 }
154         }
155
156         buf = &data[old & md->mask];
157         size = head - old;
158         old += size;
159
160         if (write_output(rec, buf, size) < 0) {
161                 rc = -1;
162                 goto out;
163         }
164
165         md->prev = old;
166         perf_mmap__write_tail(md, old);
167
168 out:
169         return rc;
170 }
171
172 static volatile int done = 0;
173 static volatile int signr = -1;
174 static volatile int child_finished = 0;
175
176 static void sig_handler(int sig)
177 {
178         if (sig == SIGCHLD)
179                 child_finished = 1;
180
181         done = 1;
182         signr = sig;
183 }
184
185 static void perf_record__sig_exit(int exit_status __maybe_unused, void *arg)
186 {
187         struct perf_record *rec = arg;
188         int status;
189
190         if (rec->evlist->workload.pid > 0) {
191                 if (!child_finished)
192                         kill(rec->evlist->workload.pid, SIGTERM);
193
194                 wait(&status);
195                 if (WIFSIGNALED(status))
196                         psignal(WTERMSIG(status), rec->progname);
197         }
198
199         if (signr == -1 || signr == SIGUSR1)
200                 return;
201
202         signal(signr, SIG_DFL);
203         kill(getpid(), signr);
204 }
205
206 static bool perf_evlist__equal(struct perf_evlist *evlist,
207                                struct perf_evlist *other)
208 {
209         struct perf_evsel *pos, *pair;
210
211         if (evlist->nr_entries != other->nr_entries)
212                 return false;
213
214         pair = perf_evlist__first(other);
215
216         list_for_each_entry(pos, &evlist->entries, node) {
217                 if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
218                         return false;
219                 pair = perf_evsel__next(pair);
220         }
221
222         return true;
223 }
224
225 static int perf_record__open(struct perf_record *rec)
226 {
227         char msg[512];
228         struct perf_evsel *pos;
229         struct perf_evlist *evlist = rec->evlist;
230         struct perf_session *session = rec->session;
231         struct perf_record_opts *opts = &rec->opts;
232         int rc = 0;
233
234         perf_evlist__config(evlist, opts);
235
236         list_for_each_entry(pos, &evlist->entries, node) {
237 try_again:
238                 if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
239                         if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
240                                 if (verbose)
241                                         ui__warning("%s\n", msg);
242                                 goto try_again;
243                         }
244
245                         rc = -errno;
246                         perf_evsel__open_strerror(pos, &opts->target,
247                                                   errno, msg, sizeof(msg));
248                         ui__error("%s\n", msg);
249                         goto out;
250                 }
251         }
252
253         if (perf_evlist__apply_filters(evlist)) {
254                 error("failed to set filter with %d (%s)\n", errno,
255                         strerror(errno));
256                 rc = -1;
257                 goto out;
258         }
259
260         if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
261                 if (errno == EPERM) {
262                         pr_err("Permission error mapping pages.\n"
263                                "Consider increasing "
264                                "/proc/sys/kernel/perf_event_mlock_kb,\n"
265                                "or try again with a smaller value of -m/--mmap_pages.\n"
266                                "(current value: %d)\n", opts->mmap_pages);
267                         rc = -errno;
268                 } else if (!is_power_of_2(opts->mmap_pages) &&
269                            (opts->mmap_pages != UINT_MAX)) {
270                         pr_err("--mmap_pages/-m value must be a power of two.");
271                         rc = -EINVAL;
272                 } else {
273                         pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno));
274                         rc = -errno;
275                 }
276                 goto out;
277         }
278
279         if (rec->file_new)
280                 session->evlist = evlist;
281         else {
282                 if (!perf_evlist__equal(session->evlist, evlist)) {
283                         fprintf(stderr, "incompatible append\n");
284                         rc = -1;
285                         goto out;
286                 }
287         }
288
289         perf_session__set_id_hdr_size(session);
290 out:
291         return rc;
292 }
293
294 static int process_buildids(struct perf_record *rec)
295 {
296         u64 size = lseek(rec->output, 0, SEEK_CUR);
297
298         if (size == 0)
299                 return 0;
300
301         rec->session->fd = rec->output;
302         return __perf_session__process_events(rec->session, rec->post_processing_offset,
303                                               size - rec->post_processing_offset,
304                                               size, &build_id__mark_dso_hit_ops);
305 }
306
307 static void perf_record__exit(int status, void *arg)
308 {
309         struct perf_record *rec = arg;
310
311         if (status != 0)
312                 return;
313
314         if (!rec->opts.pipe_output) {
315                 rec->session->header.data_size += rec->bytes_written;
316
317                 if (!rec->no_buildid)
318                         process_buildids(rec);
319                 perf_session__write_header(rec->session, rec->evlist,
320                                            rec->output, true);
321                 perf_session__delete(rec->session);
322                 perf_evlist__delete(rec->evlist);
323                 symbol__exit();
324         }
325 }
326
327 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
328 {
329         int err;
330         struct perf_tool *tool = data;
331         /*
332          *As for guest kernel when processing subcommand record&report,
333          *we arrange module mmap prior to guest kernel mmap and trigger
334          *a preload dso because default guest module symbols are loaded
335          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
336          *method is used to avoid symbol missing when the first addr is
337          *in module instead of in guest kernel.
338          */
339         err = perf_event__synthesize_modules(tool, process_synthesized_event,
340                                              machine);
341         if (err < 0)
342                 pr_err("Couldn't record guest kernel [%d]'s reference"
343                        " relocation symbol.\n", machine->pid);
344
345         /*
346          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
347          * have no _text sometimes.
348          */
349         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
350                                                  machine, "_text");
351         if (err < 0)
352                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
353                                                          machine, "_stext");
354         if (err < 0)
355                 pr_err("Couldn't record guest kernel [%d]'s reference"
356                        " relocation symbol.\n", machine->pid);
357 }
358
359 static struct perf_event_header finished_round_event = {
360         .size = sizeof(struct perf_event_header),
361         .type = PERF_RECORD_FINISHED_ROUND,
362 };
363
364 static int perf_record__mmap_read_all(struct perf_record *rec)
365 {
366         int i;
367         int rc = 0;
368
369         for (i = 0; i < rec->evlist->nr_mmaps; i++) {
370                 if (rec->evlist->mmap[i].base) {
371                         if (perf_record__mmap_read(rec, &rec->evlist->mmap[i]) != 0) {
372                                 rc = -1;
373                                 goto out;
374                         }
375                 }
376         }
377
378         if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
379                 rc = write_output(rec, &finished_round_event,
380                                   sizeof(finished_round_event));
381
382 out:
383         return rc;
384 }
385
386 static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
387 {
388         struct stat st;
389         int flags;
390         int err, output, feat;
391         unsigned long waking = 0;
392         const bool forks = argc > 0;
393         struct machine *machine;
394         struct perf_tool *tool = &rec->tool;
395         struct perf_record_opts *opts = &rec->opts;
396         struct perf_evlist *evsel_list = rec->evlist;
397         const char *output_name = rec->output_name;
398         struct perf_session *session;
399         bool disabled = false;
400
401         rec->progname = argv[0];
402
403         rec->page_size = sysconf(_SC_PAGE_SIZE);
404
405         on_exit(perf_record__sig_exit, rec);
406         signal(SIGCHLD, sig_handler);
407         signal(SIGINT, sig_handler);
408         signal(SIGUSR1, sig_handler);
409
410         if (!output_name) {
411                 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
412                         opts->pipe_output = true;
413                 else
414                         rec->output_name = output_name = "perf.data";
415         }
416         if (output_name) {
417                 if (!strcmp(output_name, "-"))
418                         opts->pipe_output = true;
419                 else if (!stat(output_name, &st) && st.st_size) {
420                         if (rec->write_mode == WRITE_FORCE) {
421                                 char oldname[PATH_MAX];
422                                 snprintf(oldname, sizeof(oldname), "%s.old",
423                                          output_name);
424                                 unlink(oldname);
425                                 rename(output_name, oldname);
426                         }
427                 } else if (rec->write_mode == WRITE_APPEND) {
428                         rec->write_mode = WRITE_FORCE;
429                 }
430         }
431
432         flags = O_CREAT|O_RDWR;
433         if (rec->write_mode == WRITE_APPEND)
434                 rec->file_new = 0;
435         else
436                 flags |= O_TRUNC;
437
438         if (opts->pipe_output)
439                 output = STDOUT_FILENO;
440         else
441                 output = open(output_name, flags, S_IRUSR | S_IWUSR);
442         if (output < 0) {
443                 perror("failed to create output file");
444                 return -1;
445         }
446
447         rec->output = output;
448
449         session = perf_session__new(output_name, O_WRONLY,
450                                     rec->write_mode == WRITE_FORCE, false, NULL);
451         if (session == NULL) {
452                 pr_err("Not enough memory for reading perf file header\n");
453                 return -1;
454         }
455
456         rec->session = session;
457
458         for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
459                 perf_header__set_feat(&session->header, feat);
460
461         if (rec->no_buildid)
462                 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
463
464         if (!have_tracepoints(&evsel_list->entries))
465                 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
466
467         if (!rec->opts.branch_stack)
468                 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
469
470         if (!rec->file_new) {
471                 err = perf_session__read_header(session, output);
472                 if (err < 0)
473                         goto out_delete_session;
474         }
475
476         if (forks) {
477                 err = perf_evlist__prepare_workload(evsel_list, opts, argv);
478                 if (err < 0) {
479                         pr_err("Couldn't run the workload!\n");
480                         goto out_delete_session;
481                 }
482         }
483
484         if (perf_record__open(rec) != 0) {
485                 err = -1;
486                 goto out_delete_session;
487         }
488
489         if (!evsel_list->nr_groups)
490                 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
491
492         /*
493          * perf_session__delete(session) will be called at perf_record__exit()
494          */
495         on_exit(perf_record__exit, rec);
496
497         if (opts->pipe_output) {
498                 err = perf_header__write_pipe(output);
499                 if (err < 0)
500                         goto out_delete_session;
501         } else if (rec->file_new) {
502                 err = perf_session__write_header(session, evsel_list,
503                                                  output, false);
504                 if (err < 0)
505                         goto out_delete_session;
506         }
507
508         if (!rec->no_buildid
509             && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
510                 pr_err("Couldn't generate buildids. "
511                        "Use --no-buildid to profile anyway.\n");
512                 err = -1;
513                 goto out_delete_session;
514         }
515
516         rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
517
518         machine = &session->machines.host;
519
520         if (opts->pipe_output) {
521                 err = perf_event__synthesize_attrs(tool, session,
522                                                    process_synthesized_event);
523                 if (err < 0) {
524                         pr_err("Couldn't synthesize attrs.\n");
525                         goto out_delete_session;
526                 }
527
528                 err = perf_event__synthesize_event_types(tool, process_synthesized_event,
529                                                          machine);
530                 if (err < 0) {
531                         pr_err("Couldn't synthesize event_types.\n");
532                         goto out_delete_session;
533                 }
534
535                 if (have_tracepoints(&evsel_list->entries)) {
536                         /*
537                          * FIXME err <= 0 here actually means that
538                          * there were no tracepoints so its not really
539                          * an error, just that we don't need to
540                          * synthesize anything.  We really have to
541                          * return this more properly and also
542                          * propagate errors that now are calling die()
543                          */
544                         err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
545                                                                   process_synthesized_event);
546                         if (err <= 0) {
547                                 pr_err("Couldn't record tracing data.\n");
548                                 goto out_delete_session;
549                         }
550                         advance_output(rec, err);
551                 }
552         }
553
554         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
555                                                  machine, "_text");
556         if (err < 0)
557                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
558                                                          machine, "_stext");
559         if (err < 0)
560                 pr_err("Couldn't record kernel reference relocation symbol\n"
561                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
562                        "Check /proc/kallsyms permission or run as root.\n");
563
564         err = perf_event__synthesize_modules(tool, process_synthesized_event,
565                                              machine);
566         if (err < 0)
567                 pr_err("Couldn't record kernel module information.\n"
568                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
569                        "Check /proc/modules permission or run as root.\n");
570
571         if (perf_guest) {
572                 machines__process_guests(&session->machines,
573                                          perf_event__synthesize_guest_os, tool);
574         }
575
576         if (!opts->target.system_wide)
577                 err = perf_event__synthesize_thread_map(tool, evsel_list->threads,
578                                                   process_synthesized_event,
579                                                   machine);
580         else
581                 err = perf_event__synthesize_threads(tool, process_synthesized_event,
582                                                machine);
583
584         if (err != 0)
585                 goto out_delete_session;
586
587         if (rec->realtime_prio) {
588                 struct sched_param param;
589
590                 param.sched_priority = rec->realtime_prio;
591                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
592                         pr_err("Could not set realtime priority.\n");
593                         err = -1;
594                         goto out_delete_session;
595                 }
596         }
597
598         /*
599          * When perf is starting the traced process, all the events
600          * (apart from group members) have enable_on_exec=1 set,
601          * so don't spoil it by prematurely enabling them.
602          */
603         if (!perf_target__none(&opts->target))
604                 perf_evlist__enable(evsel_list);
605
606         /*
607          * Let the child rip
608          */
609         if (forks)
610                 perf_evlist__start_workload(evsel_list);
611
612         for (;;) {
613                 int hits = rec->samples;
614
615                 if (perf_record__mmap_read_all(rec) < 0) {
616                         err = -1;
617                         goto out_delete_session;
618                 }
619
620                 if (hits == rec->samples) {
621                         if (done)
622                                 break;
623                         err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
624                         waking++;
625                 }
626
627                 /*
628                  * When perf is starting the traced process, at the end events
629                  * die with the process and we wait for that. Thus no need to
630                  * disable events in this case.
631                  */
632                 if (done && !disabled && !perf_target__none(&opts->target)) {
633                         perf_evlist__disable(evsel_list);
634                         disabled = true;
635                 }
636         }
637
638         if (quiet || signr == SIGUSR1)
639                 return 0;
640
641         fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
642
643         /*
644          * Approximate RIP event size: 24 bytes.
645          */
646         fprintf(stderr,
647                 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
648                 (double)rec->bytes_written / 1024.0 / 1024.0,
649                 output_name,
650                 rec->bytes_written / 24);
651
652         return 0;
653
654 out_delete_session:
655         perf_session__delete(session);
656         return err;
657 }
658
659 #define BRANCH_OPT(n, m) \
660         { .name = n, .mode = (m) }
661
662 #define BRANCH_END { .name = NULL }
663
664 struct branch_mode {
665         const char *name;
666         int mode;
667 };
668
669 static const struct branch_mode branch_modes[] = {
670         BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
671         BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
672         BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
673         BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
674         BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
675         BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
676         BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
677         BRANCH_END
678 };
679
680 static int
681 parse_branch_stack(const struct option *opt, const char *str, int unset)
682 {
683 #define ONLY_PLM \
684         (PERF_SAMPLE_BRANCH_USER        |\
685          PERF_SAMPLE_BRANCH_KERNEL      |\
686          PERF_SAMPLE_BRANCH_HV)
687
688         uint64_t *mode = (uint64_t *)opt->value;
689         const struct branch_mode *br;
690         char *s, *os = NULL, *p;
691         int ret = -1;
692
693         if (unset)
694                 return 0;
695
696         /*
697          * cannot set it twice, -b + --branch-filter for instance
698          */
699         if (*mode)
700                 return -1;
701
702         /* str may be NULL in case no arg is passed to -b */
703         if (str) {
704                 /* because str is read-only */
705                 s = os = strdup(str);
706                 if (!s)
707                         return -1;
708
709                 for (;;) {
710                         p = strchr(s, ',');
711                         if (p)
712                                 *p = '\0';
713
714                         for (br = branch_modes; br->name; br++) {
715                                 if (!strcasecmp(s, br->name))
716                                         break;
717                         }
718                         if (!br->name) {
719                                 ui__warning("unknown branch filter %s,"
720                                             " check man page\n", s);
721                                 goto error;
722                         }
723
724                         *mode |= br->mode;
725
726                         if (!p)
727                                 break;
728
729                         s = p + 1;
730                 }
731         }
732         ret = 0;
733
734         /* default to any branch */
735         if ((*mode & ~ONLY_PLM) == 0) {
736                 *mode = PERF_SAMPLE_BRANCH_ANY;
737         }
738 error:
739         free(os);
740         return ret;
741 }
742
743 #ifdef LIBUNWIND_SUPPORT
744 static int get_stack_size(char *str, unsigned long *_size)
745 {
746         char *endptr;
747         unsigned long size;
748         unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
749
750         size = strtoul(str, &endptr, 0);
751
752         do {
753                 if (*endptr)
754                         break;
755
756                 size = round_up(size, sizeof(u64));
757                 if (!size || size > max_size)
758                         break;
759
760                 *_size = size;
761                 return 0;
762
763         } while (0);
764
765         pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
766                max_size, str);
767         return -1;
768 }
769 #endif /* LIBUNWIND_SUPPORT */
770
771 int record_parse_callchain_opt(const struct option *opt,
772                                const char *arg, int unset)
773 {
774         struct perf_record_opts *opts = opt->value;
775         char *tok, *name, *saveptr = NULL;
776         char *buf;
777         int ret = -1;
778
779         /* --no-call-graph */
780         if (unset)
781                 return 0;
782
783         /* We specified default option if none is provided. */
784         BUG_ON(!arg);
785
786         /* We need buffer that we know we can write to. */
787         buf = malloc(strlen(arg) + 1);
788         if (!buf)
789                 return -ENOMEM;
790
791         strcpy(buf, arg);
792
793         tok = strtok_r((char *)buf, ",", &saveptr);
794         name = tok ? : (char *)buf;
795
796         do {
797                 /* Framepointer style */
798                 if (!strncmp(name, "fp", sizeof("fp"))) {
799                         if (!strtok_r(NULL, ",", &saveptr)) {
800                                 opts->call_graph = CALLCHAIN_FP;
801                                 ret = 0;
802                         } else
803                                 pr_err("callchain: No more arguments "
804                                        "needed for -g fp\n");
805                         break;
806
807 #ifdef LIBUNWIND_SUPPORT
808                 /* Dwarf style */
809                 } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
810                         const unsigned long default_stack_dump_size = 8192;
811
812                         ret = 0;
813                         opts->call_graph = CALLCHAIN_DWARF;
814                         opts->stack_dump_size = default_stack_dump_size;
815
816                         tok = strtok_r(NULL, ",", &saveptr);
817                         if (tok) {
818                                 unsigned long size = 0;
819
820                                 ret = get_stack_size(tok, &size);
821                                 opts->stack_dump_size = size;
822                         }
823
824                         if (!ret)
825                                 pr_debug("callchain: stack dump size %d\n",
826                                          opts->stack_dump_size);
827 #endif /* LIBUNWIND_SUPPORT */
828                 } else {
829                         pr_err("callchain: Unknown -g option "
830                                "value: %s\n", arg);
831                         break;
832                 }
833
834         } while (0);
835
836         free(buf);
837
838         if (!ret)
839                 pr_debug("callchain: type %d\n", opts->call_graph);
840
841         return ret;
842 }
843
844 static const char * const record_usage[] = {
845         "perf record [<options>] [<command>]",
846         "perf record [<options>] -- <command> [<options>]",
847         NULL
848 };
849
850 /*
851  * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
852  * because we need to have access to it in perf_record__exit, that is called
853  * after cmd_record() exits, but since record_options need to be accessible to
854  * builtin-script, leave it here.
855  *
856  * At least we don't ouch it in all the other functions here directly.
857  *
858  * Just say no to tons of global variables, sigh.
859  */
860 static struct perf_record record = {
861         .opts = {
862                 .mmap_pages          = UINT_MAX,
863                 .user_freq           = UINT_MAX,
864                 .user_interval       = ULLONG_MAX,
865                 .freq                = 4000,
866                 .target              = {
867                         .uses_mmap   = true,
868                 },
869         },
870         .write_mode = WRITE_FORCE,
871         .file_new   = true,
872 };
873
874 #define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: "
875
876 #ifdef LIBUNWIND_SUPPORT
877 const char record_callchain_help[] = CALLCHAIN_HELP "[fp] dwarf";
878 #else
879 const char record_callchain_help[] = CALLCHAIN_HELP "[fp]";
880 #endif
881
882 /*
883  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
884  * with it and switch to use the library functions in perf_evlist that came
885  * from builtin-record.c, i.e. use perf_record_opts,
886  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
887  * using pipes, etc.
888  */
889 const struct option record_options[] = {
890         OPT_CALLBACK('e', "event", &record.evlist, "event",
891                      "event selector. use 'perf list' to list available events",
892                      parse_events_option),
893         OPT_CALLBACK(0, "filter", &record.evlist, "filter",
894                      "event filter", parse_filter),
895         OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
896                     "record events on existing process id"),
897         OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
898                     "record events on existing thread id"),
899         OPT_INTEGER('r', "realtime", &record.realtime_prio,
900                     "collect data with this RT SCHED_FIFO priority"),
901         OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
902                     "collect data without buffering"),
903         OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
904                     "collect raw sample records from all opened counters"),
905         OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
906                             "system-wide collection from all CPUs"),
907         OPT_BOOLEAN('A', "append", &record.append_file,
908                             "append to the output file to do incremental profiling"),
909         OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
910                     "list of cpus to monitor"),
911         OPT_BOOLEAN('f', "force", &record.force,
912                         "overwrite existing data file (deprecated)"),
913         OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
914         OPT_STRING('o', "output", &record.output_name, "file",
915                     "output file name"),
916         OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
917                     "child tasks do not inherit counters"),
918         OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
919         OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
920                      "number of mmap data pages"),
921         OPT_BOOLEAN(0, "group", &record.opts.group,
922                     "put the counters into a counter group"),
923         OPT_CALLBACK_DEFAULT('g', "call-graph", &record.opts,
924                              "mode[,dump_size]", record_callchain_help,
925                              &record_parse_callchain_opt, "fp"),
926         OPT_INCR('v', "verbose", &verbose,
927                     "be more verbose (show counter open errors, etc)"),
928         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
929         OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
930                     "per thread counts"),
931         OPT_BOOLEAN('d', "data", &record.opts.sample_address,
932                     "Sample addresses"),
933         OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
934         OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
935         OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
936                     "don't sample"),
937         OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
938                     "do not update the buildid cache"),
939         OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
940                     "do not collect buildids in perf.data"),
941         OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
942                      "monitor event in cgroup name only",
943                      parse_cgroups),
944         OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
945                    "user to profile"),
946
947         OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
948                      "branch any", "sample any taken branches",
949                      parse_branch_stack),
950
951         OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
952                      "branch filter mask", "branch stack filter modes",
953                      parse_branch_stack),
954         OPT_END()
955 };
956
957 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
958 {
959         int err = -ENOMEM;
960         struct perf_evsel *pos;
961         struct perf_evlist *evsel_list;
962         struct perf_record *rec = &record;
963         char errbuf[BUFSIZ];
964
965         evsel_list = perf_evlist__new(NULL, NULL);
966         if (evsel_list == NULL)
967                 return -ENOMEM;
968
969         rec->evlist = evsel_list;
970
971         argc = parse_options(argc, argv, record_options, record_usage,
972                             PARSE_OPT_STOP_AT_NON_OPTION);
973         if (!argc && perf_target__none(&rec->opts.target))
974                 usage_with_options(record_usage, record_options);
975
976         if (rec->force && rec->append_file) {
977                 ui__error("Can't overwrite and append at the same time."
978                           " You need to choose between -f and -A");
979                 usage_with_options(record_usage, record_options);
980         } else if (rec->append_file) {
981                 rec->write_mode = WRITE_APPEND;
982         } else {
983                 rec->write_mode = WRITE_FORCE;
984         }
985
986         if (nr_cgroups && !rec->opts.target.system_wide) {
987                 ui__error("cgroup monitoring only available in"
988                           " system-wide mode\n");
989                 usage_with_options(record_usage, record_options);
990         }
991
992         symbol__init();
993
994         if (symbol_conf.kptr_restrict)
995                 pr_warning(
996 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
997 "check /proc/sys/kernel/kptr_restrict.\n\n"
998 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
999 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1000 "Samples in kernel modules won't be resolved at all.\n\n"
1001 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1002 "even with a suitable vmlinux or kallsyms file.\n\n");
1003
1004         if (rec->no_buildid_cache || rec->no_buildid)
1005                 disable_buildid_cache();
1006
1007         if (evsel_list->nr_entries == 0 &&
1008             perf_evlist__add_default(evsel_list) < 0) {
1009                 pr_err("Not enough memory for event selector list\n");
1010                 goto out_symbol_exit;
1011         }
1012
1013         err = perf_target__validate(&rec->opts.target);
1014         if (err) {
1015                 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1016                 ui__warning("%s", errbuf);
1017         }
1018
1019         err = perf_target__parse_uid(&rec->opts.target);
1020         if (err) {
1021                 int saved_errno = errno;
1022
1023                 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1024                 ui__error("%s", errbuf);
1025
1026                 err = -saved_errno;
1027                 goto out_free_fd;
1028         }
1029
1030         err = -ENOMEM;
1031         if (perf_evlist__create_maps(evsel_list, &rec->opts.target) < 0)
1032                 usage_with_options(record_usage, record_options);
1033
1034         list_for_each_entry(pos, &evsel_list->entries, node) {
1035                 if (perf_header__push_event(pos->attr.config, perf_evsel__name(pos)))
1036                         goto out_free_fd;
1037         }
1038
1039         if (rec->opts.user_interval != ULLONG_MAX)
1040                 rec->opts.default_interval = rec->opts.user_interval;
1041         if (rec->opts.user_freq != UINT_MAX)
1042                 rec->opts.freq = rec->opts.user_freq;
1043
1044         /*
1045          * User specified count overrides default frequency.
1046          */
1047         if (rec->opts.default_interval)
1048                 rec->opts.freq = 0;
1049         else if (rec->opts.freq) {
1050                 rec->opts.default_interval = rec->opts.freq;
1051         } else {
1052                 ui__error("frequency and count are zero, aborting\n");
1053                 err = -EINVAL;
1054                 goto out_free_fd;
1055         }
1056
1057         err = __cmd_record(&record, argc, argv);
1058 out_free_fd:
1059         perf_evlist__delete_maps(evsel_list);
1060 out_symbol_exit:
1061         symbol__exit();
1062         return err;
1063 }