]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - tools/perf/builtin-record.c
Merge branch 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[karo-tx-linux.git] / tools / perf / builtin-record.c
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #include "builtin.h"
9
10 #include "perf.h"
11
12 #include "util/build-id.h"
13 #include "util/util.h"
14 #include "util/parse-options.h"
15 #include "util/parse-events.h"
16
17 #include "util/callchain.h"
18 #include "util/cgroup.h"
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/session.h"
25 #include "util/tool.h"
26 #include "util/symbol.h"
27 #include "util/cpumap.h"
28 #include "util/thread_map.h"
29 #include "util/data.h"
30 #include "util/perf_regs.h"
31 #include "util/auxtrace.h"
32 #include "util/parse-branch-options.h"
33 #include "util/parse-regs-options.h"
34 #include "util/llvm-utils.h"
35
36 #include <unistd.h>
37 #include <sched.h>
38 #include <sys/mman.h>
39
40
41 struct record {
42         struct perf_tool        tool;
43         struct record_opts      opts;
44         u64                     bytes_written;
45         struct perf_data_file   file;
46         struct auxtrace_record  *itr;
47         struct perf_evlist      *evlist;
48         struct perf_session     *session;
49         const char              *progname;
50         int                     realtime_prio;
51         bool                    no_buildid;
52         bool                    no_buildid_cache;
53         unsigned long long      samples;
54 };
55
56 static int record__write(struct record *rec, void *bf, size_t size)
57 {
58         if (perf_data_file__write(rec->session->file, bf, size) < 0) {
59                 pr_err("failed to write perf data, error: %m\n");
60                 return -1;
61         }
62
63         rec->bytes_written += size;
64         return 0;
65 }
66
67 static int process_synthesized_event(struct perf_tool *tool,
68                                      union perf_event *event,
69                                      struct perf_sample *sample __maybe_unused,
70                                      struct machine *machine __maybe_unused)
71 {
72         struct record *rec = container_of(tool, struct record, tool);
73         return record__write(rec, event, event->header.size);
74 }
75
76 static int record__mmap_read(struct record *rec, int idx)
77 {
78         struct perf_mmap *md = &rec->evlist->mmap[idx];
79         u64 head = perf_mmap__read_head(md);
80         u64 old = md->prev;
81         unsigned char *data = md->base + page_size;
82         unsigned long size;
83         void *buf;
84         int rc = 0;
85
86         if (old == head)
87                 return 0;
88
89         rec->samples++;
90
91         size = head - old;
92
93         if ((old & md->mask) + size != (head & md->mask)) {
94                 buf = &data[old & md->mask];
95                 size = md->mask + 1 - (old & md->mask);
96                 old += size;
97
98                 if (record__write(rec, buf, size) < 0) {
99                         rc = -1;
100                         goto out;
101                 }
102         }
103
104         buf = &data[old & md->mask];
105         size = head - old;
106         old += size;
107
108         if (record__write(rec, buf, size) < 0) {
109                 rc = -1;
110                 goto out;
111         }
112
113         md->prev = old;
114         perf_evlist__mmap_consume(rec->evlist, idx);
115 out:
116         return rc;
117 }
118
119 static volatile int done;
120 static volatile int signr = -1;
121 static volatile int child_finished;
122 static volatile int auxtrace_snapshot_enabled;
123 static volatile int auxtrace_snapshot_err;
124 static volatile int auxtrace_record__snapshot_started;
125
126 static void sig_handler(int sig)
127 {
128         if (sig == SIGCHLD)
129                 child_finished = 1;
130         else
131                 signr = sig;
132
133         done = 1;
134 }
135
136 static void record__sig_exit(void)
137 {
138         if (signr == -1)
139                 return;
140
141         signal(signr, SIG_DFL);
142         raise(signr);
143 }
144
145 #ifdef HAVE_AUXTRACE_SUPPORT
146
147 static int record__process_auxtrace(struct perf_tool *tool,
148                                     union perf_event *event, void *data1,
149                                     size_t len1, void *data2, size_t len2)
150 {
151         struct record *rec = container_of(tool, struct record, tool);
152         struct perf_data_file *file = &rec->file;
153         size_t padding;
154         u8 pad[8] = {0};
155
156         if (!perf_data_file__is_pipe(file)) {
157                 off_t file_offset;
158                 int fd = perf_data_file__fd(file);
159                 int err;
160
161                 file_offset = lseek(fd, 0, SEEK_CUR);
162                 if (file_offset == -1)
163                         return -1;
164                 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
165                                                      event, file_offset);
166                 if (err)
167                         return err;
168         }
169
170         /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
171         padding = (len1 + len2) & 7;
172         if (padding)
173                 padding = 8 - padding;
174
175         record__write(rec, event, event->header.size);
176         record__write(rec, data1, len1);
177         if (len2)
178                 record__write(rec, data2, len2);
179         record__write(rec, &pad, padding);
180
181         return 0;
182 }
183
184 static int record__auxtrace_mmap_read(struct record *rec,
185                                       struct auxtrace_mmap *mm)
186 {
187         int ret;
188
189         ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
190                                   record__process_auxtrace);
191         if (ret < 0)
192                 return ret;
193
194         if (ret)
195                 rec->samples++;
196
197         return 0;
198 }
199
200 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
201                                                struct auxtrace_mmap *mm)
202 {
203         int ret;
204
205         ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool,
206                                            record__process_auxtrace,
207                                            rec->opts.auxtrace_snapshot_size);
208         if (ret < 0)
209                 return ret;
210
211         if (ret)
212                 rec->samples++;
213
214         return 0;
215 }
216
217 static int record__auxtrace_read_snapshot_all(struct record *rec)
218 {
219         int i;
220         int rc = 0;
221
222         for (i = 0; i < rec->evlist->nr_mmaps; i++) {
223                 struct auxtrace_mmap *mm =
224                                 &rec->evlist->mmap[i].auxtrace_mmap;
225
226                 if (!mm->base)
227                         continue;
228
229                 if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) {
230                         rc = -1;
231                         goto out;
232                 }
233         }
234 out:
235         return rc;
236 }
237
238 static void record__read_auxtrace_snapshot(struct record *rec)
239 {
240         pr_debug("Recording AUX area tracing snapshot\n");
241         if (record__auxtrace_read_snapshot_all(rec) < 0) {
242                 auxtrace_snapshot_err = -1;
243         } else {
244                 auxtrace_snapshot_err = auxtrace_record__snapshot_finish(rec->itr);
245                 if (!auxtrace_snapshot_err)
246                         auxtrace_snapshot_enabled = 1;
247         }
248 }
249
250 #else
251
252 static inline
253 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
254                                struct auxtrace_mmap *mm __maybe_unused)
255 {
256         return 0;
257 }
258
259 static inline
260 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
261 {
262 }
263
264 static inline
265 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
266 {
267         return 0;
268 }
269
270 #endif
271
272 static int record__open(struct record *rec)
273 {
274         char msg[512];
275         struct perf_evsel *pos;
276         struct perf_evlist *evlist = rec->evlist;
277         struct perf_session *session = rec->session;
278         struct record_opts *opts = &rec->opts;
279         int rc = 0;
280
281         perf_evlist__config(evlist, opts);
282
283         evlist__for_each(evlist, pos) {
284 try_again:
285                 if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
286                         if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
287                                 if (verbose)
288                                         ui__warning("%s\n", msg);
289                                 goto try_again;
290                         }
291
292                         rc = -errno;
293                         perf_evsel__open_strerror(pos, &opts->target,
294                                                   errno, msg, sizeof(msg));
295                         ui__error("%s\n", msg);
296                         goto out;
297                 }
298         }
299
300         if (perf_evlist__apply_filters(evlist, &pos)) {
301                 error("failed to set filter \"%s\" on event %s with %d (%s)\n",
302                         pos->filter, perf_evsel__name(pos), errno,
303                         strerror_r(errno, msg, sizeof(msg)));
304                 rc = -1;
305                 goto out;
306         }
307
308         if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
309                                  opts->auxtrace_mmap_pages,
310                                  opts->auxtrace_snapshot_mode) < 0) {
311                 if (errno == EPERM) {
312                         pr_err("Permission error mapping pages.\n"
313                                "Consider increasing "
314                                "/proc/sys/kernel/perf_event_mlock_kb,\n"
315                                "or try again with a smaller value of -m/--mmap_pages.\n"
316                                "(current value: %u,%u)\n",
317                                opts->mmap_pages, opts->auxtrace_mmap_pages);
318                         rc = -errno;
319                 } else {
320                         pr_err("failed to mmap with %d (%s)\n", errno,
321                                 strerror_r(errno, msg, sizeof(msg)));
322                         rc = -errno;
323                 }
324                 goto out;
325         }
326
327         session->evlist = evlist;
328         perf_session__set_id_hdr_size(session);
329 out:
330         return rc;
331 }
332
333 static int process_sample_event(struct perf_tool *tool,
334                                 union perf_event *event,
335                                 struct perf_sample *sample,
336                                 struct perf_evsel *evsel,
337                                 struct machine *machine)
338 {
339         struct record *rec = container_of(tool, struct record, tool);
340
341         rec->samples++;
342
343         return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
344 }
345
346 static int process_buildids(struct record *rec)
347 {
348         struct perf_data_file *file  = &rec->file;
349         struct perf_session *session = rec->session;
350
351         if (file->size == 0)
352                 return 0;
353
354         /*
355          * During this process, it'll load kernel map and replace the
356          * dso->long_name to a real pathname it found.  In this case
357          * we prefer the vmlinux path like
358          *   /lib/modules/3.16.4/build/vmlinux
359          *
360          * rather than build-id path (in debug directory).
361          *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
362          */
363         symbol_conf.ignore_vmlinux_buildid = true;
364
365         return perf_session__process_events(session);
366 }
367
368 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
369 {
370         int err;
371         struct perf_tool *tool = data;
372         /*
373          *As for guest kernel when processing subcommand record&report,
374          *we arrange module mmap prior to guest kernel mmap and trigger
375          *a preload dso because default guest module symbols are loaded
376          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
377          *method is used to avoid symbol missing when the first addr is
378          *in module instead of in guest kernel.
379          */
380         err = perf_event__synthesize_modules(tool, process_synthesized_event,
381                                              machine);
382         if (err < 0)
383                 pr_err("Couldn't record guest kernel [%d]'s reference"
384                        " relocation symbol.\n", machine->pid);
385
386         /*
387          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
388          * have no _text sometimes.
389          */
390         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
391                                                  machine);
392         if (err < 0)
393                 pr_err("Couldn't record guest kernel [%d]'s reference"
394                        " relocation symbol.\n", machine->pid);
395 }
396
397 static struct perf_event_header finished_round_event = {
398         .size = sizeof(struct perf_event_header),
399         .type = PERF_RECORD_FINISHED_ROUND,
400 };
401
402 static int record__mmap_read_all(struct record *rec)
403 {
404         u64 bytes_written = rec->bytes_written;
405         int i;
406         int rc = 0;
407
408         for (i = 0; i < rec->evlist->nr_mmaps; i++) {
409                 struct auxtrace_mmap *mm = &rec->evlist->mmap[i].auxtrace_mmap;
410
411                 if (rec->evlist->mmap[i].base) {
412                         if (record__mmap_read(rec, i) != 0) {
413                                 rc = -1;
414                                 goto out;
415                         }
416                 }
417
418                 if (mm->base && !rec->opts.auxtrace_snapshot_mode &&
419                     record__auxtrace_mmap_read(rec, mm) != 0) {
420                         rc = -1;
421                         goto out;
422                 }
423         }
424
425         /*
426          * Mark the round finished in case we wrote
427          * at least one event.
428          */
429         if (bytes_written != rec->bytes_written)
430                 rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
431
432 out:
433         return rc;
434 }
435
436 static void record__init_features(struct record *rec)
437 {
438         struct perf_session *session = rec->session;
439         int feat;
440
441         for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
442                 perf_header__set_feat(&session->header, feat);
443
444         if (rec->no_buildid)
445                 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
446
447         if (!have_tracepoints(&rec->evlist->entries))
448                 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
449
450         if (!rec->opts.branch_stack)
451                 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
452
453         if (!rec->opts.full_auxtrace)
454                 perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
455 }
456
457 static volatile int workload_exec_errno;
458
459 /*
460  * perf_evlist__prepare_workload will send a SIGUSR1
461  * if the fork fails, since we asked by setting its
462  * want_signal to true.
463  */
464 static void workload_exec_failed_signal(int signo __maybe_unused,
465                                         siginfo_t *info,
466                                         void *ucontext __maybe_unused)
467 {
468         workload_exec_errno = info->si_value.sival_int;
469         done = 1;
470         child_finished = 1;
471 }
472
473 static void snapshot_sig_handler(int sig);
474
475 static int __cmd_record(struct record *rec, int argc, const char **argv)
476 {
477         int err;
478         int status = 0;
479         unsigned long waking = 0;
480         const bool forks = argc > 0;
481         struct machine *machine;
482         struct perf_tool *tool = &rec->tool;
483         struct record_opts *opts = &rec->opts;
484         struct perf_data_file *file = &rec->file;
485         struct perf_session *session;
486         bool disabled = false, draining = false;
487         int fd;
488
489         rec->progname = argv[0];
490
491         atexit(record__sig_exit);
492         signal(SIGCHLD, sig_handler);
493         signal(SIGINT, sig_handler);
494         signal(SIGTERM, sig_handler);
495         if (rec->opts.auxtrace_snapshot_mode)
496                 signal(SIGUSR2, snapshot_sig_handler);
497         else
498                 signal(SIGUSR2, SIG_IGN);
499
500         session = perf_session__new(file, false, tool);
501         if (session == NULL) {
502                 pr_err("Perf session creation failed.\n");
503                 return -1;
504         }
505
506         fd = perf_data_file__fd(file);
507         rec->session = session;
508
509         record__init_features(rec);
510
511         if (forks) {
512                 err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
513                                                     argv, file->is_pipe,
514                                                     workload_exec_failed_signal);
515                 if (err < 0) {
516                         pr_err("Couldn't run the workload!\n");
517                         status = err;
518                         goto out_delete_session;
519                 }
520         }
521
522         if (record__open(rec) != 0) {
523                 err = -1;
524                 goto out_child;
525         }
526
527         /*
528          * Normally perf_session__new would do this, but it doesn't have the
529          * evlist.
530          */
531         if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
532                 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
533                 rec->tool.ordered_events = false;
534         }
535
536         if (!rec->evlist->nr_groups)
537                 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
538
539         if (file->is_pipe) {
540                 err = perf_header__write_pipe(fd);
541                 if (err < 0)
542                         goto out_child;
543         } else {
544                 err = perf_session__write_header(session, rec->evlist, fd, false);
545                 if (err < 0)
546                         goto out_child;
547         }
548
549         if (!rec->no_buildid
550             && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
551                 pr_err("Couldn't generate buildids. "
552                        "Use --no-buildid to profile anyway.\n");
553                 err = -1;
554                 goto out_child;
555         }
556
557         machine = &session->machines.host;
558
559         if (file->is_pipe) {
560                 err = perf_event__synthesize_attrs(tool, session,
561                                                    process_synthesized_event);
562                 if (err < 0) {
563                         pr_err("Couldn't synthesize attrs.\n");
564                         goto out_child;
565                 }
566
567                 if (have_tracepoints(&rec->evlist->entries)) {
568                         /*
569                          * FIXME err <= 0 here actually means that
570                          * there were no tracepoints so its not really
571                          * an error, just that we don't need to
572                          * synthesize anything.  We really have to
573                          * return this more properly and also
574                          * propagate errors that now are calling die()
575                          */
576                         err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
577                                                                   process_synthesized_event);
578                         if (err <= 0) {
579                                 pr_err("Couldn't record tracing data.\n");
580                                 goto out_child;
581                         }
582                         rec->bytes_written += err;
583                 }
584         }
585
586         if (rec->opts.full_auxtrace) {
587                 err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
588                                         session, process_synthesized_event);
589                 if (err)
590                         goto out_delete_session;
591         }
592
593         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
594                                                  machine);
595         if (err < 0)
596                 pr_err("Couldn't record kernel reference relocation symbol\n"
597                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
598                        "Check /proc/kallsyms permission or run as root.\n");
599
600         err = perf_event__synthesize_modules(tool, process_synthesized_event,
601                                              machine);
602         if (err < 0)
603                 pr_err("Couldn't record kernel module information.\n"
604                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
605                        "Check /proc/modules permission or run as root.\n");
606
607         if (perf_guest) {
608                 machines__process_guests(&session->machines,
609                                          perf_event__synthesize_guest_os, tool);
610         }
611
612         err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
613                                             process_synthesized_event, opts->sample_address,
614                                             opts->proc_map_timeout);
615         if (err != 0)
616                 goto out_child;
617
618         if (rec->realtime_prio) {
619                 struct sched_param param;
620
621                 param.sched_priority = rec->realtime_prio;
622                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
623                         pr_err("Could not set realtime priority.\n");
624                         err = -1;
625                         goto out_child;
626                 }
627         }
628
629         /*
630          * When perf is starting the traced process, all the events
631          * (apart from group members) have enable_on_exec=1 set,
632          * so don't spoil it by prematurely enabling them.
633          */
634         if (!target__none(&opts->target) && !opts->initial_delay)
635                 perf_evlist__enable(rec->evlist);
636
637         /*
638          * Let the child rip
639          */
640         if (forks) {
641                 union perf_event *event;
642
643                 event = malloc(sizeof(event->comm) + machine->id_hdr_size);
644                 if (event == NULL) {
645                         err = -ENOMEM;
646                         goto out_child;
647                 }
648
649                 /*
650                  * Some H/W events are generated before COMM event
651                  * which is emitted during exec(), so perf script
652                  * cannot see a correct process name for those events.
653                  * Synthesize COMM event to prevent it.
654                  */
655                 perf_event__synthesize_comm(tool, event,
656                                             rec->evlist->workload.pid,
657                                             process_synthesized_event,
658                                             machine);
659                 free(event);
660
661                 perf_evlist__start_workload(rec->evlist);
662         }
663
664         if (opts->initial_delay) {
665                 usleep(opts->initial_delay * 1000);
666                 perf_evlist__enable(rec->evlist);
667         }
668
669         auxtrace_snapshot_enabled = 1;
670         for (;;) {
671                 unsigned long long hits = rec->samples;
672
673                 if (record__mmap_read_all(rec) < 0) {
674                         auxtrace_snapshot_enabled = 0;
675                         err = -1;
676                         goto out_child;
677                 }
678
679                 if (auxtrace_record__snapshot_started) {
680                         auxtrace_record__snapshot_started = 0;
681                         if (!auxtrace_snapshot_err)
682                                 record__read_auxtrace_snapshot(rec);
683                         if (auxtrace_snapshot_err) {
684                                 pr_err("AUX area tracing snapshot failed\n");
685                                 err = -1;
686                                 goto out_child;
687                         }
688                 }
689
690                 if (hits == rec->samples) {
691                         if (done || draining)
692                                 break;
693                         err = perf_evlist__poll(rec->evlist, -1);
694                         /*
695                          * Propagate error, only if there's any. Ignore positive
696                          * number of returned events and interrupt error.
697                          */
698                         if (err > 0 || (err < 0 && errno == EINTR))
699                                 err = 0;
700                         waking++;
701
702                         if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
703                                 draining = true;
704                 }
705
706                 /*
707                  * When perf is starting the traced process, at the end events
708                  * die with the process and we wait for that. Thus no need to
709                  * disable events in this case.
710                  */
711                 if (done && !disabled && !target__none(&opts->target)) {
712                         auxtrace_snapshot_enabled = 0;
713                         perf_evlist__disable(rec->evlist);
714                         disabled = true;
715                 }
716         }
717         auxtrace_snapshot_enabled = 0;
718
719         if (forks && workload_exec_errno) {
720                 char msg[STRERR_BUFSIZE];
721                 const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg));
722                 pr_err("Workload failed: %s\n", emsg);
723                 err = -1;
724                 goto out_child;
725         }
726
727         if (!quiet)
728                 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
729
730 out_child:
731         if (forks) {
732                 int exit_status;
733
734                 if (!child_finished)
735                         kill(rec->evlist->workload.pid, SIGTERM);
736
737                 wait(&exit_status);
738
739                 if (err < 0)
740                         status = err;
741                 else if (WIFEXITED(exit_status))
742                         status = WEXITSTATUS(exit_status);
743                 else if (WIFSIGNALED(exit_status))
744                         signr = WTERMSIG(exit_status);
745         } else
746                 status = err;
747
748         /* this will be recalculated during process_buildids() */
749         rec->samples = 0;
750
751         if (!err && !file->is_pipe) {
752                 rec->session->header.data_size += rec->bytes_written;
753                 file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
754
755                 if (!rec->no_buildid) {
756                         process_buildids(rec);
757                         /*
758                          * We take all buildids when the file contains
759                          * AUX area tracing data because we do not decode the
760                          * trace because it would take too long.
761                          */
762                         if (rec->opts.full_auxtrace)
763                                 dsos__hit_all(rec->session);
764                 }
765                 perf_session__write_header(rec->session, rec->evlist, fd, true);
766         }
767
768         if (!err && !quiet) {
769                 char samples[128];
770
771                 if (rec->samples && !rec->opts.full_auxtrace)
772                         scnprintf(samples, sizeof(samples),
773                                   " (%" PRIu64 " samples)", rec->samples);
774                 else
775                         samples[0] = '\0';
776
777                 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s ]\n",
778                         perf_data_file__size(file) / 1024.0 / 1024.0,
779                         file->path, samples);
780         }
781
782 out_delete_session:
783         perf_session__delete(session);
784         return status;
785 }
786
787 static void callchain_debug(void)
788 {
789         static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
790
791         pr_debug("callchain: type %s\n", str[callchain_param.record_mode]);
792
793         if (callchain_param.record_mode == CALLCHAIN_DWARF)
794                 pr_debug("callchain: stack dump size %d\n",
795                          callchain_param.dump_size);
796 }
797
798 int record_parse_callchain_opt(const struct option *opt,
799                                const char *arg,
800                                int unset)
801 {
802         int ret;
803         struct record_opts *record = (struct record_opts *)opt->value;
804
805         record->callgraph_set = true;
806         callchain_param.enabled = !unset;
807
808         /* --no-call-graph */
809         if (unset) {
810                 callchain_param.record_mode = CALLCHAIN_NONE;
811                 pr_debug("callchain: disabled\n");
812                 return 0;
813         }
814
815         ret = parse_callchain_record_opt(arg, &callchain_param);
816         if (!ret)
817                 callchain_debug();
818
819         return ret;
820 }
821
822 int record_callchain_opt(const struct option *opt,
823                          const char *arg __maybe_unused,
824                          int unset __maybe_unused)
825 {
826         struct record_opts *record = (struct record_opts *)opt->value;
827
828         record->callgraph_set = true;
829         callchain_param.enabled = true;
830
831         if (callchain_param.record_mode == CALLCHAIN_NONE)
832                 callchain_param.record_mode = CALLCHAIN_FP;
833
834         callchain_debug();
835         return 0;
836 }
837
838 static int perf_record_config(const char *var, const char *value, void *cb)
839 {
840         if (!strcmp(var, "record.call-graph"))
841                 var = "call-graph.record-mode"; /* fall-through */
842
843         return perf_default_config(var, value, cb);
844 }
845
846 struct clockid_map {
847         const char *name;
848         int clockid;
849 };
850
851 #define CLOCKID_MAP(n, c)       \
852         { .name = n, .clockid = (c), }
853
854 #define CLOCKID_END     { .name = NULL, }
855
856
857 /*
858  * Add the missing ones, we need to build on many distros...
859  */
860 #ifndef CLOCK_MONOTONIC_RAW
861 #define CLOCK_MONOTONIC_RAW 4
862 #endif
863 #ifndef CLOCK_BOOTTIME
864 #define CLOCK_BOOTTIME 7
865 #endif
866 #ifndef CLOCK_TAI
867 #define CLOCK_TAI 11
868 #endif
869
870 static const struct clockid_map clockids[] = {
871         /* available for all events, NMI safe */
872         CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
873         CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
874
875         /* available for some events */
876         CLOCKID_MAP("realtime", CLOCK_REALTIME),
877         CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
878         CLOCKID_MAP("tai", CLOCK_TAI),
879
880         /* available for the lazy */
881         CLOCKID_MAP("mono", CLOCK_MONOTONIC),
882         CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
883         CLOCKID_MAP("real", CLOCK_REALTIME),
884         CLOCKID_MAP("boot", CLOCK_BOOTTIME),
885
886         CLOCKID_END,
887 };
888
889 static int parse_clockid(const struct option *opt, const char *str, int unset)
890 {
891         struct record_opts *opts = (struct record_opts *)opt->value;
892         const struct clockid_map *cm;
893         const char *ostr = str;
894
895         if (unset) {
896                 opts->use_clockid = 0;
897                 return 0;
898         }
899
900         /* no arg passed */
901         if (!str)
902                 return 0;
903
904         /* no setting it twice */
905         if (opts->use_clockid)
906                 return -1;
907
908         opts->use_clockid = true;
909
910         /* if its a number, we're done */
911         if (sscanf(str, "%d", &opts->clockid) == 1)
912                 return 0;
913
914         /* allow a "CLOCK_" prefix to the name */
915         if (!strncasecmp(str, "CLOCK_", 6))
916                 str += 6;
917
918         for (cm = clockids; cm->name; cm++) {
919                 if (!strcasecmp(str, cm->name)) {
920                         opts->clockid = cm->clockid;
921                         return 0;
922                 }
923         }
924
925         opts->use_clockid = false;
926         ui__warning("unknown clockid %s, check man page\n", ostr);
927         return -1;
928 }
929
930 static int record__parse_mmap_pages(const struct option *opt,
931                                     const char *str,
932                                     int unset __maybe_unused)
933 {
934         struct record_opts *opts = opt->value;
935         char *s, *p;
936         unsigned int mmap_pages;
937         int ret;
938
939         if (!str)
940                 return -EINVAL;
941
942         s = strdup(str);
943         if (!s)
944                 return -ENOMEM;
945
946         p = strchr(s, ',');
947         if (p)
948                 *p = '\0';
949
950         if (*s) {
951                 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
952                 if (ret)
953                         goto out_free;
954                 opts->mmap_pages = mmap_pages;
955         }
956
957         if (!p) {
958                 ret = 0;
959                 goto out_free;
960         }
961
962         ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
963         if (ret)
964                 goto out_free;
965
966         opts->auxtrace_mmap_pages = mmap_pages;
967
968 out_free:
969         free(s);
970         return ret;
971 }
972
973 static const char * const __record_usage[] = {
974         "perf record [<options>] [<command>]",
975         "perf record [<options>] -- <command> [<options>]",
976         NULL
977 };
978 const char * const *record_usage = __record_usage;
979
980 /*
981  * XXX Ideally would be local to cmd_record() and passed to a record__new
982  * because we need to have access to it in record__exit, that is called
983  * after cmd_record() exits, but since record_options need to be accessible to
984  * builtin-script, leave it here.
985  *
986  * At least we don't ouch it in all the other functions here directly.
987  *
988  * Just say no to tons of global variables, sigh.
989  */
990 static struct record record = {
991         .opts = {
992                 .sample_time         = true,
993                 .mmap_pages          = UINT_MAX,
994                 .user_freq           = UINT_MAX,
995                 .user_interval       = ULLONG_MAX,
996                 .freq                = 4000,
997                 .target              = {
998                         .uses_mmap   = true,
999                         .default_per_cpu = true,
1000                 },
1001                 .proc_map_timeout     = 500,
1002         },
1003         .tool = {
1004                 .sample         = process_sample_event,
1005                 .fork           = perf_event__process_fork,
1006                 .exit           = perf_event__process_exit,
1007                 .comm           = perf_event__process_comm,
1008                 .mmap           = perf_event__process_mmap,
1009                 .mmap2          = perf_event__process_mmap2,
1010                 .ordered_events = true,
1011         },
1012 };
1013
1014 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1015         "\n\t\t\t\tDefault: fp";
1016
1017 /*
1018  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1019  * with it and switch to use the library functions in perf_evlist that came
1020  * from builtin-record.c, i.e. use record_opts,
1021  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1022  * using pipes, etc.
1023  */
1024 struct option __record_options[] = {
1025         OPT_CALLBACK('e', "event", &record.evlist, "event",
1026                      "event selector. use 'perf list' to list available events",
1027                      parse_events_option),
1028         OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1029                      "event filter", parse_filter),
1030         OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1031                            NULL, "don't record events from perf itself",
1032                            exclude_perf),
1033         OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1034                     "record events on existing process id"),
1035         OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1036                     "record events on existing thread id"),
1037         OPT_INTEGER('r', "realtime", &record.realtime_prio,
1038                     "collect data with this RT SCHED_FIFO priority"),
1039         OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1040                     "collect data without buffering"),
1041         OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1042                     "collect raw sample records from all opened counters"),
1043         OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1044                             "system-wide collection from all CPUs"),
1045         OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1046                     "list of cpus to monitor"),
1047         OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1048         OPT_STRING('o', "output", &record.file.path, "file",
1049                     "output file name"),
1050         OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1051                         &record.opts.no_inherit_set,
1052                         "child tasks do not inherit counters"),
1053         OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
1054         OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1055                      "number of mmap data pages and AUX area tracing mmap pages",
1056                      record__parse_mmap_pages),
1057         OPT_BOOLEAN(0, "group", &record.opts.group,
1058                     "put the counters into a counter group"),
1059         OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
1060                            NULL, "enables call-graph recording" ,
1061                            &record_callchain_opt),
1062         OPT_CALLBACK(0, "call-graph", &record.opts,
1063                      "record_mode[,record_size]", record_callchain_help,
1064                      &record_parse_callchain_opt),
1065         OPT_INCR('v', "verbose", &verbose,
1066                     "be more verbose (show counter open errors, etc)"),
1067         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1068         OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1069                     "per thread counts"),
1070         OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1071         OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1072                         &record.opts.sample_time_set,
1073                         "Record the sample timestamps"),
1074         OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"),
1075         OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1076                     "don't sample"),
1077         OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
1078                     "do not update the buildid cache"),
1079         OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
1080                     "do not collect buildids in perf.data"),
1081         OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1082                      "monitor event in cgroup name only",
1083                      parse_cgroups),
1084         OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1085                   "ms to wait before starting measurement after program start"),
1086         OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1087                    "user to profile"),
1088
1089         OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1090                      "branch any", "sample any taken branches",
1091                      parse_branch_stack),
1092
1093         OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1094                      "branch filter mask", "branch stack filter modes",
1095                      parse_branch_stack),
1096         OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1097                     "sample by weight (on special events only)"),
1098         OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1099                     "sample transaction flags (special events only)"),
1100         OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1101                     "use per-thread mmaps"),
1102         OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1103                     "sample selected machine registers on interrupt,"
1104                     " use -I ? to list register names", parse_regs),
1105         OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1106                     "Record running/enabled time of read (:S) events"),
1107         OPT_CALLBACK('k', "clockid", &record.opts,
1108         "clockid", "clockid to use for events, see clock_gettime()",
1109         parse_clockid),
1110         OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1111                           "opts", "AUX area tracing Snapshot Mode", ""),
1112         OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
1113                         "per thread proc mmap processing timeout in ms"),
1114         OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1115                     "Record context switch events"),
1116 #ifdef HAVE_LIBBPF_SUPPORT
1117         OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1118                    "clang binary to use for compiling BPF scriptlets"),
1119         OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1120                    "options passed to clang when compiling BPF scriptlets"),
1121 #endif
1122         OPT_END()
1123 };
1124
1125 struct option *record_options = __record_options;
1126
1127 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
1128 {
1129         int err;
1130         struct record *rec = &record;
1131         char errbuf[BUFSIZ];
1132
1133         rec->evlist = perf_evlist__new();
1134         if (rec->evlist == NULL)
1135                 return -ENOMEM;
1136
1137         perf_config(perf_record_config, rec);
1138
1139         argc = parse_options(argc, argv, record_options, record_usage,
1140                             PARSE_OPT_STOP_AT_NON_OPTION);
1141         if (!argc && target__none(&rec->opts.target))
1142                 usage_with_options(record_usage, record_options);
1143
1144         if (nr_cgroups && !rec->opts.target.system_wide) {
1145                 usage_with_options_msg(record_usage, record_options,
1146                         "cgroup monitoring only available in system-wide mode");
1147
1148         }
1149         if (rec->opts.record_switch_events &&
1150             !perf_can_record_switch_events()) {
1151                 ui__error("kernel does not support recording context switch events\n");
1152                 parse_options_usage(record_usage, record_options, "switch-events", 0);
1153                 return -EINVAL;
1154         }
1155
1156         if (!rec->itr) {
1157                 rec->itr = auxtrace_record__init(rec->evlist, &err);
1158                 if (err)
1159                         return err;
1160         }
1161
1162         err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
1163                                               rec->opts.auxtrace_snapshot_opts);
1164         if (err)
1165                 return err;
1166
1167         err = -ENOMEM;
1168
1169         symbol__init(NULL);
1170
1171         if (symbol_conf.kptr_restrict)
1172                 pr_warning(
1173 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1174 "check /proc/sys/kernel/kptr_restrict.\n\n"
1175 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1176 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1177 "Samples in kernel modules won't be resolved at all.\n\n"
1178 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1179 "even with a suitable vmlinux or kallsyms file.\n\n");
1180
1181         if (rec->no_buildid_cache || rec->no_buildid)
1182                 disable_buildid_cache();
1183
1184         if (rec->evlist->nr_entries == 0 &&
1185             perf_evlist__add_default(rec->evlist) < 0) {
1186                 pr_err("Not enough memory for event selector list\n");
1187                 goto out_symbol_exit;
1188         }
1189
1190         if (rec->opts.target.tid && !rec->opts.no_inherit_set)
1191                 rec->opts.no_inherit = true;
1192
1193         err = target__validate(&rec->opts.target);
1194         if (err) {
1195                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1196                 ui__warning("%s", errbuf);
1197         }
1198
1199         err = target__parse_uid(&rec->opts.target);
1200         if (err) {
1201                 int saved_errno = errno;
1202
1203                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1204                 ui__error("%s", errbuf);
1205
1206                 err = -saved_errno;
1207                 goto out_symbol_exit;
1208         }
1209
1210         err = -ENOMEM;
1211         if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
1212                 usage_with_options(record_usage, record_options);
1213
1214         err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
1215         if (err)
1216                 goto out_symbol_exit;
1217
1218         if (record_opts__config(&rec->opts)) {
1219                 err = -EINVAL;
1220                 goto out_symbol_exit;
1221         }
1222
1223         err = __cmd_record(&record, argc, argv);
1224 out_symbol_exit:
1225         perf_evlist__delete(rec->evlist);
1226         symbol__exit();
1227         auxtrace_record__free(rec->itr);
1228         return err;
1229 }
1230
1231 static void snapshot_sig_handler(int sig __maybe_unused)
1232 {
1233         if (!auxtrace_snapshot_enabled)
1234                 return;
1235         auxtrace_snapshot_enabled = 0;
1236         auxtrace_snapshot_err = auxtrace_record__snapshot_start(record.itr);
1237         auxtrace_record__snapshot_started = 1;
1238 }