]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - tools/perf/builtin-trace.c
perf record: Synthesize non-exec MMAP records when --data used
[karo-tx-linux.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14
15 #include <libaudit.h>
16 #include <stdlib.h>
17 #include <sys/eventfd.h>
18 #include <sys/mman.h>
19 #include <linux/futex.h>
20
21 /* For older distros: */
22 #ifndef MAP_STACK
23 # define MAP_STACK              0x20000
24 #endif
25
26 #ifndef MADV_HWPOISON
27 # define MADV_HWPOISON          100
28 #endif
29
30 #ifndef MADV_MERGEABLE
31 # define MADV_MERGEABLE         12
32 #endif
33
34 #ifndef MADV_UNMERGEABLE
35 # define MADV_UNMERGEABLE       13
36 #endif
37
38 struct tp_field {
39         int offset;
40         union {
41                 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
42                 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
43         };
44 };
45
46 #define TP_UINT_FIELD(bits) \
47 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
48 { \
49         return *(u##bits *)(sample->raw_data + field->offset); \
50 }
51
52 TP_UINT_FIELD(8);
53 TP_UINT_FIELD(16);
54 TP_UINT_FIELD(32);
55 TP_UINT_FIELD(64);
56
57 #define TP_UINT_FIELD__SWAPPED(bits) \
58 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
59 { \
60         u##bits value = *(u##bits *)(sample->raw_data + field->offset); \
61         return bswap_##bits(value);\
62 }
63
64 TP_UINT_FIELD__SWAPPED(16);
65 TP_UINT_FIELD__SWAPPED(32);
66 TP_UINT_FIELD__SWAPPED(64);
67
68 static int tp_field__init_uint(struct tp_field *field,
69                                struct format_field *format_field,
70                                bool needs_swap)
71 {
72         field->offset = format_field->offset;
73
74         switch (format_field->size) {
75         case 1:
76                 field->integer = tp_field__u8;
77                 break;
78         case 2:
79                 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
80                 break;
81         case 4:
82                 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
83                 break;
84         case 8:
85                 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
86                 break;
87         default:
88                 return -1;
89         }
90
91         return 0;
92 }
93
94 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
95 {
96         return sample->raw_data + field->offset;
97 }
98
99 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
100 {
101         field->offset = format_field->offset;
102         field->pointer = tp_field__ptr;
103         return 0;
104 }
105
106 struct syscall_tp {
107         struct tp_field id;
108         union {
109                 struct tp_field args, ret;
110         };
111 };
112
113 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
114                                           struct tp_field *field,
115                                           const char *name)
116 {
117         struct format_field *format_field = perf_evsel__field(evsel, name);
118
119         if (format_field == NULL)
120                 return -1;
121
122         return tp_field__init_uint(field, format_field, evsel->needs_swap);
123 }
124
125 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
126         ({ struct syscall_tp *sc = evsel->priv;\
127            perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
128
129 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
130                                          struct tp_field *field,
131                                          const char *name)
132 {
133         struct format_field *format_field = perf_evsel__field(evsel, name);
134
135         if (format_field == NULL)
136                 return -1;
137
138         return tp_field__init_ptr(field, format_field);
139 }
140
141 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
142         ({ struct syscall_tp *sc = evsel->priv;\
143            perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
144
145 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
146 {
147         free(evsel->priv);
148         evsel->priv = NULL;
149         perf_evsel__delete(evsel);
150 }
151
152 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
153 {
154         struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
155
156         if (evsel) {
157                 evsel->priv = malloc(sizeof(struct syscall_tp));
158
159                 if (evsel->priv == NULL)
160                         goto out_delete;
161
162                 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
163                         goto out_delete;
164
165                 evsel->handler = handler;
166         }
167
168         return evsel;
169
170 out_delete:
171         perf_evsel__delete_priv(evsel);
172         return NULL;
173 }
174
175 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
176         ({ struct syscall_tp *fields = evsel->priv; \
177            fields->name.integer(&fields->name, sample); })
178
179 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
180         ({ struct syscall_tp *fields = evsel->priv; \
181            fields->name.pointer(&fields->name, sample); })
182
183 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
184                                           void *sys_enter_handler,
185                                           void *sys_exit_handler)
186 {
187         int ret = -1;
188         struct perf_evsel *sys_enter, *sys_exit;
189
190         sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
191         if (sys_enter == NULL)
192                 goto out;
193
194         if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
195                 goto out_delete_sys_enter;
196
197         sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
198         if (sys_exit == NULL)
199                 goto out_delete_sys_enter;
200
201         if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
202                 goto out_delete_sys_exit;
203
204         perf_evlist__add(evlist, sys_enter);
205         perf_evlist__add(evlist, sys_exit);
206
207         ret = 0;
208 out:
209         return ret;
210
211 out_delete_sys_exit:
212         perf_evsel__delete_priv(sys_exit);
213 out_delete_sys_enter:
214         perf_evsel__delete_priv(sys_enter);
215         goto out;
216 }
217
218
219 struct syscall_arg {
220         unsigned long val;
221         struct thread *thread;
222         struct trace  *trace;
223         void          *parm;
224         u8            idx;
225         u8            mask;
226 };
227
228 struct strarray {
229         int         offset;
230         int         nr_entries;
231         const char **entries;
232 };
233
234 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
235         .nr_entries = ARRAY_SIZE(array), \
236         .entries = array, \
237 }
238
239 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
240         .offset     = off, \
241         .nr_entries = ARRAY_SIZE(array), \
242         .entries = array, \
243 }
244
245 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
246                                                 const char *intfmt,
247                                                 struct syscall_arg *arg)
248 {
249         struct strarray *sa = arg->parm;
250         int idx = arg->val - sa->offset;
251
252         if (idx < 0 || idx >= sa->nr_entries)
253                 return scnprintf(bf, size, intfmt, arg->val);
254
255         return scnprintf(bf, size, "%s", sa->entries[idx]);
256 }
257
258 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
259                                               struct syscall_arg *arg)
260 {
261         return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
262 }
263
264 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
265
266 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
267                                                  struct syscall_arg *arg)
268 {
269         return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
270 }
271
272 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
273
274 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
275                                         struct syscall_arg *arg);
276
277 #define SCA_FD syscall_arg__scnprintf_fd
278
279 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
280                                            struct syscall_arg *arg)
281 {
282         int fd = arg->val;
283
284         if (fd == AT_FDCWD)
285                 return scnprintf(bf, size, "CWD");
286
287         return syscall_arg__scnprintf_fd(bf, size, arg);
288 }
289
290 #define SCA_FDAT syscall_arg__scnprintf_fd_at
291
292 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
293                                               struct syscall_arg *arg);
294
295 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
296
297 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
298                                          struct syscall_arg *arg)
299 {
300         return scnprintf(bf, size, "%#lx", arg->val);
301 }
302
303 #define SCA_HEX syscall_arg__scnprintf_hex
304
305 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
306                                                struct syscall_arg *arg)
307 {
308         int printed = 0, prot = arg->val;
309
310         if (prot == PROT_NONE)
311                 return scnprintf(bf, size, "NONE");
312 #define P_MMAP_PROT(n) \
313         if (prot & PROT_##n) { \
314                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
315                 prot &= ~PROT_##n; \
316         }
317
318         P_MMAP_PROT(EXEC);
319         P_MMAP_PROT(READ);
320         P_MMAP_PROT(WRITE);
321 #ifdef PROT_SEM
322         P_MMAP_PROT(SEM);
323 #endif
324         P_MMAP_PROT(GROWSDOWN);
325         P_MMAP_PROT(GROWSUP);
326 #undef P_MMAP_PROT
327
328         if (prot)
329                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
330
331         return printed;
332 }
333
334 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
335
336 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
337                                                 struct syscall_arg *arg)
338 {
339         int printed = 0, flags = arg->val;
340
341 #define P_MMAP_FLAG(n) \
342         if (flags & MAP_##n) { \
343                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
344                 flags &= ~MAP_##n; \
345         }
346
347         P_MMAP_FLAG(SHARED);
348         P_MMAP_FLAG(PRIVATE);
349 #ifdef MAP_32BIT
350         P_MMAP_FLAG(32BIT);
351 #endif
352         P_MMAP_FLAG(ANONYMOUS);
353         P_MMAP_FLAG(DENYWRITE);
354         P_MMAP_FLAG(EXECUTABLE);
355         P_MMAP_FLAG(FILE);
356         P_MMAP_FLAG(FIXED);
357         P_MMAP_FLAG(GROWSDOWN);
358 #ifdef MAP_HUGETLB
359         P_MMAP_FLAG(HUGETLB);
360 #endif
361         P_MMAP_FLAG(LOCKED);
362         P_MMAP_FLAG(NONBLOCK);
363         P_MMAP_FLAG(NORESERVE);
364         P_MMAP_FLAG(POPULATE);
365         P_MMAP_FLAG(STACK);
366 #ifdef MAP_UNINITIALIZED
367         P_MMAP_FLAG(UNINITIALIZED);
368 #endif
369 #undef P_MMAP_FLAG
370
371         if (flags)
372                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
373
374         return printed;
375 }
376
377 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
378
379 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
380                                                       struct syscall_arg *arg)
381 {
382         int behavior = arg->val;
383
384         switch (behavior) {
385 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
386         P_MADV_BHV(NORMAL);
387         P_MADV_BHV(RANDOM);
388         P_MADV_BHV(SEQUENTIAL);
389         P_MADV_BHV(WILLNEED);
390         P_MADV_BHV(DONTNEED);
391         P_MADV_BHV(REMOVE);
392         P_MADV_BHV(DONTFORK);
393         P_MADV_BHV(DOFORK);
394         P_MADV_BHV(HWPOISON);
395 #ifdef MADV_SOFT_OFFLINE
396         P_MADV_BHV(SOFT_OFFLINE);
397 #endif
398         P_MADV_BHV(MERGEABLE);
399         P_MADV_BHV(UNMERGEABLE);
400 #ifdef MADV_HUGEPAGE
401         P_MADV_BHV(HUGEPAGE);
402 #endif
403 #ifdef MADV_NOHUGEPAGE
404         P_MADV_BHV(NOHUGEPAGE);
405 #endif
406 #ifdef MADV_DONTDUMP
407         P_MADV_BHV(DONTDUMP);
408 #endif
409 #ifdef MADV_DODUMP
410         P_MADV_BHV(DODUMP);
411 #endif
412 #undef P_MADV_PHV
413         default: break;
414         }
415
416         return scnprintf(bf, size, "%#x", behavior);
417 }
418
419 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
420
421 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
422                                            struct syscall_arg *arg)
423 {
424         int printed = 0, op = arg->val;
425
426         if (op == 0)
427                 return scnprintf(bf, size, "NONE");
428 #define P_CMD(cmd) \
429         if ((op & LOCK_##cmd) == LOCK_##cmd) { \
430                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
431                 op &= ~LOCK_##cmd; \
432         }
433
434         P_CMD(SH);
435         P_CMD(EX);
436         P_CMD(NB);
437         P_CMD(UN);
438         P_CMD(MAND);
439         P_CMD(RW);
440         P_CMD(READ);
441         P_CMD(WRITE);
442 #undef P_OP
443
444         if (op)
445                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
446
447         return printed;
448 }
449
450 #define SCA_FLOCK syscall_arg__scnprintf_flock
451
452 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
453 {
454         enum syscall_futex_args {
455                 SCF_UADDR   = (1 << 0),
456                 SCF_OP      = (1 << 1),
457                 SCF_VAL     = (1 << 2),
458                 SCF_TIMEOUT = (1 << 3),
459                 SCF_UADDR2  = (1 << 4),
460                 SCF_VAL3    = (1 << 5),
461         };
462         int op = arg->val;
463         int cmd = op & FUTEX_CMD_MASK;
464         size_t printed = 0;
465
466         switch (cmd) {
467 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
468         P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
469         P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
470         P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
471         P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
472         P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
473         P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
474         P_FUTEX_OP(WAKE_OP);                                                      break;
475         P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
476         P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
477         P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
478         P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
479         P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
480         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
481         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
482         }
483
484         if (op & FUTEX_PRIVATE_FLAG)
485                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
486
487         if (op & FUTEX_CLOCK_REALTIME)
488                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
489
490         return printed;
491 }
492
493 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
494
495 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
496 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
497
498 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
499 static DEFINE_STRARRAY(itimers);
500
501 static const char *whences[] = { "SET", "CUR", "END",
502 #ifdef SEEK_DATA
503 "DATA",
504 #endif
505 #ifdef SEEK_HOLE
506 "HOLE",
507 #endif
508 };
509 static DEFINE_STRARRAY(whences);
510
511 static const char *fcntl_cmds[] = {
512         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
513         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
514         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
515         "F_GETOWNER_UIDS",
516 };
517 static DEFINE_STRARRAY(fcntl_cmds);
518
519 static const char *rlimit_resources[] = {
520         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
521         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
522         "RTTIME",
523 };
524 static DEFINE_STRARRAY(rlimit_resources);
525
526 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
527 static DEFINE_STRARRAY(sighow);
528
529 static const char *clockid[] = {
530         "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
531         "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
532 };
533 static DEFINE_STRARRAY(clockid);
534
535 static const char *socket_families[] = {
536         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
537         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
538         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
539         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
540         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
541         "ALG", "NFC", "VSOCK",
542 };
543 static DEFINE_STRARRAY(socket_families);
544
545 #ifndef SOCK_TYPE_MASK
546 #define SOCK_TYPE_MASK 0xf
547 #endif
548
549 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
550                                                       struct syscall_arg *arg)
551 {
552         size_t printed;
553         int type = arg->val,
554             flags = type & ~SOCK_TYPE_MASK;
555
556         type &= SOCK_TYPE_MASK;
557         /*
558          * Can't use a strarray, MIPS may override for ABI reasons.
559          */
560         switch (type) {
561 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
562         P_SK_TYPE(STREAM);
563         P_SK_TYPE(DGRAM);
564         P_SK_TYPE(RAW);
565         P_SK_TYPE(RDM);
566         P_SK_TYPE(SEQPACKET);
567         P_SK_TYPE(DCCP);
568         P_SK_TYPE(PACKET);
569 #undef P_SK_TYPE
570         default:
571                 printed = scnprintf(bf, size, "%#x", type);
572         }
573
574 #define P_SK_FLAG(n) \
575         if (flags & SOCK_##n) { \
576                 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
577                 flags &= ~SOCK_##n; \
578         }
579
580         P_SK_FLAG(CLOEXEC);
581         P_SK_FLAG(NONBLOCK);
582 #undef P_SK_FLAG
583
584         if (flags)
585                 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
586
587         return printed;
588 }
589
590 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
591
592 #ifndef MSG_PROBE
593 #define MSG_PROBE            0x10
594 #endif
595 #ifndef MSG_WAITFORONE
596 #define MSG_WAITFORONE  0x10000
597 #endif
598 #ifndef MSG_SENDPAGE_NOTLAST
599 #define MSG_SENDPAGE_NOTLAST 0x20000
600 #endif
601 #ifndef MSG_FASTOPEN
602 #define MSG_FASTOPEN         0x20000000
603 #endif
604
605 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
606                                                struct syscall_arg *arg)
607 {
608         int printed = 0, flags = arg->val;
609
610         if (flags == 0)
611                 return scnprintf(bf, size, "NONE");
612 #define P_MSG_FLAG(n) \
613         if (flags & MSG_##n) { \
614                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
615                 flags &= ~MSG_##n; \
616         }
617
618         P_MSG_FLAG(OOB);
619         P_MSG_FLAG(PEEK);
620         P_MSG_FLAG(DONTROUTE);
621         P_MSG_FLAG(TRYHARD);
622         P_MSG_FLAG(CTRUNC);
623         P_MSG_FLAG(PROBE);
624         P_MSG_FLAG(TRUNC);
625         P_MSG_FLAG(DONTWAIT);
626         P_MSG_FLAG(EOR);
627         P_MSG_FLAG(WAITALL);
628         P_MSG_FLAG(FIN);
629         P_MSG_FLAG(SYN);
630         P_MSG_FLAG(CONFIRM);
631         P_MSG_FLAG(RST);
632         P_MSG_FLAG(ERRQUEUE);
633         P_MSG_FLAG(NOSIGNAL);
634         P_MSG_FLAG(MORE);
635         P_MSG_FLAG(WAITFORONE);
636         P_MSG_FLAG(SENDPAGE_NOTLAST);
637         P_MSG_FLAG(FASTOPEN);
638         P_MSG_FLAG(CMSG_CLOEXEC);
639 #undef P_MSG_FLAG
640
641         if (flags)
642                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
643
644         return printed;
645 }
646
647 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
648
649 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
650                                                  struct syscall_arg *arg)
651 {
652         size_t printed = 0;
653         int mode = arg->val;
654
655         if (mode == F_OK) /* 0 */
656                 return scnprintf(bf, size, "F");
657 #define P_MODE(n) \
658         if (mode & n##_OK) { \
659                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
660                 mode &= ~n##_OK; \
661         }
662
663         P_MODE(R);
664         P_MODE(W);
665         P_MODE(X);
666 #undef P_MODE
667
668         if (mode)
669                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
670
671         return printed;
672 }
673
674 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
675
676 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
677                                                struct syscall_arg *arg)
678 {
679         int printed = 0, flags = arg->val;
680
681         if (!(flags & O_CREAT))
682                 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
683
684         if (flags == 0)
685                 return scnprintf(bf, size, "RDONLY");
686 #define P_FLAG(n) \
687         if (flags & O_##n) { \
688                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
689                 flags &= ~O_##n; \
690         }
691
692         P_FLAG(APPEND);
693         P_FLAG(ASYNC);
694         P_FLAG(CLOEXEC);
695         P_FLAG(CREAT);
696         P_FLAG(DIRECT);
697         P_FLAG(DIRECTORY);
698         P_FLAG(EXCL);
699         P_FLAG(LARGEFILE);
700         P_FLAG(NOATIME);
701         P_FLAG(NOCTTY);
702 #ifdef O_NONBLOCK
703         P_FLAG(NONBLOCK);
704 #elif O_NDELAY
705         P_FLAG(NDELAY);
706 #endif
707 #ifdef O_PATH
708         P_FLAG(PATH);
709 #endif
710         P_FLAG(RDWR);
711 #ifdef O_DSYNC
712         if ((flags & O_SYNC) == O_SYNC)
713                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
714         else {
715                 P_FLAG(DSYNC);
716         }
717 #else
718         P_FLAG(SYNC);
719 #endif
720         P_FLAG(TRUNC);
721         P_FLAG(WRONLY);
722 #undef P_FLAG
723
724         if (flags)
725                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
726
727         return printed;
728 }
729
730 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
731
732 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
733                                                    struct syscall_arg *arg)
734 {
735         int printed = 0, flags = arg->val;
736
737         if (flags == 0)
738                 return scnprintf(bf, size, "NONE");
739 #define P_FLAG(n) \
740         if (flags & EFD_##n) { \
741                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
742                 flags &= ~EFD_##n; \
743         }
744
745         P_FLAG(SEMAPHORE);
746         P_FLAG(CLOEXEC);
747         P_FLAG(NONBLOCK);
748 #undef P_FLAG
749
750         if (flags)
751                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
752
753         return printed;
754 }
755
756 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
757
758 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
759                                                 struct syscall_arg *arg)
760 {
761         int printed = 0, flags = arg->val;
762
763 #define P_FLAG(n) \
764         if (flags & O_##n) { \
765                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
766                 flags &= ~O_##n; \
767         }
768
769         P_FLAG(CLOEXEC);
770         P_FLAG(NONBLOCK);
771 #undef P_FLAG
772
773         if (flags)
774                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
775
776         return printed;
777 }
778
779 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
780
781 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
782 {
783         int sig = arg->val;
784
785         switch (sig) {
786 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
787         P_SIGNUM(HUP);
788         P_SIGNUM(INT);
789         P_SIGNUM(QUIT);
790         P_SIGNUM(ILL);
791         P_SIGNUM(TRAP);
792         P_SIGNUM(ABRT);
793         P_SIGNUM(BUS);
794         P_SIGNUM(FPE);
795         P_SIGNUM(KILL);
796         P_SIGNUM(USR1);
797         P_SIGNUM(SEGV);
798         P_SIGNUM(USR2);
799         P_SIGNUM(PIPE);
800         P_SIGNUM(ALRM);
801         P_SIGNUM(TERM);
802         P_SIGNUM(STKFLT);
803         P_SIGNUM(CHLD);
804         P_SIGNUM(CONT);
805         P_SIGNUM(STOP);
806         P_SIGNUM(TSTP);
807         P_SIGNUM(TTIN);
808         P_SIGNUM(TTOU);
809         P_SIGNUM(URG);
810         P_SIGNUM(XCPU);
811         P_SIGNUM(XFSZ);
812         P_SIGNUM(VTALRM);
813         P_SIGNUM(PROF);
814         P_SIGNUM(WINCH);
815         P_SIGNUM(IO);
816         P_SIGNUM(PWR);
817         P_SIGNUM(SYS);
818         default: break;
819         }
820
821         return scnprintf(bf, size, "%#x", sig);
822 }
823
824 #define SCA_SIGNUM syscall_arg__scnprintf_signum
825
826 #define TCGETS          0x5401
827
828 static const char *tioctls[] = {
829         "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
830         "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
831         "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
832         "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
833         "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
834         "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
835         "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
836         "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
837         "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
838         "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
839         "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
840         [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
841         "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
842         "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
843         "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
844 };
845
846 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
847
848 #define STRARRAY(arg, name, array) \
849           .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
850           .arg_parm      = { [arg] = &strarray__##array, }
851
852 static struct syscall_fmt {
853         const char *name;
854         const char *alias;
855         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
856         void       *arg_parm[6];
857         bool       errmsg;
858         bool       timeout;
859         bool       hexret;
860 } syscall_fmts[] = {
861         { .name     = "access",     .errmsg = true,
862           .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
863         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
864         { .name     = "brk",        .hexret = true,
865           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
866         { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
867         { .name     = "close",      .errmsg = true,
868           .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, 
869         { .name     = "connect",    .errmsg = true, },
870         { .name     = "dup",        .errmsg = true,
871           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
872         { .name     = "dup2",       .errmsg = true,
873           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
874         { .name     = "dup3",       .errmsg = true,
875           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
876         { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
877         { .name     = "eventfd2",   .errmsg = true,
878           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
879         { .name     = "faccessat",  .errmsg = true,
880           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
881         { .name     = "fadvise64",  .errmsg = true,
882           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
883         { .name     = "fallocate",  .errmsg = true,
884           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
885         { .name     = "fchdir",     .errmsg = true,
886           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
887         { .name     = "fchmod",     .errmsg = true,
888           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
889         { .name     = "fchmodat",   .errmsg = true,
890           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
891         { .name     = "fchown",     .errmsg = true,
892           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
893         { .name     = "fchownat",   .errmsg = true,
894           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
895         { .name     = "fcntl",      .errmsg = true,
896           .arg_scnprintf = { [0] = SCA_FD, /* fd */
897                              [1] = SCA_STRARRAY, /* cmd */ },
898           .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
899         { .name     = "fdatasync",  .errmsg = true,
900           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
901         { .name     = "flock",      .errmsg = true,
902           .arg_scnprintf = { [0] = SCA_FD, /* fd */
903                              [1] = SCA_FLOCK, /* cmd */ }, },
904         { .name     = "fsetxattr",  .errmsg = true,
905           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
906         { .name     = "fstat",      .errmsg = true, .alias = "newfstat",
907           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
908         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat",
909           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
910         { .name     = "fstatfs",    .errmsg = true,
911           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
912         { .name     = "fsync",    .errmsg = true,
913           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
914         { .name     = "ftruncate", .errmsg = true,
915           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
916         { .name     = "futex",      .errmsg = true,
917           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
918         { .name     = "futimesat", .errmsg = true,
919           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
920         { .name     = "getdents",   .errmsg = true,
921           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
922         { .name     = "getdents64", .errmsg = true,
923           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
924         { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
925         { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
926         { .name     = "ioctl",      .errmsg = true,
927           .arg_scnprintf = { [0] = SCA_FD, /* fd */ 
928                              [1] = SCA_STRHEXARRAY, /* cmd */
929                              [2] = SCA_HEX, /* arg */ },
930           .arg_parm      = { [1] = &strarray__tioctls, /* cmd */ }, },
931         { .name     = "kill",       .errmsg = true,
932           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
933         { .name     = "linkat",     .errmsg = true,
934           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
935         { .name     = "lseek",      .errmsg = true,
936           .arg_scnprintf = { [0] = SCA_FD, /* fd */
937                              [2] = SCA_STRARRAY, /* whence */ },
938           .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
939         { .name     = "lstat",      .errmsg = true, .alias = "newlstat", },
940         { .name     = "madvise",    .errmsg = true,
941           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
942                              [2] = SCA_MADV_BHV, /* behavior */ }, },
943         { .name     = "mkdirat",    .errmsg = true,
944           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
945         { .name     = "mknodat",    .errmsg = true,
946           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
947         { .name     = "mlock",      .errmsg = true,
948           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
949         { .name     = "mlockall",   .errmsg = true,
950           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
951         { .name     = "mmap",       .hexret = true,
952           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
953                              [2] = SCA_MMAP_PROT, /* prot */
954                              [3] = SCA_MMAP_FLAGS, /* flags */ }, },
955         { .name     = "mprotect",   .errmsg = true,
956           .arg_scnprintf = { [0] = SCA_HEX, /* start */
957                              [2] = SCA_MMAP_PROT, /* prot */ }, },
958         { .name     = "mremap",     .hexret = true,
959           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
960                              [4] = SCA_HEX, /* new_addr */ }, },
961         { .name     = "munlock",    .errmsg = true,
962           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
963         { .name     = "munmap",     .errmsg = true,
964           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
965         { .name     = "name_to_handle_at", .errmsg = true,
966           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
967         { .name     = "newfstatat", .errmsg = true,
968           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
969         { .name     = "open",       .errmsg = true,
970           .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
971         { .name     = "open_by_handle_at", .errmsg = true,
972           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
973                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
974         { .name     = "openat",     .errmsg = true,
975           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
976                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
977         { .name     = "pipe2",      .errmsg = true,
978           .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
979         { .name     = "poll",       .errmsg = true, .timeout = true, },
980         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
981         { .name     = "pread",      .errmsg = true, .alias = "pread64",
982           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
983         { .name     = "preadv",     .errmsg = true, .alias = "pread",
984           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
985         { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
986         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64",
987           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
988         { .name     = "pwritev",    .errmsg = true,
989           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
990         { .name     = "read",       .errmsg = true,
991           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
992         { .name     = "readlinkat", .errmsg = true,
993           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
994         { .name     = "readv",      .errmsg = true,
995           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
996         { .name     = "recvfrom",   .errmsg = true,
997           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
998         { .name     = "recvmmsg",   .errmsg = true,
999           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1000         { .name     = "recvmsg",    .errmsg = true,
1001           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1002         { .name     = "renameat",   .errmsg = true,
1003           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1004         { .name     = "rt_sigaction", .errmsg = true,
1005           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1006         { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1007         { .name     = "rt_sigqueueinfo", .errmsg = true,
1008           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1009         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
1010           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1011         { .name     = "select",     .errmsg = true, .timeout = true, },
1012         { .name     = "sendmmsg",    .errmsg = true,
1013           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1014         { .name     = "sendmsg",    .errmsg = true,
1015           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1016         { .name     = "sendto",     .errmsg = true,
1017           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1018         { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1019         { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1020         { .name     = "shutdown",   .errmsg = true,
1021           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1022         { .name     = "socket",     .errmsg = true,
1023           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1024                              [1] = SCA_SK_TYPE, /* type */ },
1025           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1026         { .name     = "socketpair", .errmsg = true,
1027           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1028                              [1] = SCA_SK_TYPE, /* type */ },
1029           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1030         { .name     = "stat",       .errmsg = true, .alias = "newstat", },
1031         { .name     = "symlinkat",  .errmsg = true,
1032           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1033         { .name     = "tgkill",     .errmsg = true,
1034           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1035         { .name     = "tkill",      .errmsg = true,
1036           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1037         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
1038         { .name     = "unlinkat",   .errmsg = true,
1039           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1040         { .name     = "utimensat",  .errmsg = true,
1041           .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1042         { .name     = "write",      .errmsg = true,
1043           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1044         { .name     = "writev",     .errmsg = true,
1045           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1046 };
1047
1048 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1049 {
1050         const struct syscall_fmt *fmt = fmtp;
1051         return strcmp(name, fmt->name);
1052 }
1053
1054 static struct syscall_fmt *syscall_fmt__find(const char *name)
1055 {
1056         const int nmemb = ARRAY_SIZE(syscall_fmts);
1057         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1058 }
1059
1060 struct syscall {
1061         struct event_format *tp_format;
1062         const char          *name;
1063         bool                filtered;
1064         struct syscall_fmt  *fmt;
1065         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1066         void                **arg_parm;
1067 };
1068
1069 static size_t fprintf_duration(unsigned long t, FILE *fp)
1070 {
1071         double duration = (double)t / NSEC_PER_MSEC;
1072         size_t printed = fprintf(fp, "(");
1073
1074         if (duration >= 1.0)
1075                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1076         else if (duration >= 0.01)
1077                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1078         else
1079                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1080         return printed + fprintf(fp, "): ");
1081 }
1082
1083 struct thread_trace {
1084         u64               entry_time;
1085         u64               exit_time;
1086         bool              entry_pending;
1087         unsigned long     nr_events;
1088         char              *entry_str;
1089         double            runtime_ms;
1090         struct {
1091                 int       max;
1092                 char      **table;
1093         } paths;
1094
1095         struct intlist *syscall_stats;
1096 };
1097
1098 static struct thread_trace *thread_trace__new(void)
1099 {
1100         struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1101
1102         if (ttrace)
1103                 ttrace->paths.max = -1;
1104
1105         ttrace->syscall_stats = intlist__new(NULL);
1106
1107         return ttrace;
1108 }
1109
1110 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1111 {
1112         struct thread_trace *ttrace;
1113
1114         if (thread == NULL)
1115                 goto fail;
1116
1117         if (thread->priv == NULL)
1118                 thread->priv = thread_trace__new();
1119                 
1120         if (thread->priv == NULL)
1121                 goto fail;
1122
1123         ttrace = thread->priv;
1124         ++ttrace->nr_events;
1125
1126         return ttrace;
1127 fail:
1128         color_fprintf(fp, PERF_COLOR_RED,
1129                       "WARNING: not enough memory, dropping samples!\n");
1130         return NULL;
1131 }
1132
1133 struct trace {
1134         struct perf_tool        tool;
1135         struct {
1136                 int             machine;
1137                 int             open_id;
1138         }                       audit;
1139         struct {
1140                 int             max;
1141                 struct syscall  *table;
1142         } syscalls;
1143         struct perf_record_opts opts;
1144         struct machine          *host;
1145         u64                     base_time;
1146         bool                    full_time;
1147         FILE                    *output;
1148         unsigned long           nr_events;
1149         struct strlist          *ev_qualifier;
1150         bool                    not_ev_qualifier;
1151         bool                    live;
1152         const char              *last_vfs_getname;
1153         struct intlist          *tid_list;
1154         struct intlist          *pid_list;
1155         bool                    sched;
1156         bool                    multiple_threads;
1157         bool                    summary;
1158         bool                    show_comm;
1159         bool                    show_tool_stats;
1160         double                  duration_filter;
1161         double                  runtime_ms;
1162         struct {
1163                 u64             vfs_getname, proc_getname;
1164         } stats;
1165 };
1166
1167 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1168 {
1169         struct thread_trace *ttrace = thread->priv;
1170
1171         if (fd > ttrace->paths.max) {
1172                 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1173
1174                 if (npath == NULL)
1175                         return -1;
1176
1177                 if (ttrace->paths.max != -1) {
1178                         memset(npath + ttrace->paths.max + 1, 0,
1179                                (fd - ttrace->paths.max) * sizeof(char *));
1180                 } else {
1181                         memset(npath, 0, (fd + 1) * sizeof(char *));
1182                 }
1183
1184                 ttrace->paths.table = npath;
1185                 ttrace->paths.max   = fd;
1186         }
1187
1188         ttrace->paths.table[fd] = strdup(pathname);
1189
1190         return ttrace->paths.table[fd] != NULL ? 0 : -1;
1191 }
1192
1193 static int thread__read_fd_path(struct thread *thread, int fd)
1194 {
1195         char linkname[PATH_MAX], pathname[PATH_MAX];
1196         struct stat st;
1197         int ret;
1198
1199         if (thread->pid_ == thread->tid) {
1200                 scnprintf(linkname, sizeof(linkname),
1201                           "/proc/%d/fd/%d", thread->pid_, fd);
1202         } else {
1203                 scnprintf(linkname, sizeof(linkname),
1204                           "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1205         }
1206
1207         if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1208                 return -1;
1209
1210         ret = readlink(linkname, pathname, sizeof(pathname));
1211
1212         if (ret < 0 || ret > st.st_size)
1213                 return -1;
1214
1215         pathname[ret] = '\0';
1216         return trace__set_fd_pathname(thread, fd, pathname);
1217 }
1218
1219 static const char *thread__fd_path(struct thread *thread, int fd,
1220                                    struct trace *trace)
1221 {
1222         struct thread_trace *ttrace = thread->priv;
1223
1224         if (ttrace == NULL)
1225                 return NULL;
1226
1227         if (fd < 0)
1228                 return NULL;
1229
1230         if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL))
1231                 if (!trace->live)
1232                         return NULL;
1233                 ++trace->stats.proc_getname;
1234                 if (thread__read_fd_path(thread, fd)) {
1235                         return NULL;
1236         }
1237
1238         return ttrace->paths.table[fd];
1239 }
1240
1241 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1242                                         struct syscall_arg *arg)
1243 {
1244         int fd = arg->val;
1245         size_t printed = scnprintf(bf, size, "%d", fd);
1246         const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1247
1248         if (path)
1249                 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1250
1251         return printed;
1252 }
1253
1254 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1255                                               struct syscall_arg *arg)
1256 {
1257         int fd = arg->val;
1258         size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1259         struct thread_trace *ttrace = arg->thread->priv;
1260
1261         if (ttrace && fd >= 0 && fd <= ttrace->paths.max) {
1262                 free(ttrace->paths.table[fd]);
1263                 ttrace->paths.table[fd] = NULL;
1264         }
1265
1266         return printed;
1267 }
1268
1269 static bool trace__filter_duration(struct trace *trace, double t)
1270 {
1271         return t < (trace->duration_filter * NSEC_PER_MSEC);
1272 }
1273
1274 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1275 {
1276         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1277
1278         return fprintf(fp, "%10.3f ", ts);
1279 }
1280
1281 static bool done = false;
1282 static bool interrupted = false;
1283
1284 static void sig_handler(int sig)
1285 {
1286         done = true;
1287         interrupted = sig == SIGINT;
1288 }
1289
1290 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1291                                         u64 duration, u64 tstamp, FILE *fp)
1292 {
1293         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1294         printed += fprintf_duration(duration, fp);
1295
1296         if (trace->multiple_threads) {
1297                 if (trace->show_comm)
1298                         printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1299                 printed += fprintf(fp, "%d ", thread->tid);
1300         }
1301
1302         return printed;
1303 }
1304
1305 static int trace__process_event(struct trace *trace, struct machine *machine,
1306                                 union perf_event *event, struct perf_sample *sample)
1307 {
1308         int ret = 0;
1309
1310         switch (event->header.type) {
1311         case PERF_RECORD_LOST:
1312                 color_fprintf(trace->output, PERF_COLOR_RED,
1313                               "LOST %" PRIu64 " events!\n", event->lost.lost);
1314                 ret = machine__process_lost_event(machine, event, sample);
1315         default:
1316                 ret = machine__process_event(machine, event, sample);
1317                 break;
1318         }
1319
1320         return ret;
1321 }
1322
1323 static int trace__tool_process(struct perf_tool *tool,
1324                                union perf_event *event,
1325                                struct perf_sample *sample,
1326                                struct machine *machine)
1327 {
1328         struct trace *trace = container_of(tool, struct trace, tool);
1329         return trace__process_event(trace, machine, event, sample);
1330 }
1331
1332 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1333 {
1334         int err = symbol__init();
1335
1336         if (err)
1337                 return err;
1338
1339         trace->host = machine__new_host();
1340         if (trace->host == NULL)
1341                 return -ENOMEM;
1342
1343         if (perf_target__has_task(&trace->opts.target)) {
1344                 err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads,
1345                                                         trace__tool_process,
1346                                                         trace->host, false);
1347         } else {
1348                 err = perf_event__synthesize_threads(&trace->tool, trace__tool_process,
1349                                                      trace->host, false);
1350         }
1351
1352         if (err)
1353                 symbol__exit();
1354
1355         return err;
1356 }
1357
1358 static int syscall__set_arg_fmts(struct syscall *sc)
1359 {
1360         struct format_field *field;
1361         int idx = 0;
1362
1363         sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1364         if (sc->arg_scnprintf == NULL)
1365                 return -1;
1366
1367         if (sc->fmt)
1368                 sc->arg_parm = sc->fmt->arg_parm;
1369
1370         for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1371                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1372                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1373                 else if (field->flags & FIELD_IS_POINTER)
1374                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1375                 ++idx;
1376         }
1377
1378         return 0;
1379 }
1380
1381 static int trace__read_syscall_info(struct trace *trace, int id)
1382 {
1383         char tp_name[128];
1384         struct syscall *sc;
1385         const char *name = audit_syscall_to_name(id, trace->audit.machine);
1386
1387         if (name == NULL)
1388                 return -1;
1389
1390         if (id > trace->syscalls.max) {
1391                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1392
1393                 if (nsyscalls == NULL)
1394                         return -1;
1395
1396                 if (trace->syscalls.max != -1) {
1397                         memset(nsyscalls + trace->syscalls.max + 1, 0,
1398                                (id - trace->syscalls.max) * sizeof(*sc));
1399                 } else {
1400                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1401                 }
1402
1403                 trace->syscalls.table = nsyscalls;
1404                 trace->syscalls.max   = id;
1405         }
1406
1407         sc = trace->syscalls.table + id;
1408         sc->name = name;
1409
1410         if (trace->ev_qualifier) {
1411                 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1412
1413                 if (!(in ^ trace->not_ev_qualifier)) {
1414                         sc->filtered = true;
1415                         /*
1416                          * No need to do read tracepoint information since this will be
1417                          * filtered out.
1418                          */
1419                         return 0;
1420                 }
1421         }
1422
1423         sc->fmt  = syscall_fmt__find(sc->name);
1424
1425         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1426         sc->tp_format = event_format__new("syscalls", tp_name);
1427
1428         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1429                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1430                 sc->tp_format = event_format__new("syscalls", tp_name);
1431         }
1432
1433         if (sc->tp_format == NULL)
1434                 return -1;
1435
1436         return syscall__set_arg_fmts(sc);
1437 }
1438
1439 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1440                                       unsigned long *args, struct trace *trace,
1441                                       struct thread *thread)
1442 {
1443         size_t printed = 0;
1444
1445         if (sc->tp_format != NULL) {
1446                 struct format_field *field;
1447                 u8 bit = 1;
1448                 struct syscall_arg arg = {
1449                         .idx    = 0,
1450                         .mask   = 0,
1451                         .trace  = trace,
1452                         .thread = thread,
1453                 };
1454
1455                 for (field = sc->tp_format->format.fields->next; field;
1456                      field = field->next, ++arg.idx, bit <<= 1) {
1457                         if (arg.mask & bit)
1458                                 continue;
1459                         /*
1460                          * Suppress this argument if its value is zero and
1461                          * and we don't have a string associated in an
1462                          * strarray for it.
1463                          */
1464                         if (args[arg.idx] == 0 &&
1465                             !(sc->arg_scnprintf &&
1466                               sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1467                               sc->arg_parm[arg.idx]))
1468                                 continue;
1469
1470                         printed += scnprintf(bf + printed, size - printed,
1471                                              "%s%s: ", printed ? ", " : "", field->name);
1472                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1473                                 arg.val = args[arg.idx];
1474                                 if (sc->arg_parm)
1475                                         arg.parm = sc->arg_parm[arg.idx];
1476                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1477                                                                       size - printed, &arg);
1478                         } else {
1479                                 printed += scnprintf(bf + printed, size - printed,
1480                                                      "%ld", args[arg.idx]);
1481                         }
1482                 }
1483         } else {
1484                 int i = 0;
1485
1486                 while (i < 6) {
1487                         printed += scnprintf(bf + printed, size - printed,
1488                                              "%sarg%d: %ld",
1489                                              printed ? ", " : "", i, args[i]);
1490                         ++i;
1491                 }
1492         }
1493
1494         return printed;
1495 }
1496
1497 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1498                                   struct perf_sample *sample);
1499
1500 static struct syscall *trace__syscall_info(struct trace *trace,
1501                                            struct perf_evsel *evsel, int id)
1502 {
1503
1504         if (id < 0) {
1505
1506                 /*
1507                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1508                  * before that, leaving at a higher verbosity level till that is
1509                  * explained. Reproduced with plain ftrace with:
1510                  *
1511                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1512                  * grep "NR -1 " /t/trace_pipe
1513                  *
1514                  * After generating some load on the machine.
1515                  */
1516                 if (verbose > 1) {
1517                         static u64 n;
1518                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1519                                 id, perf_evsel__name(evsel), ++n);
1520                 }
1521                 return NULL;
1522         }
1523
1524         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1525             trace__read_syscall_info(trace, id))
1526                 goto out_cant_read;
1527
1528         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1529                 goto out_cant_read;
1530
1531         return &trace->syscalls.table[id];
1532
1533 out_cant_read:
1534         if (verbose) {
1535                 fprintf(trace->output, "Problems reading syscall %d", id);
1536                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1537                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1538                 fputs(" information\n", trace->output);
1539         }
1540         return NULL;
1541 }
1542
1543 static void thread__update_stats(struct thread_trace *ttrace,
1544                                  int id, struct perf_sample *sample)
1545 {
1546         struct int_node *inode;
1547         struct stats *stats;
1548         u64 duration = 0;
1549
1550         inode = intlist__findnew(ttrace->syscall_stats, id);
1551         if (inode == NULL)
1552                 return;
1553
1554         stats = inode->priv;
1555         if (stats == NULL) {
1556                 stats = malloc(sizeof(struct stats));
1557                 if (stats == NULL)
1558                         return;
1559                 init_stats(stats);
1560                 inode->priv = stats;
1561         }
1562
1563         if (ttrace->entry_time && sample->time > ttrace->entry_time)
1564                 duration = sample->time - ttrace->entry_time;
1565
1566         update_stats(stats, duration);
1567 }
1568
1569 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1570                             struct perf_sample *sample)
1571 {
1572         char *msg;
1573         void *args;
1574         size_t printed = 0;
1575         struct thread *thread;
1576         int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1577         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1578         struct thread_trace *ttrace;
1579
1580         if (sc == NULL)
1581                 return -1;
1582
1583         if (sc->filtered)
1584                 return 0;
1585
1586         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1587         ttrace = thread__trace(thread, trace->output);
1588         if (ttrace == NULL)
1589                 return -1;
1590
1591         args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1592         ttrace = thread->priv;
1593
1594         if (ttrace->entry_str == NULL) {
1595                 ttrace->entry_str = malloc(1024);
1596                 if (!ttrace->entry_str)
1597                         return -1;
1598         }
1599
1600         ttrace->entry_time = sample->time;
1601         msg = ttrace->entry_str;
1602         printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1603
1604         printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1605                                            args, trace, thread);
1606
1607         if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1608                 if (!trace->duration_filter) {
1609                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1610                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1611                 }
1612         } else
1613                 ttrace->entry_pending = true;
1614
1615         return 0;
1616 }
1617
1618 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1619                            struct perf_sample *sample)
1620 {
1621         int ret;
1622         u64 duration = 0;
1623         struct thread *thread;
1624         int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1625         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1626         struct thread_trace *ttrace;
1627
1628         if (sc == NULL)
1629                 return -1;
1630
1631         if (sc->filtered)
1632                 return 0;
1633
1634         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1635         ttrace = thread__trace(thread, trace->output);
1636         if (ttrace == NULL)
1637                 return -1;
1638
1639         if (trace->summary)
1640                 thread__update_stats(ttrace, id, sample);
1641
1642         ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1643
1644         if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1645                 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1646                 trace->last_vfs_getname = NULL;
1647                 ++trace->stats.vfs_getname;
1648         }
1649
1650         ttrace = thread->priv;
1651
1652         ttrace->exit_time = sample->time;
1653
1654         if (ttrace->entry_time) {
1655                 duration = sample->time - ttrace->entry_time;
1656                 if (trace__filter_duration(trace, duration))
1657                         goto out;
1658         } else if (trace->duration_filter)
1659                 goto out;
1660
1661         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1662
1663         if (ttrace->entry_pending) {
1664                 fprintf(trace->output, "%-70s", ttrace->entry_str);
1665         } else {
1666                 fprintf(trace->output, " ... [");
1667                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1668                 fprintf(trace->output, "]: %s()", sc->name);
1669         }
1670
1671         if (sc->fmt == NULL) {
1672 signed_print:
1673                 fprintf(trace->output, ") = %d", ret);
1674         } else if (ret < 0 && sc->fmt->errmsg) {
1675                 char bf[256];
1676                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1677                            *e = audit_errno_to_name(-ret);
1678
1679                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1680         } else if (ret == 0 && sc->fmt->timeout)
1681                 fprintf(trace->output, ") = 0 Timeout");
1682         else if (sc->fmt->hexret)
1683                 fprintf(trace->output, ") = %#x", ret);
1684         else
1685                 goto signed_print;
1686
1687         fputc('\n', trace->output);
1688 out:
1689         ttrace->entry_pending = false;
1690
1691         return 0;
1692 }
1693
1694 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1695                               struct perf_sample *sample)
1696 {
1697         trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1698         return 0;
1699 }
1700
1701 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1702                                      struct perf_sample *sample)
1703 {
1704         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1705         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1706         struct thread *thread = machine__findnew_thread(trace->host,
1707                                                         sample->pid,
1708                                                         sample->tid);
1709         struct thread_trace *ttrace = thread__trace(thread, trace->output);
1710
1711         if (ttrace == NULL)
1712                 goto out_dump;
1713
1714         ttrace->runtime_ms += runtime_ms;
1715         trace->runtime_ms += runtime_ms;
1716         return 0;
1717
1718 out_dump:
1719         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1720                evsel->name,
1721                perf_evsel__strval(evsel, sample, "comm"),
1722                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1723                runtime,
1724                perf_evsel__intval(evsel, sample, "vruntime"));
1725         return 0;
1726 }
1727
1728 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1729 {
1730         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1731             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1732                 return false;
1733
1734         if (trace->pid_list || trace->tid_list)
1735                 return true;
1736
1737         return false;
1738 }
1739
1740 static int trace__process_sample(struct perf_tool *tool,
1741                                  union perf_event *event __maybe_unused,
1742                                  struct perf_sample *sample,
1743                                  struct perf_evsel *evsel,
1744                                  struct machine *machine __maybe_unused)
1745 {
1746         struct trace *trace = container_of(tool, struct trace, tool);
1747         int err = 0;
1748
1749         tracepoint_handler handler = evsel->handler;
1750
1751         if (skip_sample(trace, sample))
1752                 return 0;
1753
1754         if (!trace->full_time && trace->base_time == 0)
1755                 trace->base_time = sample->time;
1756
1757         if (handler)
1758                 handler(trace, evsel, sample);
1759
1760         return err;
1761 }
1762
1763 static bool
1764 perf_session__has_tp(struct perf_session *session, const char *name)
1765 {
1766         struct perf_evsel *evsel;
1767
1768         evsel = perf_evlist__find_tracepoint_by_name(session->evlist, name);
1769
1770         return evsel != NULL;
1771 }
1772
1773 static int parse_target_str(struct trace *trace)
1774 {
1775         if (trace->opts.target.pid) {
1776                 trace->pid_list = intlist__new(trace->opts.target.pid);
1777                 if (trace->pid_list == NULL) {
1778                         pr_err("Error parsing process id string\n");
1779                         return -EINVAL;
1780                 }
1781         }
1782
1783         if (trace->opts.target.tid) {
1784                 trace->tid_list = intlist__new(trace->opts.target.tid);
1785                 if (trace->tid_list == NULL) {
1786                         pr_err("Error parsing thread id string\n");
1787                         return -EINVAL;
1788                 }
1789         }
1790
1791         return 0;
1792 }
1793
1794 static int trace__record(int argc, const char **argv)
1795 {
1796         unsigned int rec_argc, i, j;
1797         const char **rec_argv;
1798         const char * const record_args[] = {
1799                 "record",
1800                 "-R",
1801                 "-m", "1024",
1802                 "-c", "1",
1803                 "-e", "raw_syscalls:sys_enter,raw_syscalls:sys_exit",
1804         };
1805
1806         rec_argc = ARRAY_SIZE(record_args) + argc;
1807         rec_argv = calloc(rec_argc + 1, sizeof(char *));
1808
1809         if (rec_argv == NULL)
1810                 return -ENOMEM;
1811
1812         for (i = 0; i < ARRAY_SIZE(record_args); i++)
1813                 rec_argv[i] = record_args[i];
1814
1815         for (j = 0; j < (unsigned int)argc; j++, i++)
1816                 rec_argv[i] = argv[j];
1817
1818         return cmd_record(i, rec_argv, NULL);
1819 }
1820
1821 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
1822
1823 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
1824 {
1825         struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
1826         if (evsel == NULL)
1827                 return;
1828
1829         if (perf_evsel__field(evsel, "pathname") == NULL) {
1830                 perf_evsel__delete(evsel);
1831                 return;
1832         }
1833
1834         evsel->handler = trace__vfs_getname;
1835         perf_evlist__add(evlist, evsel);
1836 }
1837
1838 static int trace__run(struct trace *trace, int argc, const char **argv)
1839 {
1840         struct perf_evlist *evlist = perf_evlist__new();
1841         struct perf_evsel *evsel;
1842         int err = -1, i;
1843         unsigned long before;
1844         const bool forks = argc > 0;
1845
1846         trace->live = true;
1847
1848         if (evlist == NULL) {
1849                 fprintf(trace->output, "Not enough memory to run!\n");
1850                 goto out;
1851         }
1852
1853         if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit))
1854                 goto out_error_tp;
1855
1856         perf_evlist__add_vfs_getname(evlist);
1857
1858         if (trace->sched &&
1859                 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1860                                 trace__sched_stat_runtime))
1861                 goto out_error_tp;
1862
1863         err = perf_evlist__create_maps(evlist, &trace->opts.target);
1864         if (err < 0) {
1865                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1866                 goto out_delete_evlist;
1867         }
1868
1869         err = trace__symbols_init(trace, evlist);
1870         if (err < 0) {
1871                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1872                 goto out_delete_maps;
1873         }
1874
1875         perf_evlist__config(evlist, &trace->opts);
1876
1877         signal(SIGCHLD, sig_handler);
1878         signal(SIGINT, sig_handler);
1879
1880         if (forks) {
1881                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1882                                                     argv, false, false);
1883                 if (err < 0) {
1884                         fprintf(trace->output, "Couldn't run the workload!\n");
1885                         goto out_delete_maps;
1886                 }
1887         }
1888
1889         err = perf_evlist__open(evlist);
1890         if (err < 0)
1891                 goto out_error_open;
1892
1893         err = perf_evlist__mmap(evlist, UINT_MAX, false);
1894         if (err < 0) {
1895                 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1896                 goto out_close_evlist;
1897         }
1898
1899         perf_evlist__enable(evlist);
1900
1901         if (forks)
1902                 perf_evlist__start_workload(evlist);
1903
1904         trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1905 again:
1906         before = trace->nr_events;
1907
1908         for (i = 0; i < evlist->nr_mmaps; i++) {
1909                 union perf_event *event;
1910
1911                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1912                         const u32 type = event->header.type;
1913                         tracepoint_handler handler;
1914                         struct perf_sample sample;
1915
1916                         ++trace->nr_events;
1917
1918                         err = perf_evlist__parse_sample(evlist, event, &sample);
1919                         if (err) {
1920                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1921                                 goto next_event;
1922                         }
1923
1924                         if (!trace->full_time && trace->base_time == 0)
1925                                 trace->base_time = sample.time;
1926
1927                         if (type != PERF_RECORD_SAMPLE) {
1928                                 trace__process_event(trace, trace->host, event, &sample);
1929                                 continue;
1930                         }
1931
1932                         evsel = perf_evlist__id2evsel(evlist, sample.id);
1933                         if (evsel == NULL) {
1934                                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1935                                 goto next_event;
1936                         }
1937
1938                         if (sample.raw_data == NULL) {
1939                                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1940                                        perf_evsel__name(evsel), sample.tid,
1941                                        sample.cpu, sample.raw_size);
1942                                 goto next_event;
1943                         }
1944
1945                         handler = evsel->handler;
1946                         handler(trace, evsel, &sample);
1947 next_event:
1948                         perf_evlist__mmap_consume(evlist, i);
1949
1950                         if (interrupted)
1951                                 goto out_disable;
1952                 }
1953         }
1954
1955         if (trace->nr_events == before) {
1956                 int timeout = done ? 100 : -1;
1957
1958                 if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0)
1959                         goto again;
1960         } else {
1961                 goto again;
1962         }
1963
1964 out_disable:
1965         perf_evlist__disable(evlist);
1966
1967         if (!err) {
1968                 if (trace->summary)
1969                         trace__fprintf_thread_summary(trace, trace->output);
1970
1971                 if (trace->show_tool_stats) {
1972                         fprintf(trace->output, "Stats:\n "
1973                                                " vfs_getname : %" PRIu64 "\n"
1974                                                " proc_getname: %" PRIu64 "\n",
1975                                 trace->stats.vfs_getname,
1976                                 trace->stats.proc_getname);
1977                 }
1978         }
1979
1980         perf_evlist__munmap(evlist);
1981 out_close_evlist:
1982         perf_evlist__close(evlist);
1983 out_delete_maps:
1984         perf_evlist__delete_maps(evlist);
1985 out_delete_evlist:
1986         perf_evlist__delete(evlist);
1987 out:
1988         trace->live = false;
1989         return err;
1990 {
1991         char errbuf[BUFSIZ];
1992
1993 out_error_tp:
1994         perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf));
1995         goto out_error;
1996
1997 out_error_open:
1998         perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
1999
2000 out_error:
2001         fprintf(trace->output, "%s\n", errbuf);
2002         goto out_delete_evlist;
2003 }
2004 }
2005
2006 static int trace__replay(struct trace *trace)
2007 {
2008         const struct perf_evsel_str_handler handlers[] = {
2009                 { "raw_syscalls:sys_enter",  trace__sys_enter, },
2010                 { "raw_syscalls:sys_exit",   trace__sys_exit, },
2011                 { "probe:vfs_getname",       trace__vfs_getname, },
2012         };
2013         struct perf_data_file file = {
2014                 .path  = input_name,
2015                 .mode  = PERF_DATA_MODE_READ,
2016         };
2017         struct perf_session *session;
2018         int err = -1;
2019
2020         trace->tool.sample        = trace__process_sample;
2021         trace->tool.mmap          = perf_event__process_mmap;
2022         trace->tool.mmap2         = perf_event__process_mmap2;
2023         trace->tool.comm          = perf_event__process_comm;
2024         trace->tool.exit          = perf_event__process_exit;
2025         trace->tool.fork          = perf_event__process_fork;
2026         trace->tool.attr          = perf_event__process_attr;
2027         trace->tool.tracing_data = perf_event__process_tracing_data;
2028         trace->tool.build_id      = perf_event__process_build_id;
2029
2030         trace->tool.ordered_samples = true;
2031         trace->tool.ordering_requires_timestamps = true;
2032
2033         /* add tid to output */
2034         trace->multiple_threads = true;
2035
2036         if (symbol__init() < 0)
2037                 return -1;
2038
2039         session = perf_session__new(&file, false, &trace->tool);
2040         if (session == NULL)
2041                 return -ENOMEM;
2042
2043         trace->host = &session->machines.host;
2044
2045         err = perf_session__set_tracepoints_handlers(session, handlers);
2046         if (err)
2047                 goto out;
2048
2049         if (!perf_session__has_tp(session, "raw_syscalls:sys_enter")) {
2050                 pr_err("Data file does not have raw_syscalls:sys_enter events\n");
2051                 goto out;
2052         }
2053
2054         if (!perf_session__has_tp(session, "raw_syscalls:sys_exit")) {
2055                 pr_err("Data file does not have raw_syscalls:sys_exit events\n");
2056                 goto out;
2057         }
2058
2059         err = parse_target_str(trace);
2060         if (err != 0)
2061                 goto out;
2062
2063         setup_pager();
2064
2065         err = perf_session__process_events(session, &trace->tool);
2066         if (err)
2067                 pr_err("Failed to process events, error %d", err);
2068
2069         else if (trace->summary)
2070                 trace__fprintf_thread_summary(trace, trace->output);
2071
2072 out:
2073         perf_session__delete(session);
2074
2075         return err;
2076 }
2077
2078 static size_t trace__fprintf_threads_header(FILE *fp)
2079 {
2080         size_t printed;
2081
2082         printed  = fprintf(fp, "\n _____________________________________________________________________________\n");
2083         printed += fprintf(fp, " __)    Summary of events    (__\n\n");
2084         printed += fprintf(fp, "              [ task - pid ]     [ events ] [ ratio ]  [ runtime ]\n");
2085         printed += fprintf(fp, "                                  syscall  count    min     max    avg  stddev\n");
2086         printed += fprintf(fp, "                                                   msec    msec   msec     %%\n");
2087         printed += fprintf(fp, " _____________________________________________________________________________\n\n");
2088
2089         return printed;
2090 }
2091
2092 static size_t thread__dump_stats(struct thread_trace *ttrace,
2093                                  struct trace *trace, FILE *fp)
2094 {
2095         struct stats *stats;
2096         size_t printed = 0;
2097         struct syscall *sc;
2098         struct int_node *inode = intlist__first(ttrace->syscall_stats);
2099
2100         if (inode == NULL)
2101                 return 0;
2102
2103         printed += fprintf(fp, "\n");
2104
2105         /* each int_node is a syscall */
2106         while (inode) {
2107                 stats = inode->priv;
2108                 if (stats) {
2109                         double min = (double)(stats->min) / NSEC_PER_MSEC;
2110                         double max = (double)(stats->max) / NSEC_PER_MSEC;
2111                         double avg = avg_stats(stats);
2112                         double pct;
2113                         u64 n = (u64) stats->n;
2114
2115                         pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2116                         avg /= NSEC_PER_MSEC;
2117
2118                         sc = &trace->syscalls.table[inode->i];
2119                         printed += fprintf(fp, "%24s  %14s : ", "", sc->name);
2120                         printed += fprintf(fp, "%5" PRIu64 "  %8.3f  %8.3f",
2121                                            n, min, max);
2122                         printed += fprintf(fp, "  %8.3f  %6.2f\n", avg, pct);
2123                 }
2124
2125                 inode = intlist__next(inode);
2126         }
2127
2128         printed += fprintf(fp, "\n\n");
2129
2130         return printed;
2131 }
2132
2133 /* struct used to pass data to per-thread function */
2134 struct summary_data {
2135         FILE *fp;
2136         struct trace *trace;
2137         size_t printed;
2138 };
2139
2140 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2141 {
2142         struct summary_data *data = priv;
2143         FILE *fp = data->fp;
2144         size_t printed = data->printed;
2145         struct trace *trace = data->trace;
2146         struct thread_trace *ttrace = thread->priv;
2147         const char *color;
2148         double ratio;
2149
2150         if (ttrace == NULL)
2151                 return 0;
2152
2153         ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2154
2155         color = PERF_COLOR_NORMAL;
2156         if (ratio > 50.0)
2157                 color = PERF_COLOR_RED;
2158         else if (ratio > 25.0)
2159                 color = PERF_COLOR_GREEN;
2160         else if (ratio > 5.0)
2161                 color = PERF_COLOR_YELLOW;
2162
2163         printed += color_fprintf(fp, color, "%20s", thread__comm_str(thread));
2164         printed += fprintf(fp, " - %-5d :%11lu   [", thread->tid, ttrace->nr_events);
2165         printed += color_fprintf(fp, color, "%5.1f%%", ratio);
2166         printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
2167         printed += thread__dump_stats(ttrace, trace, fp);
2168
2169         data->printed += printed;
2170
2171         return 0;
2172 }
2173
2174 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2175 {
2176         struct summary_data data = {
2177                 .fp = fp,
2178                 .trace = trace
2179         };
2180         data.printed = trace__fprintf_threads_header(fp);
2181
2182         machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2183
2184         return data.printed;
2185 }
2186
2187 static int trace__set_duration(const struct option *opt, const char *str,
2188                                int unset __maybe_unused)
2189 {
2190         struct trace *trace = opt->value;
2191
2192         trace->duration_filter = atof(str);
2193         return 0;
2194 }
2195
2196 static int trace__open_output(struct trace *trace, const char *filename)
2197 {
2198         struct stat st;
2199
2200         if (!stat(filename, &st) && st.st_size) {
2201                 char oldname[PATH_MAX];
2202
2203                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2204                 unlink(oldname);
2205                 rename(filename, oldname);
2206         }
2207
2208         trace->output = fopen(filename, "w");
2209
2210         return trace->output == NULL ? -errno : 0;
2211 }
2212
2213 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2214 {
2215         const char * const trace_usage[] = {
2216                 "perf trace [<options>] [<command>]",
2217                 "perf trace [<options>] -- <command> [<options>]",
2218                 "perf trace record [<options>] [<command>]",
2219                 "perf trace record [<options>] -- <command> [<options>]",
2220                 NULL
2221         };
2222         struct trace trace = {
2223                 .audit = {
2224                         .machine = audit_detect_machine(),
2225                         .open_id = audit_name_to_syscall("open", trace.audit.machine),
2226                 },
2227                 .syscalls = {
2228                         . max = -1,
2229                 },
2230                 .opts = {
2231                         .target = {
2232                                 .uid       = UINT_MAX,
2233                                 .uses_mmap = true,
2234                         },
2235                         .user_freq     = UINT_MAX,
2236                         .user_interval = ULLONG_MAX,
2237                         .no_delay      = true,
2238                         .mmap_pages    = 1024,
2239                 },
2240                 .output = stdout,
2241                 .show_comm = true,
2242         };
2243         const char *output_name = NULL;
2244         const char *ev_qualifier_str = NULL;
2245         const struct option trace_options[] = {
2246         OPT_BOOLEAN(0, "comm", &trace.show_comm,
2247                     "show the thread COMM next to its id"),
2248         OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2249         OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2250                     "list of events to trace"),
2251         OPT_STRING('o', "output", &output_name, "file", "output file name"),
2252         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2253         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2254                     "trace events on existing process id"),
2255         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2256                     "trace events on existing thread id"),
2257         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2258                     "system-wide collection from all CPUs"),
2259         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2260                     "list of cpus to monitor"),
2261         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2262                     "child tasks do not inherit counters"),
2263         OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2264                      "number of mmap data pages",
2265                      perf_evlist__parse_mmap_pages),
2266         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2267                    "user to profile"),
2268         OPT_CALLBACK(0, "duration", &trace, "float",
2269                      "show only events with duration > N.M ms",
2270                      trace__set_duration),
2271         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2272         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2273         OPT_BOOLEAN('T', "time", &trace.full_time,
2274                     "Show full timestamp, not time relative to first start"),
2275         OPT_BOOLEAN(0, "summary", &trace.summary,
2276                     "Show syscall summary with statistics"),
2277         OPT_END()
2278         };
2279         int err;
2280         char bf[BUFSIZ];
2281
2282         if ((argc > 1) && (strcmp(argv[1], "record") == 0))
2283                 return trace__record(argc-2, &argv[2]);
2284
2285         argc = parse_options(argc, argv, trace_options, trace_usage, 0);
2286
2287         if (output_name != NULL) {
2288                 err = trace__open_output(&trace, output_name);
2289                 if (err < 0) {
2290                         perror("failed to create output file");
2291                         goto out;
2292                 }
2293         }
2294
2295         if (ev_qualifier_str != NULL) {
2296                 const char *s = ev_qualifier_str;
2297
2298                 trace.not_ev_qualifier = *s == '!';
2299                 if (trace.not_ev_qualifier)
2300                         ++s;
2301                 trace.ev_qualifier = strlist__new(true, s);
2302                 if (trace.ev_qualifier == NULL) {
2303                         fputs("Not enough memory to parse event qualifier",
2304                               trace.output);
2305                         err = -ENOMEM;
2306                         goto out_close;
2307                 }
2308         }
2309
2310         err = perf_target__validate(&trace.opts.target);
2311         if (err) {
2312                 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2313                 fprintf(trace.output, "%s", bf);
2314                 goto out_close;
2315         }
2316
2317         err = perf_target__parse_uid(&trace.opts.target);
2318         if (err) {
2319                 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2320                 fprintf(trace.output, "%s", bf);
2321                 goto out_close;
2322         }
2323
2324         if (!argc && perf_target__none(&trace.opts.target))
2325                 trace.opts.target.system_wide = true;
2326
2327         if (input_name)
2328                 err = trace__replay(&trace);
2329         else
2330                 err = trace__run(&trace, argc, argv);
2331
2332 out_close:
2333         if (output_name != NULL)
2334                 fclose(trace.output);
2335 out:
2336         return err;
2337 }