]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - tools/perf/builtin-trace.c
arm: imx6: defconfig: update tx6 defconfigs
[karo-tx-linux.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14
15 #include <libaudit.h>
16 #include <stdlib.h>
17 #include <sys/eventfd.h>
18 #include <sys/mman.h>
19 #include <linux/futex.h>
20
21 /* For older distros: */
22 #ifndef MAP_STACK
23 # define MAP_STACK              0x20000
24 #endif
25
26 #ifndef MADV_HWPOISON
27 # define MADV_HWPOISON          100
28 #endif
29
30 #ifndef MADV_MERGEABLE
31 # define MADV_MERGEABLE         12
32 #endif
33
34 #ifndef MADV_UNMERGEABLE
35 # define MADV_UNMERGEABLE       13
36 #endif
37
38 struct syscall_arg {
39         unsigned long val;
40         struct thread *thread;
41         struct trace  *trace;
42         void          *parm;
43         u8            idx;
44         u8            mask;
45 };
46
47 struct strarray {
48         int         offset;
49         int         nr_entries;
50         const char **entries;
51 };
52
53 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
54         .nr_entries = ARRAY_SIZE(array), \
55         .entries = array, \
56 }
57
58 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
59         .offset     = off, \
60         .nr_entries = ARRAY_SIZE(array), \
61         .entries = array, \
62 }
63
64 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
65                                                 const char *intfmt,
66                                                 struct syscall_arg *arg)
67 {
68         struct strarray *sa = arg->parm;
69         int idx = arg->val - sa->offset;
70
71         if (idx < 0 || idx >= sa->nr_entries)
72                 return scnprintf(bf, size, intfmt, arg->val);
73
74         return scnprintf(bf, size, "%s", sa->entries[idx]);
75 }
76
77 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
78                                               struct syscall_arg *arg)
79 {
80         return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
81 }
82
83 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
84
85 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
86                                                  struct syscall_arg *arg)
87 {
88         return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
89 }
90
91 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
92
93 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
94                                         struct syscall_arg *arg);
95
96 #define SCA_FD syscall_arg__scnprintf_fd
97
98 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
99                                            struct syscall_arg *arg)
100 {
101         int fd = arg->val;
102
103         if (fd == AT_FDCWD)
104                 return scnprintf(bf, size, "CWD");
105
106         return syscall_arg__scnprintf_fd(bf, size, arg);
107 }
108
109 #define SCA_FDAT syscall_arg__scnprintf_fd_at
110
111 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
112                                               struct syscall_arg *arg);
113
114 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
115
116 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
117                                          struct syscall_arg *arg)
118 {
119         return scnprintf(bf, size, "%#lx", arg->val);
120 }
121
122 #define SCA_HEX syscall_arg__scnprintf_hex
123
124 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
125                                                struct syscall_arg *arg)
126 {
127         int printed = 0, prot = arg->val;
128
129         if (prot == PROT_NONE)
130                 return scnprintf(bf, size, "NONE");
131 #define P_MMAP_PROT(n) \
132         if (prot & PROT_##n) { \
133                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
134                 prot &= ~PROT_##n; \
135         }
136
137         P_MMAP_PROT(EXEC);
138         P_MMAP_PROT(READ);
139         P_MMAP_PROT(WRITE);
140 #ifdef PROT_SEM
141         P_MMAP_PROT(SEM);
142 #endif
143         P_MMAP_PROT(GROWSDOWN);
144         P_MMAP_PROT(GROWSUP);
145 #undef P_MMAP_PROT
146
147         if (prot)
148                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
149
150         return printed;
151 }
152
153 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
154
155 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
156                                                 struct syscall_arg *arg)
157 {
158         int printed = 0, flags = arg->val;
159
160 #define P_MMAP_FLAG(n) \
161         if (flags & MAP_##n) { \
162                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
163                 flags &= ~MAP_##n; \
164         }
165
166         P_MMAP_FLAG(SHARED);
167         P_MMAP_FLAG(PRIVATE);
168 #ifdef MAP_32BIT
169         P_MMAP_FLAG(32BIT);
170 #endif
171         P_MMAP_FLAG(ANONYMOUS);
172         P_MMAP_FLAG(DENYWRITE);
173         P_MMAP_FLAG(EXECUTABLE);
174         P_MMAP_FLAG(FILE);
175         P_MMAP_FLAG(FIXED);
176         P_MMAP_FLAG(GROWSDOWN);
177 #ifdef MAP_HUGETLB
178         P_MMAP_FLAG(HUGETLB);
179 #endif
180         P_MMAP_FLAG(LOCKED);
181         P_MMAP_FLAG(NONBLOCK);
182         P_MMAP_FLAG(NORESERVE);
183         P_MMAP_FLAG(POPULATE);
184         P_MMAP_FLAG(STACK);
185 #ifdef MAP_UNINITIALIZED
186         P_MMAP_FLAG(UNINITIALIZED);
187 #endif
188 #undef P_MMAP_FLAG
189
190         if (flags)
191                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
192
193         return printed;
194 }
195
196 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
197
198 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
199                                                       struct syscall_arg *arg)
200 {
201         int behavior = arg->val;
202
203         switch (behavior) {
204 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
205         P_MADV_BHV(NORMAL);
206         P_MADV_BHV(RANDOM);
207         P_MADV_BHV(SEQUENTIAL);
208         P_MADV_BHV(WILLNEED);
209         P_MADV_BHV(DONTNEED);
210         P_MADV_BHV(REMOVE);
211         P_MADV_BHV(DONTFORK);
212         P_MADV_BHV(DOFORK);
213         P_MADV_BHV(HWPOISON);
214 #ifdef MADV_SOFT_OFFLINE
215         P_MADV_BHV(SOFT_OFFLINE);
216 #endif
217         P_MADV_BHV(MERGEABLE);
218         P_MADV_BHV(UNMERGEABLE);
219 #ifdef MADV_HUGEPAGE
220         P_MADV_BHV(HUGEPAGE);
221 #endif
222 #ifdef MADV_NOHUGEPAGE
223         P_MADV_BHV(NOHUGEPAGE);
224 #endif
225 #ifdef MADV_DONTDUMP
226         P_MADV_BHV(DONTDUMP);
227 #endif
228 #ifdef MADV_DODUMP
229         P_MADV_BHV(DODUMP);
230 #endif
231 #undef P_MADV_PHV
232         default: break;
233         }
234
235         return scnprintf(bf, size, "%#x", behavior);
236 }
237
238 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
239
240 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
241                                            struct syscall_arg *arg)
242 {
243         int printed = 0, op = arg->val;
244
245         if (op == 0)
246                 return scnprintf(bf, size, "NONE");
247 #define P_CMD(cmd) \
248         if ((op & LOCK_##cmd) == LOCK_##cmd) { \
249                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
250                 op &= ~LOCK_##cmd; \
251         }
252
253         P_CMD(SH);
254         P_CMD(EX);
255         P_CMD(NB);
256         P_CMD(UN);
257         P_CMD(MAND);
258         P_CMD(RW);
259         P_CMD(READ);
260         P_CMD(WRITE);
261 #undef P_OP
262
263         if (op)
264                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
265
266         return printed;
267 }
268
269 #define SCA_FLOCK syscall_arg__scnprintf_flock
270
271 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
272 {
273         enum syscall_futex_args {
274                 SCF_UADDR   = (1 << 0),
275                 SCF_OP      = (1 << 1),
276                 SCF_VAL     = (1 << 2),
277                 SCF_TIMEOUT = (1 << 3),
278                 SCF_UADDR2  = (1 << 4),
279                 SCF_VAL3    = (1 << 5),
280         };
281         int op = arg->val;
282         int cmd = op & FUTEX_CMD_MASK;
283         size_t printed = 0;
284
285         switch (cmd) {
286 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
287         P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
288         P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
289         P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
290         P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
291         P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
292         P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
293         P_FUTEX_OP(WAKE_OP);                                                      break;
294         P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
295         P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
296         P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
297         P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
298         P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
299         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
300         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
301         }
302
303         if (op & FUTEX_PRIVATE_FLAG)
304                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
305
306         if (op & FUTEX_CLOCK_REALTIME)
307                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
308
309         return printed;
310 }
311
312 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
313
314 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
315 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
316
317 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
318 static DEFINE_STRARRAY(itimers);
319
320 static const char *whences[] = { "SET", "CUR", "END",
321 #ifdef SEEK_DATA
322 "DATA",
323 #endif
324 #ifdef SEEK_HOLE
325 "HOLE",
326 #endif
327 };
328 static DEFINE_STRARRAY(whences);
329
330 static const char *fcntl_cmds[] = {
331         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
332         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
333         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
334         "F_GETOWNER_UIDS",
335 };
336 static DEFINE_STRARRAY(fcntl_cmds);
337
338 static const char *rlimit_resources[] = {
339         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
340         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
341         "RTTIME",
342 };
343 static DEFINE_STRARRAY(rlimit_resources);
344
345 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
346 static DEFINE_STRARRAY(sighow);
347
348 static const char *clockid[] = {
349         "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
350         "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
351 };
352 static DEFINE_STRARRAY(clockid);
353
354 static const char *socket_families[] = {
355         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
356         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
357         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
358         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
359         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
360         "ALG", "NFC", "VSOCK",
361 };
362 static DEFINE_STRARRAY(socket_families);
363
364 #ifndef SOCK_TYPE_MASK
365 #define SOCK_TYPE_MASK 0xf
366 #endif
367
368 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
369                                                       struct syscall_arg *arg)
370 {
371         size_t printed;
372         int type = arg->val,
373             flags = type & ~SOCK_TYPE_MASK;
374
375         type &= SOCK_TYPE_MASK;
376         /*
377          * Can't use a strarray, MIPS may override for ABI reasons.
378          */
379         switch (type) {
380 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
381         P_SK_TYPE(STREAM);
382         P_SK_TYPE(DGRAM);
383         P_SK_TYPE(RAW);
384         P_SK_TYPE(RDM);
385         P_SK_TYPE(SEQPACKET);
386         P_SK_TYPE(DCCP);
387         P_SK_TYPE(PACKET);
388 #undef P_SK_TYPE
389         default:
390                 printed = scnprintf(bf, size, "%#x", type);
391         }
392
393 #define P_SK_FLAG(n) \
394         if (flags & SOCK_##n) { \
395                 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
396                 flags &= ~SOCK_##n; \
397         }
398
399         P_SK_FLAG(CLOEXEC);
400         P_SK_FLAG(NONBLOCK);
401 #undef P_SK_FLAG
402
403         if (flags)
404                 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
405
406         return printed;
407 }
408
409 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
410
411 #ifndef MSG_PROBE
412 #define MSG_PROBE            0x10
413 #endif
414 #ifndef MSG_WAITFORONE
415 #define MSG_WAITFORONE  0x10000
416 #endif
417 #ifndef MSG_SENDPAGE_NOTLAST
418 #define MSG_SENDPAGE_NOTLAST 0x20000
419 #endif
420 #ifndef MSG_FASTOPEN
421 #define MSG_FASTOPEN         0x20000000
422 #endif
423
424 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
425                                                struct syscall_arg *arg)
426 {
427         int printed = 0, flags = arg->val;
428
429         if (flags == 0)
430                 return scnprintf(bf, size, "NONE");
431 #define P_MSG_FLAG(n) \
432         if (flags & MSG_##n) { \
433                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
434                 flags &= ~MSG_##n; \
435         }
436
437         P_MSG_FLAG(OOB);
438         P_MSG_FLAG(PEEK);
439         P_MSG_FLAG(DONTROUTE);
440         P_MSG_FLAG(TRYHARD);
441         P_MSG_FLAG(CTRUNC);
442         P_MSG_FLAG(PROBE);
443         P_MSG_FLAG(TRUNC);
444         P_MSG_FLAG(DONTWAIT);
445         P_MSG_FLAG(EOR);
446         P_MSG_FLAG(WAITALL);
447         P_MSG_FLAG(FIN);
448         P_MSG_FLAG(SYN);
449         P_MSG_FLAG(CONFIRM);
450         P_MSG_FLAG(RST);
451         P_MSG_FLAG(ERRQUEUE);
452         P_MSG_FLAG(NOSIGNAL);
453         P_MSG_FLAG(MORE);
454         P_MSG_FLAG(WAITFORONE);
455         P_MSG_FLAG(SENDPAGE_NOTLAST);
456         P_MSG_FLAG(FASTOPEN);
457         P_MSG_FLAG(CMSG_CLOEXEC);
458 #undef P_MSG_FLAG
459
460         if (flags)
461                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
462
463         return printed;
464 }
465
466 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
467
468 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
469                                                  struct syscall_arg *arg)
470 {
471         size_t printed = 0;
472         int mode = arg->val;
473
474         if (mode == F_OK) /* 0 */
475                 return scnprintf(bf, size, "F");
476 #define P_MODE(n) \
477         if (mode & n##_OK) { \
478                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
479                 mode &= ~n##_OK; \
480         }
481
482         P_MODE(R);
483         P_MODE(W);
484         P_MODE(X);
485 #undef P_MODE
486
487         if (mode)
488                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
489
490         return printed;
491 }
492
493 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
494
495 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
496                                                struct syscall_arg *arg)
497 {
498         int printed = 0, flags = arg->val;
499
500         if (!(flags & O_CREAT))
501                 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
502
503         if (flags == 0)
504                 return scnprintf(bf, size, "RDONLY");
505 #define P_FLAG(n) \
506         if (flags & O_##n) { \
507                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
508                 flags &= ~O_##n; \
509         }
510
511         P_FLAG(APPEND);
512         P_FLAG(ASYNC);
513         P_FLAG(CLOEXEC);
514         P_FLAG(CREAT);
515         P_FLAG(DIRECT);
516         P_FLAG(DIRECTORY);
517         P_FLAG(EXCL);
518         P_FLAG(LARGEFILE);
519         P_FLAG(NOATIME);
520         P_FLAG(NOCTTY);
521 #ifdef O_NONBLOCK
522         P_FLAG(NONBLOCK);
523 #elif O_NDELAY
524         P_FLAG(NDELAY);
525 #endif
526 #ifdef O_PATH
527         P_FLAG(PATH);
528 #endif
529         P_FLAG(RDWR);
530 #ifdef O_DSYNC
531         if ((flags & O_SYNC) == O_SYNC)
532                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
533         else {
534                 P_FLAG(DSYNC);
535         }
536 #else
537         P_FLAG(SYNC);
538 #endif
539         P_FLAG(TRUNC);
540         P_FLAG(WRONLY);
541 #undef P_FLAG
542
543         if (flags)
544                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
545
546         return printed;
547 }
548
549 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
550
551 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
552                                                    struct syscall_arg *arg)
553 {
554         int printed = 0, flags = arg->val;
555
556         if (flags == 0)
557                 return scnprintf(bf, size, "NONE");
558 #define P_FLAG(n) \
559         if (flags & EFD_##n) { \
560                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
561                 flags &= ~EFD_##n; \
562         }
563
564         P_FLAG(SEMAPHORE);
565         P_FLAG(CLOEXEC);
566         P_FLAG(NONBLOCK);
567 #undef P_FLAG
568
569         if (flags)
570                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
571
572         return printed;
573 }
574
575 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
576
577 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
578                                                 struct syscall_arg *arg)
579 {
580         int printed = 0, flags = arg->val;
581
582 #define P_FLAG(n) \
583         if (flags & O_##n) { \
584                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
585                 flags &= ~O_##n; \
586         }
587
588         P_FLAG(CLOEXEC);
589         P_FLAG(NONBLOCK);
590 #undef P_FLAG
591
592         if (flags)
593                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
594
595         return printed;
596 }
597
598 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
599
600 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
601 {
602         int sig = arg->val;
603
604         switch (sig) {
605 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
606         P_SIGNUM(HUP);
607         P_SIGNUM(INT);
608         P_SIGNUM(QUIT);
609         P_SIGNUM(ILL);
610         P_SIGNUM(TRAP);
611         P_SIGNUM(ABRT);
612         P_SIGNUM(BUS);
613         P_SIGNUM(FPE);
614         P_SIGNUM(KILL);
615         P_SIGNUM(USR1);
616         P_SIGNUM(SEGV);
617         P_SIGNUM(USR2);
618         P_SIGNUM(PIPE);
619         P_SIGNUM(ALRM);
620         P_SIGNUM(TERM);
621         P_SIGNUM(STKFLT);
622         P_SIGNUM(CHLD);
623         P_SIGNUM(CONT);
624         P_SIGNUM(STOP);
625         P_SIGNUM(TSTP);
626         P_SIGNUM(TTIN);
627         P_SIGNUM(TTOU);
628         P_SIGNUM(URG);
629         P_SIGNUM(XCPU);
630         P_SIGNUM(XFSZ);
631         P_SIGNUM(VTALRM);
632         P_SIGNUM(PROF);
633         P_SIGNUM(WINCH);
634         P_SIGNUM(IO);
635         P_SIGNUM(PWR);
636         P_SIGNUM(SYS);
637         default: break;
638         }
639
640         return scnprintf(bf, size, "%#x", sig);
641 }
642
643 #define SCA_SIGNUM syscall_arg__scnprintf_signum
644
645 #define TCGETS          0x5401
646
647 static const char *tioctls[] = {
648         "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
649         "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
650         "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
651         "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
652         "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
653         "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
654         "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
655         "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
656         "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
657         "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
658         "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
659         [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
660         "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
661         "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
662         "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
663 };
664
665 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
666
667 #define STRARRAY(arg, name, array) \
668           .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
669           .arg_parm      = { [arg] = &strarray__##array, }
670
671 static struct syscall_fmt {
672         const char *name;
673         const char *alias;
674         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
675         void       *arg_parm[6];
676         bool       errmsg;
677         bool       timeout;
678         bool       hexret;
679 } syscall_fmts[] = {
680         { .name     = "access",     .errmsg = true,
681           .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
682         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
683         { .name     = "brk",        .hexret = true,
684           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
685         { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
686         { .name     = "close",      .errmsg = true,
687           .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, 
688         { .name     = "connect",    .errmsg = true, },
689         { .name     = "dup",        .errmsg = true,
690           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
691         { .name     = "dup2",       .errmsg = true,
692           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
693         { .name     = "dup3",       .errmsg = true,
694           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
695         { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
696         { .name     = "eventfd2",   .errmsg = true,
697           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
698         { .name     = "faccessat",  .errmsg = true,
699           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
700         { .name     = "fadvise64",  .errmsg = true,
701           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
702         { .name     = "fallocate",  .errmsg = true,
703           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
704         { .name     = "fchdir",     .errmsg = true,
705           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
706         { .name     = "fchmod",     .errmsg = true,
707           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
708         { .name     = "fchmodat",   .errmsg = true,
709           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
710         { .name     = "fchown",     .errmsg = true,
711           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
712         { .name     = "fchownat",   .errmsg = true,
713           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
714         { .name     = "fcntl",      .errmsg = true,
715           .arg_scnprintf = { [0] = SCA_FD, /* fd */
716                              [1] = SCA_STRARRAY, /* cmd */ },
717           .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
718         { .name     = "fdatasync",  .errmsg = true,
719           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
720         { .name     = "flock",      .errmsg = true,
721           .arg_scnprintf = { [0] = SCA_FD, /* fd */
722                              [1] = SCA_FLOCK, /* cmd */ }, },
723         { .name     = "fsetxattr",  .errmsg = true,
724           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
725         { .name     = "fstat",      .errmsg = true, .alias = "newfstat",
726           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
727         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat",
728           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
729         { .name     = "fstatfs",    .errmsg = true,
730           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
731         { .name     = "fsync",    .errmsg = true,
732           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
733         { .name     = "ftruncate", .errmsg = true,
734           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
735         { .name     = "futex",      .errmsg = true,
736           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
737         { .name     = "futimesat", .errmsg = true,
738           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
739         { .name     = "getdents",   .errmsg = true,
740           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
741         { .name     = "getdents64", .errmsg = true,
742           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
743         { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
744         { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
745         { .name     = "ioctl",      .errmsg = true,
746           .arg_scnprintf = { [0] = SCA_FD, /* fd */ 
747                              [1] = SCA_STRHEXARRAY, /* cmd */
748                              [2] = SCA_HEX, /* arg */ },
749           .arg_parm      = { [1] = &strarray__tioctls, /* cmd */ }, },
750         { .name     = "kill",       .errmsg = true,
751           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
752         { .name     = "linkat",     .errmsg = true,
753           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
754         { .name     = "lseek",      .errmsg = true,
755           .arg_scnprintf = { [0] = SCA_FD, /* fd */
756                              [2] = SCA_STRARRAY, /* whence */ },
757           .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
758         { .name     = "lstat",      .errmsg = true, .alias = "newlstat", },
759         { .name     = "madvise",    .errmsg = true,
760           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
761                              [2] = SCA_MADV_BHV, /* behavior */ }, },
762         { .name     = "mkdirat",    .errmsg = true,
763           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
764         { .name     = "mknodat",    .errmsg = true,
765           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
766         { .name     = "mlock",      .errmsg = true,
767           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
768         { .name     = "mlockall",   .errmsg = true,
769           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
770         { .name     = "mmap",       .hexret = true,
771           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
772                              [2] = SCA_MMAP_PROT, /* prot */
773                              [3] = SCA_MMAP_FLAGS, /* flags */ }, },
774         { .name     = "mprotect",   .errmsg = true,
775           .arg_scnprintf = { [0] = SCA_HEX, /* start */
776                              [2] = SCA_MMAP_PROT, /* prot */ }, },
777         { .name     = "mremap",     .hexret = true,
778           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
779                              [4] = SCA_HEX, /* new_addr */ }, },
780         { .name     = "munlock",    .errmsg = true,
781           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
782         { .name     = "munmap",     .errmsg = true,
783           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
784         { .name     = "name_to_handle_at", .errmsg = true,
785           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
786         { .name     = "newfstatat", .errmsg = true,
787           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
788         { .name     = "open",       .errmsg = true,
789           .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
790         { .name     = "open_by_handle_at", .errmsg = true,
791           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
792                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
793         { .name     = "openat",     .errmsg = true,
794           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
795                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
796         { .name     = "pipe2",      .errmsg = true,
797           .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
798         { .name     = "poll",       .errmsg = true, .timeout = true, },
799         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
800         { .name     = "pread",      .errmsg = true, .alias = "pread64",
801           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
802         { .name     = "preadv",     .errmsg = true, .alias = "pread",
803           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
804         { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
805         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64",
806           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
807         { .name     = "pwritev",    .errmsg = true,
808           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
809         { .name     = "read",       .errmsg = true,
810           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
811         { .name     = "readlinkat", .errmsg = true,
812           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
813         { .name     = "readv",      .errmsg = true,
814           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
815         { .name     = "recvfrom",   .errmsg = true,
816           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
817         { .name     = "recvmmsg",   .errmsg = true,
818           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
819         { .name     = "recvmsg",    .errmsg = true,
820           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
821         { .name     = "renameat",   .errmsg = true,
822           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
823         { .name     = "rt_sigaction", .errmsg = true,
824           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
825         { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
826         { .name     = "rt_sigqueueinfo", .errmsg = true,
827           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
828         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
829           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
830         { .name     = "select",     .errmsg = true, .timeout = true, },
831         { .name     = "sendmmsg",    .errmsg = true,
832           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
833         { .name     = "sendmsg",    .errmsg = true,
834           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
835         { .name     = "sendto",     .errmsg = true,
836           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
837         { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
838         { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
839         { .name     = "shutdown",   .errmsg = true,
840           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
841         { .name     = "socket",     .errmsg = true,
842           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
843                              [1] = SCA_SK_TYPE, /* type */ },
844           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
845         { .name     = "socketpair", .errmsg = true,
846           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
847                              [1] = SCA_SK_TYPE, /* type */ },
848           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
849         { .name     = "stat",       .errmsg = true, .alias = "newstat", },
850         { .name     = "symlinkat",  .errmsg = true,
851           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
852         { .name     = "tgkill",     .errmsg = true,
853           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
854         { .name     = "tkill",      .errmsg = true,
855           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
856         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
857         { .name     = "unlinkat",   .errmsg = true,
858           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
859         { .name     = "utimensat",  .errmsg = true,
860           .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
861         { .name     = "write",      .errmsg = true,
862           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
863         { .name     = "writev",     .errmsg = true,
864           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
865 };
866
867 static int syscall_fmt__cmp(const void *name, const void *fmtp)
868 {
869         const struct syscall_fmt *fmt = fmtp;
870         return strcmp(name, fmt->name);
871 }
872
873 static struct syscall_fmt *syscall_fmt__find(const char *name)
874 {
875         const int nmemb = ARRAY_SIZE(syscall_fmts);
876         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
877 }
878
879 struct syscall {
880         struct event_format *tp_format;
881         const char          *name;
882         bool                filtered;
883         struct syscall_fmt  *fmt;
884         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
885         void                **arg_parm;
886 };
887
888 static size_t fprintf_duration(unsigned long t, FILE *fp)
889 {
890         double duration = (double)t / NSEC_PER_MSEC;
891         size_t printed = fprintf(fp, "(");
892
893         if (duration >= 1.0)
894                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
895         else if (duration >= 0.01)
896                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
897         else
898                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
899         return printed + fprintf(fp, "): ");
900 }
901
902 struct thread_trace {
903         u64               entry_time;
904         u64               exit_time;
905         bool              entry_pending;
906         unsigned long     nr_events;
907         char              *entry_str;
908         double            runtime_ms;
909         struct {
910                 int       max;
911                 char      **table;
912         } paths;
913
914         struct intlist *syscall_stats;
915 };
916
917 static struct thread_trace *thread_trace__new(void)
918 {
919         struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
920
921         if (ttrace)
922                 ttrace->paths.max = -1;
923
924         ttrace->syscall_stats = intlist__new(NULL);
925
926         return ttrace;
927 }
928
929 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
930 {
931         struct thread_trace *ttrace;
932
933         if (thread == NULL)
934                 goto fail;
935
936         if (thread->priv == NULL)
937                 thread->priv = thread_trace__new();
938                 
939         if (thread->priv == NULL)
940                 goto fail;
941
942         ttrace = thread->priv;
943         ++ttrace->nr_events;
944
945         return ttrace;
946 fail:
947         color_fprintf(fp, PERF_COLOR_RED,
948                       "WARNING: not enough memory, dropping samples!\n");
949         return NULL;
950 }
951
952 struct trace {
953         struct perf_tool        tool;
954         struct {
955                 int             machine;
956                 int             open_id;
957         }                       audit;
958         struct {
959                 int             max;
960                 struct syscall  *table;
961         } syscalls;
962         struct perf_record_opts opts;
963         struct machine          *host;
964         u64                     base_time;
965         bool                    full_time;
966         FILE                    *output;
967         unsigned long           nr_events;
968         struct strlist          *ev_qualifier;
969         bool                    not_ev_qualifier;
970         bool                    live;
971         const char              *last_vfs_getname;
972         struct intlist          *tid_list;
973         struct intlist          *pid_list;
974         bool                    sched;
975         bool                    multiple_threads;
976         bool                    summary;
977         bool                    show_comm;
978         bool                    show_tool_stats;
979         double                  duration_filter;
980         double                  runtime_ms;
981         struct {
982                 u64             vfs_getname, proc_getname;
983         } stats;
984 };
985
986 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
987 {
988         struct thread_trace *ttrace = thread->priv;
989
990         if (fd > ttrace->paths.max) {
991                 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
992
993                 if (npath == NULL)
994                         return -1;
995
996                 if (ttrace->paths.max != -1) {
997                         memset(npath + ttrace->paths.max + 1, 0,
998                                (fd - ttrace->paths.max) * sizeof(char *));
999                 } else {
1000                         memset(npath, 0, (fd + 1) * sizeof(char *));
1001                 }
1002
1003                 ttrace->paths.table = npath;
1004                 ttrace->paths.max   = fd;
1005         }
1006
1007         ttrace->paths.table[fd] = strdup(pathname);
1008
1009         return ttrace->paths.table[fd] != NULL ? 0 : -1;
1010 }
1011
1012 static int thread__read_fd_path(struct thread *thread, int fd)
1013 {
1014         char linkname[PATH_MAX], pathname[PATH_MAX];
1015         struct stat st;
1016         int ret;
1017
1018         if (thread->pid_ == thread->tid) {
1019                 scnprintf(linkname, sizeof(linkname),
1020                           "/proc/%d/fd/%d", thread->pid_, fd);
1021         } else {
1022                 scnprintf(linkname, sizeof(linkname),
1023                           "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1024         }
1025
1026         if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1027                 return -1;
1028
1029         ret = readlink(linkname, pathname, sizeof(pathname));
1030
1031         if (ret < 0 || ret > st.st_size)
1032                 return -1;
1033
1034         pathname[ret] = '\0';
1035         return trace__set_fd_pathname(thread, fd, pathname);
1036 }
1037
1038 static const char *thread__fd_path(struct thread *thread, int fd,
1039                                    struct trace *trace)
1040 {
1041         struct thread_trace *ttrace = thread->priv;
1042
1043         if (ttrace == NULL)
1044                 return NULL;
1045
1046         if (fd < 0)
1047                 return NULL;
1048
1049         if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL))
1050                 if (!trace->live)
1051                         return NULL;
1052                 ++trace->stats.proc_getname;
1053                 if (thread__read_fd_path(thread, fd)) {
1054                         return NULL;
1055         }
1056
1057         return ttrace->paths.table[fd];
1058 }
1059
1060 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1061                                         struct syscall_arg *arg)
1062 {
1063         int fd = arg->val;
1064         size_t printed = scnprintf(bf, size, "%d", fd);
1065         const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1066
1067         if (path)
1068                 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1069
1070         return printed;
1071 }
1072
1073 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1074                                               struct syscall_arg *arg)
1075 {
1076         int fd = arg->val;
1077         size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1078         struct thread_trace *ttrace = arg->thread->priv;
1079
1080         if (ttrace && fd >= 0 && fd <= ttrace->paths.max) {
1081                 free(ttrace->paths.table[fd]);
1082                 ttrace->paths.table[fd] = NULL;
1083         }
1084
1085         return printed;
1086 }
1087
1088 static bool trace__filter_duration(struct trace *trace, double t)
1089 {
1090         return t < (trace->duration_filter * NSEC_PER_MSEC);
1091 }
1092
1093 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1094 {
1095         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1096
1097         return fprintf(fp, "%10.3f ", ts);
1098 }
1099
1100 static bool done = false;
1101 static bool interrupted = false;
1102
1103 static void sig_handler(int sig)
1104 {
1105         done = true;
1106         interrupted = sig == SIGINT;
1107 }
1108
1109 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1110                                         u64 duration, u64 tstamp, FILE *fp)
1111 {
1112         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1113         printed += fprintf_duration(duration, fp);
1114
1115         if (trace->multiple_threads) {
1116                 if (trace->show_comm)
1117                         printed += fprintf(fp, "%.14s/", thread->comm);
1118                 printed += fprintf(fp, "%d ", thread->tid);
1119         }
1120
1121         return printed;
1122 }
1123
1124 static int trace__process_event(struct trace *trace, struct machine *machine,
1125                                 union perf_event *event)
1126 {
1127         int ret = 0;
1128
1129         switch (event->header.type) {
1130         case PERF_RECORD_LOST:
1131                 color_fprintf(trace->output, PERF_COLOR_RED,
1132                               "LOST %" PRIu64 " events!\n", event->lost.lost);
1133                 ret = machine__process_lost_event(machine, event);
1134         default:
1135                 ret = machine__process_event(machine, event);
1136                 break;
1137         }
1138
1139         return ret;
1140 }
1141
1142 static int trace__tool_process(struct perf_tool *tool,
1143                                union perf_event *event,
1144                                struct perf_sample *sample __maybe_unused,
1145                                struct machine *machine)
1146 {
1147         struct trace *trace = container_of(tool, struct trace, tool);
1148         return trace__process_event(trace, machine, event);
1149 }
1150
1151 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1152 {
1153         int err = symbol__init();
1154
1155         if (err)
1156                 return err;
1157
1158         trace->host = machine__new_host();
1159         if (trace->host == NULL)
1160                 return -ENOMEM;
1161
1162         if (perf_target__has_task(&trace->opts.target)) {
1163                 err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads,
1164                                                         trace__tool_process,
1165                                                         trace->host);
1166         } else {
1167                 err = perf_event__synthesize_threads(&trace->tool, trace__tool_process,
1168                                                      trace->host);
1169         }
1170
1171         if (err)
1172                 symbol__exit();
1173
1174         return err;
1175 }
1176
1177 static int syscall__set_arg_fmts(struct syscall *sc)
1178 {
1179         struct format_field *field;
1180         int idx = 0;
1181
1182         sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1183         if (sc->arg_scnprintf == NULL)
1184                 return -1;
1185
1186         if (sc->fmt)
1187                 sc->arg_parm = sc->fmt->arg_parm;
1188
1189         for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1190                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1191                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1192                 else if (field->flags & FIELD_IS_POINTER)
1193                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1194                 ++idx;
1195         }
1196
1197         return 0;
1198 }
1199
1200 static int trace__read_syscall_info(struct trace *trace, int id)
1201 {
1202         char tp_name[128];
1203         struct syscall *sc;
1204         const char *name = audit_syscall_to_name(id, trace->audit.machine);
1205
1206         if (name == NULL)
1207                 return -1;
1208
1209         if (id > trace->syscalls.max) {
1210                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1211
1212                 if (nsyscalls == NULL)
1213                         return -1;
1214
1215                 if (trace->syscalls.max != -1) {
1216                         memset(nsyscalls + trace->syscalls.max + 1, 0,
1217                                (id - trace->syscalls.max) * sizeof(*sc));
1218                 } else {
1219                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1220                 }
1221
1222                 trace->syscalls.table = nsyscalls;
1223                 trace->syscalls.max   = id;
1224         }
1225
1226         sc = trace->syscalls.table + id;
1227         sc->name = name;
1228
1229         if (trace->ev_qualifier) {
1230                 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1231
1232                 if (!(in ^ trace->not_ev_qualifier)) {
1233                         sc->filtered = true;
1234                         /*
1235                          * No need to do read tracepoint information since this will be
1236                          * filtered out.
1237                          */
1238                         return 0;
1239                 }
1240         }
1241
1242         sc->fmt  = syscall_fmt__find(sc->name);
1243
1244         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1245         sc->tp_format = event_format__new("syscalls", tp_name);
1246
1247         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1248                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1249                 sc->tp_format = event_format__new("syscalls", tp_name);
1250         }
1251
1252         if (sc->tp_format == NULL)
1253                 return -1;
1254
1255         return syscall__set_arg_fmts(sc);
1256 }
1257
1258 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1259                                       unsigned long *args, struct trace *trace,
1260                                       struct thread *thread)
1261 {
1262         size_t printed = 0;
1263
1264         if (sc->tp_format != NULL) {
1265                 struct format_field *field;
1266                 u8 bit = 1;
1267                 struct syscall_arg arg = {
1268                         .idx    = 0,
1269                         .mask   = 0,
1270                         .trace  = trace,
1271                         .thread = thread,
1272                 };
1273
1274                 for (field = sc->tp_format->format.fields->next; field;
1275                      field = field->next, ++arg.idx, bit <<= 1) {
1276                         if (arg.mask & bit)
1277                                 continue;
1278                         /*
1279                          * Suppress this argument if its value is zero and
1280                          * and we don't have a string associated in an
1281                          * strarray for it.
1282                          */
1283                         if (args[arg.idx] == 0 &&
1284                             !(sc->arg_scnprintf &&
1285                               sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1286                               sc->arg_parm[arg.idx]))
1287                                 continue;
1288
1289                         printed += scnprintf(bf + printed, size - printed,
1290                                              "%s%s: ", printed ? ", " : "", field->name);
1291                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1292                                 arg.val = args[arg.idx];
1293                                 if (sc->arg_parm)
1294                                         arg.parm = sc->arg_parm[arg.idx];
1295                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1296                                                                       size - printed, &arg);
1297                         } else {
1298                                 printed += scnprintf(bf + printed, size - printed,
1299                                                      "%ld", args[arg.idx]);
1300                         }
1301                 }
1302         } else {
1303                 int i = 0;
1304
1305                 while (i < 6) {
1306                         printed += scnprintf(bf + printed, size - printed,
1307                                              "%sarg%d: %ld",
1308                                              printed ? ", " : "", i, args[i]);
1309                         ++i;
1310                 }
1311         }
1312
1313         return printed;
1314 }
1315
1316 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1317                                   struct perf_sample *sample);
1318
1319 static struct syscall *trace__syscall_info(struct trace *trace,
1320                                            struct perf_evsel *evsel, int id)
1321 {
1322
1323         if (id < 0) {
1324
1325                 /*
1326                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1327                  * before that, leaving at a higher verbosity level till that is
1328                  * explained. Reproduced with plain ftrace with:
1329                  *
1330                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1331                  * grep "NR -1 " /t/trace_pipe
1332                  *
1333                  * After generating some load on the machine.
1334                  */
1335                 if (verbose > 1) {
1336                         static u64 n;
1337                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1338                                 id, perf_evsel__name(evsel), ++n);
1339                 }
1340                 return NULL;
1341         }
1342
1343         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1344             trace__read_syscall_info(trace, id))
1345                 goto out_cant_read;
1346
1347         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1348                 goto out_cant_read;
1349
1350         return &trace->syscalls.table[id];
1351
1352 out_cant_read:
1353         if (verbose) {
1354                 fprintf(trace->output, "Problems reading syscall %d", id);
1355                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1356                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1357                 fputs(" information\n", trace->output);
1358         }
1359         return NULL;
1360 }
1361
1362 static void thread__update_stats(struct thread_trace *ttrace,
1363                                  int id, struct perf_sample *sample)
1364 {
1365         struct int_node *inode;
1366         struct stats *stats;
1367         u64 duration = 0;
1368
1369         inode = intlist__findnew(ttrace->syscall_stats, id);
1370         if (inode == NULL)
1371                 return;
1372
1373         stats = inode->priv;
1374         if (stats == NULL) {
1375                 stats = malloc(sizeof(struct stats));
1376                 if (stats == NULL)
1377                         return;
1378                 init_stats(stats);
1379                 inode->priv = stats;
1380         }
1381
1382         if (ttrace->entry_time && sample->time > ttrace->entry_time)
1383                 duration = sample->time - ttrace->entry_time;
1384
1385         update_stats(stats, duration);
1386 }
1387
1388 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1389                             struct perf_sample *sample)
1390 {
1391         char *msg;
1392         void *args;
1393         size_t printed = 0;
1394         struct thread *thread;
1395         int id = perf_evsel__intval(evsel, sample, "id");
1396         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1397         struct thread_trace *ttrace;
1398
1399         if (sc == NULL)
1400                 return -1;
1401
1402         if (sc->filtered)
1403                 return 0;
1404
1405         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1406         ttrace = thread__trace(thread, trace->output);
1407         if (ttrace == NULL)
1408                 return -1;
1409
1410         args = perf_evsel__rawptr(evsel, sample, "args");
1411         if (args == NULL) {
1412                 fprintf(trace->output, "Problems reading syscall arguments\n");
1413                 return -1;
1414         }
1415
1416         ttrace = thread->priv;
1417
1418         if (ttrace->entry_str == NULL) {
1419                 ttrace->entry_str = malloc(1024);
1420                 if (!ttrace->entry_str)
1421                         return -1;
1422         }
1423
1424         ttrace->entry_time = sample->time;
1425         msg = ttrace->entry_str;
1426         printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1427
1428         printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1429                                            args, trace, thread);
1430
1431         if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1432                 if (!trace->duration_filter) {
1433                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1434                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1435                 }
1436         } else
1437                 ttrace->entry_pending = true;
1438
1439         return 0;
1440 }
1441
1442 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1443                            struct perf_sample *sample)
1444 {
1445         int ret;
1446         u64 duration = 0;
1447         struct thread *thread;
1448         int id = perf_evsel__intval(evsel, sample, "id");
1449         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1450         struct thread_trace *ttrace;
1451
1452         if (sc == NULL)
1453                 return -1;
1454
1455         if (sc->filtered)
1456                 return 0;
1457
1458         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1459         ttrace = thread__trace(thread, trace->output);
1460         if (ttrace == NULL)
1461                 return -1;
1462
1463         if (trace->summary)
1464                 thread__update_stats(ttrace, id, sample);
1465
1466         ret = perf_evsel__intval(evsel, sample, "ret");
1467
1468         if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1469                 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1470                 trace->last_vfs_getname = NULL;
1471                 ++trace->stats.vfs_getname;
1472         }
1473
1474         ttrace = thread->priv;
1475
1476         ttrace->exit_time = sample->time;
1477
1478         if (ttrace->entry_time) {
1479                 duration = sample->time - ttrace->entry_time;
1480                 if (trace__filter_duration(trace, duration))
1481                         goto out;
1482         } else if (trace->duration_filter)
1483                 goto out;
1484
1485         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1486
1487         if (ttrace->entry_pending) {
1488                 fprintf(trace->output, "%-70s", ttrace->entry_str);
1489         } else {
1490                 fprintf(trace->output, " ... [");
1491                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1492                 fprintf(trace->output, "]: %s()", sc->name);
1493         }
1494
1495         if (sc->fmt == NULL) {
1496 signed_print:
1497                 fprintf(trace->output, ") = %d", ret);
1498         } else if (ret < 0 && sc->fmt->errmsg) {
1499                 char bf[256];
1500                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1501                            *e = audit_errno_to_name(-ret);
1502
1503                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1504         } else if (ret == 0 && sc->fmt->timeout)
1505                 fprintf(trace->output, ") = 0 Timeout");
1506         else if (sc->fmt->hexret)
1507                 fprintf(trace->output, ") = %#x", ret);
1508         else
1509                 goto signed_print;
1510
1511         fputc('\n', trace->output);
1512 out:
1513         ttrace->entry_pending = false;
1514
1515         return 0;
1516 }
1517
1518 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1519                               struct perf_sample *sample)
1520 {
1521         trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1522         return 0;
1523 }
1524
1525 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1526                                      struct perf_sample *sample)
1527 {
1528         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1529         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1530         struct thread *thread = machine__findnew_thread(trace->host,
1531                                                         sample->pid,
1532                                                         sample->tid);
1533         struct thread_trace *ttrace = thread__trace(thread, trace->output);
1534
1535         if (ttrace == NULL)
1536                 goto out_dump;
1537
1538         ttrace->runtime_ms += runtime_ms;
1539         trace->runtime_ms += runtime_ms;
1540         return 0;
1541
1542 out_dump:
1543         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1544                evsel->name,
1545                perf_evsel__strval(evsel, sample, "comm"),
1546                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1547                runtime,
1548                perf_evsel__intval(evsel, sample, "vruntime"));
1549         return 0;
1550 }
1551
1552 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1553 {
1554         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1555             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1556                 return false;
1557
1558         if (trace->pid_list || trace->tid_list)
1559                 return true;
1560
1561         return false;
1562 }
1563
1564 static int trace__process_sample(struct perf_tool *tool,
1565                                  union perf_event *event __maybe_unused,
1566                                  struct perf_sample *sample,
1567                                  struct perf_evsel *evsel,
1568                                  struct machine *machine __maybe_unused)
1569 {
1570         struct trace *trace = container_of(tool, struct trace, tool);
1571         int err = 0;
1572
1573         tracepoint_handler handler = evsel->handler.func;
1574
1575         if (skip_sample(trace, sample))
1576                 return 0;
1577
1578         if (!trace->full_time && trace->base_time == 0)
1579                 trace->base_time = sample->time;
1580
1581         if (handler)
1582                 handler(trace, evsel, sample);
1583
1584         return err;
1585 }
1586
1587 static bool
1588 perf_session__has_tp(struct perf_session *session, const char *name)
1589 {
1590         struct perf_evsel *evsel;
1591
1592         evsel = perf_evlist__find_tracepoint_by_name(session->evlist, name);
1593
1594         return evsel != NULL;
1595 }
1596
1597 static int parse_target_str(struct trace *trace)
1598 {
1599         if (trace->opts.target.pid) {
1600                 trace->pid_list = intlist__new(trace->opts.target.pid);
1601                 if (trace->pid_list == NULL) {
1602                         pr_err("Error parsing process id string\n");
1603                         return -EINVAL;
1604                 }
1605         }
1606
1607         if (trace->opts.target.tid) {
1608                 trace->tid_list = intlist__new(trace->opts.target.tid);
1609                 if (trace->tid_list == NULL) {
1610                         pr_err("Error parsing thread id string\n");
1611                         return -EINVAL;
1612                 }
1613         }
1614
1615         return 0;
1616 }
1617
1618 static int trace__record(int argc, const char **argv)
1619 {
1620         unsigned int rec_argc, i, j;
1621         const char **rec_argv;
1622         const char * const record_args[] = {
1623                 "record",
1624                 "-R",
1625                 "-m", "1024",
1626                 "-c", "1",
1627                 "-e", "raw_syscalls:sys_enter,raw_syscalls:sys_exit",
1628         };
1629
1630         rec_argc = ARRAY_SIZE(record_args) + argc;
1631         rec_argv = calloc(rec_argc + 1, sizeof(char *));
1632
1633         if (rec_argv == NULL)
1634                 return -ENOMEM;
1635
1636         for (i = 0; i < ARRAY_SIZE(record_args); i++)
1637                 rec_argv[i] = record_args[i];
1638
1639         for (j = 0; j < (unsigned int)argc; j++, i++)
1640                 rec_argv[i] = argv[j];
1641
1642         return cmd_record(i, rec_argv, NULL);
1643 }
1644
1645 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
1646
1647 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
1648 {
1649         struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname",
1650                                                      evlist->nr_entries);
1651         if (evsel == NULL)
1652                 return;
1653
1654         if (perf_evsel__field(evsel, "pathname") == NULL) {
1655                 perf_evsel__delete(evsel);
1656                 return;
1657         }
1658
1659         evsel->handler.func = trace__vfs_getname;
1660         perf_evlist__add(evlist, evsel);
1661 }
1662
1663 static int trace__run(struct trace *trace, int argc, const char **argv)
1664 {
1665         struct perf_evlist *evlist = perf_evlist__new();
1666         struct perf_evsel *evsel;
1667         int err = -1, i;
1668         unsigned long before;
1669         const bool forks = argc > 0;
1670
1671         trace->live = true;
1672
1673         if (evlist == NULL) {
1674                 fprintf(trace->output, "Not enough memory to run!\n");
1675                 goto out;
1676         }
1677
1678         if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) ||
1679                 perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit))
1680                 goto out_error_tp;
1681
1682         perf_evlist__add_vfs_getname(evlist);
1683
1684         if (trace->sched &&
1685                 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1686                                 trace__sched_stat_runtime))
1687                 goto out_error_tp;
1688
1689         err = perf_evlist__create_maps(evlist, &trace->opts.target);
1690         if (err < 0) {
1691                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1692                 goto out_delete_evlist;
1693         }
1694
1695         err = trace__symbols_init(trace, evlist);
1696         if (err < 0) {
1697                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1698                 goto out_delete_maps;
1699         }
1700
1701         perf_evlist__config(evlist, &trace->opts);
1702
1703         signal(SIGCHLD, sig_handler);
1704         signal(SIGINT, sig_handler);
1705
1706         if (forks) {
1707                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1708                                                     argv, false, false);
1709                 if (err < 0) {
1710                         fprintf(trace->output, "Couldn't run the workload!\n");
1711                         goto out_delete_maps;
1712                 }
1713         }
1714
1715         err = perf_evlist__open(evlist);
1716         if (err < 0)
1717                 goto out_error_open;
1718
1719         err = perf_evlist__mmap(evlist, UINT_MAX, false);
1720         if (err < 0) {
1721                 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1722                 goto out_close_evlist;
1723         }
1724
1725         perf_evlist__enable(evlist);
1726
1727         if (forks)
1728                 perf_evlist__start_workload(evlist);
1729
1730         trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1731 again:
1732         before = trace->nr_events;
1733
1734         for (i = 0; i < evlist->nr_mmaps; i++) {
1735                 union perf_event *event;
1736
1737                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1738                         const u32 type = event->header.type;
1739                         tracepoint_handler handler;
1740                         struct perf_sample sample;
1741
1742                         ++trace->nr_events;
1743
1744                         err = perf_evlist__parse_sample(evlist, event, &sample);
1745                         if (err) {
1746                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1747                                 continue;
1748                         }
1749
1750                         if (!trace->full_time && trace->base_time == 0)
1751                                 trace->base_time = sample.time;
1752
1753                         if (type != PERF_RECORD_SAMPLE) {
1754                                 trace__process_event(trace, trace->host, event);
1755                                 continue;
1756                         }
1757
1758                         evsel = perf_evlist__id2evsel(evlist, sample.id);
1759                         if (evsel == NULL) {
1760                                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1761                                 continue;
1762                         }
1763
1764                         if (sample.raw_data == NULL) {
1765                                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1766                                        perf_evsel__name(evsel), sample.tid,
1767                                        sample.cpu, sample.raw_size);
1768                                 continue;
1769                         }
1770
1771                         handler = evsel->handler.func;
1772                         handler(trace, evsel, &sample);
1773
1774                         if (interrupted)
1775                                 goto out_disable;
1776                 }
1777         }
1778
1779         if (trace->nr_events == before) {
1780                 int timeout = done ? 100 : -1;
1781
1782                 if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0)
1783                         goto again;
1784         } else {
1785                 goto again;
1786         }
1787
1788 out_disable:
1789         perf_evlist__disable(evlist);
1790
1791         if (!err) {
1792                 if (trace->summary)
1793                         trace__fprintf_thread_summary(trace, trace->output);
1794
1795                 if (trace->show_tool_stats) {
1796                         fprintf(trace->output, "Stats:\n "
1797                                                " vfs_getname : %" PRIu64 "\n"
1798                                                " proc_getname: %" PRIu64 "\n",
1799                                 trace->stats.vfs_getname,
1800                                 trace->stats.proc_getname);
1801                 }
1802         }
1803
1804         perf_evlist__munmap(evlist);
1805 out_close_evlist:
1806         perf_evlist__close(evlist);
1807 out_delete_maps:
1808         perf_evlist__delete_maps(evlist);
1809 out_delete_evlist:
1810         perf_evlist__delete(evlist);
1811 out:
1812         trace->live = false;
1813         return err;
1814 {
1815         char errbuf[BUFSIZ];
1816
1817 out_error_tp:
1818         perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf));
1819         goto out_error;
1820
1821 out_error_open:
1822         perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
1823
1824 out_error:
1825         fprintf(trace->output, "%s\n", errbuf);
1826         goto out_delete_evlist;
1827 }
1828 }
1829
1830 static int trace__replay(struct trace *trace)
1831 {
1832         const struct perf_evsel_str_handler handlers[] = {
1833                 { "raw_syscalls:sys_enter",  trace__sys_enter, },
1834                 { "raw_syscalls:sys_exit",   trace__sys_exit, },
1835                 { "probe:vfs_getname",       trace__vfs_getname, },
1836         };
1837         struct perf_data_file file = {
1838                 .path  = input_name,
1839                 .mode  = PERF_DATA_MODE_READ,
1840         };
1841         struct perf_session *session;
1842         int err = -1;
1843
1844         trace->tool.sample        = trace__process_sample;
1845         trace->tool.mmap          = perf_event__process_mmap;
1846         trace->tool.mmap2         = perf_event__process_mmap2;
1847         trace->tool.comm          = perf_event__process_comm;
1848         trace->tool.exit          = perf_event__process_exit;
1849         trace->tool.fork          = perf_event__process_fork;
1850         trace->tool.attr          = perf_event__process_attr;
1851         trace->tool.tracing_data = perf_event__process_tracing_data;
1852         trace->tool.build_id      = perf_event__process_build_id;
1853
1854         trace->tool.ordered_samples = true;
1855         trace->tool.ordering_requires_timestamps = true;
1856
1857         /* add tid to output */
1858         trace->multiple_threads = true;
1859
1860         if (symbol__init() < 0)
1861                 return -1;
1862
1863         session = perf_session__new(&file, false, &trace->tool);
1864         if (session == NULL)
1865                 return -ENOMEM;
1866
1867         trace->host = &session->machines.host;
1868
1869         err = perf_session__set_tracepoints_handlers(session, handlers);
1870         if (err)
1871                 goto out;
1872
1873         if (!perf_session__has_tp(session, "raw_syscalls:sys_enter")) {
1874                 pr_err("Data file does not have raw_syscalls:sys_enter events\n");
1875                 goto out;
1876         }
1877
1878         if (!perf_session__has_tp(session, "raw_syscalls:sys_exit")) {
1879                 pr_err("Data file does not have raw_syscalls:sys_exit events\n");
1880                 goto out;
1881         }
1882
1883         err = parse_target_str(trace);
1884         if (err != 0)
1885                 goto out;
1886
1887         setup_pager();
1888
1889         err = perf_session__process_events(session, &trace->tool);
1890         if (err)
1891                 pr_err("Failed to process events, error %d", err);
1892
1893         else if (trace->summary)
1894                 trace__fprintf_thread_summary(trace, trace->output);
1895
1896 out:
1897         perf_session__delete(session);
1898
1899         return err;
1900 }
1901
1902 static size_t trace__fprintf_threads_header(FILE *fp)
1903 {
1904         size_t printed;
1905
1906         printed  = fprintf(fp, "\n _____________________________________________________________________________\n");
1907         printed += fprintf(fp, " __)    Summary of events    (__\n\n");
1908         printed += fprintf(fp, "              [ task - pid ]     [ events ] [ ratio ]  [ runtime ]\n");
1909         printed += fprintf(fp, "                                  syscall  count    min     max    avg  stddev\n");
1910         printed += fprintf(fp, "                                                   msec    msec   msec     %%\n");
1911         printed += fprintf(fp, " _____________________________________________________________________________\n\n");
1912
1913         return printed;
1914 }
1915
1916 static size_t thread__dump_stats(struct thread_trace *ttrace,
1917                                  struct trace *trace, FILE *fp)
1918 {
1919         struct stats *stats;
1920         size_t printed = 0;
1921         struct syscall *sc;
1922         struct int_node *inode = intlist__first(ttrace->syscall_stats);
1923
1924         if (inode == NULL)
1925                 return 0;
1926
1927         printed += fprintf(fp, "\n");
1928
1929         /* each int_node is a syscall */
1930         while (inode) {
1931                 stats = inode->priv;
1932                 if (stats) {
1933                         double min = (double)(stats->min) / NSEC_PER_MSEC;
1934                         double max = (double)(stats->max) / NSEC_PER_MSEC;
1935                         double avg = avg_stats(stats);
1936                         double pct;
1937                         u64 n = (u64) stats->n;
1938
1939                         pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
1940                         avg /= NSEC_PER_MSEC;
1941
1942                         sc = &trace->syscalls.table[inode->i];
1943                         printed += fprintf(fp, "%24s  %14s : ", "", sc->name);
1944                         printed += fprintf(fp, "%5" PRIu64 "  %8.3f  %8.3f",
1945                                            n, min, max);
1946                         printed += fprintf(fp, "  %8.3f  %6.2f\n", avg, pct);
1947                 }
1948
1949                 inode = intlist__next(inode);
1950         }
1951
1952         printed += fprintf(fp, "\n\n");
1953
1954         return printed;
1955 }
1956
1957 /* struct used to pass data to per-thread function */
1958 struct summary_data {
1959         FILE *fp;
1960         struct trace *trace;
1961         size_t printed;
1962 };
1963
1964 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
1965 {
1966         struct summary_data *data = priv;
1967         FILE *fp = data->fp;
1968         size_t printed = data->printed;
1969         struct trace *trace = data->trace;
1970         struct thread_trace *ttrace = thread->priv;
1971         const char *color;
1972         double ratio;
1973
1974         if (ttrace == NULL)
1975                 return 0;
1976
1977         ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
1978
1979         color = PERF_COLOR_NORMAL;
1980         if (ratio > 50.0)
1981                 color = PERF_COLOR_RED;
1982         else if (ratio > 25.0)
1983                 color = PERF_COLOR_GREEN;
1984         else if (ratio > 5.0)
1985                 color = PERF_COLOR_YELLOW;
1986
1987         printed += color_fprintf(fp, color, "%20s", thread->comm);
1988         printed += fprintf(fp, " - %-5d :%11lu   [", thread->tid, ttrace->nr_events);
1989         printed += color_fprintf(fp, color, "%5.1f%%", ratio);
1990         printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
1991         printed += thread__dump_stats(ttrace, trace, fp);
1992
1993         data->printed += printed;
1994
1995         return 0;
1996 }
1997
1998 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
1999 {
2000         struct summary_data data = {
2001                 .fp = fp,
2002                 .trace = trace
2003         };
2004         data.printed = trace__fprintf_threads_header(fp);
2005
2006         machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2007
2008         return data.printed;
2009 }
2010
2011 static int trace__set_duration(const struct option *opt, const char *str,
2012                                int unset __maybe_unused)
2013 {
2014         struct trace *trace = opt->value;
2015
2016         trace->duration_filter = atof(str);
2017         return 0;
2018 }
2019
2020 static int trace__open_output(struct trace *trace, const char *filename)
2021 {
2022         struct stat st;
2023
2024         if (!stat(filename, &st) && st.st_size) {
2025                 char oldname[PATH_MAX];
2026
2027                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2028                 unlink(oldname);
2029                 rename(filename, oldname);
2030         }
2031
2032         trace->output = fopen(filename, "w");
2033
2034         return trace->output == NULL ? -errno : 0;
2035 }
2036
2037 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2038 {
2039         const char * const trace_usage[] = {
2040                 "perf trace [<options>] [<command>]",
2041                 "perf trace [<options>] -- <command> [<options>]",
2042                 "perf trace record [<options>] [<command>]",
2043                 "perf trace record [<options>] -- <command> [<options>]",
2044                 NULL
2045         };
2046         struct trace trace = {
2047                 .audit = {
2048                         .machine = audit_detect_machine(),
2049                         .open_id = audit_name_to_syscall("open", trace.audit.machine),
2050                 },
2051                 .syscalls = {
2052                         . max = -1,
2053                 },
2054                 .opts = {
2055                         .target = {
2056                                 .uid       = UINT_MAX,
2057                                 .uses_mmap = true,
2058                         },
2059                         .user_freq     = UINT_MAX,
2060                         .user_interval = ULLONG_MAX,
2061                         .no_delay      = true,
2062                         .mmap_pages    = 1024,
2063                 },
2064                 .output = stdout,
2065                 .show_comm = true,
2066         };
2067         const char *output_name = NULL;
2068         const char *ev_qualifier_str = NULL;
2069         const struct option trace_options[] = {
2070         OPT_BOOLEAN(0, "comm", &trace.show_comm,
2071                     "show the thread COMM next to its id"),
2072         OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2073         OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2074                     "list of events to trace"),
2075         OPT_STRING('o', "output", &output_name, "file", "output file name"),
2076         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2077         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2078                     "trace events on existing process id"),
2079         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2080                     "trace events on existing thread id"),
2081         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2082                     "system-wide collection from all CPUs"),
2083         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2084                     "list of cpus to monitor"),
2085         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2086                     "child tasks do not inherit counters"),
2087         OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2088                      "number of mmap data pages",
2089                      perf_evlist__parse_mmap_pages),
2090         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2091                    "user to profile"),
2092         OPT_CALLBACK(0, "duration", &trace, "float",
2093                      "show only events with duration > N.M ms",
2094                      trace__set_duration),
2095         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2096         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2097         OPT_BOOLEAN('T', "time", &trace.full_time,
2098                     "Show full timestamp, not time relative to first start"),
2099         OPT_BOOLEAN(0, "summary", &trace.summary,
2100                     "Show syscall summary with statistics"),
2101         OPT_END()
2102         };
2103         int err;
2104         char bf[BUFSIZ];
2105
2106         if ((argc > 1) && (strcmp(argv[1], "record") == 0))
2107                 return trace__record(argc-2, &argv[2]);
2108
2109         argc = parse_options(argc, argv, trace_options, trace_usage, 0);
2110
2111         if (output_name != NULL) {
2112                 err = trace__open_output(&trace, output_name);
2113                 if (err < 0) {
2114                         perror("failed to create output file");
2115                         goto out;
2116                 }
2117         }
2118
2119         if (ev_qualifier_str != NULL) {
2120                 const char *s = ev_qualifier_str;
2121
2122                 trace.not_ev_qualifier = *s == '!';
2123                 if (trace.not_ev_qualifier)
2124                         ++s;
2125                 trace.ev_qualifier = strlist__new(true, s);
2126                 if (trace.ev_qualifier == NULL) {
2127                         fputs("Not enough memory to parse event qualifier",
2128                               trace.output);
2129                         err = -ENOMEM;
2130                         goto out_close;
2131                 }
2132         }
2133
2134         err = perf_target__validate(&trace.opts.target);
2135         if (err) {
2136                 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2137                 fprintf(trace.output, "%s", bf);
2138                 goto out_close;
2139         }
2140
2141         err = perf_target__parse_uid(&trace.opts.target);
2142         if (err) {
2143                 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2144                 fprintf(trace.output, "%s", bf);
2145                 goto out_close;
2146         }
2147
2148         if (!argc && perf_target__none(&trace.opts.target))
2149                 trace.opts.target.system_wide = true;
2150
2151         if (input_name)
2152                 err = trace__replay(&trace);
2153         else
2154                 err = trace__run(&trace, argc, argv);
2155
2156 out_close:
2157         if (output_name != NULL)
2158                 fclose(trace.output);
2159 out:
2160         return err;
2161 }