1 #include <traceevent/event-parse.h>
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
17 #include <sys/eventfd.h>
19 #include <linux/futex.h>
21 /* For older distros: */
23 # define MAP_STACK 0x20000
27 # define MADV_HWPOISON 100
30 #ifndef MADV_MERGEABLE
31 # define MADV_MERGEABLE 12
34 #ifndef MADV_UNMERGEABLE
35 # define MADV_UNMERGEABLE 13
41 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
42 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
46 #define TP_UINT_FIELD(bits) \
47 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
49 return *(u##bits *)(sample->raw_data + field->offset); \
57 #define TP_UINT_FIELD__SWAPPED(bits) \
58 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
60 u##bits value = *(u##bits *)(sample->raw_data + field->offset); \
61 return bswap_##bits(value);\
64 TP_UINT_FIELD__SWAPPED(16);
65 TP_UINT_FIELD__SWAPPED(32);
66 TP_UINT_FIELD__SWAPPED(64);
68 static int tp_field__init_uint(struct tp_field *field,
69 struct format_field *format_field,
72 field->offset = format_field->offset;
74 switch (format_field->size) {
76 field->integer = tp_field__u8;
79 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
82 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
85 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
94 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
96 return sample->raw_data + field->offset;
99 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
101 field->offset = format_field->offset;
102 field->pointer = tp_field__ptr;
109 struct tp_field args, ret;
113 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
114 struct tp_field *field,
117 struct format_field *format_field = perf_evsel__field(evsel, name);
119 if (format_field == NULL)
122 return tp_field__init_uint(field, format_field, evsel->needs_swap);
125 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
126 ({ struct syscall_tp *sc = evsel->priv;\
127 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
129 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
130 struct tp_field *field,
133 struct format_field *format_field = perf_evsel__field(evsel, name);
135 if (format_field == NULL)
138 return tp_field__init_ptr(field, format_field);
141 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
142 ({ struct syscall_tp *sc = evsel->priv;\
143 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
145 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
149 perf_evsel__delete(evsel);
152 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
154 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
157 evsel->priv = malloc(sizeof(struct syscall_tp));
159 if (evsel->priv == NULL)
162 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
165 evsel->handler = handler;
171 perf_evsel__delete_priv(evsel);
175 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
176 ({ struct syscall_tp *fields = evsel->priv; \
177 fields->name.integer(&fields->name, sample); })
179 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
180 ({ struct syscall_tp *fields = evsel->priv; \
181 fields->name.pointer(&fields->name, sample); })
183 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
184 void *sys_enter_handler,
185 void *sys_exit_handler)
188 struct perf_evsel *sys_enter, *sys_exit;
190 sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
191 if (sys_enter == NULL)
194 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
195 goto out_delete_sys_enter;
197 sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
198 if (sys_exit == NULL)
199 goto out_delete_sys_enter;
201 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
202 goto out_delete_sys_exit;
204 perf_evlist__add(evlist, sys_enter);
205 perf_evlist__add(evlist, sys_exit);
212 perf_evsel__delete_priv(sys_exit);
213 out_delete_sys_enter:
214 perf_evsel__delete_priv(sys_enter);
221 struct thread *thread;
231 const char **entries;
234 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
235 .nr_entries = ARRAY_SIZE(array), \
239 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
241 .nr_entries = ARRAY_SIZE(array), \
245 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
247 struct syscall_arg *arg)
249 struct strarray *sa = arg->parm;
250 int idx = arg->val - sa->offset;
252 if (idx < 0 || idx >= sa->nr_entries)
253 return scnprintf(bf, size, intfmt, arg->val);
255 return scnprintf(bf, size, "%s", sa->entries[idx]);
258 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
259 struct syscall_arg *arg)
261 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
264 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
266 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
267 struct syscall_arg *arg)
269 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
272 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
274 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
275 struct syscall_arg *arg);
277 #define SCA_FD syscall_arg__scnprintf_fd
279 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
280 struct syscall_arg *arg)
285 return scnprintf(bf, size, "CWD");
287 return syscall_arg__scnprintf_fd(bf, size, arg);
290 #define SCA_FDAT syscall_arg__scnprintf_fd_at
292 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
293 struct syscall_arg *arg);
295 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
297 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
298 struct syscall_arg *arg)
300 return scnprintf(bf, size, "%#lx", arg->val);
303 #define SCA_HEX syscall_arg__scnprintf_hex
305 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
306 struct syscall_arg *arg)
308 int printed = 0, prot = arg->val;
310 if (prot == PROT_NONE)
311 return scnprintf(bf, size, "NONE");
312 #define P_MMAP_PROT(n) \
313 if (prot & PROT_##n) { \
314 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
324 P_MMAP_PROT(GROWSDOWN);
325 P_MMAP_PROT(GROWSUP);
329 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
334 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
336 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
337 struct syscall_arg *arg)
339 int printed = 0, flags = arg->val;
341 #define P_MMAP_FLAG(n) \
342 if (flags & MAP_##n) { \
343 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
348 P_MMAP_FLAG(PRIVATE);
352 P_MMAP_FLAG(ANONYMOUS);
353 P_MMAP_FLAG(DENYWRITE);
354 P_MMAP_FLAG(EXECUTABLE);
357 P_MMAP_FLAG(GROWSDOWN);
359 P_MMAP_FLAG(HUGETLB);
362 P_MMAP_FLAG(NONBLOCK);
363 P_MMAP_FLAG(NORESERVE);
364 P_MMAP_FLAG(POPULATE);
366 #ifdef MAP_UNINITIALIZED
367 P_MMAP_FLAG(UNINITIALIZED);
372 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
377 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
379 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
380 struct syscall_arg *arg)
382 int behavior = arg->val;
385 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
388 P_MADV_BHV(SEQUENTIAL);
389 P_MADV_BHV(WILLNEED);
390 P_MADV_BHV(DONTNEED);
392 P_MADV_BHV(DONTFORK);
394 P_MADV_BHV(HWPOISON);
395 #ifdef MADV_SOFT_OFFLINE
396 P_MADV_BHV(SOFT_OFFLINE);
398 P_MADV_BHV(MERGEABLE);
399 P_MADV_BHV(UNMERGEABLE);
401 P_MADV_BHV(HUGEPAGE);
403 #ifdef MADV_NOHUGEPAGE
404 P_MADV_BHV(NOHUGEPAGE);
407 P_MADV_BHV(DONTDUMP);
416 return scnprintf(bf, size, "%#x", behavior);
419 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
421 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
422 struct syscall_arg *arg)
424 int printed = 0, op = arg->val;
427 return scnprintf(bf, size, "NONE");
429 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
430 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
445 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
450 #define SCA_FLOCK syscall_arg__scnprintf_flock
452 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
454 enum syscall_futex_args {
455 SCF_UADDR = (1 << 0),
458 SCF_TIMEOUT = (1 << 3),
459 SCF_UADDR2 = (1 << 4),
463 int cmd = op & FUTEX_CMD_MASK;
467 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
468 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
469 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
470 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
471 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
472 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
473 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
474 P_FUTEX_OP(WAKE_OP); break;
475 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
476 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
477 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
478 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
479 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
480 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
481 default: printed = scnprintf(bf, size, "%#x", cmd); break;
484 if (op & FUTEX_PRIVATE_FLAG)
485 printed += scnprintf(bf + printed, size - printed, "|PRIV");
487 if (op & FUTEX_CLOCK_REALTIME)
488 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
493 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
495 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
496 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
498 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
499 static DEFINE_STRARRAY(itimers);
501 static const char *whences[] = { "SET", "CUR", "END",
509 static DEFINE_STRARRAY(whences);
511 static const char *fcntl_cmds[] = {
512 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
513 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
514 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
517 static DEFINE_STRARRAY(fcntl_cmds);
519 static const char *rlimit_resources[] = {
520 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
521 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
524 static DEFINE_STRARRAY(rlimit_resources);
526 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
527 static DEFINE_STRARRAY(sighow);
529 static const char *clockid[] = {
530 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
531 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
533 static DEFINE_STRARRAY(clockid);
535 static const char *socket_families[] = {
536 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
537 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
538 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
539 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
540 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
541 "ALG", "NFC", "VSOCK",
543 static DEFINE_STRARRAY(socket_families);
545 #ifndef SOCK_TYPE_MASK
546 #define SOCK_TYPE_MASK 0xf
549 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
550 struct syscall_arg *arg)
554 flags = type & ~SOCK_TYPE_MASK;
556 type &= SOCK_TYPE_MASK;
558 * Can't use a strarray, MIPS may override for ABI reasons.
561 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
566 P_SK_TYPE(SEQPACKET);
571 printed = scnprintf(bf, size, "%#x", type);
574 #define P_SK_FLAG(n) \
575 if (flags & SOCK_##n) { \
576 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
577 flags &= ~SOCK_##n; \
585 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
590 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
593 #define MSG_PROBE 0x10
595 #ifndef MSG_WAITFORONE
596 #define MSG_WAITFORONE 0x10000
598 #ifndef MSG_SENDPAGE_NOTLAST
599 #define MSG_SENDPAGE_NOTLAST 0x20000
602 #define MSG_FASTOPEN 0x20000000
605 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
606 struct syscall_arg *arg)
608 int printed = 0, flags = arg->val;
611 return scnprintf(bf, size, "NONE");
612 #define P_MSG_FLAG(n) \
613 if (flags & MSG_##n) { \
614 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
620 P_MSG_FLAG(DONTROUTE);
625 P_MSG_FLAG(DONTWAIT);
632 P_MSG_FLAG(ERRQUEUE);
633 P_MSG_FLAG(NOSIGNAL);
635 P_MSG_FLAG(WAITFORONE);
636 P_MSG_FLAG(SENDPAGE_NOTLAST);
637 P_MSG_FLAG(FASTOPEN);
638 P_MSG_FLAG(CMSG_CLOEXEC);
642 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
647 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
649 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
650 struct syscall_arg *arg)
655 if (mode == F_OK) /* 0 */
656 return scnprintf(bf, size, "F");
658 if (mode & n##_OK) { \
659 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
669 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
674 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
676 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
677 struct syscall_arg *arg)
679 int printed = 0, flags = arg->val;
681 if (!(flags & O_CREAT))
682 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
685 return scnprintf(bf, size, "RDONLY");
687 if (flags & O_##n) { \
688 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
712 if ((flags & O_SYNC) == O_SYNC)
713 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
725 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
730 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
732 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
733 struct syscall_arg *arg)
735 int printed = 0, flags = arg->val;
738 return scnprintf(bf, size, "NONE");
740 if (flags & EFD_##n) { \
741 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
751 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
756 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
758 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
759 struct syscall_arg *arg)
761 int printed = 0, flags = arg->val;
764 if (flags & O_##n) { \
765 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
774 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
779 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
781 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
786 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
821 return scnprintf(bf, size, "%#x", sig);
824 #define SCA_SIGNUM syscall_arg__scnprintf_signum
826 #define TCGETS 0x5401
828 static const char *tioctls[] = {
829 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
830 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
831 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
832 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
833 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
834 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
835 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
836 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
837 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
838 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
839 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
840 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
841 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
842 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
843 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
846 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
848 #define STRARRAY(arg, name, array) \
849 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
850 .arg_parm = { [arg] = &strarray__##array, }
852 static struct syscall_fmt {
855 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
861 { .name = "access", .errmsg = true,
862 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
863 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
864 { .name = "brk", .hexret = true,
865 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
866 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
867 { .name = "close", .errmsg = true,
868 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
869 { .name = "connect", .errmsg = true, },
870 { .name = "dup", .errmsg = true,
871 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
872 { .name = "dup2", .errmsg = true,
873 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
874 { .name = "dup3", .errmsg = true,
875 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
876 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
877 { .name = "eventfd2", .errmsg = true,
878 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
879 { .name = "faccessat", .errmsg = true,
880 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
881 { .name = "fadvise64", .errmsg = true,
882 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
883 { .name = "fallocate", .errmsg = true,
884 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
885 { .name = "fchdir", .errmsg = true,
886 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
887 { .name = "fchmod", .errmsg = true,
888 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
889 { .name = "fchmodat", .errmsg = true,
890 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
891 { .name = "fchown", .errmsg = true,
892 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
893 { .name = "fchownat", .errmsg = true,
894 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
895 { .name = "fcntl", .errmsg = true,
896 .arg_scnprintf = { [0] = SCA_FD, /* fd */
897 [1] = SCA_STRARRAY, /* cmd */ },
898 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
899 { .name = "fdatasync", .errmsg = true,
900 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
901 { .name = "flock", .errmsg = true,
902 .arg_scnprintf = { [0] = SCA_FD, /* fd */
903 [1] = SCA_FLOCK, /* cmd */ }, },
904 { .name = "fsetxattr", .errmsg = true,
905 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
906 { .name = "fstat", .errmsg = true, .alias = "newfstat",
907 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
908 { .name = "fstatat", .errmsg = true, .alias = "newfstatat",
909 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
910 { .name = "fstatfs", .errmsg = true,
911 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
912 { .name = "fsync", .errmsg = true,
913 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
914 { .name = "ftruncate", .errmsg = true,
915 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
916 { .name = "futex", .errmsg = true,
917 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
918 { .name = "futimesat", .errmsg = true,
919 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
920 { .name = "getdents", .errmsg = true,
921 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
922 { .name = "getdents64", .errmsg = true,
923 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
924 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
925 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
926 { .name = "ioctl", .errmsg = true,
927 .arg_scnprintf = { [0] = SCA_FD, /* fd */
928 [1] = SCA_STRHEXARRAY, /* cmd */
929 [2] = SCA_HEX, /* arg */ },
930 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
931 { .name = "kill", .errmsg = true,
932 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
933 { .name = "linkat", .errmsg = true,
934 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
935 { .name = "lseek", .errmsg = true,
936 .arg_scnprintf = { [0] = SCA_FD, /* fd */
937 [2] = SCA_STRARRAY, /* whence */ },
938 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
939 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
940 { .name = "madvise", .errmsg = true,
941 .arg_scnprintf = { [0] = SCA_HEX, /* start */
942 [2] = SCA_MADV_BHV, /* behavior */ }, },
943 { .name = "mkdirat", .errmsg = true,
944 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
945 { .name = "mknodat", .errmsg = true,
946 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
947 { .name = "mlock", .errmsg = true,
948 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
949 { .name = "mlockall", .errmsg = true,
950 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
951 { .name = "mmap", .hexret = true,
952 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
953 [2] = SCA_MMAP_PROT, /* prot */
954 [3] = SCA_MMAP_FLAGS, /* flags */ }, },
955 { .name = "mprotect", .errmsg = true,
956 .arg_scnprintf = { [0] = SCA_HEX, /* start */
957 [2] = SCA_MMAP_PROT, /* prot */ }, },
958 { .name = "mremap", .hexret = true,
959 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
960 [4] = SCA_HEX, /* new_addr */ }, },
961 { .name = "munlock", .errmsg = true,
962 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
963 { .name = "munmap", .errmsg = true,
964 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
965 { .name = "name_to_handle_at", .errmsg = true,
966 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
967 { .name = "newfstatat", .errmsg = true,
968 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
969 { .name = "open", .errmsg = true,
970 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
971 { .name = "open_by_handle_at", .errmsg = true,
972 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
973 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
974 { .name = "openat", .errmsg = true,
975 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
976 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
977 { .name = "pipe2", .errmsg = true,
978 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
979 { .name = "poll", .errmsg = true, .timeout = true, },
980 { .name = "ppoll", .errmsg = true, .timeout = true, },
981 { .name = "pread", .errmsg = true, .alias = "pread64",
982 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
983 { .name = "preadv", .errmsg = true, .alias = "pread",
984 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
985 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
986 { .name = "pwrite", .errmsg = true, .alias = "pwrite64",
987 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
988 { .name = "pwritev", .errmsg = true,
989 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
990 { .name = "read", .errmsg = true,
991 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
992 { .name = "readlinkat", .errmsg = true,
993 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
994 { .name = "readv", .errmsg = true,
995 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
996 { .name = "recvfrom", .errmsg = true,
997 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
998 { .name = "recvmmsg", .errmsg = true,
999 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1000 { .name = "recvmsg", .errmsg = true,
1001 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1002 { .name = "renameat", .errmsg = true,
1003 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1004 { .name = "rt_sigaction", .errmsg = true,
1005 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1006 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
1007 { .name = "rt_sigqueueinfo", .errmsg = true,
1008 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1009 { .name = "rt_tgsigqueueinfo", .errmsg = true,
1010 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1011 { .name = "select", .errmsg = true, .timeout = true, },
1012 { .name = "sendmmsg", .errmsg = true,
1013 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1014 { .name = "sendmsg", .errmsg = true,
1015 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1016 { .name = "sendto", .errmsg = true,
1017 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1018 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1019 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1020 { .name = "shutdown", .errmsg = true,
1021 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1022 { .name = "socket", .errmsg = true,
1023 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1024 [1] = SCA_SK_TYPE, /* type */ },
1025 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1026 { .name = "socketpair", .errmsg = true,
1027 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1028 [1] = SCA_SK_TYPE, /* type */ },
1029 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1030 { .name = "stat", .errmsg = true, .alias = "newstat", },
1031 { .name = "symlinkat", .errmsg = true,
1032 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1033 { .name = "tgkill", .errmsg = true,
1034 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1035 { .name = "tkill", .errmsg = true,
1036 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1037 { .name = "uname", .errmsg = true, .alias = "newuname", },
1038 { .name = "unlinkat", .errmsg = true,
1039 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1040 { .name = "utimensat", .errmsg = true,
1041 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1042 { .name = "write", .errmsg = true,
1043 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1044 { .name = "writev", .errmsg = true,
1045 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1048 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1050 const struct syscall_fmt *fmt = fmtp;
1051 return strcmp(name, fmt->name);
1054 static struct syscall_fmt *syscall_fmt__find(const char *name)
1056 const int nmemb = ARRAY_SIZE(syscall_fmts);
1057 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1061 struct event_format *tp_format;
1064 struct syscall_fmt *fmt;
1065 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1069 static size_t fprintf_duration(unsigned long t, FILE *fp)
1071 double duration = (double)t / NSEC_PER_MSEC;
1072 size_t printed = fprintf(fp, "(");
1074 if (duration >= 1.0)
1075 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1076 else if (duration >= 0.01)
1077 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1079 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1080 return printed + fprintf(fp, "): ");
1083 struct thread_trace {
1087 unsigned long nr_events;
1095 struct intlist *syscall_stats;
1098 static struct thread_trace *thread_trace__new(void)
1100 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
1103 ttrace->paths.max = -1;
1105 ttrace->syscall_stats = intlist__new(NULL);
1110 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1112 struct thread_trace *ttrace;
1117 if (thread->priv == NULL)
1118 thread->priv = thread_trace__new();
1120 if (thread->priv == NULL)
1123 ttrace = thread->priv;
1124 ++ttrace->nr_events;
1128 color_fprintf(fp, PERF_COLOR_RED,
1129 "WARNING: not enough memory, dropping samples!\n");
1134 struct perf_tool tool;
1141 struct syscall *table;
1143 struct perf_record_opts opts;
1144 struct machine *host;
1148 unsigned long nr_events;
1149 struct strlist *ev_qualifier;
1150 bool not_ev_qualifier;
1152 const char *last_vfs_getname;
1153 struct intlist *tid_list;
1154 struct intlist *pid_list;
1156 bool multiple_threads;
1159 bool show_tool_stats;
1160 double duration_filter;
1163 u64 vfs_getname, proc_getname;
1167 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1169 struct thread_trace *ttrace = thread->priv;
1171 if (fd > ttrace->paths.max) {
1172 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1177 if (ttrace->paths.max != -1) {
1178 memset(npath + ttrace->paths.max + 1, 0,
1179 (fd - ttrace->paths.max) * sizeof(char *));
1181 memset(npath, 0, (fd + 1) * sizeof(char *));
1184 ttrace->paths.table = npath;
1185 ttrace->paths.max = fd;
1188 ttrace->paths.table[fd] = strdup(pathname);
1190 return ttrace->paths.table[fd] != NULL ? 0 : -1;
1193 static int thread__read_fd_path(struct thread *thread, int fd)
1195 char linkname[PATH_MAX], pathname[PATH_MAX];
1199 if (thread->pid_ == thread->tid) {
1200 scnprintf(linkname, sizeof(linkname),
1201 "/proc/%d/fd/%d", thread->pid_, fd);
1203 scnprintf(linkname, sizeof(linkname),
1204 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1207 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1210 ret = readlink(linkname, pathname, sizeof(pathname));
1212 if (ret < 0 || ret > st.st_size)
1215 pathname[ret] = '\0';
1216 return trace__set_fd_pathname(thread, fd, pathname);
1219 static const char *thread__fd_path(struct thread *thread, int fd,
1220 struct trace *trace)
1222 struct thread_trace *ttrace = thread->priv;
1230 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL))
1233 ++trace->stats.proc_getname;
1234 if (thread__read_fd_path(thread, fd)) {
1238 return ttrace->paths.table[fd];
1241 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1242 struct syscall_arg *arg)
1245 size_t printed = scnprintf(bf, size, "%d", fd);
1246 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1249 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1254 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1255 struct syscall_arg *arg)
1258 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1259 struct thread_trace *ttrace = arg->thread->priv;
1261 if (ttrace && fd >= 0 && fd <= ttrace->paths.max) {
1262 free(ttrace->paths.table[fd]);
1263 ttrace->paths.table[fd] = NULL;
1269 static bool trace__filter_duration(struct trace *trace, double t)
1271 return t < (trace->duration_filter * NSEC_PER_MSEC);
1274 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1276 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1278 return fprintf(fp, "%10.3f ", ts);
1281 static bool done = false;
1282 static bool interrupted = false;
1284 static void sig_handler(int sig)
1287 interrupted = sig == SIGINT;
1290 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1291 u64 duration, u64 tstamp, FILE *fp)
1293 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1294 printed += fprintf_duration(duration, fp);
1296 if (trace->multiple_threads) {
1297 if (trace->show_comm)
1298 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1299 printed += fprintf(fp, "%d ", thread->tid);
1305 static int trace__process_event(struct trace *trace, struct machine *machine,
1306 union perf_event *event, struct perf_sample *sample)
1310 switch (event->header.type) {
1311 case PERF_RECORD_LOST:
1312 color_fprintf(trace->output, PERF_COLOR_RED,
1313 "LOST %" PRIu64 " events!\n", event->lost.lost);
1314 ret = machine__process_lost_event(machine, event, sample);
1316 ret = machine__process_event(machine, event, sample);
1323 static int trace__tool_process(struct perf_tool *tool,
1324 union perf_event *event,
1325 struct perf_sample *sample,
1326 struct machine *machine)
1328 struct trace *trace = container_of(tool, struct trace, tool);
1329 return trace__process_event(trace, machine, event, sample);
1332 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1334 int err = symbol__init();
1339 trace->host = machine__new_host();
1340 if (trace->host == NULL)
1343 if (perf_target__has_task(&trace->opts.target)) {
1344 err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads,
1345 trace__tool_process,
1348 err = perf_event__synthesize_threads(&trace->tool, trace__tool_process,
1358 static int syscall__set_arg_fmts(struct syscall *sc)
1360 struct format_field *field;
1363 sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1364 if (sc->arg_scnprintf == NULL)
1368 sc->arg_parm = sc->fmt->arg_parm;
1370 for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1371 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1372 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1373 else if (field->flags & FIELD_IS_POINTER)
1374 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1381 static int trace__read_syscall_info(struct trace *trace, int id)
1385 const char *name = audit_syscall_to_name(id, trace->audit.machine);
1390 if (id > trace->syscalls.max) {
1391 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1393 if (nsyscalls == NULL)
1396 if (trace->syscalls.max != -1) {
1397 memset(nsyscalls + trace->syscalls.max + 1, 0,
1398 (id - trace->syscalls.max) * sizeof(*sc));
1400 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1403 trace->syscalls.table = nsyscalls;
1404 trace->syscalls.max = id;
1407 sc = trace->syscalls.table + id;
1410 if (trace->ev_qualifier) {
1411 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1413 if (!(in ^ trace->not_ev_qualifier)) {
1414 sc->filtered = true;
1416 * No need to do read tracepoint information since this will be
1423 sc->fmt = syscall_fmt__find(sc->name);
1425 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1426 sc->tp_format = event_format__new("syscalls", tp_name);
1428 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1429 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1430 sc->tp_format = event_format__new("syscalls", tp_name);
1433 if (sc->tp_format == NULL)
1436 return syscall__set_arg_fmts(sc);
1439 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1440 unsigned long *args, struct trace *trace,
1441 struct thread *thread)
1445 if (sc->tp_format != NULL) {
1446 struct format_field *field;
1448 struct syscall_arg arg = {
1455 for (field = sc->tp_format->format.fields->next; field;
1456 field = field->next, ++arg.idx, bit <<= 1) {
1460 * Suppress this argument if its value is zero and
1461 * and we don't have a string associated in an
1464 if (args[arg.idx] == 0 &&
1465 !(sc->arg_scnprintf &&
1466 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1467 sc->arg_parm[arg.idx]))
1470 printed += scnprintf(bf + printed, size - printed,
1471 "%s%s: ", printed ? ", " : "", field->name);
1472 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1473 arg.val = args[arg.idx];
1475 arg.parm = sc->arg_parm[arg.idx];
1476 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1477 size - printed, &arg);
1479 printed += scnprintf(bf + printed, size - printed,
1480 "%ld", args[arg.idx]);
1487 printed += scnprintf(bf + printed, size - printed,
1489 printed ? ", " : "", i, args[i]);
1497 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1498 struct perf_sample *sample);
1500 static struct syscall *trace__syscall_info(struct trace *trace,
1501 struct perf_evsel *evsel, int id)
1507 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1508 * before that, leaving at a higher verbosity level till that is
1509 * explained. Reproduced with plain ftrace with:
1511 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1512 * grep "NR -1 " /t/trace_pipe
1514 * After generating some load on the machine.
1518 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1519 id, perf_evsel__name(evsel), ++n);
1524 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1525 trace__read_syscall_info(trace, id))
1528 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1531 return &trace->syscalls.table[id];
1535 fprintf(trace->output, "Problems reading syscall %d", id);
1536 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1537 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1538 fputs(" information\n", trace->output);
1543 static void thread__update_stats(struct thread_trace *ttrace,
1544 int id, struct perf_sample *sample)
1546 struct int_node *inode;
1547 struct stats *stats;
1550 inode = intlist__findnew(ttrace->syscall_stats, id);
1554 stats = inode->priv;
1555 if (stats == NULL) {
1556 stats = malloc(sizeof(struct stats));
1560 inode->priv = stats;
1563 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1564 duration = sample->time - ttrace->entry_time;
1566 update_stats(stats, duration);
1569 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1570 struct perf_sample *sample)
1575 struct thread *thread;
1576 int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1577 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1578 struct thread_trace *ttrace;
1586 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1587 ttrace = thread__trace(thread, trace->output);
1591 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1592 ttrace = thread->priv;
1594 if (ttrace->entry_str == NULL) {
1595 ttrace->entry_str = malloc(1024);
1596 if (!ttrace->entry_str)
1600 ttrace->entry_time = sample->time;
1601 msg = ttrace->entry_str;
1602 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1604 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1605 args, trace, thread);
1607 if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1608 if (!trace->duration_filter) {
1609 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1610 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1613 ttrace->entry_pending = true;
1618 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1619 struct perf_sample *sample)
1623 struct thread *thread;
1624 int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1625 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1626 struct thread_trace *ttrace;
1634 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1635 ttrace = thread__trace(thread, trace->output);
1640 thread__update_stats(ttrace, id, sample);
1642 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1644 if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1645 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1646 trace->last_vfs_getname = NULL;
1647 ++trace->stats.vfs_getname;
1650 ttrace = thread->priv;
1652 ttrace->exit_time = sample->time;
1654 if (ttrace->entry_time) {
1655 duration = sample->time - ttrace->entry_time;
1656 if (trace__filter_duration(trace, duration))
1658 } else if (trace->duration_filter)
1661 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1663 if (ttrace->entry_pending) {
1664 fprintf(trace->output, "%-70s", ttrace->entry_str);
1666 fprintf(trace->output, " ... [");
1667 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1668 fprintf(trace->output, "]: %s()", sc->name);
1671 if (sc->fmt == NULL) {
1673 fprintf(trace->output, ") = %d", ret);
1674 } else if (ret < 0 && sc->fmt->errmsg) {
1676 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1677 *e = audit_errno_to_name(-ret);
1679 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1680 } else if (ret == 0 && sc->fmt->timeout)
1681 fprintf(trace->output, ") = 0 Timeout");
1682 else if (sc->fmt->hexret)
1683 fprintf(trace->output, ") = %#x", ret);
1687 fputc('\n', trace->output);
1689 ttrace->entry_pending = false;
1694 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1695 struct perf_sample *sample)
1697 trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1701 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1702 struct perf_sample *sample)
1704 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1705 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1706 struct thread *thread = machine__findnew_thread(trace->host,
1709 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1714 ttrace->runtime_ms += runtime_ms;
1715 trace->runtime_ms += runtime_ms;
1719 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1721 perf_evsel__strval(evsel, sample, "comm"),
1722 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1724 perf_evsel__intval(evsel, sample, "vruntime"));
1728 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1730 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1731 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1734 if (trace->pid_list || trace->tid_list)
1740 static int trace__process_sample(struct perf_tool *tool,
1741 union perf_event *event __maybe_unused,
1742 struct perf_sample *sample,
1743 struct perf_evsel *evsel,
1744 struct machine *machine __maybe_unused)
1746 struct trace *trace = container_of(tool, struct trace, tool);
1749 tracepoint_handler handler = evsel->handler;
1751 if (skip_sample(trace, sample))
1754 if (!trace->full_time && trace->base_time == 0)
1755 trace->base_time = sample->time;
1758 handler(trace, evsel, sample);
1764 perf_session__has_tp(struct perf_session *session, const char *name)
1766 struct perf_evsel *evsel;
1768 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, name);
1770 return evsel != NULL;
1773 static int parse_target_str(struct trace *trace)
1775 if (trace->opts.target.pid) {
1776 trace->pid_list = intlist__new(trace->opts.target.pid);
1777 if (trace->pid_list == NULL) {
1778 pr_err("Error parsing process id string\n");
1783 if (trace->opts.target.tid) {
1784 trace->tid_list = intlist__new(trace->opts.target.tid);
1785 if (trace->tid_list == NULL) {
1786 pr_err("Error parsing thread id string\n");
1794 static int trace__record(int argc, const char **argv)
1796 unsigned int rec_argc, i, j;
1797 const char **rec_argv;
1798 const char * const record_args[] = {
1803 "-e", "raw_syscalls:sys_enter,raw_syscalls:sys_exit",
1806 rec_argc = ARRAY_SIZE(record_args) + argc;
1807 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1809 if (rec_argv == NULL)
1812 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1813 rec_argv[i] = record_args[i];
1815 for (j = 0; j < (unsigned int)argc; j++, i++)
1816 rec_argv[i] = argv[j];
1818 return cmd_record(i, rec_argv, NULL);
1821 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
1823 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
1825 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
1829 if (perf_evsel__field(evsel, "pathname") == NULL) {
1830 perf_evsel__delete(evsel);
1834 evsel->handler = trace__vfs_getname;
1835 perf_evlist__add(evlist, evsel);
1838 static int trace__run(struct trace *trace, int argc, const char **argv)
1840 struct perf_evlist *evlist = perf_evlist__new();
1841 struct perf_evsel *evsel;
1843 unsigned long before;
1844 const bool forks = argc > 0;
1848 if (evlist == NULL) {
1849 fprintf(trace->output, "Not enough memory to run!\n");
1853 if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit))
1856 perf_evlist__add_vfs_getname(evlist);
1859 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1860 trace__sched_stat_runtime))
1863 err = perf_evlist__create_maps(evlist, &trace->opts.target);
1865 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1866 goto out_delete_evlist;
1869 err = trace__symbols_init(trace, evlist);
1871 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1872 goto out_delete_maps;
1875 perf_evlist__config(evlist, &trace->opts);
1877 signal(SIGCHLD, sig_handler);
1878 signal(SIGINT, sig_handler);
1881 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1882 argv, false, false);
1884 fprintf(trace->output, "Couldn't run the workload!\n");
1885 goto out_delete_maps;
1889 err = perf_evlist__open(evlist);
1891 goto out_error_open;
1893 err = perf_evlist__mmap(evlist, UINT_MAX, false);
1895 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1896 goto out_close_evlist;
1899 perf_evlist__enable(evlist);
1902 perf_evlist__start_workload(evlist);
1904 trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1906 before = trace->nr_events;
1908 for (i = 0; i < evlist->nr_mmaps; i++) {
1909 union perf_event *event;
1911 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1912 const u32 type = event->header.type;
1913 tracepoint_handler handler;
1914 struct perf_sample sample;
1918 err = perf_evlist__parse_sample(evlist, event, &sample);
1920 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1924 if (!trace->full_time && trace->base_time == 0)
1925 trace->base_time = sample.time;
1927 if (type != PERF_RECORD_SAMPLE) {
1928 trace__process_event(trace, trace->host, event, &sample);
1932 evsel = perf_evlist__id2evsel(evlist, sample.id);
1933 if (evsel == NULL) {
1934 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1938 if (sample.raw_data == NULL) {
1939 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1940 perf_evsel__name(evsel), sample.tid,
1941 sample.cpu, sample.raw_size);
1945 handler = evsel->handler;
1946 handler(trace, evsel, &sample);
1948 perf_evlist__mmap_consume(evlist, i);
1955 if (trace->nr_events == before) {
1956 int timeout = done ? 100 : -1;
1958 if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0)
1965 perf_evlist__disable(evlist);
1969 trace__fprintf_thread_summary(trace, trace->output);
1971 if (trace->show_tool_stats) {
1972 fprintf(trace->output, "Stats:\n "
1973 " vfs_getname : %" PRIu64 "\n"
1974 " proc_getname: %" PRIu64 "\n",
1975 trace->stats.vfs_getname,
1976 trace->stats.proc_getname);
1980 perf_evlist__munmap(evlist);
1982 perf_evlist__close(evlist);
1984 perf_evlist__delete_maps(evlist);
1986 perf_evlist__delete(evlist);
1988 trace->live = false;
1991 char errbuf[BUFSIZ];
1994 perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf));
1998 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2001 fprintf(trace->output, "%s\n", errbuf);
2002 goto out_delete_evlist;
2006 static int trace__replay(struct trace *trace)
2008 const struct perf_evsel_str_handler handlers[] = {
2009 { "raw_syscalls:sys_enter", trace__sys_enter, },
2010 { "raw_syscalls:sys_exit", trace__sys_exit, },
2011 { "probe:vfs_getname", trace__vfs_getname, },
2013 struct perf_data_file file = {
2015 .mode = PERF_DATA_MODE_READ,
2017 struct perf_session *session;
2020 trace->tool.sample = trace__process_sample;
2021 trace->tool.mmap = perf_event__process_mmap;
2022 trace->tool.mmap2 = perf_event__process_mmap2;
2023 trace->tool.comm = perf_event__process_comm;
2024 trace->tool.exit = perf_event__process_exit;
2025 trace->tool.fork = perf_event__process_fork;
2026 trace->tool.attr = perf_event__process_attr;
2027 trace->tool.tracing_data = perf_event__process_tracing_data;
2028 trace->tool.build_id = perf_event__process_build_id;
2030 trace->tool.ordered_samples = true;
2031 trace->tool.ordering_requires_timestamps = true;
2033 /* add tid to output */
2034 trace->multiple_threads = true;
2036 if (symbol__init() < 0)
2039 session = perf_session__new(&file, false, &trace->tool);
2040 if (session == NULL)
2043 trace->host = &session->machines.host;
2045 err = perf_session__set_tracepoints_handlers(session, handlers);
2049 if (!perf_session__has_tp(session, "raw_syscalls:sys_enter")) {
2050 pr_err("Data file does not have raw_syscalls:sys_enter events\n");
2054 if (!perf_session__has_tp(session, "raw_syscalls:sys_exit")) {
2055 pr_err("Data file does not have raw_syscalls:sys_exit events\n");
2059 err = parse_target_str(trace);
2065 err = perf_session__process_events(session, &trace->tool);
2067 pr_err("Failed to process events, error %d", err);
2069 else if (trace->summary)
2070 trace__fprintf_thread_summary(trace, trace->output);
2073 perf_session__delete(session);
2078 static size_t trace__fprintf_threads_header(FILE *fp)
2082 printed = fprintf(fp, "\n _____________________________________________________________________________\n");
2083 printed += fprintf(fp, " __) Summary of events (__\n\n");
2084 printed += fprintf(fp, " [ task - pid ] [ events ] [ ratio ] [ runtime ]\n");
2085 printed += fprintf(fp, " syscall count min max avg stddev\n");
2086 printed += fprintf(fp, " msec msec msec %%\n");
2087 printed += fprintf(fp, " _____________________________________________________________________________\n\n");
2092 static size_t thread__dump_stats(struct thread_trace *ttrace,
2093 struct trace *trace, FILE *fp)
2095 struct stats *stats;
2098 struct int_node *inode = intlist__first(ttrace->syscall_stats);
2103 printed += fprintf(fp, "\n");
2105 /* each int_node is a syscall */
2107 stats = inode->priv;
2109 double min = (double)(stats->min) / NSEC_PER_MSEC;
2110 double max = (double)(stats->max) / NSEC_PER_MSEC;
2111 double avg = avg_stats(stats);
2113 u64 n = (u64) stats->n;
2115 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2116 avg /= NSEC_PER_MSEC;
2118 sc = &trace->syscalls.table[inode->i];
2119 printed += fprintf(fp, "%24s %14s : ", "", sc->name);
2120 printed += fprintf(fp, "%5" PRIu64 " %8.3f %8.3f",
2122 printed += fprintf(fp, " %8.3f %6.2f\n", avg, pct);
2125 inode = intlist__next(inode);
2128 printed += fprintf(fp, "\n\n");
2133 /* struct used to pass data to per-thread function */
2134 struct summary_data {
2136 struct trace *trace;
2140 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2142 struct summary_data *data = priv;
2143 FILE *fp = data->fp;
2144 size_t printed = data->printed;
2145 struct trace *trace = data->trace;
2146 struct thread_trace *ttrace = thread->priv;
2153 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2155 color = PERF_COLOR_NORMAL;
2157 color = PERF_COLOR_RED;
2158 else if (ratio > 25.0)
2159 color = PERF_COLOR_GREEN;
2160 else if (ratio > 5.0)
2161 color = PERF_COLOR_YELLOW;
2163 printed += color_fprintf(fp, color, "%20s", thread__comm_str(thread));
2164 printed += fprintf(fp, " - %-5d :%11lu [", thread->tid, ttrace->nr_events);
2165 printed += color_fprintf(fp, color, "%5.1f%%", ratio);
2166 printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
2167 printed += thread__dump_stats(ttrace, trace, fp);
2169 data->printed += printed;
2174 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2176 struct summary_data data = {
2180 data.printed = trace__fprintf_threads_header(fp);
2182 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2184 return data.printed;
2187 static int trace__set_duration(const struct option *opt, const char *str,
2188 int unset __maybe_unused)
2190 struct trace *trace = opt->value;
2192 trace->duration_filter = atof(str);
2196 static int trace__open_output(struct trace *trace, const char *filename)
2200 if (!stat(filename, &st) && st.st_size) {
2201 char oldname[PATH_MAX];
2203 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2205 rename(filename, oldname);
2208 trace->output = fopen(filename, "w");
2210 return trace->output == NULL ? -errno : 0;
2213 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2215 const char * const trace_usage[] = {
2216 "perf trace [<options>] [<command>]",
2217 "perf trace [<options>] -- <command> [<options>]",
2218 "perf trace record [<options>] [<command>]",
2219 "perf trace record [<options>] -- <command> [<options>]",
2222 struct trace trace = {
2224 .machine = audit_detect_machine(),
2225 .open_id = audit_name_to_syscall("open", trace.audit.machine),
2235 .user_freq = UINT_MAX,
2236 .user_interval = ULLONG_MAX,
2243 const char *output_name = NULL;
2244 const char *ev_qualifier_str = NULL;
2245 const struct option trace_options[] = {
2246 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2247 "show the thread COMM next to its id"),
2248 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2249 OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2250 "list of events to trace"),
2251 OPT_STRING('o', "output", &output_name, "file", "output file name"),
2252 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2253 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2254 "trace events on existing process id"),
2255 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2256 "trace events on existing thread id"),
2257 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2258 "system-wide collection from all CPUs"),
2259 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2260 "list of cpus to monitor"),
2261 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2262 "child tasks do not inherit counters"),
2263 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2264 "number of mmap data pages",
2265 perf_evlist__parse_mmap_pages),
2266 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2268 OPT_CALLBACK(0, "duration", &trace, "float",
2269 "show only events with duration > N.M ms",
2270 trace__set_duration),
2271 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2272 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2273 OPT_BOOLEAN('T', "time", &trace.full_time,
2274 "Show full timestamp, not time relative to first start"),
2275 OPT_BOOLEAN(0, "summary", &trace.summary,
2276 "Show syscall summary with statistics"),
2282 if ((argc > 1) && (strcmp(argv[1], "record") == 0))
2283 return trace__record(argc-2, &argv[2]);
2285 argc = parse_options(argc, argv, trace_options, trace_usage, 0);
2287 if (output_name != NULL) {
2288 err = trace__open_output(&trace, output_name);
2290 perror("failed to create output file");
2295 if (ev_qualifier_str != NULL) {
2296 const char *s = ev_qualifier_str;
2298 trace.not_ev_qualifier = *s == '!';
2299 if (trace.not_ev_qualifier)
2301 trace.ev_qualifier = strlist__new(true, s);
2302 if (trace.ev_qualifier == NULL) {
2303 fputs("Not enough memory to parse event qualifier",
2310 err = perf_target__validate(&trace.opts.target);
2312 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2313 fprintf(trace.output, "%s", bf);
2317 err = perf_target__parse_uid(&trace.opts.target);
2319 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2320 fprintf(trace.output, "%s", bf);
2324 if (!argc && perf_target__none(&trace.opts.target))
2325 trace.opts.target.system_wide = true;
2328 err = trace__replay(&trace);
2330 err = trace__run(&trace, argc, argv);
2333 if (output_name != NULL)
2334 fclose(trace.output);