6b230af940e2ab0c136098ef49b21522b6c165b6
[cascardo/linux.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14
15 #include <libaudit.h>
16 #include <stdlib.h>
17 #include <sys/eventfd.h>
18 #include <sys/mman.h>
19 #include <linux/futex.h>
20
21 /* For older distros: */
22 #ifndef MAP_STACK
23 # define MAP_STACK              0x20000
24 #endif
25
26 #ifndef MADV_HWPOISON
27 # define MADV_HWPOISON          100
28 #endif
29
30 #ifndef MADV_MERGEABLE
31 # define MADV_MERGEABLE         12
32 #endif
33
34 #ifndef MADV_UNMERGEABLE
35 # define MADV_UNMERGEABLE       13
36 #endif
37
38 struct tp_field {
39         int offset;
40         union {
41                 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
42                 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
43         };
44 };
45
46 #define TP_UINT_FIELD(bits) \
47 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
48 { \
49         return *(u##bits *)(sample->raw_data + field->offset); \
50 }
51
52 TP_UINT_FIELD(8);
53 TP_UINT_FIELD(16);
54 TP_UINT_FIELD(32);
55 TP_UINT_FIELD(64);
56
57 #define TP_UINT_FIELD__SWAPPED(bits) \
58 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
59 { \
60         u##bits value = *(u##bits *)(sample->raw_data + field->offset); \
61         return bswap_##bits(value);\
62 }
63
64 TP_UINT_FIELD__SWAPPED(16);
65 TP_UINT_FIELD__SWAPPED(32);
66 TP_UINT_FIELD__SWAPPED(64);
67
68 static int tp_field__init_uint(struct tp_field *field,
69                                struct format_field *format_field,
70                                bool needs_swap)
71 {
72         field->offset = format_field->offset;
73
74         switch (format_field->size) {
75         case 1:
76                 field->integer = tp_field__u8;
77                 break;
78         case 2:
79                 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
80                 break;
81         case 4:
82                 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
83                 break;
84         case 8:
85                 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
86                 break;
87         default:
88                 return -1;
89         }
90
91         return 0;
92 }
93
94 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
95 {
96         return sample->raw_data + field->offset;
97 }
98
99 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
100 {
101         field->offset = format_field->offset;
102         field->pointer = tp_field__ptr;
103         return 0;
104 }
105
106 struct syscall_tp {
107         struct tp_field id;
108         union {
109                 struct tp_field args, ret;
110         };
111 };
112
113 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
114                                           struct tp_field *field,
115                                           const char *name)
116 {
117         struct format_field *format_field = perf_evsel__field(evsel, name);
118
119         if (format_field == NULL)
120                 return -1;
121
122         return tp_field__init_uint(field, format_field, evsel->needs_swap);
123 }
124
125 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
126         ({ struct syscall_tp *sc = evsel->priv;\
127            perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
128
129 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
130                                          struct tp_field *field,
131                                          const char *name)
132 {
133         struct format_field *format_field = perf_evsel__field(evsel, name);
134
135         if (format_field == NULL)
136                 return -1;
137
138         return tp_field__init_ptr(field, format_field);
139 }
140
141 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
142         ({ struct syscall_tp *sc = evsel->priv;\
143            perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
144
145 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
146 {
147         free(evsel->priv);
148         evsel->priv = NULL;
149         perf_evsel__delete(evsel);
150 }
151
152 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
153 {
154         evsel->priv = malloc(sizeof(struct syscall_tp));
155         if (evsel->priv != NULL) {
156                 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
157                         goto out_delete;
158
159                 evsel->handler = handler;
160                 return 0;
161         }
162
163         return -ENOMEM;
164
165 out_delete:
166         free(evsel->priv);
167         evsel->priv = NULL;
168         return -ENOENT;
169 }
170
171 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
172 {
173         struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
174
175         if (evsel) {
176                 if (perf_evsel__init_syscall_tp(evsel, handler))
177                         goto out_delete;
178         }
179
180         return evsel;
181
182 out_delete:
183         perf_evsel__delete_priv(evsel);
184         return NULL;
185 }
186
187 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
188         ({ struct syscall_tp *fields = evsel->priv; \
189            fields->name.integer(&fields->name, sample); })
190
191 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
192         ({ struct syscall_tp *fields = evsel->priv; \
193            fields->name.pointer(&fields->name, sample); })
194
195 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
196                                           void *sys_enter_handler,
197                                           void *sys_exit_handler)
198 {
199         int ret = -1;
200         struct perf_evsel *sys_enter, *sys_exit;
201
202         sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
203         if (sys_enter == NULL)
204                 goto out;
205
206         if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
207                 goto out_delete_sys_enter;
208
209         sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
210         if (sys_exit == NULL)
211                 goto out_delete_sys_enter;
212
213         if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
214                 goto out_delete_sys_exit;
215
216         perf_evlist__add(evlist, sys_enter);
217         perf_evlist__add(evlist, sys_exit);
218
219         ret = 0;
220 out:
221         return ret;
222
223 out_delete_sys_exit:
224         perf_evsel__delete_priv(sys_exit);
225 out_delete_sys_enter:
226         perf_evsel__delete_priv(sys_enter);
227         goto out;
228 }
229
230
231 struct syscall_arg {
232         unsigned long val;
233         struct thread *thread;
234         struct trace  *trace;
235         void          *parm;
236         u8            idx;
237         u8            mask;
238 };
239
240 struct strarray {
241         int         offset;
242         int         nr_entries;
243         const char **entries;
244 };
245
246 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
247         .nr_entries = ARRAY_SIZE(array), \
248         .entries = array, \
249 }
250
251 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
252         .offset     = off, \
253         .nr_entries = ARRAY_SIZE(array), \
254         .entries = array, \
255 }
256
257 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
258                                                 const char *intfmt,
259                                                 struct syscall_arg *arg)
260 {
261         struct strarray *sa = arg->parm;
262         int idx = arg->val - sa->offset;
263
264         if (idx < 0 || idx >= sa->nr_entries)
265                 return scnprintf(bf, size, intfmt, arg->val);
266
267         return scnprintf(bf, size, "%s", sa->entries[idx]);
268 }
269
270 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
271                                               struct syscall_arg *arg)
272 {
273         return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
274 }
275
276 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
277
278 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
279                                                  struct syscall_arg *arg)
280 {
281         return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
282 }
283
284 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
285
286 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
287                                         struct syscall_arg *arg);
288
289 #define SCA_FD syscall_arg__scnprintf_fd
290
291 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
292                                            struct syscall_arg *arg)
293 {
294         int fd = arg->val;
295
296         if (fd == AT_FDCWD)
297                 return scnprintf(bf, size, "CWD");
298
299         return syscall_arg__scnprintf_fd(bf, size, arg);
300 }
301
302 #define SCA_FDAT syscall_arg__scnprintf_fd_at
303
304 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
305                                               struct syscall_arg *arg);
306
307 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
308
309 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
310                                          struct syscall_arg *arg)
311 {
312         return scnprintf(bf, size, "%#lx", arg->val);
313 }
314
315 #define SCA_HEX syscall_arg__scnprintf_hex
316
317 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
318                                                struct syscall_arg *arg)
319 {
320         int printed = 0, prot = arg->val;
321
322         if (prot == PROT_NONE)
323                 return scnprintf(bf, size, "NONE");
324 #define P_MMAP_PROT(n) \
325         if (prot & PROT_##n) { \
326                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
327                 prot &= ~PROT_##n; \
328         }
329
330         P_MMAP_PROT(EXEC);
331         P_MMAP_PROT(READ);
332         P_MMAP_PROT(WRITE);
333 #ifdef PROT_SEM
334         P_MMAP_PROT(SEM);
335 #endif
336         P_MMAP_PROT(GROWSDOWN);
337         P_MMAP_PROT(GROWSUP);
338 #undef P_MMAP_PROT
339
340         if (prot)
341                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
342
343         return printed;
344 }
345
346 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
347
348 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
349                                                 struct syscall_arg *arg)
350 {
351         int printed = 0, flags = arg->val;
352
353 #define P_MMAP_FLAG(n) \
354         if (flags & MAP_##n) { \
355                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
356                 flags &= ~MAP_##n; \
357         }
358
359         P_MMAP_FLAG(SHARED);
360         P_MMAP_FLAG(PRIVATE);
361 #ifdef MAP_32BIT
362         P_MMAP_FLAG(32BIT);
363 #endif
364         P_MMAP_FLAG(ANONYMOUS);
365         P_MMAP_FLAG(DENYWRITE);
366         P_MMAP_FLAG(EXECUTABLE);
367         P_MMAP_FLAG(FILE);
368         P_MMAP_FLAG(FIXED);
369         P_MMAP_FLAG(GROWSDOWN);
370 #ifdef MAP_HUGETLB
371         P_MMAP_FLAG(HUGETLB);
372 #endif
373         P_MMAP_FLAG(LOCKED);
374         P_MMAP_FLAG(NONBLOCK);
375         P_MMAP_FLAG(NORESERVE);
376         P_MMAP_FLAG(POPULATE);
377         P_MMAP_FLAG(STACK);
378 #ifdef MAP_UNINITIALIZED
379         P_MMAP_FLAG(UNINITIALIZED);
380 #endif
381 #undef P_MMAP_FLAG
382
383         if (flags)
384                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
385
386         return printed;
387 }
388
389 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
390
391 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
392                                                       struct syscall_arg *arg)
393 {
394         int behavior = arg->val;
395
396         switch (behavior) {
397 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
398         P_MADV_BHV(NORMAL);
399         P_MADV_BHV(RANDOM);
400         P_MADV_BHV(SEQUENTIAL);
401         P_MADV_BHV(WILLNEED);
402         P_MADV_BHV(DONTNEED);
403         P_MADV_BHV(REMOVE);
404         P_MADV_BHV(DONTFORK);
405         P_MADV_BHV(DOFORK);
406         P_MADV_BHV(HWPOISON);
407 #ifdef MADV_SOFT_OFFLINE
408         P_MADV_BHV(SOFT_OFFLINE);
409 #endif
410         P_MADV_BHV(MERGEABLE);
411         P_MADV_BHV(UNMERGEABLE);
412 #ifdef MADV_HUGEPAGE
413         P_MADV_BHV(HUGEPAGE);
414 #endif
415 #ifdef MADV_NOHUGEPAGE
416         P_MADV_BHV(NOHUGEPAGE);
417 #endif
418 #ifdef MADV_DONTDUMP
419         P_MADV_BHV(DONTDUMP);
420 #endif
421 #ifdef MADV_DODUMP
422         P_MADV_BHV(DODUMP);
423 #endif
424 #undef P_MADV_PHV
425         default: break;
426         }
427
428         return scnprintf(bf, size, "%#x", behavior);
429 }
430
431 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
432
433 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
434                                            struct syscall_arg *arg)
435 {
436         int printed = 0, op = arg->val;
437
438         if (op == 0)
439                 return scnprintf(bf, size, "NONE");
440 #define P_CMD(cmd) \
441         if ((op & LOCK_##cmd) == LOCK_##cmd) { \
442                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
443                 op &= ~LOCK_##cmd; \
444         }
445
446         P_CMD(SH);
447         P_CMD(EX);
448         P_CMD(NB);
449         P_CMD(UN);
450         P_CMD(MAND);
451         P_CMD(RW);
452         P_CMD(READ);
453         P_CMD(WRITE);
454 #undef P_OP
455
456         if (op)
457                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
458
459         return printed;
460 }
461
462 #define SCA_FLOCK syscall_arg__scnprintf_flock
463
464 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
465 {
466         enum syscall_futex_args {
467                 SCF_UADDR   = (1 << 0),
468                 SCF_OP      = (1 << 1),
469                 SCF_VAL     = (1 << 2),
470                 SCF_TIMEOUT = (1 << 3),
471                 SCF_UADDR2  = (1 << 4),
472                 SCF_VAL3    = (1 << 5),
473         };
474         int op = arg->val;
475         int cmd = op & FUTEX_CMD_MASK;
476         size_t printed = 0;
477
478         switch (cmd) {
479 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
480         P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
481         P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
482         P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
483         P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
484         P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
485         P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
486         P_FUTEX_OP(WAKE_OP);                                                      break;
487         P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
488         P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
489         P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
490         P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
491         P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
492         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
493         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
494         }
495
496         if (op & FUTEX_PRIVATE_FLAG)
497                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
498
499         if (op & FUTEX_CLOCK_REALTIME)
500                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
501
502         return printed;
503 }
504
505 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
506
507 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
508 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
509
510 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
511 static DEFINE_STRARRAY(itimers);
512
513 static const char *whences[] = { "SET", "CUR", "END",
514 #ifdef SEEK_DATA
515 "DATA",
516 #endif
517 #ifdef SEEK_HOLE
518 "HOLE",
519 #endif
520 };
521 static DEFINE_STRARRAY(whences);
522
523 static const char *fcntl_cmds[] = {
524         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
525         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
526         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
527         "F_GETOWNER_UIDS",
528 };
529 static DEFINE_STRARRAY(fcntl_cmds);
530
531 static const char *rlimit_resources[] = {
532         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
533         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
534         "RTTIME",
535 };
536 static DEFINE_STRARRAY(rlimit_resources);
537
538 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
539 static DEFINE_STRARRAY(sighow);
540
541 static const char *clockid[] = {
542         "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
543         "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
544 };
545 static DEFINE_STRARRAY(clockid);
546
547 static const char *socket_families[] = {
548         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
549         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
550         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
551         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
552         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
553         "ALG", "NFC", "VSOCK",
554 };
555 static DEFINE_STRARRAY(socket_families);
556
557 #ifndef SOCK_TYPE_MASK
558 #define SOCK_TYPE_MASK 0xf
559 #endif
560
561 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
562                                                       struct syscall_arg *arg)
563 {
564         size_t printed;
565         int type = arg->val,
566             flags = type & ~SOCK_TYPE_MASK;
567
568         type &= SOCK_TYPE_MASK;
569         /*
570          * Can't use a strarray, MIPS may override for ABI reasons.
571          */
572         switch (type) {
573 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
574         P_SK_TYPE(STREAM);
575         P_SK_TYPE(DGRAM);
576         P_SK_TYPE(RAW);
577         P_SK_TYPE(RDM);
578         P_SK_TYPE(SEQPACKET);
579         P_SK_TYPE(DCCP);
580         P_SK_TYPE(PACKET);
581 #undef P_SK_TYPE
582         default:
583                 printed = scnprintf(bf, size, "%#x", type);
584         }
585
586 #define P_SK_FLAG(n) \
587         if (flags & SOCK_##n) { \
588                 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
589                 flags &= ~SOCK_##n; \
590         }
591
592         P_SK_FLAG(CLOEXEC);
593         P_SK_FLAG(NONBLOCK);
594 #undef P_SK_FLAG
595
596         if (flags)
597                 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
598
599         return printed;
600 }
601
602 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
603
604 #ifndef MSG_PROBE
605 #define MSG_PROBE            0x10
606 #endif
607 #ifndef MSG_WAITFORONE
608 #define MSG_WAITFORONE  0x10000
609 #endif
610 #ifndef MSG_SENDPAGE_NOTLAST
611 #define MSG_SENDPAGE_NOTLAST 0x20000
612 #endif
613 #ifndef MSG_FASTOPEN
614 #define MSG_FASTOPEN         0x20000000
615 #endif
616
617 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
618                                                struct syscall_arg *arg)
619 {
620         int printed = 0, flags = arg->val;
621
622         if (flags == 0)
623                 return scnprintf(bf, size, "NONE");
624 #define P_MSG_FLAG(n) \
625         if (flags & MSG_##n) { \
626                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
627                 flags &= ~MSG_##n; \
628         }
629
630         P_MSG_FLAG(OOB);
631         P_MSG_FLAG(PEEK);
632         P_MSG_FLAG(DONTROUTE);
633         P_MSG_FLAG(TRYHARD);
634         P_MSG_FLAG(CTRUNC);
635         P_MSG_FLAG(PROBE);
636         P_MSG_FLAG(TRUNC);
637         P_MSG_FLAG(DONTWAIT);
638         P_MSG_FLAG(EOR);
639         P_MSG_FLAG(WAITALL);
640         P_MSG_FLAG(FIN);
641         P_MSG_FLAG(SYN);
642         P_MSG_FLAG(CONFIRM);
643         P_MSG_FLAG(RST);
644         P_MSG_FLAG(ERRQUEUE);
645         P_MSG_FLAG(NOSIGNAL);
646         P_MSG_FLAG(MORE);
647         P_MSG_FLAG(WAITFORONE);
648         P_MSG_FLAG(SENDPAGE_NOTLAST);
649         P_MSG_FLAG(FASTOPEN);
650         P_MSG_FLAG(CMSG_CLOEXEC);
651 #undef P_MSG_FLAG
652
653         if (flags)
654                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
655
656         return printed;
657 }
658
659 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
660
661 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
662                                                  struct syscall_arg *arg)
663 {
664         size_t printed = 0;
665         int mode = arg->val;
666
667         if (mode == F_OK) /* 0 */
668                 return scnprintf(bf, size, "F");
669 #define P_MODE(n) \
670         if (mode & n##_OK) { \
671                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
672                 mode &= ~n##_OK; \
673         }
674
675         P_MODE(R);
676         P_MODE(W);
677         P_MODE(X);
678 #undef P_MODE
679
680         if (mode)
681                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
682
683         return printed;
684 }
685
686 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
687
688 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
689                                                struct syscall_arg *arg)
690 {
691         int printed = 0, flags = arg->val;
692
693         if (!(flags & O_CREAT))
694                 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
695
696         if (flags == 0)
697                 return scnprintf(bf, size, "RDONLY");
698 #define P_FLAG(n) \
699         if (flags & O_##n) { \
700                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
701                 flags &= ~O_##n; \
702         }
703
704         P_FLAG(APPEND);
705         P_FLAG(ASYNC);
706         P_FLAG(CLOEXEC);
707         P_FLAG(CREAT);
708         P_FLAG(DIRECT);
709         P_FLAG(DIRECTORY);
710         P_FLAG(EXCL);
711         P_FLAG(LARGEFILE);
712         P_FLAG(NOATIME);
713         P_FLAG(NOCTTY);
714 #ifdef O_NONBLOCK
715         P_FLAG(NONBLOCK);
716 #elif O_NDELAY
717         P_FLAG(NDELAY);
718 #endif
719 #ifdef O_PATH
720         P_FLAG(PATH);
721 #endif
722         P_FLAG(RDWR);
723 #ifdef O_DSYNC
724         if ((flags & O_SYNC) == O_SYNC)
725                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
726         else {
727                 P_FLAG(DSYNC);
728         }
729 #else
730         P_FLAG(SYNC);
731 #endif
732         P_FLAG(TRUNC);
733         P_FLAG(WRONLY);
734 #undef P_FLAG
735
736         if (flags)
737                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
738
739         return printed;
740 }
741
742 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
743
744 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
745                                                    struct syscall_arg *arg)
746 {
747         int printed = 0, flags = arg->val;
748
749         if (flags == 0)
750                 return scnprintf(bf, size, "NONE");
751 #define P_FLAG(n) \
752         if (flags & EFD_##n) { \
753                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
754                 flags &= ~EFD_##n; \
755         }
756
757         P_FLAG(SEMAPHORE);
758         P_FLAG(CLOEXEC);
759         P_FLAG(NONBLOCK);
760 #undef P_FLAG
761
762         if (flags)
763                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
764
765         return printed;
766 }
767
768 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
769
770 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
771                                                 struct syscall_arg *arg)
772 {
773         int printed = 0, flags = arg->val;
774
775 #define P_FLAG(n) \
776         if (flags & O_##n) { \
777                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
778                 flags &= ~O_##n; \
779         }
780
781         P_FLAG(CLOEXEC);
782         P_FLAG(NONBLOCK);
783 #undef P_FLAG
784
785         if (flags)
786                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
787
788         return printed;
789 }
790
791 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
792
793 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
794 {
795         int sig = arg->val;
796
797         switch (sig) {
798 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
799         P_SIGNUM(HUP);
800         P_SIGNUM(INT);
801         P_SIGNUM(QUIT);
802         P_SIGNUM(ILL);
803         P_SIGNUM(TRAP);
804         P_SIGNUM(ABRT);
805         P_SIGNUM(BUS);
806         P_SIGNUM(FPE);
807         P_SIGNUM(KILL);
808         P_SIGNUM(USR1);
809         P_SIGNUM(SEGV);
810         P_SIGNUM(USR2);
811         P_SIGNUM(PIPE);
812         P_SIGNUM(ALRM);
813         P_SIGNUM(TERM);
814         P_SIGNUM(STKFLT);
815         P_SIGNUM(CHLD);
816         P_SIGNUM(CONT);
817         P_SIGNUM(STOP);
818         P_SIGNUM(TSTP);
819         P_SIGNUM(TTIN);
820         P_SIGNUM(TTOU);
821         P_SIGNUM(URG);
822         P_SIGNUM(XCPU);
823         P_SIGNUM(XFSZ);
824         P_SIGNUM(VTALRM);
825         P_SIGNUM(PROF);
826         P_SIGNUM(WINCH);
827         P_SIGNUM(IO);
828         P_SIGNUM(PWR);
829         P_SIGNUM(SYS);
830         default: break;
831         }
832
833         return scnprintf(bf, size, "%#x", sig);
834 }
835
836 #define SCA_SIGNUM syscall_arg__scnprintf_signum
837
838 #define TCGETS          0x5401
839
840 static const char *tioctls[] = {
841         "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
842         "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
843         "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
844         "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
845         "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
846         "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
847         "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
848         "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
849         "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
850         "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
851         "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
852         [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
853         "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
854         "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
855         "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
856 };
857
858 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
859
860 #define STRARRAY(arg, name, array) \
861           .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
862           .arg_parm      = { [arg] = &strarray__##array, }
863
864 static struct syscall_fmt {
865         const char *name;
866         const char *alias;
867         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
868         void       *arg_parm[6];
869         bool       errmsg;
870         bool       timeout;
871         bool       hexret;
872 } syscall_fmts[] = {
873         { .name     = "access",     .errmsg = true,
874           .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
875         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
876         { .name     = "brk",        .hexret = true,
877           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
878         { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
879         { .name     = "close",      .errmsg = true,
880           .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, 
881         { .name     = "connect",    .errmsg = true, },
882         { .name     = "dup",        .errmsg = true,
883           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
884         { .name     = "dup2",       .errmsg = true,
885           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
886         { .name     = "dup3",       .errmsg = true,
887           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
888         { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
889         { .name     = "eventfd2",   .errmsg = true,
890           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
891         { .name     = "faccessat",  .errmsg = true,
892           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
893         { .name     = "fadvise64",  .errmsg = true,
894           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
895         { .name     = "fallocate",  .errmsg = true,
896           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
897         { .name     = "fchdir",     .errmsg = true,
898           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
899         { .name     = "fchmod",     .errmsg = true,
900           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
901         { .name     = "fchmodat",   .errmsg = true,
902           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
903         { .name     = "fchown",     .errmsg = true,
904           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
905         { .name     = "fchownat",   .errmsg = true,
906           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
907         { .name     = "fcntl",      .errmsg = true,
908           .arg_scnprintf = { [0] = SCA_FD, /* fd */
909                              [1] = SCA_STRARRAY, /* cmd */ },
910           .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
911         { .name     = "fdatasync",  .errmsg = true,
912           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
913         { .name     = "flock",      .errmsg = true,
914           .arg_scnprintf = { [0] = SCA_FD, /* fd */
915                              [1] = SCA_FLOCK, /* cmd */ }, },
916         { .name     = "fsetxattr",  .errmsg = true,
917           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
918         { .name     = "fstat",      .errmsg = true, .alias = "newfstat",
919           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
920         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat",
921           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
922         { .name     = "fstatfs",    .errmsg = true,
923           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
924         { .name     = "fsync",    .errmsg = true,
925           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
926         { .name     = "ftruncate", .errmsg = true,
927           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
928         { .name     = "futex",      .errmsg = true,
929           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
930         { .name     = "futimesat", .errmsg = true,
931           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
932         { .name     = "getdents",   .errmsg = true,
933           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
934         { .name     = "getdents64", .errmsg = true,
935           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
936         { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
937         { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
938         { .name     = "ioctl",      .errmsg = true,
939           .arg_scnprintf = { [0] = SCA_FD, /* fd */ 
940                              [1] = SCA_STRHEXARRAY, /* cmd */
941                              [2] = SCA_HEX, /* arg */ },
942           .arg_parm      = { [1] = &strarray__tioctls, /* cmd */ }, },
943         { .name     = "kill",       .errmsg = true,
944           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
945         { .name     = "linkat",     .errmsg = true,
946           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
947         { .name     = "lseek",      .errmsg = true,
948           .arg_scnprintf = { [0] = SCA_FD, /* fd */
949                              [2] = SCA_STRARRAY, /* whence */ },
950           .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
951         { .name     = "lstat",      .errmsg = true, .alias = "newlstat", },
952         { .name     = "madvise",    .errmsg = true,
953           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
954                              [2] = SCA_MADV_BHV, /* behavior */ }, },
955         { .name     = "mkdirat",    .errmsg = true,
956           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
957         { .name     = "mknodat",    .errmsg = true,
958           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
959         { .name     = "mlock",      .errmsg = true,
960           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
961         { .name     = "mlockall",   .errmsg = true,
962           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
963         { .name     = "mmap",       .hexret = true,
964           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
965                              [2] = SCA_MMAP_PROT, /* prot */
966                              [3] = SCA_MMAP_FLAGS, /* flags */
967                              [4] = SCA_FD,        /* fd */ }, },
968         { .name     = "mprotect",   .errmsg = true,
969           .arg_scnprintf = { [0] = SCA_HEX, /* start */
970                              [2] = SCA_MMAP_PROT, /* prot */ }, },
971         { .name     = "mremap",     .hexret = true,
972           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
973                              [4] = SCA_HEX, /* new_addr */ }, },
974         { .name     = "munlock",    .errmsg = true,
975           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
976         { .name     = "munmap",     .errmsg = true,
977           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
978         { .name     = "name_to_handle_at", .errmsg = true,
979           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
980         { .name     = "newfstatat", .errmsg = true,
981           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
982         { .name     = "open",       .errmsg = true,
983           .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
984         { .name     = "open_by_handle_at", .errmsg = true,
985           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
986                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
987         { .name     = "openat",     .errmsg = true,
988           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
989                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
990         { .name     = "pipe2",      .errmsg = true,
991           .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
992         { .name     = "poll",       .errmsg = true, .timeout = true, },
993         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
994         { .name     = "pread",      .errmsg = true, .alias = "pread64",
995           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
996         { .name     = "preadv",     .errmsg = true, .alias = "pread",
997           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
998         { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
999         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64",
1000           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1001         { .name     = "pwritev",    .errmsg = true,
1002           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1003         { .name     = "read",       .errmsg = true,
1004           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1005         { .name     = "readlinkat", .errmsg = true,
1006           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1007         { .name     = "readv",      .errmsg = true,
1008           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1009         { .name     = "recvfrom",   .errmsg = true,
1010           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1011         { .name     = "recvmmsg",   .errmsg = true,
1012           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1013         { .name     = "recvmsg",    .errmsg = true,
1014           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1015         { .name     = "renameat",   .errmsg = true,
1016           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1017         { .name     = "rt_sigaction", .errmsg = true,
1018           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1019         { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1020         { .name     = "rt_sigqueueinfo", .errmsg = true,
1021           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1022         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
1023           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1024         { .name     = "select",     .errmsg = true, .timeout = true, },
1025         { .name     = "sendmmsg",    .errmsg = true,
1026           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1027         { .name     = "sendmsg",    .errmsg = true,
1028           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1029         { .name     = "sendto",     .errmsg = true,
1030           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1031         { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1032         { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1033         { .name     = "shutdown",   .errmsg = true,
1034           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1035         { .name     = "socket",     .errmsg = true,
1036           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1037                              [1] = SCA_SK_TYPE, /* type */ },
1038           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1039         { .name     = "socketpair", .errmsg = true,
1040           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1041                              [1] = SCA_SK_TYPE, /* type */ },
1042           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1043         { .name     = "stat",       .errmsg = true, .alias = "newstat", },
1044         { .name     = "symlinkat",  .errmsg = true,
1045           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1046         { .name     = "tgkill",     .errmsg = true,
1047           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1048         { .name     = "tkill",      .errmsg = true,
1049           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1050         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
1051         { .name     = "unlinkat",   .errmsg = true,
1052           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1053         { .name     = "utimensat",  .errmsg = true,
1054           .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1055         { .name     = "write",      .errmsg = true,
1056           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1057         { .name     = "writev",     .errmsg = true,
1058           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1059 };
1060
1061 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1062 {
1063         const struct syscall_fmt *fmt = fmtp;
1064         return strcmp(name, fmt->name);
1065 }
1066
1067 static struct syscall_fmt *syscall_fmt__find(const char *name)
1068 {
1069         const int nmemb = ARRAY_SIZE(syscall_fmts);
1070         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1071 }
1072
1073 struct syscall {
1074         struct event_format *tp_format;
1075         const char          *name;
1076         bool                filtered;
1077         struct syscall_fmt  *fmt;
1078         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1079         void                **arg_parm;
1080 };
1081
1082 static size_t fprintf_duration(unsigned long t, FILE *fp)
1083 {
1084         double duration = (double)t / NSEC_PER_MSEC;
1085         size_t printed = fprintf(fp, "(");
1086
1087         if (duration >= 1.0)
1088                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1089         else if (duration >= 0.01)
1090                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1091         else
1092                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1093         return printed + fprintf(fp, "): ");
1094 }
1095
1096 struct thread_trace {
1097         u64               entry_time;
1098         u64               exit_time;
1099         bool              entry_pending;
1100         unsigned long     nr_events;
1101         char              *entry_str;
1102         double            runtime_ms;
1103         struct {
1104                 int       max;
1105                 char      **table;
1106         } paths;
1107
1108         struct intlist *syscall_stats;
1109 };
1110
1111 static struct thread_trace *thread_trace__new(void)
1112 {
1113         struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1114
1115         if (ttrace)
1116                 ttrace->paths.max = -1;
1117
1118         ttrace->syscall_stats = intlist__new(NULL);
1119
1120         return ttrace;
1121 }
1122
1123 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1124 {
1125         struct thread_trace *ttrace;
1126
1127         if (thread == NULL)
1128                 goto fail;
1129
1130         if (thread->priv == NULL)
1131                 thread->priv = thread_trace__new();
1132                 
1133         if (thread->priv == NULL)
1134                 goto fail;
1135
1136         ttrace = thread->priv;
1137         ++ttrace->nr_events;
1138
1139         return ttrace;
1140 fail:
1141         color_fprintf(fp, PERF_COLOR_RED,
1142                       "WARNING: not enough memory, dropping samples!\n");
1143         return NULL;
1144 }
1145
1146 struct trace {
1147         struct perf_tool        tool;
1148         struct {
1149                 int             machine;
1150                 int             open_id;
1151         }                       audit;
1152         struct {
1153                 int             max;
1154                 struct syscall  *table;
1155         } syscalls;
1156         struct perf_record_opts opts;
1157         struct machine          *host;
1158         u64                     base_time;
1159         bool                    full_time;
1160         FILE                    *output;
1161         unsigned long           nr_events;
1162         struct strlist          *ev_qualifier;
1163         bool                    not_ev_qualifier;
1164         bool                    live;
1165         const char              *last_vfs_getname;
1166         struct intlist          *tid_list;
1167         struct intlist          *pid_list;
1168         bool                    sched;
1169         bool                    multiple_threads;
1170         bool                    summary;
1171         bool                    summary_only;
1172         bool                    show_comm;
1173         bool                    show_tool_stats;
1174         double                  duration_filter;
1175         double                  runtime_ms;
1176         struct {
1177                 u64             vfs_getname, proc_getname;
1178         } stats;
1179 };
1180
1181 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1182 {
1183         struct thread_trace *ttrace = thread->priv;
1184
1185         if (fd > ttrace->paths.max) {
1186                 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1187
1188                 if (npath == NULL)
1189                         return -1;
1190
1191                 if (ttrace->paths.max != -1) {
1192                         memset(npath + ttrace->paths.max + 1, 0,
1193                                (fd - ttrace->paths.max) * sizeof(char *));
1194                 } else {
1195                         memset(npath, 0, (fd + 1) * sizeof(char *));
1196                 }
1197
1198                 ttrace->paths.table = npath;
1199                 ttrace->paths.max   = fd;
1200         }
1201
1202         ttrace->paths.table[fd] = strdup(pathname);
1203
1204         return ttrace->paths.table[fd] != NULL ? 0 : -1;
1205 }
1206
1207 static int thread__read_fd_path(struct thread *thread, int fd)
1208 {
1209         char linkname[PATH_MAX], pathname[PATH_MAX];
1210         struct stat st;
1211         int ret;
1212
1213         if (thread->pid_ == thread->tid) {
1214                 scnprintf(linkname, sizeof(linkname),
1215                           "/proc/%d/fd/%d", thread->pid_, fd);
1216         } else {
1217                 scnprintf(linkname, sizeof(linkname),
1218                           "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1219         }
1220
1221         if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1222                 return -1;
1223
1224         ret = readlink(linkname, pathname, sizeof(pathname));
1225
1226         if (ret < 0 || ret > st.st_size)
1227                 return -1;
1228
1229         pathname[ret] = '\0';
1230         return trace__set_fd_pathname(thread, fd, pathname);
1231 }
1232
1233 static const char *thread__fd_path(struct thread *thread, int fd,
1234                                    struct trace *trace)
1235 {
1236         struct thread_trace *ttrace = thread->priv;
1237
1238         if (ttrace == NULL)
1239                 return NULL;
1240
1241         if (fd < 0)
1242                 return NULL;
1243
1244         if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL))
1245                 if (!trace->live)
1246                         return NULL;
1247                 ++trace->stats.proc_getname;
1248                 if (thread__read_fd_path(thread, fd)) {
1249                         return NULL;
1250         }
1251
1252         return ttrace->paths.table[fd];
1253 }
1254
1255 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1256                                         struct syscall_arg *arg)
1257 {
1258         int fd = arg->val;
1259         size_t printed = scnprintf(bf, size, "%d", fd);
1260         const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1261
1262         if (path)
1263                 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1264
1265         return printed;
1266 }
1267
1268 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1269                                               struct syscall_arg *arg)
1270 {
1271         int fd = arg->val;
1272         size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1273         struct thread_trace *ttrace = arg->thread->priv;
1274
1275         if (ttrace && fd >= 0 && fd <= ttrace->paths.max) {
1276                 free(ttrace->paths.table[fd]);
1277                 ttrace->paths.table[fd] = NULL;
1278         }
1279
1280         return printed;
1281 }
1282
1283 static bool trace__filter_duration(struct trace *trace, double t)
1284 {
1285         return t < (trace->duration_filter * NSEC_PER_MSEC);
1286 }
1287
1288 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1289 {
1290         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1291
1292         return fprintf(fp, "%10.3f ", ts);
1293 }
1294
1295 static bool done = false;
1296 static bool interrupted = false;
1297
1298 static void sig_handler(int sig)
1299 {
1300         done = true;
1301         interrupted = sig == SIGINT;
1302 }
1303
1304 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1305                                         u64 duration, u64 tstamp, FILE *fp)
1306 {
1307         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1308         printed += fprintf_duration(duration, fp);
1309
1310         if (trace->multiple_threads) {
1311                 if (trace->show_comm)
1312                         printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1313                 printed += fprintf(fp, "%d ", thread->tid);
1314         }
1315
1316         return printed;
1317 }
1318
1319 static int trace__process_event(struct trace *trace, struct machine *machine,
1320                                 union perf_event *event, struct perf_sample *sample)
1321 {
1322         int ret = 0;
1323
1324         switch (event->header.type) {
1325         case PERF_RECORD_LOST:
1326                 color_fprintf(trace->output, PERF_COLOR_RED,
1327                               "LOST %" PRIu64 " events!\n", event->lost.lost);
1328                 ret = machine__process_lost_event(machine, event, sample);
1329         default:
1330                 ret = machine__process_event(machine, event, sample);
1331                 break;
1332         }
1333
1334         return ret;
1335 }
1336
1337 static int trace__tool_process(struct perf_tool *tool,
1338                                union perf_event *event,
1339                                struct perf_sample *sample,
1340                                struct machine *machine)
1341 {
1342         struct trace *trace = container_of(tool, struct trace, tool);
1343         return trace__process_event(trace, machine, event, sample);
1344 }
1345
1346 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1347 {
1348         int err = symbol__init();
1349
1350         if (err)
1351                 return err;
1352
1353         trace->host = machine__new_host();
1354         if (trace->host == NULL)
1355                 return -ENOMEM;
1356
1357         err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1358                                             evlist->threads, trace__tool_process, false);
1359         if (err)
1360                 symbol__exit();
1361
1362         return err;
1363 }
1364
1365 static int syscall__set_arg_fmts(struct syscall *sc)
1366 {
1367         struct format_field *field;
1368         int idx = 0;
1369
1370         sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1371         if (sc->arg_scnprintf == NULL)
1372                 return -1;
1373
1374         if (sc->fmt)
1375                 sc->arg_parm = sc->fmt->arg_parm;
1376
1377         for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1378                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1379                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1380                 else if (field->flags & FIELD_IS_POINTER)
1381                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1382                 ++idx;
1383         }
1384
1385         return 0;
1386 }
1387
1388 static int trace__read_syscall_info(struct trace *trace, int id)
1389 {
1390         char tp_name[128];
1391         struct syscall *sc;
1392         const char *name = audit_syscall_to_name(id, trace->audit.machine);
1393
1394         if (name == NULL)
1395                 return -1;
1396
1397         if (id > trace->syscalls.max) {
1398                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1399
1400                 if (nsyscalls == NULL)
1401                         return -1;
1402
1403                 if (trace->syscalls.max != -1) {
1404                         memset(nsyscalls + trace->syscalls.max + 1, 0,
1405                                (id - trace->syscalls.max) * sizeof(*sc));
1406                 } else {
1407                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1408                 }
1409
1410                 trace->syscalls.table = nsyscalls;
1411                 trace->syscalls.max   = id;
1412         }
1413
1414         sc = trace->syscalls.table + id;
1415         sc->name = name;
1416
1417         if (trace->ev_qualifier) {
1418                 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1419
1420                 if (!(in ^ trace->not_ev_qualifier)) {
1421                         sc->filtered = true;
1422                         /*
1423                          * No need to do read tracepoint information since this will be
1424                          * filtered out.
1425                          */
1426                         return 0;
1427                 }
1428         }
1429
1430         sc->fmt  = syscall_fmt__find(sc->name);
1431
1432         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1433         sc->tp_format = event_format__new("syscalls", tp_name);
1434
1435         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1436                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1437                 sc->tp_format = event_format__new("syscalls", tp_name);
1438         }
1439
1440         if (sc->tp_format == NULL)
1441                 return -1;
1442
1443         return syscall__set_arg_fmts(sc);
1444 }
1445
1446 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1447                                       unsigned long *args, struct trace *trace,
1448                                       struct thread *thread)
1449 {
1450         size_t printed = 0;
1451
1452         if (sc->tp_format != NULL) {
1453                 struct format_field *field;
1454                 u8 bit = 1;
1455                 struct syscall_arg arg = {
1456                         .idx    = 0,
1457                         .mask   = 0,
1458                         .trace  = trace,
1459                         .thread = thread,
1460                 };
1461
1462                 for (field = sc->tp_format->format.fields->next; field;
1463                      field = field->next, ++arg.idx, bit <<= 1) {
1464                         if (arg.mask & bit)
1465                                 continue;
1466                         /*
1467                          * Suppress this argument if its value is zero and
1468                          * and we don't have a string associated in an
1469                          * strarray for it.
1470                          */
1471                         if (args[arg.idx] == 0 &&
1472                             !(sc->arg_scnprintf &&
1473                               sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1474                               sc->arg_parm[arg.idx]))
1475                                 continue;
1476
1477                         printed += scnprintf(bf + printed, size - printed,
1478                                              "%s%s: ", printed ? ", " : "", field->name);
1479                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1480                                 arg.val = args[arg.idx];
1481                                 if (sc->arg_parm)
1482                                         arg.parm = sc->arg_parm[arg.idx];
1483                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1484                                                                       size - printed, &arg);
1485                         } else {
1486                                 printed += scnprintf(bf + printed, size - printed,
1487                                                      "%ld", args[arg.idx]);
1488                         }
1489                 }
1490         } else {
1491                 int i = 0;
1492
1493                 while (i < 6) {
1494                         printed += scnprintf(bf + printed, size - printed,
1495                                              "%sarg%d: %ld",
1496                                              printed ? ", " : "", i, args[i]);
1497                         ++i;
1498                 }
1499         }
1500
1501         return printed;
1502 }
1503
1504 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1505                                   struct perf_sample *sample);
1506
1507 static struct syscall *trace__syscall_info(struct trace *trace,
1508                                            struct perf_evsel *evsel, int id)
1509 {
1510
1511         if (id < 0) {
1512
1513                 /*
1514                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1515                  * before that, leaving at a higher verbosity level till that is
1516                  * explained. Reproduced with plain ftrace with:
1517                  *
1518                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1519                  * grep "NR -1 " /t/trace_pipe
1520                  *
1521                  * After generating some load on the machine.
1522                  */
1523                 if (verbose > 1) {
1524                         static u64 n;
1525                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1526                                 id, perf_evsel__name(evsel), ++n);
1527                 }
1528                 return NULL;
1529         }
1530
1531         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1532             trace__read_syscall_info(trace, id))
1533                 goto out_cant_read;
1534
1535         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1536                 goto out_cant_read;
1537
1538         return &trace->syscalls.table[id];
1539
1540 out_cant_read:
1541         if (verbose) {
1542                 fprintf(trace->output, "Problems reading syscall %d", id);
1543                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1544                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1545                 fputs(" information\n", trace->output);
1546         }
1547         return NULL;
1548 }
1549
1550 static void thread__update_stats(struct thread_trace *ttrace,
1551                                  int id, struct perf_sample *sample)
1552 {
1553         struct int_node *inode;
1554         struct stats *stats;
1555         u64 duration = 0;
1556
1557         inode = intlist__findnew(ttrace->syscall_stats, id);
1558         if (inode == NULL)
1559                 return;
1560
1561         stats = inode->priv;
1562         if (stats == NULL) {
1563                 stats = malloc(sizeof(struct stats));
1564                 if (stats == NULL)
1565                         return;
1566                 init_stats(stats);
1567                 inode->priv = stats;
1568         }
1569
1570         if (ttrace->entry_time && sample->time > ttrace->entry_time)
1571                 duration = sample->time - ttrace->entry_time;
1572
1573         update_stats(stats, duration);
1574 }
1575
1576 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1577                             struct perf_sample *sample)
1578 {
1579         char *msg;
1580         void *args;
1581         size_t printed = 0;
1582         struct thread *thread;
1583         int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1584         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1585         struct thread_trace *ttrace;
1586
1587         if (sc == NULL)
1588                 return -1;
1589
1590         if (sc->filtered)
1591                 return 0;
1592
1593         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1594         ttrace = thread__trace(thread, trace->output);
1595         if (ttrace == NULL)
1596                 return -1;
1597
1598         args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1599         ttrace = thread->priv;
1600
1601         if (ttrace->entry_str == NULL) {
1602                 ttrace->entry_str = malloc(1024);
1603                 if (!ttrace->entry_str)
1604                         return -1;
1605         }
1606
1607         ttrace->entry_time = sample->time;
1608         msg = ttrace->entry_str;
1609         printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1610
1611         printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1612                                            args, trace, thread);
1613
1614         if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1615                 if (!trace->duration_filter && !trace->summary_only) {
1616                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1617                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1618                 }
1619         } else
1620                 ttrace->entry_pending = true;
1621
1622         return 0;
1623 }
1624
1625 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1626                            struct perf_sample *sample)
1627 {
1628         int ret;
1629         u64 duration = 0;
1630         struct thread *thread;
1631         int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1632         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1633         struct thread_trace *ttrace;
1634
1635         if (sc == NULL)
1636                 return -1;
1637
1638         if (sc->filtered)
1639                 return 0;
1640
1641         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1642         ttrace = thread__trace(thread, trace->output);
1643         if (ttrace == NULL)
1644                 return -1;
1645
1646         if (trace->summary)
1647                 thread__update_stats(ttrace, id, sample);
1648
1649         ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1650
1651         if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1652                 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1653                 trace->last_vfs_getname = NULL;
1654                 ++trace->stats.vfs_getname;
1655         }
1656
1657         ttrace = thread->priv;
1658
1659         ttrace->exit_time = sample->time;
1660
1661         if (ttrace->entry_time) {
1662                 duration = sample->time - ttrace->entry_time;
1663                 if (trace__filter_duration(trace, duration))
1664                         goto out;
1665         } else if (trace->duration_filter)
1666                 goto out;
1667
1668         if (trace->summary_only)
1669                 goto out;
1670
1671         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1672
1673         if (ttrace->entry_pending) {
1674                 fprintf(trace->output, "%-70s", ttrace->entry_str);
1675         } else {
1676                 fprintf(trace->output, " ... [");
1677                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1678                 fprintf(trace->output, "]: %s()", sc->name);
1679         }
1680
1681         if (sc->fmt == NULL) {
1682 signed_print:
1683                 fprintf(trace->output, ") = %d", ret);
1684         } else if (ret < 0 && sc->fmt->errmsg) {
1685                 char bf[256];
1686                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1687                            *e = audit_errno_to_name(-ret);
1688
1689                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1690         } else if (ret == 0 && sc->fmt->timeout)
1691                 fprintf(trace->output, ") = 0 Timeout");
1692         else if (sc->fmt->hexret)
1693                 fprintf(trace->output, ") = %#x", ret);
1694         else
1695                 goto signed_print;
1696
1697         fputc('\n', trace->output);
1698 out:
1699         ttrace->entry_pending = false;
1700
1701         return 0;
1702 }
1703
1704 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1705                               struct perf_sample *sample)
1706 {
1707         trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1708         return 0;
1709 }
1710
1711 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1712                                      struct perf_sample *sample)
1713 {
1714         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1715         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1716         struct thread *thread = machine__findnew_thread(trace->host,
1717                                                         sample->pid,
1718                                                         sample->tid);
1719         struct thread_trace *ttrace = thread__trace(thread, trace->output);
1720
1721         if (ttrace == NULL)
1722                 goto out_dump;
1723
1724         ttrace->runtime_ms += runtime_ms;
1725         trace->runtime_ms += runtime_ms;
1726         return 0;
1727
1728 out_dump:
1729         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1730                evsel->name,
1731                perf_evsel__strval(evsel, sample, "comm"),
1732                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1733                runtime,
1734                perf_evsel__intval(evsel, sample, "vruntime"));
1735         return 0;
1736 }
1737
1738 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1739 {
1740         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1741             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1742                 return false;
1743
1744         if (trace->pid_list || trace->tid_list)
1745                 return true;
1746
1747         return false;
1748 }
1749
1750 static int trace__process_sample(struct perf_tool *tool,
1751                                  union perf_event *event __maybe_unused,
1752                                  struct perf_sample *sample,
1753                                  struct perf_evsel *evsel,
1754                                  struct machine *machine __maybe_unused)
1755 {
1756         struct trace *trace = container_of(tool, struct trace, tool);
1757         int err = 0;
1758
1759         tracepoint_handler handler = evsel->handler;
1760
1761         if (skip_sample(trace, sample))
1762                 return 0;
1763
1764         if (!trace->full_time && trace->base_time == 0)
1765                 trace->base_time = sample->time;
1766
1767         if (handler)
1768                 handler(trace, evsel, sample);
1769
1770         return err;
1771 }
1772
1773 static int parse_target_str(struct trace *trace)
1774 {
1775         if (trace->opts.target.pid) {
1776                 trace->pid_list = intlist__new(trace->opts.target.pid);
1777                 if (trace->pid_list == NULL) {
1778                         pr_err("Error parsing process id string\n");
1779                         return -EINVAL;
1780                 }
1781         }
1782
1783         if (trace->opts.target.tid) {
1784                 trace->tid_list = intlist__new(trace->opts.target.tid);
1785                 if (trace->tid_list == NULL) {
1786                         pr_err("Error parsing thread id string\n");
1787                         return -EINVAL;
1788                 }
1789         }
1790
1791         return 0;
1792 }
1793
1794 static int trace__record(int argc, const char **argv)
1795 {
1796         unsigned int rec_argc, i, j;
1797         const char **rec_argv;
1798         const char * const record_args[] = {
1799                 "record",
1800                 "-R",
1801                 "-m", "1024",
1802                 "-c", "1",
1803                 "-e", "raw_syscalls:sys_enter,raw_syscalls:sys_exit",
1804         };
1805
1806         rec_argc = ARRAY_SIZE(record_args) + argc;
1807         rec_argv = calloc(rec_argc + 1, sizeof(char *));
1808
1809         if (rec_argv == NULL)
1810                 return -ENOMEM;
1811
1812         for (i = 0; i < ARRAY_SIZE(record_args); i++)
1813                 rec_argv[i] = record_args[i];
1814
1815         for (j = 0; j < (unsigned int)argc; j++, i++)
1816                 rec_argv[i] = argv[j];
1817
1818         return cmd_record(i, rec_argv, NULL);
1819 }
1820
1821 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
1822
1823 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
1824 {
1825         struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
1826         if (evsel == NULL)
1827                 return;
1828
1829         if (perf_evsel__field(evsel, "pathname") == NULL) {
1830                 perf_evsel__delete(evsel);
1831                 return;
1832         }
1833
1834         evsel->handler = trace__vfs_getname;
1835         perf_evlist__add(evlist, evsel);
1836 }
1837
1838 static int trace__run(struct trace *trace, int argc, const char **argv)
1839 {
1840         struct perf_evlist *evlist = perf_evlist__new();
1841         struct perf_evsel *evsel;
1842         int err = -1, i;
1843         unsigned long before;
1844         const bool forks = argc > 0;
1845
1846         trace->live = true;
1847
1848         if (evlist == NULL) {
1849                 fprintf(trace->output, "Not enough memory to run!\n");
1850                 goto out;
1851         }
1852
1853         if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit))
1854                 goto out_error_tp;
1855
1856         perf_evlist__add_vfs_getname(evlist);
1857
1858         if (trace->sched &&
1859                 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1860                                 trace__sched_stat_runtime))
1861                 goto out_error_tp;
1862
1863         err = perf_evlist__create_maps(evlist, &trace->opts.target);
1864         if (err < 0) {
1865                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1866                 goto out_delete_evlist;
1867         }
1868
1869         err = trace__symbols_init(trace, evlist);
1870         if (err < 0) {
1871                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1872                 goto out_delete_maps;
1873         }
1874
1875         perf_evlist__config(evlist, &trace->opts);
1876
1877         signal(SIGCHLD, sig_handler);
1878         signal(SIGINT, sig_handler);
1879
1880         if (forks) {
1881                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1882                                                     argv, false, false);
1883                 if (err < 0) {
1884                         fprintf(trace->output, "Couldn't run the workload!\n");
1885                         goto out_delete_maps;
1886                 }
1887         }
1888
1889         err = perf_evlist__open(evlist);
1890         if (err < 0)
1891                 goto out_error_open;
1892
1893         err = perf_evlist__mmap(evlist, UINT_MAX, false);
1894         if (err < 0) {
1895                 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1896                 goto out_close_evlist;
1897         }
1898
1899         perf_evlist__enable(evlist);
1900
1901         if (forks)
1902                 perf_evlist__start_workload(evlist);
1903
1904         trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1905 again:
1906         before = trace->nr_events;
1907
1908         for (i = 0; i < evlist->nr_mmaps; i++) {
1909                 union perf_event *event;
1910
1911                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1912                         const u32 type = event->header.type;
1913                         tracepoint_handler handler;
1914                         struct perf_sample sample;
1915
1916                         ++trace->nr_events;
1917
1918                         err = perf_evlist__parse_sample(evlist, event, &sample);
1919                         if (err) {
1920                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1921                                 goto next_event;
1922                         }
1923
1924                         if (!trace->full_time && trace->base_time == 0)
1925                                 trace->base_time = sample.time;
1926
1927                         if (type != PERF_RECORD_SAMPLE) {
1928                                 trace__process_event(trace, trace->host, event, &sample);
1929                                 continue;
1930                         }
1931
1932                         evsel = perf_evlist__id2evsel(evlist, sample.id);
1933                         if (evsel == NULL) {
1934                                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1935                                 goto next_event;
1936                         }
1937
1938                         if (sample.raw_data == NULL) {
1939                                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1940                                        perf_evsel__name(evsel), sample.tid,
1941                                        sample.cpu, sample.raw_size);
1942                                 goto next_event;
1943                         }
1944
1945                         handler = evsel->handler;
1946                         handler(trace, evsel, &sample);
1947 next_event:
1948                         perf_evlist__mmap_consume(evlist, i);
1949
1950                         if (interrupted)
1951                                 goto out_disable;
1952                 }
1953         }
1954
1955         if (trace->nr_events == before) {
1956                 int timeout = done ? 100 : -1;
1957
1958                 if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0)
1959                         goto again;
1960         } else {
1961                 goto again;
1962         }
1963
1964 out_disable:
1965         perf_evlist__disable(evlist);
1966
1967         if (!err) {
1968                 if (trace->summary)
1969                         trace__fprintf_thread_summary(trace, trace->output);
1970
1971                 if (trace->show_tool_stats) {
1972                         fprintf(trace->output, "Stats:\n "
1973                                                " vfs_getname : %" PRIu64 "\n"
1974                                                " proc_getname: %" PRIu64 "\n",
1975                                 trace->stats.vfs_getname,
1976                                 trace->stats.proc_getname);
1977                 }
1978         }
1979
1980         perf_evlist__munmap(evlist);
1981 out_close_evlist:
1982         perf_evlist__close(evlist);
1983 out_delete_maps:
1984         perf_evlist__delete_maps(evlist);
1985 out_delete_evlist:
1986         perf_evlist__delete(evlist);
1987 out:
1988         trace->live = false;
1989         return err;
1990 {
1991         char errbuf[BUFSIZ];
1992
1993 out_error_tp:
1994         perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf));
1995         goto out_error;
1996
1997 out_error_open:
1998         perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
1999
2000 out_error:
2001         fprintf(trace->output, "%s\n", errbuf);
2002         goto out_delete_evlist;
2003 }
2004 }
2005
2006 static int trace__replay(struct trace *trace)
2007 {
2008         const struct perf_evsel_str_handler handlers[] = {
2009                 { "probe:vfs_getname",       trace__vfs_getname, },
2010         };
2011         struct perf_data_file file = {
2012                 .path  = input_name,
2013                 .mode  = PERF_DATA_MODE_READ,
2014         };
2015         struct perf_session *session;
2016         struct perf_evsel *evsel;
2017         int err = -1;
2018
2019         trace->tool.sample        = trace__process_sample;
2020         trace->tool.mmap          = perf_event__process_mmap;
2021         trace->tool.mmap2         = perf_event__process_mmap2;
2022         trace->tool.comm          = perf_event__process_comm;
2023         trace->tool.exit          = perf_event__process_exit;
2024         trace->tool.fork          = perf_event__process_fork;
2025         trace->tool.attr          = perf_event__process_attr;
2026         trace->tool.tracing_data = perf_event__process_tracing_data;
2027         trace->tool.build_id      = perf_event__process_build_id;
2028
2029         trace->tool.ordered_samples = true;
2030         trace->tool.ordering_requires_timestamps = true;
2031
2032         /* add tid to output */
2033         trace->multiple_threads = true;
2034
2035         if (symbol__init() < 0)
2036                 return -1;
2037
2038         session = perf_session__new(&file, false, &trace->tool);
2039         if (session == NULL)
2040                 return -ENOMEM;
2041
2042         trace->host = &session->machines.host;
2043
2044         err = perf_session__set_tracepoints_handlers(session, handlers);
2045         if (err)
2046                 goto out;
2047
2048         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2049                                                      "raw_syscalls:sys_enter");
2050         if (evsel == NULL) {
2051                 pr_err("Data file does not have raw_syscalls:sys_enter event\n");
2052                 goto out;
2053         }
2054
2055         if (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2056             perf_evsel__init_sc_tp_ptr_field(evsel, args)) {
2057                 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2058                 goto out;
2059         }
2060
2061         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2062                                                      "raw_syscalls:sys_exit");
2063         if (evsel == NULL) {
2064                 pr_err("Data file does not have raw_syscalls:sys_exit event\n");
2065                 goto out;
2066         }
2067
2068         if (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2069             perf_evsel__init_sc_tp_uint_field(evsel, ret)) {
2070                 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2071                 goto out;
2072         }
2073
2074         err = parse_target_str(trace);
2075         if (err != 0)
2076                 goto out;
2077
2078         setup_pager();
2079
2080         err = perf_session__process_events(session, &trace->tool);
2081         if (err)
2082                 pr_err("Failed to process events, error %d", err);
2083
2084         else if (trace->summary)
2085                 trace__fprintf_thread_summary(trace, trace->output);
2086
2087 out:
2088         perf_session__delete(session);
2089
2090         return err;
2091 }
2092
2093 static size_t trace__fprintf_threads_header(FILE *fp)
2094 {
2095         size_t printed;
2096
2097         printed  = fprintf(fp, "\n Summary of events:\n\n");
2098
2099         return printed;
2100 }
2101
2102 static size_t thread__dump_stats(struct thread_trace *ttrace,
2103                                  struct trace *trace, FILE *fp)
2104 {
2105         struct stats *stats;
2106         size_t printed = 0;
2107         struct syscall *sc;
2108         struct int_node *inode = intlist__first(ttrace->syscall_stats);
2109
2110         if (inode == NULL)
2111                 return 0;
2112
2113         printed += fprintf(fp, "\n");
2114
2115         printed += fprintf(fp, "                                                    msec/call\n");
2116         printed += fprintf(fp, "   syscall            calls      min      avg      max stddev\n");
2117         printed += fprintf(fp, "   --------------- -------- -------- -------- -------- ------\n");
2118
2119         /* each int_node is a syscall */
2120         while (inode) {
2121                 stats = inode->priv;
2122                 if (stats) {
2123                         double min = (double)(stats->min) / NSEC_PER_MSEC;
2124                         double max = (double)(stats->max) / NSEC_PER_MSEC;
2125                         double avg = avg_stats(stats);
2126                         double pct;
2127                         u64 n = (u64) stats->n;
2128
2129                         pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2130                         avg /= NSEC_PER_MSEC;
2131
2132                         sc = &trace->syscalls.table[inode->i];
2133                         printed += fprintf(fp, "   %-15s", sc->name);
2134                         printed += fprintf(fp, " %8" PRIu64 " %8.3f %8.3f",
2135                                            n, min, avg);
2136                         printed += fprintf(fp, " %8.3f %6.2f\n", max, pct);
2137                 }
2138
2139                 inode = intlist__next(inode);
2140         }
2141
2142         printed += fprintf(fp, "\n\n");
2143
2144         return printed;
2145 }
2146
2147 /* struct used to pass data to per-thread function */
2148 struct summary_data {
2149         FILE *fp;
2150         struct trace *trace;
2151         size_t printed;
2152 };
2153
2154 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2155 {
2156         struct summary_data *data = priv;
2157         FILE *fp = data->fp;
2158         size_t printed = data->printed;
2159         struct trace *trace = data->trace;
2160         struct thread_trace *ttrace = thread->priv;
2161         const char *color;
2162         double ratio;
2163
2164         if (ttrace == NULL)
2165                 return 0;
2166
2167         ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2168
2169         color = PERF_COLOR_NORMAL;
2170         if (ratio > 50.0)
2171                 color = PERF_COLOR_RED;
2172         else if (ratio > 25.0)
2173                 color = PERF_COLOR_GREEN;
2174         else if (ratio > 5.0)
2175                 color = PERF_COLOR_YELLOW;
2176
2177         printed += color_fprintf(fp, color, " %s (%d), ", thread__comm_str(thread), thread->tid);
2178         printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2179         printed += color_fprintf(fp, color, "%.1f%%", ratio);
2180         printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2181         printed += thread__dump_stats(ttrace, trace, fp);
2182
2183         data->printed += printed;
2184
2185         return 0;
2186 }
2187
2188 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2189 {
2190         struct summary_data data = {
2191                 .fp = fp,
2192                 .trace = trace
2193         };
2194         data.printed = trace__fprintf_threads_header(fp);
2195
2196         machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2197
2198         return data.printed;
2199 }
2200
2201 static int trace__set_duration(const struct option *opt, const char *str,
2202                                int unset __maybe_unused)
2203 {
2204         struct trace *trace = opt->value;
2205
2206         trace->duration_filter = atof(str);
2207         return 0;
2208 }
2209
2210 static int trace__open_output(struct trace *trace, const char *filename)
2211 {
2212         struct stat st;
2213
2214         if (!stat(filename, &st) && st.st_size) {
2215                 char oldname[PATH_MAX];
2216
2217                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2218                 unlink(oldname);
2219                 rename(filename, oldname);
2220         }
2221
2222         trace->output = fopen(filename, "w");
2223
2224         return trace->output == NULL ? -errno : 0;
2225 }
2226
2227 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2228 {
2229         const char * const trace_usage[] = {
2230                 "perf trace [<options>] [<command>]",
2231                 "perf trace [<options>] -- <command> [<options>]",
2232                 "perf trace record [<options>] [<command>]",
2233                 "perf trace record [<options>] -- <command> [<options>]",
2234                 NULL
2235         };
2236         struct trace trace = {
2237                 .audit = {
2238                         .machine = audit_detect_machine(),
2239                         .open_id = audit_name_to_syscall("open", trace.audit.machine),
2240                 },
2241                 .syscalls = {
2242                         . max = -1,
2243                 },
2244                 .opts = {
2245                         .target = {
2246                                 .uid       = UINT_MAX,
2247                                 .uses_mmap = true,
2248                         },
2249                         .user_freq     = UINT_MAX,
2250                         .user_interval = ULLONG_MAX,
2251                         .no_delay      = true,
2252                         .mmap_pages    = 1024,
2253                 },
2254                 .output = stdout,
2255                 .show_comm = true,
2256         };
2257         const char *output_name = NULL;
2258         const char *ev_qualifier_str = NULL;
2259         const struct option trace_options[] = {
2260         OPT_BOOLEAN(0, "comm", &trace.show_comm,
2261                     "show the thread COMM next to its id"),
2262         OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2263         OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2264                     "list of events to trace"),
2265         OPT_STRING('o', "output", &output_name, "file", "output file name"),
2266         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2267         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2268                     "trace events on existing process id"),
2269         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2270                     "trace events on existing thread id"),
2271         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2272                     "system-wide collection from all CPUs"),
2273         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2274                     "list of cpus to monitor"),
2275         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2276                     "child tasks do not inherit counters"),
2277         OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2278                      "number of mmap data pages",
2279                      perf_evlist__parse_mmap_pages),
2280         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2281                    "user to profile"),
2282         OPT_CALLBACK(0, "duration", &trace, "float",
2283                      "show only events with duration > N.M ms",
2284                      trace__set_duration),
2285         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2286         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2287         OPT_BOOLEAN('T', "time", &trace.full_time,
2288                     "Show full timestamp, not time relative to first start"),
2289         OPT_BOOLEAN('s', "summary", &trace.summary_only,
2290                     "Show only syscall summary with statistics"),
2291         OPT_BOOLEAN('S', "with-summary", &trace.summary,
2292                     "Show all syscalls and summary with statistics"),
2293         OPT_END()
2294         };
2295         int err;
2296         char bf[BUFSIZ];
2297
2298         if ((argc > 1) && (strcmp(argv[1], "record") == 0))
2299                 return trace__record(argc-2, &argv[2]);
2300
2301         argc = parse_options(argc, argv, trace_options, trace_usage, 0);
2302
2303         /* summary_only implies summary option, but don't overwrite summary if set */
2304         if (trace.summary_only)
2305                 trace.summary = trace.summary_only;
2306
2307         if (output_name != NULL) {
2308                 err = trace__open_output(&trace, output_name);
2309                 if (err < 0) {
2310                         perror("failed to create output file");
2311                         goto out;
2312                 }
2313         }
2314
2315         if (ev_qualifier_str != NULL) {
2316                 const char *s = ev_qualifier_str;
2317
2318                 trace.not_ev_qualifier = *s == '!';
2319                 if (trace.not_ev_qualifier)
2320                         ++s;
2321                 trace.ev_qualifier = strlist__new(true, s);
2322                 if (trace.ev_qualifier == NULL) {
2323                         fputs("Not enough memory to parse event qualifier",
2324                               trace.output);
2325                         err = -ENOMEM;
2326                         goto out_close;
2327                 }
2328         }
2329
2330         err = target__validate(&trace.opts.target);
2331         if (err) {
2332                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2333                 fprintf(trace.output, "%s", bf);
2334                 goto out_close;
2335         }
2336
2337         err = target__parse_uid(&trace.opts.target);
2338         if (err) {
2339                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2340                 fprintf(trace.output, "%s", bf);
2341                 goto out_close;
2342         }
2343
2344         if (!argc && target__none(&trace.opts.target))
2345                 trace.opts.target.system_wide = true;
2346
2347         if (input_name)
2348                 err = trace__replay(&trace);
2349         else
2350                 err = trace__run(&trace, argc, argv);
2351
2352 out_close:
2353         if (output_name != NULL)
2354                 fclose(trace.output);
2355 out:
2356         return err;
2357 }