~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/tools/perf/builtin-trace.c

Version: ~ [ linux-5.12-rc7 ] ~ [ linux-5.11.13 ] ~ [ linux-5.10.29 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.111 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.186 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.230 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.266 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.266 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.18.140 ] ~ [ linux-3.16.85 ] ~ [ linux-3.14.79 ] ~ [ linux-3.12.74 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 #include <traceevent/event-parse.h>
  2 #include "builtin.h"
  3 #include "util/color.h"
  4 #include "util/debug.h"
  5 #include "util/evlist.h"
  6 #include "util/machine.h"
  7 #include "util/session.h"
  8 #include "util/thread.h"
  9 #include "util/parse-options.h"
 10 #include "util/strlist.h"
 11 #include "util/intlist.h"
 12 #include "util/thread_map.h"
 13 #include "util/stat.h"
 14 #include "trace-event.h"
 15 #include "util/parse-events.h"
 16 
 17 #include <libaudit.h>
 18 #include <stdlib.h>
 19 #include <sys/eventfd.h>
 20 #include <sys/mman.h>
 21 #include <linux/futex.h>
 22 
 23 /* For older distros: */
 24 #ifndef MAP_STACK
 25 # define MAP_STACK              0x20000
 26 #endif
 27 
 28 #ifndef MADV_HWPOISON
 29 # define MADV_HWPOISON          100
 30 #endif
 31 
 32 #ifndef MADV_MERGEABLE
 33 # define MADV_MERGEABLE         12
 34 #endif
 35 
 36 #ifndef MADV_UNMERGEABLE
 37 # define MADV_UNMERGEABLE       13
 38 #endif
 39 
 40 #ifndef EFD_SEMAPHORE
 41 # define EFD_SEMAPHORE          1
 42 #endif
 43 
 44 struct tp_field {
 45         int offset;
 46         union {
 47                 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
 48                 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
 49         };
 50 };
 51 
 52 #define TP_UINT_FIELD(bits) \
 53 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
 54 { \
 55         return *(u##bits *)(sample->raw_data + field->offset); \
 56 }
 57 
 58 TP_UINT_FIELD(8);
 59 TP_UINT_FIELD(16);
 60 TP_UINT_FIELD(32);
 61 TP_UINT_FIELD(64);
 62 
 63 #define TP_UINT_FIELD__SWAPPED(bits) \
 64 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
 65 { \
 66         u##bits value = *(u##bits *)(sample->raw_data + field->offset); \
 67         return bswap_##bits(value);\
 68 }
 69 
 70 TP_UINT_FIELD__SWAPPED(16);
 71 TP_UINT_FIELD__SWAPPED(32);
 72 TP_UINT_FIELD__SWAPPED(64);
 73 
 74 static int tp_field__init_uint(struct tp_field *field,
 75                                struct format_field *format_field,
 76                                bool needs_swap)
 77 {
 78         field->offset = format_field->offset;
 79 
 80         switch (format_field->size) {
 81         case 1:
 82                 field->integer = tp_field__u8;
 83                 break;
 84         case 2:
 85                 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
 86                 break;
 87         case 4:
 88                 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
 89                 break;
 90         case 8:
 91                 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
 92                 break;
 93         default:
 94                 return -1;
 95         }
 96 
 97         return 0;
 98 }
 99 
100 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
101 {
102         return sample->raw_data + field->offset;
103 }
104 
105 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
106 {
107         field->offset = format_field->offset;
108         field->pointer = tp_field__ptr;
109         return 0;
110 }
111 
112 struct syscall_tp {
113         struct tp_field id;
114         union {
115                 struct tp_field args, ret;
116         };
117 };
118 
119 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
120                                           struct tp_field *field,
121                                           const char *name)
122 {
123         struct format_field *format_field = perf_evsel__field(evsel, name);
124 
125         if (format_field == NULL)
126                 return -1;
127 
128         return tp_field__init_uint(field, format_field, evsel->needs_swap);
129 }
130 
131 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
132         ({ struct syscall_tp *sc = evsel->priv;\
133            perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
134 
135 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
136                                          struct tp_field *field,
137                                          const char *name)
138 {
139         struct format_field *format_field = perf_evsel__field(evsel, name);
140 
141         if (format_field == NULL)
142                 return -1;
143 
144         return tp_field__init_ptr(field, format_field);
145 }
146 
147 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
148         ({ struct syscall_tp *sc = evsel->priv;\
149            perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
150 
151 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
152 {
153         zfree(&evsel->priv);
154         perf_evsel__delete(evsel);
155 }
156 
157 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
158 {
159         evsel->priv = malloc(sizeof(struct syscall_tp));
160         if (evsel->priv != NULL) {
161                 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
162                         goto out_delete;
163 
164                 evsel->handler = handler;
165                 return 0;
166         }
167 
168         return -ENOMEM;
169 
170 out_delete:
171         zfree(&evsel->priv);
172         return -ENOENT;
173 }
174 
175 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
176 {
177         struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
178 
179         /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
180         if (evsel == NULL)
181                 evsel = perf_evsel__newtp("syscalls", direction);
182 
183         if (evsel) {
184                 if (perf_evsel__init_syscall_tp(evsel, handler))
185                         goto out_delete;
186         }
187 
188         return evsel;
189 
190 out_delete:
191         perf_evsel__delete_priv(evsel);
192         return NULL;
193 }
194 
195 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
196         ({ struct syscall_tp *fields = evsel->priv; \
197            fields->name.integer(&fields->name, sample); })
198 
199 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
200         ({ struct syscall_tp *fields = evsel->priv; \
201            fields->name.pointer(&fields->name, sample); })
202 
203 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
204                                           void *sys_enter_handler,
205                                           void *sys_exit_handler)
206 {
207         int ret = -1;
208         struct perf_evsel *sys_enter, *sys_exit;
209 
210         sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
211         if (sys_enter == NULL)
212                 goto out;
213 
214         if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
215                 goto out_delete_sys_enter;
216 
217         sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
218         if (sys_exit == NULL)
219                 goto out_delete_sys_enter;
220 
221         if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
222                 goto out_delete_sys_exit;
223 
224         perf_evlist__add(evlist, sys_enter);
225         perf_evlist__add(evlist, sys_exit);
226 
227         ret = 0;
228 out:
229         return ret;
230 
231 out_delete_sys_exit:
232         perf_evsel__delete_priv(sys_exit);
233 out_delete_sys_enter:
234         perf_evsel__delete_priv(sys_enter);
235         goto out;
236 }
237 
238 
239 struct syscall_arg {
240         unsigned long val;
241         struct thread *thread;
242         struct trace  *trace;
243         void          *parm;
244         u8            idx;
245         u8            mask;
246 };
247 
248 struct strarray {
249         int         offset;
250         int         nr_entries;
251         const char **entries;
252 };
253 
254 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
255         .nr_entries = ARRAY_SIZE(array), \
256         .entries = array, \
257 }
258 
259 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
260         .offset     = off, \
261         .nr_entries = ARRAY_SIZE(array), \
262         .entries = array, \
263 }
264 
265 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
266                                                 const char *intfmt,
267                                                 struct syscall_arg *arg)
268 {
269         struct strarray *sa = arg->parm;
270         int idx = arg->val - sa->offset;
271 
272         if (idx < 0 || idx >= sa->nr_entries)
273                 return scnprintf(bf, size, intfmt, arg->val);
274 
275         return scnprintf(bf, size, "%s", sa->entries[idx]);
276 }
277 
278 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
279                                               struct syscall_arg *arg)
280 {
281         return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
282 }
283 
284 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
285 
286 #if defined(__i386__) || defined(__x86_64__)
287 /*
288  * FIXME: Make this available to all arches as soon as the ioctl beautifier
289  *        gets rewritten to support all arches.
290  */
291 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
292                                                  struct syscall_arg *arg)
293 {
294         return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
295 }
296 
297 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
298 #endif /* defined(__i386__) || defined(__x86_64__) */
299 
300 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
301                                         struct syscall_arg *arg);
302 
303 #define SCA_FD syscall_arg__scnprintf_fd
304 
305 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
306                                            struct syscall_arg *arg)
307 {
308         int fd = arg->val;
309 
310         if (fd == AT_FDCWD)
311                 return scnprintf(bf, size, "CWD");
312 
313         return syscall_arg__scnprintf_fd(bf, size, arg);
314 }
315 
316 #define SCA_FDAT syscall_arg__scnprintf_fd_at
317 
318 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
319                                               struct syscall_arg *arg);
320 
321 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
322 
323 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
324                                          struct syscall_arg *arg)
325 {
326         return scnprintf(bf, size, "%#lx", arg->val);
327 }
328 
329 #define SCA_HEX syscall_arg__scnprintf_hex
330 
331 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
332                                                struct syscall_arg *arg)
333 {
334         int printed = 0, prot = arg->val;
335 
336         if (prot == PROT_NONE)
337                 return scnprintf(bf, size, "NONE");
338 #define P_MMAP_PROT(n) \
339         if (prot & PROT_##n) { \
340                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
341                 prot &= ~PROT_##n; \
342         }
343 
344         P_MMAP_PROT(EXEC);
345         P_MMAP_PROT(READ);
346         P_MMAP_PROT(WRITE);
347 #ifdef PROT_SEM
348         P_MMAP_PROT(SEM);
349 #endif
350         P_MMAP_PROT(GROWSDOWN);
351         P_MMAP_PROT(GROWSUP);
352 #undef P_MMAP_PROT
353 
354         if (prot)
355                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
356 
357         return printed;
358 }
359 
360 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
361 
362 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
363                                                 struct syscall_arg *arg)
364 {
365         int printed = 0, flags = arg->val;
366 
367 #define P_MMAP_FLAG(n) \
368         if (flags & MAP_##n) { \
369                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
370                 flags &= ~MAP_##n; \
371         }
372 
373         P_MMAP_FLAG(SHARED);
374         P_MMAP_FLAG(PRIVATE);
375 #ifdef MAP_32BIT
376         P_MMAP_FLAG(32BIT);
377 #endif
378         P_MMAP_FLAG(ANONYMOUS);
379         P_MMAP_FLAG(DENYWRITE);
380         P_MMAP_FLAG(EXECUTABLE);
381         P_MMAP_FLAG(FILE);
382         P_MMAP_FLAG(FIXED);
383         P_MMAP_FLAG(GROWSDOWN);
384 #ifdef MAP_HUGETLB
385         P_MMAP_FLAG(HUGETLB);
386 #endif
387         P_MMAP_FLAG(LOCKED);
388         P_MMAP_FLAG(NONBLOCK);
389         P_MMAP_FLAG(NORESERVE);
390         P_MMAP_FLAG(POPULATE);
391         P_MMAP_FLAG(STACK);
392 #ifdef MAP_UNINITIALIZED
393         P_MMAP_FLAG(UNINITIALIZED);
394 #endif
395 #undef P_MMAP_FLAG
396 
397         if (flags)
398                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
399 
400         return printed;
401 }
402 
403 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
404 
405 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
406                                                   struct syscall_arg *arg)
407 {
408         int printed = 0, flags = arg->val;
409 
410 #define P_MREMAP_FLAG(n) \
411         if (flags & MREMAP_##n) { \
412                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
413                 flags &= ~MREMAP_##n; \
414         }
415 
416         P_MREMAP_FLAG(MAYMOVE);
417 #ifdef MREMAP_FIXED
418         P_MREMAP_FLAG(FIXED);
419 #endif
420 #undef P_MREMAP_FLAG
421 
422         if (flags)
423                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
424 
425         return printed;
426 }
427 
428 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
429 
430 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
431                                                       struct syscall_arg *arg)
432 {
433         int behavior = arg->val;
434 
435         switch (behavior) {
436 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
437         P_MADV_BHV(NORMAL);
438         P_MADV_BHV(RANDOM);
439         P_MADV_BHV(SEQUENTIAL);
440         P_MADV_BHV(WILLNEED);
441         P_MADV_BHV(DONTNEED);
442         P_MADV_BHV(REMOVE);
443         P_MADV_BHV(DONTFORK);
444         P_MADV_BHV(DOFORK);
445         P_MADV_BHV(HWPOISON);
446 #ifdef MADV_SOFT_OFFLINE
447         P_MADV_BHV(SOFT_OFFLINE);
448 #endif
449         P_MADV_BHV(MERGEABLE);
450         P_MADV_BHV(UNMERGEABLE);
451 #ifdef MADV_HUGEPAGE
452         P_MADV_BHV(HUGEPAGE);
453 #endif
454 #ifdef MADV_NOHUGEPAGE
455         P_MADV_BHV(NOHUGEPAGE);
456 #endif
457 #ifdef MADV_DONTDUMP
458         P_MADV_BHV(DONTDUMP);
459 #endif
460 #ifdef MADV_DODUMP
461         P_MADV_BHV(DODUMP);
462 #endif
463 #undef P_MADV_PHV
464         default: break;
465         }
466 
467         return scnprintf(bf, size, "%#x", behavior);
468 }
469 
470 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
471 
472 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
473                                            struct syscall_arg *arg)
474 {
475         int printed = 0, op = arg->val;
476 
477         if (op == 0)
478                 return scnprintf(bf, size, "NONE");
479 #define P_CMD(cmd) \
480         if ((op & LOCK_##cmd) == LOCK_##cmd) { \
481                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
482                 op &= ~LOCK_##cmd; \
483         }
484 
485         P_CMD(SH);
486         P_CMD(EX);
487         P_CMD(NB);
488         P_CMD(UN);
489         P_CMD(MAND);
490         P_CMD(RW);
491         P_CMD(READ);
492         P_CMD(WRITE);
493 #undef P_OP
494 
495         if (op)
496                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
497 
498         return printed;
499 }
500 
501 #define SCA_FLOCK syscall_arg__scnprintf_flock
502 
503 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
504 {
505         enum syscall_futex_args {
506                 SCF_UADDR   = (1 << 0),
507                 SCF_OP      = (1 << 1),
508                 SCF_VAL     = (1 << 2),
509                 SCF_TIMEOUT = (1 << 3),
510                 SCF_UADDR2  = (1 << 4),
511                 SCF_VAL3    = (1 << 5),
512         };
513         int op = arg->val;
514         int cmd = op & FUTEX_CMD_MASK;
515         size_t printed = 0;
516 
517         switch (cmd) {
518 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
519         P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
520         P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
521         P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
522         P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
523         P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
524         P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
525         P_FUTEX_OP(WAKE_OP);                                                      break;
526         P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
527         P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
528         P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
529         P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
530         P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
531         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
532         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
533         }
534 
535         if (op & FUTEX_PRIVATE_FLAG)
536                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
537 
538         if (op & FUTEX_CLOCK_REALTIME)
539                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
540 
541         return printed;
542 }
543 
544 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
545 
546 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
547 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
548 
549 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
550 static DEFINE_STRARRAY(itimers);
551 
552 static const char *whences[] = { "SET", "CUR", "END",
553 #ifdef SEEK_DATA
554 "DATA",
555 #endif
556 #ifdef SEEK_HOLE
557 "HOLE",
558 #endif
559 };
560 static DEFINE_STRARRAY(whences);
561 
562 static const char *fcntl_cmds[] = {
563         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
564         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
565         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
566         "F_GETOWNER_UIDS",
567 };
568 static DEFINE_STRARRAY(fcntl_cmds);
569 
570 static const char *rlimit_resources[] = {
571         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
572         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
573         "RTTIME",
574 };
575 static DEFINE_STRARRAY(rlimit_resources);
576 
577 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
578 static DEFINE_STRARRAY(sighow);
579 
580 static const char *clockid[] = {
581         "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
582         "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
583 };
584 static DEFINE_STRARRAY(clockid);
585 
586 static const char *socket_families[] = {
587         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
588         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
589         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
590         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
591         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
592         "ALG", "NFC", "VSOCK",
593 };
594 static DEFINE_STRARRAY(socket_families);
595 
596 #ifndef SOCK_TYPE_MASK
597 #define SOCK_TYPE_MASK 0xf
598 #endif
599 
600 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
601                                                       struct syscall_arg *arg)
602 {
603         size_t printed;
604         int type = arg->val,
605             flags = type & ~SOCK_TYPE_MASK;
606 
607         type &= SOCK_TYPE_MASK;
608         /*
609          * Can't use a strarray, MIPS may override for ABI reasons.
610          */
611         switch (type) {
612 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
613         P_SK_TYPE(STREAM);
614         P_SK_TYPE(DGRAM);
615         P_SK_TYPE(RAW);
616         P_SK_TYPE(RDM);
617         P_SK_TYPE(SEQPACKET);
618         P_SK_TYPE(DCCP);
619         P_SK_TYPE(PACKET);
620 #undef P_SK_TYPE
621         default:
622                 printed = scnprintf(bf, size, "%#x", type);
623         }
624 
625 #define P_SK_FLAG(n) \
626         if (flags & SOCK_##n) { \
627                 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
628                 flags &= ~SOCK_##n; \
629         }
630 
631         P_SK_FLAG(CLOEXEC);
632         P_SK_FLAG(NONBLOCK);
633 #undef P_SK_FLAG
634 
635         if (flags)
636                 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
637 
638         return printed;
639 }
640 
641 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
642 
643 #ifndef MSG_PROBE
644 #define MSG_PROBE            0x10
645 #endif
646 #ifndef MSG_WAITFORONE
647 #define MSG_WAITFORONE  0x10000
648 #endif
649 #ifndef MSG_SENDPAGE_NOTLAST
650 #define MSG_SENDPAGE_NOTLAST 0x20000
651 #endif
652 #ifndef MSG_FASTOPEN
653 #define MSG_FASTOPEN         0x20000000
654 #endif
655 
656 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
657                                                struct syscall_arg *arg)
658 {
659         int printed = 0, flags = arg->val;
660 
661         if (flags == 0)
662                 return scnprintf(bf, size, "NONE");
663 #define P_MSG_FLAG(n) \
664         if (flags & MSG_##n) { \
665                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
666                 flags &= ~MSG_##n; \
667         }
668 
669         P_MSG_FLAG(OOB);
670         P_MSG_FLAG(PEEK);
671         P_MSG_FLAG(DONTROUTE);
672         P_MSG_FLAG(TRYHARD);
673         P_MSG_FLAG(CTRUNC);
674         P_MSG_FLAG(PROBE);
675         P_MSG_FLAG(TRUNC);
676         P_MSG_FLAG(DONTWAIT);
677         P_MSG_FLAG(EOR);
678         P_MSG_FLAG(WAITALL);
679         P_MSG_FLAG(FIN);
680         P_MSG_FLAG(SYN);
681         P_MSG_FLAG(CONFIRM);
682         P_MSG_FLAG(RST);
683         P_MSG_FLAG(ERRQUEUE);
684         P_MSG_FLAG(NOSIGNAL);
685         P_MSG_FLAG(MORE);
686         P_MSG_FLAG(WAITFORONE);
687         P_MSG_FLAG(SENDPAGE_NOTLAST);
688         P_MSG_FLAG(FASTOPEN);
689         P_MSG_FLAG(CMSG_CLOEXEC);
690 #undef P_MSG_FLAG
691 
692         if (flags)
693                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
694 
695         return printed;
696 }
697 
698 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
699 
700 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
701                                                  struct syscall_arg *arg)
702 {
703         size_t printed = 0;
704         int mode = arg->val;
705 
706         if (mode == F_OK) /* 0 */
707                 return scnprintf(bf, size, "F");
708 #define P_MODE(n) \
709         if (mode & n##_OK) { \
710                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
711                 mode &= ~n##_OK; \
712         }
713 
714         P_MODE(R);
715         P_MODE(W);
716         P_MODE(X);
717 #undef P_MODE
718 
719         if (mode)
720                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
721 
722         return printed;
723 }
724 
725 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
726 
727 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
728                                                struct syscall_arg *arg)
729 {
730         int printed = 0, flags = arg->val;
731 
732         if (!(flags & O_CREAT))
733                 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
734 
735         if (flags == 0)
736                 return scnprintf(bf, size, "RDONLY");
737 #define P_FLAG(n) \
738         if (flags & O_##n) { \
739                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
740                 flags &= ~O_##n; \
741         }
742 
743         P_FLAG(APPEND);
744         P_FLAG(ASYNC);
745         P_FLAG(CLOEXEC);
746         P_FLAG(CREAT);
747         P_FLAG(DIRECT);
748         P_FLAG(DIRECTORY);
749         P_FLAG(EXCL);
750         P_FLAG(LARGEFILE);
751         P_FLAG(NOATIME);
752         P_FLAG(NOCTTY);
753 #ifdef O_NONBLOCK
754         P_FLAG(NONBLOCK);
755 #elif O_NDELAY
756         P_FLAG(NDELAY);
757 #endif
758 #ifdef O_PATH
759         P_FLAG(PATH);
760 #endif
761         P_FLAG(RDWR);
762 #ifdef O_DSYNC
763         if ((flags & O_SYNC) == O_SYNC)
764                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
765         else {
766                 P_FLAG(DSYNC);
767         }
768 #else
769         P_FLAG(SYNC);
770 #endif
771         P_FLAG(TRUNC);
772         P_FLAG(WRONLY);
773 #undef P_FLAG
774 
775         if (flags)
776                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
777 
778         return printed;
779 }
780 
781 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
782 
783 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
784                                                    struct syscall_arg *arg)
785 {
786         int printed = 0, flags = arg->val;
787 
788         if (flags == 0)
789                 return scnprintf(bf, size, "NONE");
790 #define P_FLAG(n) \
791         if (flags & EFD_##n) { \
792                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
793                 flags &= ~EFD_##n; \
794         }
795 
796         P_FLAG(SEMAPHORE);
797         P_FLAG(CLOEXEC);
798         P_FLAG(NONBLOCK);
799 #undef P_FLAG
800 
801         if (flags)
802                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
803 
804         return printed;
805 }
806 
807 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
808 
809 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
810                                                 struct syscall_arg *arg)
811 {
812         int printed = 0, flags = arg->val;
813 
814 #define P_FLAG(n) \
815         if (flags & O_##n) { \
816                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
817                 flags &= ~O_##n; \
818         }
819 
820         P_FLAG(CLOEXEC);
821         P_FLAG(NONBLOCK);
822 #undef P_FLAG
823 
824         if (flags)
825                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
826 
827         return printed;
828 }
829 
830 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
831 
832 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
833 {
834         int sig = arg->val;
835 
836         switch (sig) {
837 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
838         P_SIGNUM(HUP);
839         P_SIGNUM(INT);
840         P_SIGNUM(QUIT);
841         P_SIGNUM(ILL);
842         P_SIGNUM(TRAP);
843         P_SIGNUM(ABRT);
844         P_SIGNUM(BUS);
845         P_SIGNUM(FPE);
846         P_SIGNUM(KILL);
847         P_SIGNUM(USR1);
848         P_SIGNUM(SEGV);
849         P_SIGNUM(USR2);
850         P_SIGNUM(PIPE);
851         P_SIGNUM(ALRM);
852         P_SIGNUM(TERM);
853         P_SIGNUM(CHLD);
854         P_SIGNUM(CONT);
855         P_SIGNUM(STOP);
856         P_SIGNUM(TSTP);
857         P_SIGNUM(TTIN);
858         P_SIGNUM(TTOU);
859         P_SIGNUM(URG);
860         P_SIGNUM(XCPU);
861         P_SIGNUM(XFSZ);
862         P_SIGNUM(VTALRM);
863         P_SIGNUM(PROF);
864         P_SIGNUM(WINCH);
865         P_SIGNUM(IO);
866         P_SIGNUM(PWR);
867         P_SIGNUM(SYS);
868 #ifdef SIGEMT
869         P_SIGNUM(EMT);
870 #endif
871 #ifdef SIGSTKFLT
872         P_SIGNUM(STKFLT);
873 #endif
874 #ifdef SIGSWI
875         P_SIGNUM(SWI);
876 #endif
877         default: break;
878         }
879 
880         return scnprintf(bf, size, "%#x", sig);
881 }
882 
883 #define SCA_SIGNUM syscall_arg__scnprintf_signum
884 
885 #if defined(__i386__) || defined(__x86_64__)
886 /*
887  * FIXME: Make this available to all arches.
888  */
889 #define TCGETS          0x5401
890 
891 static const char *tioctls[] = {
892         "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
893         "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
894         "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
895         "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
896         "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
897         "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
898         "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
899         "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
900         "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
901         "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
902         "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
903         [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
904         "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
905         "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
906         "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
907 };
908 
909 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
910 #endif /* defined(__i386__) || defined(__x86_64__) */
911 
912 #define STRARRAY(arg, name, array) \
913           .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
914           .arg_parm      = { [arg] = &strarray__##array, }
915 
916 static struct syscall_fmt {
917         const char *name;
918         const char *alias;
919         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
920         void       *arg_parm[6];
921         bool       errmsg;
922         bool       timeout;
923         bool       hexret;
924 } syscall_fmts[] = {
925         { .name     = "access",     .errmsg = true,
926           .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
927         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
928         { .name     = "brk",        .hexret = true,
929           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
930         { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
931         { .name     = "close",      .errmsg = true,
932           .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
933         { .name     = "connect",    .errmsg = true, },
934         { .name     = "dup",        .errmsg = true,
935           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
936         { .name     = "dup2",       .errmsg = true,
937           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
938         { .name     = "dup3",       .errmsg = true,
939           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
940         { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
941         { .name     = "eventfd2",   .errmsg = true,
942           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
943         { .name     = "faccessat",  .errmsg = true,
944           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
945         { .name     = "fadvise64",  .errmsg = true,
946           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
947         { .name     = "fallocate",  .errmsg = true,
948           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
949         { .name     = "fchdir",     .errmsg = true,
950           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
951         { .name     = "fchmod",     .errmsg = true,
952           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
953         { .name     = "fchmodat",   .errmsg = true,
954           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
955         { .name     = "fchown",     .errmsg = true,
956           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
957         { .name     = "fchownat",   .errmsg = true,
958           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
959         { .name     = "fcntl",      .errmsg = true,
960           .arg_scnprintf = { [0] = SCA_FD, /* fd */
961                              [1] = SCA_STRARRAY, /* cmd */ },
962           .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
963         { .name     = "fdatasync",  .errmsg = true,
964           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
965         { .name     = "flock",      .errmsg = true,
966           .arg_scnprintf = { [0] = SCA_FD, /* fd */
967                              [1] = SCA_FLOCK, /* cmd */ }, },
968         { .name     = "fsetxattr",  .errmsg = true,
969           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
970         { .name     = "fstat",      .errmsg = true, .alias = "newfstat",
971           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
972         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat",
973           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
974         { .name     = "fstatfs",    .errmsg = true,
975           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
976         { .name     = "fsync",    .errmsg = true,
977           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
978         { .name     = "ftruncate", .errmsg = true,
979           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
980         { .name     = "futex",      .errmsg = true,
981           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
982         { .name     = "futimesat", .errmsg = true,
983           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
984         { .name     = "getdents",   .errmsg = true,
985           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
986         { .name     = "getdents64", .errmsg = true,
987           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
988         { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
989         { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
990         { .name     = "ioctl",      .errmsg = true,
991           .arg_scnprintf = { [0] = SCA_FD, /* fd */
992 #if defined(__i386__) || defined(__x86_64__)
993 /*
994  * FIXME: Make this available to all arches.
995  */
996                              [1] = SCA_STRHEXARRAY, /* cmd */
997                              [2] = SCA_HEX, /* arg */ },
998           .arg_parm      = { [1] = &strarray__tioctls, /* cmd */ }, },
999 #else
1000                              [2] = SCA_HEX, /* arg */ }, },
1001 #endif
1002         { .name     = "kill",       .errmsg = true,
1003           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1004         { .name     = "linkat",     .errmsg = true,
1005           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1006         { .name     = "lseek",      .errmsg = true,
1007           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1008                              [2] = SCA_STRARRAY, /* whence */ },
1009           .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
1010         { .name     = "lstat",      .errmsg = true, .alias = "newlstat", },
1011         { .name     = "madvise",    .errmsg = true,
1012           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
1013                              [2] = SCA_MADV_BHV, /* behavior */ }, },
1014         { .name     = "mkdirat",    .errmsg = true,
1015           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1016         { .name     = "mknodat",    .errmsg = true,
1017           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1018         { .name     = "mlock",      .errmsg = true,
1019           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1020         { .name     = "mlockall",   .errmsg = true,
1021           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1022         { .name     = "mmap",       .hexret = true,
1023           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
1024                              [2] = SCA_MMAP_PROT, /* prot */
1025                              [3] = SCA_MMAP_FLAGS, /* flags */
1026                              [4] = SCA_FD,        /* fd */ }, },
1027         { .name     = "mprotect",   .errmsg = true,
1028           .arg_scnprintf = { [0] = SCA_HEX, /* start */
1029                              [2] = SCA_MMAP_PROT, /* prot */ }, },
1030         { .name     = "mremap",     .hexret = true,
1031           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1032                              [3] = SCA_MREMAP_FLAGS, /* flags */
1033                              [4] = SCA_HEX, /* new_addr */ }, },
1034         { .name     = "munlock",    .errmsg = true,
1035           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1036         { .name     = "munmap",     .errmsg = true,
1037           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1038         { .name     = "name_to_handle_at", .errmsg = true,
1039           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1040         { .name     = "newfstatat", .errmsg = true,
1041           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1042         { .name     = "open",       .errmsg = true,
1043           .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1044         { .name     = "open_by_handle_at", .errmsg = true,
1045           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1046                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1047         { .name     = "openat",     .errmsg = true,
1048           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1049                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1050         { .name     = "pipe2",      .errmsg = true,
1051           .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1052         { .name     = "poll",       .errmsg = true, .timeout = true, },
1053         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
1054         { .name     = "pread",      .errmsg = true, .alias = "pread64",
1055           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1056         { .name     = "preadv",     .errmsg = true, .alias = "pread",
1057           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1058         { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1059         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64",
1060           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1061         { .name     = "pwritev",    .errmsg = true,
1062           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1063         { .name     = "read",       .errmsg = true,
1064           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1065         { .name     = "readlinkat", .errmsg = true,
1066           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1067         { .name     = "readv",      .errmsg = true,
1068           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1069         { .name     = "recvfrom",   .errmsg = true,
1070           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1071         { .name     = "recvmmsg",   .errmsg = true,
1072           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1073         { .name     = "recvmsg",    .errmsg = true,
1074           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1075         { .name     = "renameat",   .errmsg = true,
1076           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1077         { .name     = "rt_sigaction", .errmsg = true,
1078           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1079         { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1080         { .name     = "rt_sigqueueinfo", .errmsg = true,
1081           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1082         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
1083           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1084         { .name     = "select",     .errmsg = true, .timeout = true, },
1085         { .name     = "sendmmsg",    .errmsg = true,
1086           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1087         { .name     = "sendmsg",    .errmsg = true,
1088           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1089         { .name     = "sendto",     .errmsg = true,
1090           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1091         { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1092         { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1093         { .name     = "shutdown",   .errmsg = true,
1094           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1095         { .name     = "socket",     .errmsg = true,
1096           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1097                              [1] = SCA_SK_TYPE, /* type */ },
1098           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1099         { .name     = "socketpair", .errmsg = true,
1100           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1101                              [1] = SCA_SK_TYPE, /* type */ },
1102           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1103         { .name     = "stat",       .errmsg = true, .alias = "newstat", },
1104         { .name     = "symlinkat",  .errmsg = true,
1105           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1106         { .name     = "tgkill",     .errmsg = true,
1107           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1108         { .name     = "tkill",      .errmsg = true,
1109           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1110         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
1111         { .name     = "unlinkat",   .errmsg = true,
1112           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1113         { .name     = "utimensat",  .errmsg = true,
1114           .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1115         { .name     = "write",      .errmsg = true,
1116           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1117         { .name     = "writev",     .errmsg = true,
1118           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1119 };
1120 
1121 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1122 {
1123         const struct syscall_fmt *fmt = fmtp;
1124         return strcmp(name, fmt->name);
1125 }
1126 
1127 static struct syscall_fmt *syscall_fmt__find(const char *name)
1128 {
1129         const int nmemb = ARRAY_SIZE(syscall_fmts);
1130         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1131 }
1132 
1133 struct syscall {
1134         struct event_format *tp_format;
1135         const char          *name;
1136         bool                filtered;
1137         bool                is_exit;
1138         struct syscall_fmt  *fmt;
1139         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1140         void                **arg_parm;
1141 };
1142 
1143 static size_t fprintf_duration(unsigned long t, FILE *fp)
1144 {
1145         double duration = (double)t / NSEC_PER_MSEC;
1146         size_t printed = fprintf(fp, "(");
1147 
1148         if (duration >= 1.0)
1149                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1150         else if (duration >= 0.01)
1151                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1152         else
1153                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1154         return printed + fprintf(fp, "): ");
1155 }
1156 
1157 struct thread_trace {
1158         u64               entry_time;
1159         u64               exit_time;
1160         bool              entry_pending;
1161         unsigned long     nr_events;
1162         unsigned long     pfmaj, pfmin;
1163         char              *entry_str;
1164         double            runtime_ms;
1165         struct {
1166                 int       max;
1167                 char      **table;
1168         } paths;
1169 
1170         struct intlist *syscall_stats;
1171 };
1172 
1173 static struct thread_trace *thread_trace__new(void)
1174 {
1175         struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1176 
1177         if (ttrace)
1178                 ttrace->paths.max = -1;
1179 
1180         ttrace->syscall_stats = intlist__new(NULL);
1181 
1182         return ttrace;
1183 }
1184 
1185 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1186 {
1187         struct thread_trace *ttrace;
1188 
1189         if (thread == NULL)
1190                 goto fail;
1191 
1192         if (thread__priv(thread) == NULL)
1193                 thread__set_priv(thread, thread_trace__new());
1194 
1195         if (thread__priv(thread) == NULL)
1196                 goto fail;
1197 
1198         ttrace = thread__priv(thread);
1199         ++ttrace->nr_events;
1200 
1201         return ttrace;
1202 fail:
1203         color_fprintf(fp, PERF_COLOR_RED,
1204                       "WARNING: not enough memory, dropping samples!\n");
1205         return NULL;
1206 }
1207 
1208 #define TRACE_PFMAJ             (1 << 0)
1209 #define TRACE_PFMIN             (1 << 1)
1210 
1211 struct trace {
1212         struct perf_tool        tool;
1213         struct {
1214                 int             machine;
1215                 int             open_id;
1216         }                       audit;
1217         struct {
1218                 int             max;
1219                 struct syscall  *table;
1220         } syscalls;
1221         struct record_opts      opts;
1222         struct machine          *host;
1223         u64                     base_time;
1224         FILE                    *output;
1225         unsigned long           nr_events;
1226         struct strlist          *ev_qualifier;
1227         const char              *last_vfs_getname;
1228         struct intlist          *tid_list;
1229         struct intlist          *pid_list;
1230         double                  duration_filter;
1231         double                  runtime_ms;
1232         struct {
1233                 u64             vfs_getname,
1234                                 proc_getname;
1235         } stats;
1236         bool                    not_ev_qualifier;
1237         bool                    live;
1238         bool                    full_time;
1239         bool                    sched;
1240         bool                    multiple_threads;
1241         bool                    summary;
1242         bool                    summary_only;
1243         bool                    show_comm;
1244         bool                    show_tool_stats;
1245         bool                    trace_syscalls;
1246         int                     trace_pgfaults;
1247 };
1248 
1249 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1250 {
1251         struct thread_trace *ttrace = thread__priv(thread);
1252 
1253         if (fd > ttrace->paths.max) {
1254                 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1255 
1256                 if (npath == NULL)
1257                         return -1;
1258 
1259                 if (ttrace->paths.max != -1) {
1260                         memset(npath + ttrace->paths.max + 1, 0,
1261                                (fd - ttrace->paths.max) * sizeof(char *));
1262                 } else {
1263                         memset(npath, 0, (fd + 1) * sizeof(char *));
1264                 }
1265 
1266                 ttrace->paths.table = npath;
1267                 ttrace->paths.max   = fd;
1268         }
1269 
1270         ttrace->paths.table[fd] = strdup(pathname);
1271 
1272         return ttrace->paths.table[fd] != NULL ? 0 : -1;
1273 }
1274 
1275 static int thread__read_fd_path(struct thread *thread, int fd)
1276 {
1277         char linkname[PATH_MAX], pathname[PATH_MAX];
1278         struct stat st;
1279         int ret;
1280 
1281         if (thread->pid_ == thread->tid) {
1282                 scnprintf(linkname, sizeof(linkname),
1283                           "/proc/%d/fd/%d", thread->pid_, fd);
1284         } else {
1285                 scnprintf(linkname, sizeof(linkname),
1286                           "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1287         }
1288 
1289         if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1290                 return -1;
1291 
1292         ret = readlink(linkname, pathname, sizeof(pathname));
1293 
1294         if (ret < 0 || ret > st.st_size)
1295                 return -1;
1296 
1297         pathname[ret] = '\0';
1298         return trace__set_fd_pathname(thread, fd, pathname);
1299 }
1300 
1301 static const char *thread__fd_path(struct thread *thread, int fd,
1302                                    struct trace *trace)
1303 {
1304         struct thread_trace *ttrace = thread__priv(thread);
1305 
1306         if (ttrace == NULL)
1307                 return NULL;
1308 
1309         if (fd < 0)
1310                 return NULL;
1311 
1312         if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1313                 if (!trace->live)
1314                         return NULL;
1315                 ++trace->stats.proc_getname;
1316                 if (thread__read_fd_path(thread, fd))
1317                         return NULL;
1318         }
1319 
1320         return ttrace->paths.table[fd];
1321 }
1322 
1323 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1324                                         struct syscall_arg *arg)
1325 {
1326         int fd = arg->val;
1327         size_t printed = scnprintf(bf, size, "%d", fd);
1328         const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1329 
1330         if (path)
1331                 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1332 
1333         return printed;
1334 }
1335 
1336 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1337                                               struct syscall_arg *arg)
1338 {
1339         int fd = arg->val;
1340         size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1341         struct thread_trace *ttrace = thread__priv(arg->thread);
1342 
1343         if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1344                 zfree(&ttrace->paths.table[fd]);
1345 
1346         return printed;
1347 }
1348 
1349 static bool trace__filter_duration(struct trace *trace, double t)
1350 {
1351         return t < (trace->duration_filter * NSEC_PER_MSEC);
1352 }
1353 
1354 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1355 {
1356         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1357 
1358         return fprintf(fp, "%10.3f ", ts);
1359 }
1360 
1361 static bool done = false;
1362 static bool interrupted = false;
1363 
1364 static void sig_handler(int sig)
1365 {
1366         done = true;
1367         interrupted = sig == SIGINT;
1368 }
1369 
1370 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1371                                         u64 duration, u64 tstamp, FILE *fp)
1372 {
1373         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1374         printed += fprintf_duration(duration, fp);
1375 
1376         if (trace->multiple_threads) {
1377                 if (trace->show_comm)
1378                         printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1379                 printed += fprintf(fp, "%d ", thread->tid);
1380         }
1381 
1382         return printed;
1383 }
1384 
1385 static int trace__process_event(struct trace *trace, struct machine *machine,
1386                                 union perf_event *event, struct perf_sample *sample)
1387 {
1388         int ret = 0;
1389 
1390         switch (event->header.type) {
1391         case PERF_RECORD_LOST:
1392                 color_fprintf(trace->output, PERF_COLOR_RED,
1393                               "LOST %" PRIu64 " events!\n", event->lost.lost);
1394                 ret = machine__process_lost_event(machine, event, sample);
1395         default:
1396                 ret = machine__process_event(machine, event, sample);
1397                 break;
1398         }
1399 
1400         return ret;
1401 }
1402 
1403 static int trace__tool_process(struct perf_tool *tool,
1404                                union perf_event *event,
1405                                struct perf_sample *sample,
1406                                struct machine *machine)
1407 {
1408         struct trace *trace = container_of(tool, struct trace, tool);
1409         return trace__process_event(trace, machine, event, sample);
1410 }
1411 
1412 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1413 {
1414         int err = symbol__init(NULL);
1415 
1416         if (err)
1417                 return err;
1418 
1419         trace->host = machine__new_host();
1420         if (trace->host == NULL)
1421                 return -ENOMEM;
1422 
1423         err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1424                                             evlist->threads, trace__tool_process, false);
1425         if (err)
1426                 symbol__exit();
1427 
1428         return err;
1429 }
1430 
1431 static int syscall__set_arg_fmts(struct syscall *sc)
1432 {
1433         struct format_field *field;
1434         int idx = 0;
1435 
1436         sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1437         if (sc->arg_scnprintf == NULL)
1438                 return -1;
1439 
1440         if (sc->fmt)
1441                 sc->arg_parm = sc->fmt->arg_parm;
1442 
1443         for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1444                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1445                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1446                 else if (field->flags & FIELD_IS_POINTER)
1447                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1448                 ++idx;
1449         }
1450 
1451         return 0;
1452 }
1453 
1454 static int trace__read_syscall_info(struct trace *trace, int id)
1455 {
1456         char tp_name[128];
1457         struct syscall *sc;
1458         const char *name = audit_syscall_to_name(id, trace->audit.machine);
1459 
1460         if (name == NULL)
1461                 return -1;
1462 
1463         if (id > trace->syscalls.max) {
1464                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1465 
1466                 if (nsyscalls == NULL)
1467                         return -1;
1468 
1469                 if (trace->syscalls.max != -1) {
1470                         memset(nsyscalls + trace->syscalls.max + 1, 0,
1471                                (id - trace->syscalls.max) * sizeof(*sc));
1472                 } else {
1473                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1474                 }
1475 
1476                 trace->syscalls.table = nsyscalls;
1477                 trace->syscalls.max   = id;
1478         }
1479 
1480         sc = trace->syscalls.table + id;
1481         sc->name = name;
1482 
1483         if (trace->ev_qualifier) {
1484                 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1485 
1486                 if (!(in ^ trace->not_ev_qualifier)) {
1487                         sc->filtered = true;
1488                         /*
1489                          * No need to do read tracepoint information since this will be
1490                          * filtered out.
1491                          */
1492                         return 0;
1493                 }
1494         }
1495 
1496         sc->fmt  = syscall_fmt__find(sc->name);
1497 
1498         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1499         sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1500 
1501         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1502                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1503                 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1504         }
1505 
1506         if (sc->tp_format == NULL)
1507                 return -1;
1508 
1509         sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1510 
1511         return syscall__set_arg_fmts(sc);
1512 }
1513 
1514 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1515                                       unsigned long *args, struct trace *trace,
1516                                       struct thread *thread)
1517 {
1518         size_t printed = 0;
1519 
1520         if (sc->tp_format != NULL) {
1521                 struct format_field *field;
1522                 u8 bit = 1;
1523                 struct syscall_arg arg = {
1524                         .idx    = 0,
1525                         .mask   = 0,
1526                         .trace  = trace,
1527                         .thread = thread,
1528                 };
1529 
1530                 for (field = sc->tp_format->format.fields->next; field;
1531                      field = field->next, ++arg.idx, bit <<= 1) {
1532                         if (arg.mask & bit)
1533                                 continue;
1534                         /*
1535                          * Suppress this argument if its value is zero and
1536                          * and we don't have a string associated in an
1537                          * strarray for it.
1538                          */
1539                         if (args[arg.idx] == 0 &&
1540                             !(sc->arg_scnprintf &&
1541                               sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1542                               sc->arg_parm[arg.idx]))
1543                                 continue;
1544 
1545                         printed += scnprintf(bf + printed, size - printed,
1546                                              "%s%s: ", printed ? ", " : "", field->name);
1547                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1548                                 arg.val = args[arg.idx];
1549                                 if (sc->arg_parm)
1550                                         arg.parm = sc->arg_parm[arg.idx];
1551                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1552                                                                       size - printed, &arg);
1553                         } else {
1554                                 printed += scnprintf(bf + printed, size - printed,
1555                                                      "%ld", args[arg.idx]);
1556                         }
1557                 }
1558         } else {
1559                 int i = 0;
1560 
1561                 while (i < 6) {
1562                         printed += scnprintf(bf + printed, size - printed,
1563                                              "%sarg%d: %ld",
1564                                              printed ? ", " : "", i, args[i]);
1565                         ++i;
1566                 }
1567         }
1568 
1569         return printed;
1570 }
1571 
1572 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1573                                   union perf_event *event,
1574                                   struct perf_sample *sample);
1575 
1576 static struct syscall *trace__syscall_info(struct trace *trace,
1577                                            struct perf_evsel *evsel, int id)
1578 {
1579 
1580         if (id < 0) {
1581 
1582                 /*
1583                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1584                  * before that, leaving at a higher verbosity level till that is
1585                  * explained. Reproduced with plain ftrace with:
1586                  *
1587                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1588                  * grep "NR -1 " /t/trace_pipe
1589                  *
1590                  * After generating some load on the machine.
1591                  */
1592                 if (verbose > 1) {
1593                         static u64 n;
1594                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1595                                 id, perf_evsel__name(evsel), ++n);
1596                 }
1597                 return NULL;
1598         }
1599 
1600         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1601             trace__read_syscall_info(trace, id))
1602                 goto out_cant_read;
1603 
1604         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1605                 goto out_cant_read;
1606 
1607         return &trace->syscalls.table[id];
1608 
1609 out_cant_read:
1610         if (verbose) {
1611                 fprintf(trace->output, "Problems reading syscall %d", id);
1612                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1613                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1614                 fputs(" information\n", trace->output);
1615         }
1616         return NULL;
1617 }
1618 
1619 static void thread__update_stats(struct thread_trace *ttrace,
1620                                  int id, struct perf_sample *sample)
1621 {
1622         struct int_node *inode;
1623         struct stats *stats;
1624         u64 duration = 0;
1625 
1626         inode = intlist__findnew(ttrace->syscall_stats, id);
1627         if (inode == NULL)
1628                 return;
1629 
1630         stats = inode->priv;
1631         if (stats == NULL) {
1632                 stats = malloc(sizeof(struct stats));
1633                 if (stats == NULL)
1634                         return;
1635                 init_stats(stats);
1636                 inode->priv = stats;
1637         }
1638 
1639         if (ttrace->entry_time && sample->time > ttrace->entry_time)
1640                 duration = sample->time - ttrace->entry_time;
1641 
1642         update_stats(stats, duration);
1643 }
1644 
1645 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1646                             union perf_event *event __maybe_unused,
1647                             struct perf_sample *sample)
1648 {
1649         char *msg;
1650         void *args;
1651         size_t printed = 0;
1652         struct thread *thread;
1653         int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1654         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1655         struct thread_trace *ttrace;
1656 
1657         if (sc == NULL)
1658                 return -1;
1659 
1660         if (sc->filtered)
1661                 return 0;
1662 
1663         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1664         ttrace = thread__trace(thread, trace->output);
1665         if (ttrace == NULL)
1666                 return -1;
1667 
1668         args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1669 
1670         if (ttrace->entry_str == NULL) {
1671                 ttrace->entry_str = malloc(1024);
1672                 if (!ttrace->entry_str)
1673                         return -1;
1674         }
1675 
1676         ttrace->entry_time = sample->time;
1677         msg = ttrace->entry_str;
1678         printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1679 
1680         printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1681                                            args, trace, thread);
1682 
1683         if (sc->is_exit) {
1684                 if (!trace->duration_filter && !trace->summary_only) {
1685                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1686                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1687                 }
1688         } else
1689                 ttrace->entry_pending = true;
1690 
1691         return 0;
1692 }
1693 
1694 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1695                            union perf_event *event __maybe_unused,
1696                            struct perf_sample *sample)
1697 {
1698         long ret;
1699         u64 duration = 0;
1700         struct thread *thread;
1701         int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1702         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1703         struct thread_trace *ttrace;
1704 
1705         if (sc == NULL)
1706                 return -1;
1707 
1708         if (sc->filtered)
1709                 return 0;
1710 
1711         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1712         ttrace = thread__trace(thread, trace->output);
1713         if (ttrace == NULL)
1714                 return -1;
1715 
1716         if (trace->summary)
1717                 thread__update_stats(ttrace, id, sample);
1718 
1719         ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1720 
1721         if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1722                 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1723                 trace->last_vfs_getname = NULL;
1724                 ++trace->stats.vfs_getname;
1725         }
1726 
1727         ttrace->exit_time = sample->time;
1728 
1729         if (ttrace->entry_time) {
1730                 duration = sample->time - ttrace->entry_time;
1731                 if (trace__filter_duration(trace, duration))
1732                         goto out;
1733         } else if (trace->duration_filter)
1734                 goto out;
1735 
1736         if (trace->summary_only)
1737                 goto out;
1738 
1739         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1740 
1741         if (ttrace->entry_pending) {
1742                 fprintf(trace->output, "%-70s", ttrace->entry_str);
1743         } else {
1744                 fprintf(trace->output, " ... [");
1745                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1746                 fprintf(trace->output, "]: %s()", sc->name);
1747         }
1748 
1749         if (sc->fmt == NULL) {
1750 signed_print:
1751                 fprintf(trace->output, ") = %ld", ret);
1752         } else if (ret < 0 && sc->fmt->errmsg) {
1753                 char bf[STRERR_BUFSIZE];
1754                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1755                            *e = audit_errno_to_name(-ret);
1756 
1757                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1758         } else if (ret == 0 && sc->fmt->timeout)
1759                 fprintf(trace->output, ") = 0 Timeout");
1760         else if (sc->fmt->hexret)
1761                 fprintf(trace->output, ") = %#lx", ret);
1762         else
1763                 goto signed_print;
1764 
1765         fputc('\n', trace->output);
1766 out:
1767         ttrace->entry_pending = false;
1768 
1769         return 0;
1770 }
1771 
1772 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1773                               union perf_event *event __maybe_unused,
1774                               struct perf_sample *sample)
1775 {
1776         trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1777         return 0;
1778 }
1779 
1780 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1781                                      union perf_event *event __maybe_unused,
1782                                      struct perf_sample *sample)
1783 {
1784         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1785         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1786         struct thread *thread = machine__findnew_thread(trace->host,
1787                                                         sample->pid,
1788                                                         sample->tid);
1789         struct thread_trace *ttrace = thread__trace(thread, trace->output);
1790 
1791         if (ttrace == NULL)
1792                 goto out_dump;
1793 
1794         ttrace->runtime_ms += runtime_ms;
1795         trace->runtime_ms += runtime_ms;
1796         return 0;
1797 
1798 out_dump:
1799         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1800                evsel->name,
1801                perf_evsel__strval(evsel, sample, "comm"),
1802                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1803                runtime,
1804                perf_evsel__intval(evsel, sample, "vruntime"));
1805         return 0;
1806 }
1807 
1808 static void print_location(FILE *f, struct perf_sample *sample,
1809                            struct addr_location *al,
1810                            bool print_dso, bool print_sym)
1811 {
1812 
1813         if ((verbose || print_dso) && al->map)
1814                 fprintf(f, "%s@", al->map->dso->long_name);
1815 
1816         if ((verbose || print_sym) && al->sym)
1817                 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
1818                         al->addr - al->sym->start);
1819         else if (al->map)
1820                 fprintf(f, "0x%" PRIx64, al->addr);
1821         else
1822                 fprintf(f, "0x%" PRIx64, sample->addr);
1823 }
1824 
1825 static int trace__pgfault(struct trace *trace,
1826                           struct perf_evsel *evsel,
1827                           union perf_event *event,
1828                           struct perf_sample *sample)
1829 {
1830         struct thread *thread;
1831         u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
1832         struct addr_location al;
1833         char map_type = 'd';
1834         struct thread_trace *ttrace;
1835 
1836         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1837         ttrace = thread__trace(thread, trace->output);
1838         if (ttrace == NULL)
1839                 return -1;
1840 
1841         if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1842                 ttrace->pfmaj++;
1843         else
1844                 ttrace->pfmin++;
1845 
1846         if (trace->summary_only)
1847                 return 0;
1848 
1849         thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
1850                               sample->ip, &al);
1851 
1852         trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
1853 
1854         fprintf(trace->output, "%sfault [",
1855                 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1856                 "maj" : "min");
1857 
1858         print_location(trace->output, sample, &al, false, true);
1859 
1860         fprintf(trace->output, "] => ");
1861 
1862         thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
1863                                    sample->addr, &al);
1864 
1865         if (!al.map) {
1866                 thread__find_addr_location(thread, cpumode,
1867                                            MAP__FUNCTION, sample->addr, &al);
1868 
1869                 if (al.map)
1870                         map_type = 'x';
1871                 else
1872                         map_type = '?';
1873         }
1874 
1875         print_location(trace->output, sample, &al, true, false);
1876 
1877         fprintf(trace->output, " (%c%c)\n", map_type, al.level);
1878 
1879         return 0;
1880 }
1881 
1882 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1883 {
1884         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1885             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1886                 return false;
1887 
1888         if (trace->pid_list || trace->tid_list)
1889                 return true;
1890 
1891         return false;
1892 }
1893 
1894 static int trace__process_sample(struct perf_tool *tool,
1895                                  union perf_event *event,
1896                                  struct perf_sample *sample,
1897                                  struct perf_evsel *evsel,
1898                                  struct machine *machine __maybe_unused)
1899 {
1900         struct trace *trace = container_of(tool, struct trace, tool);
1901         int err = 0;
1902 
1903         tracepoint_handler handler = evsel->handler;
1904 
1905         if (skip_sample(trace, sample))
1906                 return 0;
1907 
1908         if (!trace->full_time && trace->base_time == 0)
1909                 trace->base_time = sample->time;
1910 
1911         if (handler) {
1912                 ++trace->nr_events;
1913                 handler(trace, evsel, event, sample);
1914         }
1915 
1916         return err;
1917 }
1918 
1919 static int parse_target_str(struct trace *trace)
1920 {
1921         if (trace->opts.target.pid) {
1922                 trace->pid_list = intlist__new(trace->opts.target.pid);
1923                 if (trace->pid_list == NULL) {
1924                         pr_err("Error parsing process id string\n");
1925                         return -EINVAL;
1926                 }
1927         }
1928 
1929         if (trace->opts.target.tid) {
1930                 trace->tid_list = intlist__new(trace->opts.target.tid);
1931                 if (trace->tid_list == NULL) {
1932                         pr_err("Error parsing thread id string\n");
1933                         return -EINVAL;
1934                 }
1935         }
1936 
1937         return 0;
1938 }
1939 
1940 static int trace__record(struct trace *trace, int argc, const char **argv)
1941 {
1942         unsigned int rec_argc, i, j;
1943         const char **rec_argv;
1944         const char * const record_args[] = {
1945                 "record",
1946                 "-R",
1947                 "-m", "1024",
1948                 "-c", "1",
1949         };
1950 
1951         const char * const sc_args[] = { "-e", };
1952         unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
1953         const char * const majpf_args[] = { "-e", "major-faults" };
1954         unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
1955         const char * const minpf_args[] = { "-e", "minor-faults" };
1956         unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
1957 
1958         /* +1 is for the event string below */
1959         rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
1960                 majpf_args_nr + minpf_args_nr + argc;
1961         rec_argv = calloc(rec_argc + 1, sizeof(char *));
1962 
1963         if (rec_argv == NULL)
1964                 return -ENOMEM;
1965 
1966         j = 0;
1967         for (i = 0; i < ARRAY_SIZE(record_args); i++)
1968                 rec_argv[j++] = record_args[i];
1969 
1970         if (trace->trace_syscalls) {
1971                 for (i = 0; i < sc_args_nr; i++)
1972                         rec_argv[j++] = sc_args[i];
1973 
1974                 /* event string may be different for older kernels - e.g., RHEL6 */
1975                 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
1976                         rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
1977                 else if (is_valid_tracepoint("syscalls:sys_enter"))
1978                         rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
1979                 else {
1980                         pr_err("Neither raw_syscalls nor syscalls events exist.\n");
1981                         return -1;
1982                 }
1983         }
1984 
1985         if (trace->trace_pgfaults & TRACE_PFMAJ)
1986                 for (i = 0; i < majpf_args_nr; i++)
1987                         rec_argv[j++] = majpf_args[i];
1988 
1989         if (trace->trace_pgfaults & TRACE_PFMIN)
1990                 for (i = 0; i < minpf_args_nr; i++)
1991                         rec_argv[j++] = minpf_args[i];
1992 
1993         for (i = 0; i < (unsigned int)argc; i++)
1994                 rec_argv[j++] = argv[i];
1995 
1996         return cmd_record(j, rec_argv, NULL);
1997 }
1998 
1999 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2000 
2001 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2002 {
2003         struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2004         if (evsel == NULL)
2005                 return;
2006 
2007         if (perf_evsel__field(evsel, "pathname") == NULL) {
2008                 perf_evsel__delete(evsel);
2009                 return;
2010         }
2011 
2012         evsel->handler = trace__vfs_getname;
2013         perf_evlist__add(evlist, evsel);
2014 }
2015 
2016 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2017                                     u64 config)
2018 {
2019         struct perf_evsel *evsel;
2020         struct perf_event_attr attr = {
2021                 .type = PERF_TYPE_SOFTWARE,
2022                 .mmap_data = 1,
2023         };
2024 
2025         attr.config = config;
2026         attr.sample_period = 1;
2027 
2028         event_attr_init(&attr);
2029 
2030         evsel = perf_evsel__new(&attr);
2031         if (!evsel)
2032                 return -ENOMEM;
2033 
2034         evsel->handler = trace__pgfault;
2035         perf_evlist__add(evlist, evsel);
2036 
2037         return 0;
2038 }
2039 
2040 static int trace__run(struct trace *trace, int argc, const char **argv)
2041 {
2042         struct perf_evlist *evlist = perf_evlist__new();
2043         struct perf_evsel *evsel;
2044         int err = -1, i;
2045         unsigned long before;
2046         const bool forks = argc > 0;
2047         bool draining = false;
2048 
2049         trace->live = true;
2050 
2051         if (evlist == NULL) {
2052                 fprintf(trace->output, "Not enough memory to run!\n");
2053                 goto out;
2054         }
2055 
2056         if (trace->trace_syscalls &&
2057             perf_evlist__add_syscall_newtp(evlist, trace__sys_enter,
2058                                            trace__sys_exit))
2059                 goto out_error_raw_syscalls;
2060 
2061         if (trace->trace_syscalls)
2062                 perf_evlist__add_vfs_getname(evlist);
2063 
2064         if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2065             perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2066                 goto out_error_mem;
2067         }
2068 
2069         if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2070             perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2071                 goto out_error_mem;
2072 
2073         if (trace->sched &&
2074             perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2075                                    trace__sched_stat_runtime))
2076                 goto out_error_sched_stat_runtime;
2077 
2078         err = perf_evlist__create_maps(evlist, &trace->opts.target);
2079         if (err < 0) {
2080                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2081                 goto out_delete_evlist;
2082         }
2083 
2084         err = trace__symbols_init(trace, evlist);
2085         if (err < 0) {
2086                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2087                 goto out_delete_evlist;
2088         }
2089 
2090         perf_evlist__config(evlist, &trace->opts);
2091 
2092         signal(SIGCHLD, sig_handler);
2093         signal(SIGINT, sig_handler);
2094 
2095         if (forks) {
2096                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2097                                                     argv, false, NULL);
2098                 if (err < 0) {
2099                         fprintf(trace->output, "Couldn't run the workload!\n");
2100                         goto out_delete_evlist;
2101                 }
2102         }
2103 
2104         err = perf_evlist__open(evlist);
2105         if (err < 0)
2106                 goto out_error_open;
2107 
2108         err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2109         if (err < 0)
2110                 goto out_error_mmap;
2111 
2112         perf_evlist__enable(evlist);
2113 
2114         if (forks)
2115                 perf_evlist__start_workload(evlist);
2116 
2117         trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
2118 again:
2119         before = trace->nr_events;
2120 
2121         for (i = 0; i < evlist->nr_mmaps; i++) {
2122                 union perf_event *event;
2123 
2124                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2125                         const u32 type = event->header.type;
2126                         tracepoint_handler handler;
2127                         struct perf_sample sample;
2128 
2129                         ++trace->nr_events;
2130 
2131                         err = perf_evlist__parse_sample(evlist, event, &sample);
2132                         if (err) {
2133                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2134                                 goto next_event;
2135                         }
2136 
2137                         if (!trace->full_time && trace->base_time == 0)
2138                                 trace->base_time = sample.time;
2139 
2140                         if (type != PERF_RECORD_SAMPLE) {
2141                                 trace__process_event(trace, trace->host, event, &sample);
2142                                 continue;
2143                         }
2144 
2145                         evsel = perf_evlist__id2evsel(evlist, sample.id);
2146                         if (evsel == NULL) {
2147                                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
2148                                 goto next_event;
2149                         }
2150 
2151                         if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2152                             sample.raw_data == NULL) {
2153                                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2154                                        perf_evsel__name(evsel), sample.tid,
2155                                        sample.cpu, sample.raw_size);
2156                                 goto next_event;
2157                         }
2158 
2159                         handler = evsel->handler;
2160                         handler(trace, evsel, event, &sample);
2161 next_event:
2162                         perf_evlist__mmap_consume(evlist, i);
2163 
2164                         if (interrupted)
2165                                 goto out_disable;
2166                 }
2167         }
2168 
2169         if (trace->nr_events == before) {
2170                 int timeout = done ? 100 : -1;
2171 
2172                 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2173                         if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2174                                 draining = true;
2175 
2176                         goto again;
2177                 }
2178         } else {
2179                 goto again;
2180         }
2181 
2182 out_disable:
2183         perf_evlist__disable(evlist);
2184 
2185         if (!err) {
2186                 if (trace->summary)
2187                         trace__fprintf_thread_summary(trace, trace->output);
2188 
2189                 if (trace->show_tool_stats) {
2190                         fprintf(trace->output, "Stats:\n "
2191                                                " vfs_getname : %" PRIu64 "\n"
2192                                                " proc_getname: %" PRIu64 "\n",
2193                                 trace->stats.vfs_getname,
2194                                 trace->stats.proc_getname);
2195                 }
2196         }
2197 
2198 out_delete_evlist:
2199         perf_evlist__delete(evlist);
2200 out:
2201         trace->live = false;
2202         return err;
2203 {
2204         char errbuf[BUFSIZ];
2205 
2206 out_error_sched_stat_runtime:
2207         debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2208         goto out_error;
2209 
2210 out_error_raw_syscalls:
2211         debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2212         goto out_error;
2213 
2214 out_error_mmap:
2215         perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2216         goto out_error;
2217 
2218 out_error_open:
2219         perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2220 
2221 out_error:
2222         fprintf(trace->output, "%s\n", errbuf);
2223         goto out_delete_evlist;
2224 }
2225 out_error_mem:
2226         fprintf(trace->output, "Not enough memory to run!\n");
2227         goto out_delete_evlist;
2228 }
2229 
2230 static int trace__replay(struct trace *trace)
2231 {
2232         const struct perf_evsel_str_handler handlers[] = {
2233                 { "probe:vfs_getname",       trace__vfs_getname, },
2234         };
2235         struct perf_data_file file = {
2236                 .path  = input_name,
2237                 .mode  = PERF_DATA_MODE_READ,
2238         };
2239         struct perf_session *session;
2240         struct perf_evsel *evsel;
2241         int err = -1;
2242 
2243         trace->tool.sample        = trace__process_sample;
2244         trace->tool.mmap          = perf_event__process_mmap;
2245         trace->tool.mmap2         = perf_event__process_mmap2;
2246         trace->tool.comm          = perf_event__process_comm;
2247         trace->tool.exit          = perf_event__process_exit;
2248         trace->tool.fork          = perf_event__process_fork;
2249         trace->tool.attr          = perf_event__process_attr;
2250         trace->tool.tracing_data = perf_event__process_tracing_data;
2251         trace->tool.build_id      = perf_event__process_build_id;
2252 
2253         trace->tool.ordered_events = true;
2254         trace->tool.ordering_requires_timestamps = true;
2255 
2256         /* add tid to output */
2257         trace->multiple_threads = true;
2258 
2259         session = perf_session__new(&file, false, &trace->tool);
2260         if (session == NULL)
2261                 return -1;
2262 
2263         if (symbol__init(&session->header.env) < 0)
2264                 goto out;
2265 
2266         trace->host = &session->machines.host;
2267 
2268         err = perf_session__set_tracepoints_handlers(session, handlers);
2269         if (err)
2270                 goto out;
2271 
2272         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2273                                                      "raw_syscalls:sys_enter");
2274         /* older kernels have syscalls tp versus raw_syscalls */
2275         if (evsel == NULL)
2276                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2277                                                              "syscalls:sys_enter");
2278 
2279         if (evsel &&
2280             (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2281             perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2282                 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2283                 goto out;
2284         }
2285 
2286         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2287                                                      "raw_syscalls:sys_exit");
2288         if (evsel == NULL)
2289                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2290                                                              "syscalls:sys_exit");
2291         if (evsel &&
2292             (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2293             perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2294                 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2295                 goto out;
2296         }
2297 
2298         evlist__for_each(session->evlist, evsel) {
2299                 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2300                     (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2301                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2302                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2303                         evsel->handler = trace__pgfault;
2304         }
2305 
2306         err = parse_target_str(trace);
2307         if (err != 0)
2308                 goto out;
2309 
2310         setup_pager();
2311 
2312         err = perf_session__process_events(session, &trace->tool);
2313         if (err)
2314                 pr_err("Failed to process events, error %d", err);
2315 
2316         else if (trace->summary)
2317                 trace__fprintf_thread_summary(trace, trace->output);
2318 
2319 out:
2320         perf_session__delete(session);
2321 
2322         return err;
2323 }
2324 
2325 static size_t trace__fprintf_threads_header(FILE *fp)
2326 {
2327         size_t printed;
2328 
2329         printed  = fprintf(fp, "\n Summary of events:\n\n");
2330 
2331         return printed;
2332 }
2333 
2334 static size_t thread__dump_stats(struct thread_trace *ttrace,
2335                                  struct trace *trace, FILE *fp)
2336 {
2337         struct stats *stats;
2338         size_t printed = 0;
2339         struct syscall *sc;
2340         struct int_node *inode = intlist__first(ttrace->syscall_stats);
2341 
2342         if (inode == NULL)
2343                 return 0;
2344 
2345         printed += fprintf(fp, "\n");
2346 
2347         printed += fprintf(fp, "   syscall            calls      min       avg       max      stddev\n");
2348         printed += fprintf(fp, "                               (msec)    (msec)    (msec)        (%%)\n");
2349         printed += fprintf(fp, "   --------------- -------- --------- --------- ---------     ------\n");
2350 
2351         /* each int_node is a syscall */
2352         while (inode) {
2353                 stats = inode->priv;
2354                 if (stats) {
2355                         double min = (double)(stats->min) / NSEC_PER_MSEC;
2356                         double max = (double)(stats->max) / NSEC_PER_MSEC;
2357                         double avg = avg_stats(stats);
2358                         double pct;
2359                         u64 n = (u64) stats->n;
2360 
2361                         pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2362                         avg /= NSEC_PER_MSEC;
2363 
2364                         sc = &trace->syscalls.table[inode->i];
2365                         printed += fprintf(fp, "   %-15s", sc->name);
2366                         printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2367                                            n, min, avg);
2368                         printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2369                 }
2370 
2371                 inode = intlist__next(inode);
2372         }
2373 
2374         printed += fprintf(fp, "\n\n");
2375 
2376         return printed;
2377 }
2378 
2379 /* struct used to pass data to per-thread function */
2380 struct summary_data {
2381         FILE *fp;
2382         struct trace *trace;
2383         size_t printed;
2384 };
2385 
2386 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2387 {
2388         struct summary_data *data = priv;
2389         FILE *fp = data->fp;
2390         size_t printed = data->printed;
2391         struct trace *trace = data->trace;
2392         struct thread_trace *ttrace = thread__priv(thread);
2393         double ratio;
2394 
2395         if (ttrace == NULL)
2396                 return 0;
2397 
2398         ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2399 
2400         printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2401         printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2402         printed += fprintf(fp, "%.1f%%", ratio);
2403         if (ttrace->pfmaj)
2404                 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2405         if (ttrace->pfmin)
2406                 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2407         printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2408         printed += thread__dump_stats(ttrace, trace, fp);
2409 
2410         data->printed += printed;
2411 
2412         return 0;
2413 }
2414 
2415 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2416 {
2417         struct summary_data data = {
2418                 .fp = fp,
2419                 .trace = trace
2420         };
2421         data.printed = trace__fprintf_threads_header(fp);
2422 
2423         machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2424 
2425         return data.printed;
2426 }
2427 
2428 static int trace__set_duration(const struct option *opt, const char *str,
2429                                int unset __maybe_unused)
2430 {
2431         struct trace *trace = opt->value;
2432 
2433         trace->duration_filter = atof(str);
2434         return 0;
2435 }
2436 
2437 static int trace__open_output(struct trace *trace, const char *filename)
2438 {
2439         struct stat st;
2440 
2441         if (!stat(filename, &st) && st.st_size) {
2442                 char oldname[PATH_MAX];
2443 
2444                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2445                 unlink(oldname);
2446                 rename(filename, oldname);
2447         }
2448 
2449         trace->output = fopen(filename, "w");
2450 
2451         return trace->output == NULL ? -errno : 0;
2452 }
2453 
2454 static int parse_pagefaults(const struct option *opt, const char *str,
2455                             int unset __maybe_unused)
2456 {
2457         int *trace_pgfaults = opt->value;
2458 
2459         if (strcmp(str, "all") == 0)
2460                 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2461         else if (strcmp(str, "maj") == 0)
2462                 *trace_pgfaults |= TRACE_PFMAJ;
2463         else if (strcmp(str, "min") == 0)
2464                 *trace_pgfaults |= TRACE_PFMIN;
2465         else
2466                 return -1;
2467 
2468         return 0;
2469 }
2470 
2471 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2472 {
2473         const char * const trace_usage[] = {
2474                 "perf trace [<options>] [<command>]",
2475                 "perf trace [<options>] -- <command> [<options>]",
2476                 "perf trace record [<options>] [<command>]",
2477                 "perf trace record [<options>] -- <command> [<options>]",
2478                 NULL
2479         };
2480         struct trace trace = {
2481                 .audit = {
2482                         .machine = audit_detect_machine(),
2483                         .open_id = audit_name_to_syscall("open", trace.audit.machine),
2484                 },
2485                 .syscalls = {
2486                         . max = -1,
2487                 },
2488                 .opts = {
2489                         .target = {
2490                                 .uid       = UINT_MAX,
2491                                 .uses_mmap = true,
2492                         },
2493                         .user_freq     = UINT_MAX,
2494                         .user_interval = ULLONG_MAX,
2495                         .no_buffering  = true,
2496                         .mmap_pages    = UINT_MAX,
2497                 },
2498                 .output = stdout,
2499                 .show_comm = true,
2500                 .trace_syscalls = true,
2501         };
2502         const char *output_name = NULL;
2503         const char *ev_qualifier_str = NULL;
2504         const struct option trace_options[] = {
2505         OPT_BOOLEAN(0, "comm", &trace.show_comm,
2506                     "show the thread COMM next to its id"),
2507         OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2508         OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2509                     "list of events to trace"),
2510         OPT_STRING('o', "output", &output_name, "file", "output file name"),
2511         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2512         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2513                     "trace events on existing process id"),
2514         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2515                     "trace events on existing thread id"),
2516         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2517                     "system-wide collection from all CPUs"),
2518         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2519                     "list of cpus to monitor"),
2520         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2521                     "child tasks do not inherit counters"),
2522         OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2523                      "number of mmap data pages",
2524                      perf_evlist__parse_mmap_pages),
2525         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2526                    "user to profile"),
2527         OPT_CALLBACK(0, "duration", &trace, "float",
2528                      "show only events with duration > N.M ms",
2529                      trace__set_duration),
2530         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2531         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2532         OPT_BOOLEAN('T', "time", &trace.full_time,
2533                     "Show full timestamp, not time relative to first start"),
2534         OPT_BOOLEAN('s', "summary", &trace.summary_only,
2535                     "Show only syscall summary with statistics"),
2536         OPT_BOOLEAN('S', "with-summary", &trace.summary,
2537                     "Show all syscalls and summary with statistics"),
2538         OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2539                      "Trace pagefaults", parse_pagefaults, "maj"),
2540         OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
2541         OPT_END()
2542         };
2543         int err;
2544         char bf[BUFSIZ];
2545 
2546         argc = parse_options(argc, argv, trace_options, trace_usage,
2547                              PARSE_OPT_STOP_AT_NON_OPTION);
2548 
2549         if (trace.trace_pgfaults) {
2550                 trace.opts.sample_address = true;
2551                 trace.opts.sample_time = true;
2552         }
2553 
2554         if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2555                 return trace__record(&trace, argc-1, &argv[1]);
2556 
2557         /* summary_only implies summary option, but don't overwrite summary if set */
2558         if (trace.summary_only)
2559                 trace.summary = trace.summary_only;
2560 
2561         if (!trace.trace_syscalls && !trace.trace_pgfaults) {
2562                 pr_err("Please specify something to trace.\n");
2563                 return -1;
2564         }
2565 
2566         if (output_name != NULL) {
2567                 err = trace__open_output(&trace, output_name);
2568                 if (err < 0) {
2569                         perror("failed to create output file");
2570                         goto out;
2571                 }
2572         }
2573 
2574         if (ev_qualifier_str != NULL) {
2575                 const char *s = ev_qualifier_str;
2576 
2577                 trace.not_ev_qualifier = *s == '!';
2578                 if (trace.not_ev_qualifier)
2579                         ++s;
2580                 trace.ev_qualifier = strlist__new(true, s);
2581                 if (trace.ev_qualifier == NULL) {
2582                         fputs("Not enough memory to parse event qualifier",
2583                               trace.output);
2584                         err = -ENOMEM;
2585                         goto out_close;
2586                 }
2587         }
2588 
2589         err = target__validate(&trace.opts.target);
2590         if (err) {
2591                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2592                 fprintf(trace.output, "%s", bf);
2593                 goto out_close;
2594         }
2595 
2596         err = target__parse_uid(&trace.opts.target);
2597         if (err) {
2598                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2599                 fprintf(trace.output, "%s", bf);
2600                 goto out_close;
2601         }
2602 
2603         if (!argc && target__none(&trace.opts.target))
2604                 trace.opts.target.system_wide = true;
2605 
2606         if (input_name)
2607                 err = trace__replay(&trace);
2608         else
2609                 err = trace__run(&trace, argc, argv);
2610 
2611 out_close:
2612         if (output_name != NULL)
2613                 fclose(trace.output);
2614 out:
2615         return err;
2616 }
2617 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp