~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/tools/perf/builtin-trace.c

Version: ~ [ linux-5.9-rc6 ] ~ [ linux-5.8.10 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.66 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.146 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.198 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.236 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.236 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.19.8 ] ~ [ linux-3.18.140 ] ~ [ linux-3.17.8 ] ~ [ linux-3.16.85 ] ~ [ linux-3.15.10 ] ~ [ linux-3.14.79 ] ~ [ linux-3.13.11 ] ~ [ linux-3.12.74 ] ~ [ linux-3.11.10 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 #include <traceevent/event-parse.h>
  2 #include "builtin.h"
  3 #include "util/color.h"
  4 #include "util/debug.h"
  5 #include "util/evlist.h"
  6 #include "util/machine.h"
  7 #include "util/session.h"
  8 #include "util/thread.h"
  9 #include "util/parse-options.h"
 10 #include "util/strlist.h"
 11 #include "util/intlist.h"
 12 #include "util/thread_map.h"
 13 #include "util/stat.h"
 14 #include "trace-event.h"
 15 #include "util/parse-events.h"
 16 
 17 #include <libaudit.h>
 18 #include <stdlib.h>
 19 #include <sys/mman.h>
 20 #include <linux/futex.h>
 21 
 22 /* For older distros: */
 23 #ifndef MAP_STACK
 24 # define MAP_STACK              0x20000
 25 #endif
 26 
 27 #ifndef MADV_HWPOISON
 28 # define MADV_HWPOISON          100
 29 #endif
 30 
 31 #ifndef MADV_MERGEABLE
 32 # define MADV_MERGEABLE         12
 33 #endif
 34 
 35 #ifndef MADV_UNMERGEABLE
 36 # define MADV_UNMERGEABLE       13
 37 #endif
 38 
 39 #ifndef EFD_SEMAPHORE
 40 # define EFD_SEMAPHORE          1
 41 #endif
 42 
 43 #ifndef EFD_NONBLOCK
 44 # define EFD_NONBLOCK           00004000
 45 #endif
 46 
 47 #ifndef EFD_CLOEXEC
 48 # define EFD_CLOEXEC            02000000
 49 #endif
 50 
 51 #ifndef O_CLOEXEC
 52 # define O_CLOEXEC              02000000
 53 #endif
 54 
 55 #ifndef SOCK_DCCP
 56 # define SOCK_DCCP              6
 57 #endif
 58 
 59 #ifndef SOCK_CLOEXEC
 60 # define SOCK_CLOEXEC           02000000
 61 #endif
 62 
 63 #ifndef SOCK_NONBLOCK
 64 # define SOCK_NONBLOCK          00004000
 65 #endif
 66 
 67 #ifndef MSG_CMSG_CLOEXEC
 68 # define MSG_CMSG_CLOEXEC       0x40000000
 69 #endif
 70 
 71 #ifndef PERF_FLAG_FD_NO_GROUP
 72 # define PERF_FLAG_FD_NO_GROUP          (1UL << 0)
 73 #endif
 74 
 75 #ifndef PERF_FLAG_FD_OUTPUT
 76 # define PERF_FLAG_FD_OUTPUT            (1UL << 1)
 77 #endif
 78 
 79 #ifndef PERF_FLAG_PID_CGROUP
 80 # define PERF_FLAG_PID_CGROUP           (1UL << 2) /* pid=cgroup id, per-cpu mode only */
 81 #endif
 82 
 83 #ifndef PERF_FLAG_FD_CLOEXEC
 84 # define PERF_FLAG_FD_CLOEXEC           (1UL << 3) /* O_CLOEXEC */
 85 #endif
 86 
 87 
 88 struct tp_field {
 89         int offset;
 90         union {
 91                 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
 92                 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
 93         };
 94 };
 95 
 96 #define TP_UINT_FIELD(bits) \
 97 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
 98 { \
 99         u##bits value; \
100         memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
101         return value;  \
102 }
103 
104 TP_UINT_FIELD(8);
105 TP_UINT_FIELD(16);
106 TP_UINT_FIELD(32);
107 TP_UINT_FIELD(64);
108 
109 #define TP_UINT_FIELD__SWAPPED(bits) \
110 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
111 { \
112         u##bits value; \
113         memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
114         return bswap_##bits(value);\
115 }
116 
117 TP_UINT_FIELD__SWAPPED(16);
118 TP_UINT_FIELD__SWAPPED(32);
119 TP_UINT_FIELD__SWAPPED(64);
120 
121 static int tp_field__init_uint(struct tp_field *field,
122                                struct format_field *format_field,
123                                bool needs_swap)
124 {
125         field->offset = format_field->offset;
126 
127         switch (format_field->size) {
128         case 1:
129                 field->integer = tp_field__u8;
130                 break;
131         case 2:
132                 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
133                 break;
134         case 4:
135                 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
136                 break;
137         case 8:
138                 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
139                 break;
140         default:
141                 return -1;
142         }
143 
144         return 0;
145 }
146 
147 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
148 {
149         return sample->raw_data + field->offset;
150 }
151 
152 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
153 {
154         field->offset = format_field->offset;
155         field->pointer = tp_field__ptr;
156         return 0;
157 }
158 
159 struct syscall_tp {
160         struct tp_field id;
161         union {
162                 struct tp_field args, ret;
163         };
164 };
165 
166 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
167                                           struct tp_field *field,
168                                           const char *name)
169 {
170         struct format_field *format_field = perf_evsel__field(evsel, name);
171 
172         if (format_field == NULL)
173                 return -1;
174 
175         return tp_field__init_uint(field, format_field, evsel->needs_swap);
176 }
177 
178 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
179         ({ struct syscall_tp *sc = evsel->priv;\
180            perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
181 
182 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
183                                          struct tp_field *field,
184                                          const char *name)
185 {
186         struct format_field *format_field = perf_evsel__field(evsel, name);
187 
188         if (format_field == NULL)
189                 return -1;
190 
191         return tp_field__init_ptr(field, format_field);
192 }
193 
194 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
195         ({ struct syscall_tp *sc = evsel->priv;\
196            perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
197 
198 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
199 {
200         zfree(&evsel->priv);
201         perf_evsel__delete(evsel);
202 }
203 
204 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
205 {
206         evsel->priv = malloc(sizeof(struct syscall_tp));
207         if (evsel->priv != NULL) {
208                 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
209                         goto out_delete;
210 
211                 evsel->handler = handler;
212                 return 0;
213         }
214 
215         return -ENOMEM;
216 
217 out_delete:
218         zfree(&evsel->priv);
219         return -ENOENT;
220 }
221 
222 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
223 {
224         struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
225 
226         /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
227         if (evsel == NULL)
228                 evsel = perf_evsel__newtp("syscalls", direction);
229 
230         if (evsel) {
231                 if (perf_evsel__init_syscall_tp(evsel, handler))
232                         goto out_delete;
233         }
234 
235         return evsel;
236 
237 out_delete:
238         perf_evsel__delete_priv(evsel);
239         return NULL;
240 }
241 
242 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
243         ({ struct syscall_tp *fields = evsel->priv; \
244            fields->name.integer(&fields->name, sample); })
245 
246 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
247         ({ struct syscall_tp *fields = evsel->priv; \
248            fields->name.pointer(&fields->name, sample); })
249 
250 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
251                                           void *sys_enter_handler,
252                                           void *sys_exit_handler)
253 {
254         int ret = -1;
255         struct perf_evsel *sys_enter, *sys_exit;
256 
257         sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
258         if (sys_enter == NULL)
259                 goto out;
260 
261         if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
262                 goto out_delete_sys_enter;
263 
264         sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
265         if (sys_exit == NULL)
266                 goto out_delete_sys_enter;
267 
268         if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
269                 goto out_delete_sys_exit;
270 
271         perf_evlist__add(evlist, sys_enter);
272         perf_evlist__add(evlist, sys_exit);
273 
274         ret = 0;
275 out:
276         return ret;
277 
278 out_delete_sys_exit:
279         perf_evsel__delete_priv(sys_exit);
280 out_delete_sys_enter:
281         perf_evsel__delete_priv(sys_enter);
282         goto out;
283 }
284 
285 
286 struct syscall_arg {
287         unsigned long val;
288         struct thread *thread;
289         struct trace  *trace;
290         void          *parm;
291         u8            idx;
292         u8            mask;
293 };
294 
295 struct strarray {
296         int         offset;
297         int         nr_entries;
298         const char **entries;
299 };
300 
301 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
302         .nr_entries = ARRAY_SIZE(array), \
303         .entries = array, \
304 }
305 
306 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
307         .offset     = off, \
308         .nr_entries = ARRAY_SIZE(array), \
309         .entries = array, \
310 }
311 
312 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
313                                                 const char *intfmt,
314                                                 struct syscall_arg *arg)
315 {
316         struct strarray *sa = arg->parm;
317         int idx = arg->val - sa->offset;
318 
319         if (idx < 0 || idx >= sa->nr_entries)
320                 return scnprintf(bf, size, intfmt, arg->val);
321 
322         return scnprintf(bf, size, "%s", sa->entries[idx]);
323 }
324 
325 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
326                                               struct syscall_arg *arg)
327 {
328         return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
329 }
330 
331 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
332 
333 #if defined(__i386__) || defined(__x86_64__)
334 /*
335  * FIXME: Make this available to all arches as soon as the ioctl beautifier
336  *        gets rewritten to support all arches.
337  */
338 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
339                                                  struct syscall_arg *arg)
340 {
341         return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
342 }
343 
344 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
345 #endif /* defined(__i386__) || defined(__x86_64__) */
346 
347 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
348                                         struct syscall_arg *arg);
349 
350 #define SCA_FD syscall_arg__scnprintf_fd
351 
352 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
353                                            struct syscall_arg *arg)
354 {
355         int fd = arg->val;
356 
357         if (fd == AT_FDCWD)
358                 return scnprintf(bf, size, "CWD");
359 
360         return syscall_arg__scnprintf_fd(bf, size, arg);
361 }
362 
363 #define SCA_FDAT syscall_arg__scnprintf_fd_at
364 
365 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
366                                               struct syscall_arg *arg);
367 
368 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
369 
370 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
371                                          struct syscall_arg *arg)
372 {
373         return scnprintf(bf, size, "%#lx", arg->val);
374 }
375 
376 #define SCA_HEX syscall_arg__scnprintf_hex
377 
378 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
379                                          struct syscall_arg *arg)
380 {
381         return scnprintf(bf, size, "%d", arg->val);
382 }
383 
384 #define SCA_INT syscall_arg__scnprintf_int
385 
386 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
387                                                struct syscall_arg *arg)
388 {
389         int printed = 0, prot = arg->val;
390 
391         if (prot == PROT_NONE)
392                 return scnprintf(bf, size, "NONE");
393 #define P_MMAP_PROT(n) \
394         if (prot & PROT_##n) { \
395                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
396                 prot &= ~PROT_##n; \
397         }
398 
399         P_MMAP_PROT(EXEC);
400         P_MMAP_PROT(READ);
401         P_MMAP_PROT(WRITE);
402 #ifdef PROT_SEM
403         P_MMAP_PROT(SEM);
404 #endif
405         P_MMAP_PROT(GROWSDOWN);
406         P_MMAP_PROT(GROWSUP);
407 #undef P_MMAP_PROT
408 
409         if (prot)
410                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
411 
412         return printed;
413 }
414 
415 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
416 
417 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
418                                                 struct syscall_arg *arg)
419 {
420         int printed = 0, flags = arg->val;
421 
422 #define P_MMAP_FLAG(n) \
423         if (flags & MAP_##n) { \
424                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
425                 flags &= ~MAP_##n; \
426         }
427 
428         P_MMAP_FLAG(SHARED);
429         P_MMAP_FLAG(PRIVATE);
430 #ifdef MAP_32BIT
431         P_MMAP_FLAG(32BIT);
432 #endif
433         P_MMAP_FLAG(ANONYMOUS);
434         P_MMAP_FLAG(DENYWRITE);
435         P_MMAP_FLAG(EXECUTABLE);
436         P_MMAP_FLAG(FILE);
437         P_MMAP_FLAG(FIXED);
438         P_MMAP_FLAG(GROWSDOWN);
439 #ifdef MAP_HUGETLB
440         P_MMAP_FLAG(HUGETLB);
441 #endif
442         P_MMAP_FLAG(LOCKED);
443         P_MMAP_FLAG(NONBLOCK);
444         P_MMAP_FLAG(NORESERVE);
445         P_MMAP_FLAG(POPULATE);
446         P_MMAP_FLAG(STACK);
447 #ifdef MAP_UNINITIALIZED
448         P_MMAP_FLAG(UNINITIALIZED);
449 #endif
450 #undef P_MMAP_FLAG
451 
452         if (flags)
453                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
454 
455         return printed;
456 }
457 
458 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
459 
460 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
461                                                   struct syscall_arg *arg)
462 {
463         int printed = 0, flags = arg->val;
464 
465 #define P_MREMAP_FLAG(n) \
466         if (flags & MREMAP_##n) { \
467                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
468                 flags &= ~MREMAP_##n; \
469         }
470 
471         P_MREMAP_FLAG(MAYMOVE);
472 #ifdef MREMAP_FIXED
473         P_MREMAP_FLAG(FIXED);
474 #endif
475 #undef P_MREMAP_FLAG
476 
477         if (flags)
478                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
479 
480         return printed;
481 }
482 
483 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
484 
485 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
486                                                       struct syscall_arg *arg)
487 {
488         int behavior = arg->val;
489 
490         switch (behavior) {
491 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
492         P_MADV_BHV(NORMAL);
493         P_MADV_BHV(RANDOM);
494         P_MADV_BHV(SEQUENTIAL);
495         P_MADV_BHV(WILLNEED);
496         P_MADV_BHV(DONTNEED);
497         P_MADV_BHV(REMOVE);
498         P_MADV_BHV(DONTFORK);
499         P_MADV_BHV(DOFORK);
500         P_MADV_BHV(HWPOISON);
501 #ifdef MADV_SOFT_OFFLINE
502         P_MADV_BHV(SOFT_OFFLINE);
503 #endif
504         P_MADV_BHV(MERGEABLE);
505         P_MADV_BHV(UNMERGEABLE);
506 #ifdef MADV_HUGEPAGE
507         P_MADV_BHV(HUGEPAGE);
508 #endif
509 #ifdef MADV_NOHUGEPAGE
510         P_MADV_BHV(NOHUGEPAGE);
511 #endif
512 #ifdef MADV_DONTDUMP
513         P_MADV_BHV(DONTDUMP);
514 #endif
515 #ifdef MADV_DODUMP
516         P_MADV_BHV(DODUMP);
517 #endif
518 #undef P_MADV_PHV
519         default: break;
520         }
521 
522         return scnprintf(bf, size, "%#x", behavior);
523 }
524 
525 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
526 
527 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
528                                            struct syscall_arg *arg)
529 {
530         int printed = 0, op = arg->val;
531 
532         if (op == 0)
533                 return scnprintf(bf, size, "NONE");
534 #define P_CMD(cmd) \
535         if ((op & LOCK_##cmd) == LOCK_##cmd) { \
536                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
537                 op &= ~LOCK_##cmd; \
538         }
539 
540         P_CMD(SH);
541         P_CMD(EX);
542         P_CMD(NB);
543         P_CMD(UN);
544         P_CMD(MAND);
545         P_CMD(RW);
546         P_CMD(READ);
547         P_CMD(WRITE);
548 #undef P_OP
549 
550         if (op)
551                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
552 
553         return printed;
554 }
555 
556 #define SCA_FLOCK syscall_arg__scnprintf_flock
557 
558 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
559 {
560         enum syscall_futex_args {
561                 SCF_UADDR   = (1 << 0),
562                 SCF_OP      = (1 << 1),
563                 SCF_VAL     = (1 << 2),
564                 SCF_TIMEOUT = (1 << 3),
565                 SCF_UADDR2  = (1 << 4),
566                 SCF_VAL3    = (1 << 5),
567         };
568         int op = arg->val;
569         int cmd = op & FUTEX_CMD_MASK;
570         size_t printed = 0;
571 
572         switch (cmd) {
573 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
574         P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
575         P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
576         P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
577         P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
578         P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
579         P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
580         P_FUTEX_OP(WAKE_OP);                                                      break;
581         P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
582         P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
583         P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
584         P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
585         P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
586         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
587         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
588         }
589 
590         if (op & FUTEX_PRIVATE_FLAG)
591                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
592 
593         if (op & FUTEX_CLOCK_REALTIME)
594                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
595 
596         return printed;
597 }
598 
599 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
600 
601 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
602 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
603 
604 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
605 static DEFINE_STRARRAY(itimers);
606 
607 static const char *whences[] = { "SET", "CUR", "END",
608 #ifdef SEEK_DATA
609 "DATA",
610 #endif
611 #ifdef SEEK_HOLE
612 "HOLE",
613 #endif
614 };
615 static DEFINE_STRARRAY(whences);
616 
617 static const char *fcntl_cmds[] = {
618         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
619         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
620         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
621         "F_GETOWNER_UIDS",
622 };
623 static DEFINE_STRARRAY(fcntl_cmds);
624 
625 static const char *rlimit_resources[] = {
626         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
627         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
628         "RTTIME",
629 };
630 static DEFINE_STRARRAY(rlimit_resources);
631 
632 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
633 static DEFINE_STRARRAY(sighow);
634 
635 static const char *clockid[] = {
636         "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
637         "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
638 };
639 static DEFINE_STRARRAY(clockid);
640 
641 static const char *socket_families[] = {
642         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
643         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
644         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
645         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
646         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
647         "ALG", "NFC", "VSOCK",
648 };
649 static DEFINE_STRARRAY(socket_families);
650 
651 #ifndef SOCK_TYPE_MASK
652 #define SOCK_TYPE_MASK 0xf
653 #endif
654 
655 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
656                                                       struct syscall_arg *arg)
657 {
658         size_t printed;
659         int type = arg->val,
660             flags = type & ~SOCK_TYPE_MASK;
661 
662         type &= SOCK_TYPE_MASK;
663         /*
664          * Can't use a strarray, MIPS may override for ABI reasons.
665          */
666         switch (type) {
667 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
668         P_SK_TYPE(STREAM);
669         P_SK_TYPE(DGRAM);
670         P_SK_TYPE(RAW);
671         P_SK_TYPE(RDM);
672         P_SK_TYPE(SEQPACKET);
673         P_SK_TYPE(DCCP);
674         P_SK_TYPE(PACKET);
675 #undef P_SK_TYPE
676         default:
677                 printed = scnprintf(bf, size, "%#x", type);
678         }
679 
680 #define P_SK_FLAG(n) \
681         if (flags & SOCK_##n) { \
682                 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
683                 flags &= ~SOCK_##n; \
684         }
685 
686         P_SK_FLAG(CLOEXEC);
687         P_SK_FLAG(NONBLOCK);
688 #undef P_SK_FLAG
689 
690         if (flags)
691                 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
692 
693         return printed;
694 }
695 
696 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
697 
698 #ifndef MSG_PROBE
699 #define MSG_PROBE            0x10
700 #endif
701 #ifndef MSG_WAITFORONE
702 #define MSG_WAITFORONE  0x10000
703 #endif
704 #ifndef MSG_SENDPAGE_NOTLAST
705 #define MSG_SENDPAGE_NOTLAST 0x20000
706 #endif
707 #ifndef MSG_FASTOPEN
708 #define MSG_FASTOPEN         0x20000000
709 #endif
710 
711 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
712                                                struct syscall_arg *arg)
713 {
714         int printed = 0, flags = arg->val;
715 
716         if (flags == 0)
717                 return scnprintf(bf, size, "NONE");
718 #define P_MSG_FLAG(n) \
719         if (flags & MSG_##n) { \
720                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
721                 flags &= ~MSG_##n; \
722         }
723 
724         P_MSG_FLAG(OOB);
725         P_MSG_FLAG(PEEK);
726         P_MSG_FLAG(DONTROUTE);
727         P_MSG_FLAG(TRYHARD);
728         P_MSG_FLAG(CTRUNC);
729         P_MSG_FLAG(PROBE);
730         P_MSG_FLAG(TRUNC);
731         P_MSG_FLAG(DONTWAIT);
732         P_MSG_FLAG(EOR);
733         P_MSG_FLAG(WAITALL);
734         P_MSG_FLAG(FIN);
735         P_MSG_FLAG(SYN);
736         P_MSG_FLAG(CONFIRM);
737         P_MSG_FLAG(RST);
738         P_MSG_FLAG(ERRQUEUE);
739         P_MSG_FLAG(NOSIGNAL);
740         P_MSG_FLAG(MORE);
741         P_MSG_FLAG(WAITFORONE);
742         P_MSG_FLAG(SENDPAGE_NOTLAST);
743         P_MSG_FLAG(FASTOPEN);
744         P_MSG_FLAG(CMSG_CLOEXEC);
745 #undef P_MSG_FLAG
746 
747         if (flags)
748                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
749 
750         return printed;
751 }
752 
753 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
754 
755 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
756                                                  struct syscall_arg *arg)
757 {
758         size_t printed = 0;
759         int mode = arg->val;
760 
761         if (mode == F_OK) /* 0 */
762                 return scnprintf(bf, size, "F");
763 #define P_MODE(n) \
764         if (mode & n##_OK) { \
765                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
766                 mode &= ~n##_OK; \
767         }
768 
769         P_MODE(R);
770         P_MODE(W);
771         P_MODE(X);
772 #undef P_MODE
773 
774         if (mode)
775                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
776 
777         return printed;
778 }
779 
780 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
781 
782 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
783                                                struct syscall_arg *arg)
784 {
785         int printed = 0, flags = arg->val;
786 
787         if (!(flags & O_CREAT))
788                 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
789 
790         if (flags == 0)
791                 return scnprintf(bf, size, "RDONLY");
792 #define P_FLAG(n) \
793         if (flags & O_##n) { \
794                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
795                 flags &= ~O_##n; \
796         }
797 
798         P_FLAG(APPEND);
799         P_FLAG(ASYNC);
800         P_FLAG(CLOEXEC);
801         P_FLAG(CREAT);
802         P_FLAG(DIRECT);
803         P_FLAG(DIRECTORY);
804         P_FLAG(EXCL);
805         P_FLAG(LARGEFILE);
806         P_FLAG(NOATIME);
807         P_FLAG(NOCTTY);
808 #ifdef O_NONBLOCK
809         P_FLAG(NONBLOCK);
810 #elif O_NDELAY
811         P_FLAG(NDELAY);
812 #endif
813 #ifdef O_PATH
814         P_FLAG(PATH);
815 #endif
816         P_FLAG(RDWR);
817 #ifdef O_DSYNC
818         if ((flags & O_SYNC) == O_SYNC)
819                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
820         else {
821                 P_FLAG(DSYNC);
822         }
823 #else
824         P_FLAG(SYNC);
825 #endif
826         P_FLAG(TRUNC);
827         P_FLAG(WRONLY);
828 #undef P_FLAG
829 
830         if (flags)
831                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
832 
833         return printed;
834 }
835 
836 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
837 
838 static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
839                                                 struct syscall_arg *arg)
840 {
841         int printed = 0, flags = arg->val;
842 
843         if (flags == 0)
844                 return 0;
845 
846 #define P_FLAG(n) \
847         if (flags & PERF_FLAG_##n) { \
848                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
849                 flags &= ~PERF_FLAG_##n; \
850         }
851 
852         P_FLAG(FD_NO_GROUP);
853         P_FLAG(FD_OUTPUT);
854         P_FLAG(PID_CGROUP);
855         P_FLAG(FD_CLOEXEC);
856 #undef P_FLAG
857 
858         if (flags)
859                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
860 
861         return printed;
862 }
863 
864 #define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
865 
866 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
867                                                    struct syscall_arg *arg)
868 {
869         int printed = 0, flags = arg->val;
870 
871         if (flags == 0)
872                 return scnprintf(bf, size, "NONE");
873 #define P_FLAG(n) \
874         if (flags & EFD_##n) { \
875                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
876                 flags &= ~EFD_##n; \
877         }
878 
879         P_FLAG(SEMAPHORE);
880         P_FLAG(CLOEXEC);
881         P_FLAG(NONBLOCK);
882 #undef P_FLAG
883 
884         if (flags)
885                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
886 
887         return printed;
888 }
889 
890 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
891 
892 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
893                                                 struct syscall_arg *arg)
894 {
895         int printed = 0, flags = arg->val;
896 
897 #define P_FLAG(n) \
898         if (flags & O_##n) { \
899                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
900                 flags &= ~O_##n; \
901         }
902 
903         P_FLAG(CLOEXEC);
904         P_FLAG(NONBLOCK);
905 #undef P_FLAG
906 
907         if (flags)
908                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
909 
910         return printed;
911 }
912 
913 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
914 
915 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
916 {
917         int sig = arg->val;
918 
919         switch (sig) {
920 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
921         P_SIGNUM(HUP);
922         P_SIGNUM(INT);
923         P_SIGNUM(QUIT);
924         P_SIGNUM(ILL);
925         P_SIGNUM(TRAP);
926         P_SIGNUM(ABRT);
927         P_SIGNUM(BUS);
928         P_SIGNUM(FPE);
929         P_SIGNUM(KILL);
930         P_SIGNUM(USR1);
931         P_SIGNUM(SEGV);
932         P_SIGNUM(USR2);
933         P_SIGNUM(PIPE);
934         P_SIGNUM(ALRM);
935         P_SIGNUM(TERM);
936         P_SIGNUM(CHLD);
937         P_SIGNUM(CONT);
938         P_SIGNUM(STOP);
939         P_SIGNUM(TSTP);
940         P_SIGNUM(TTIN);
941         P_SIGNUM(TTOU);
942         P_SIGNUM(URG);
943         P_SIGNUM(XCPU);
944         P_SIGNUM(XFSZ);
945         P_SIGNUM(VTALRM);
946         P_SIGNUM(PROF);
947         P_SIGNUM(WINCH);
948         P_SIGNUM(IO);
949         P_SIGNUM(PWR);
950         P_SIGNUM(SYS);
951 #ifdef SIGEMT
952         P_SIGNUM(EMT);
953 #endif
954 #ifdef SIGSTKFLT
955         P_SIGNUM(STKFLT);
956 #endif
957 #ifdef SIGSWI
958         P_SIGNUM(SWI);
959 #endif
960         default: break;
961         }
962 
963         return scnprintf(bf, size, "%#x", sig);
964 }
965 
966 #define SCA_SIGNUM syscall_arg__scnprintf_signum
967 
968 #if defined(__i386__) || defined(__x86_64__)
969 /*
970  * FIXME: Make this available to all arches.
971  */
972 #define TCGETS          0x5401
973 
974 static const char *tioctls[] = {
975         "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
976         "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
977         "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
978         "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
979         "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
980         "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
981         "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
982         "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
983         "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
984         "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
985         "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
986         [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
987         "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
988         "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
989         "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
990 };
991 
992 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
993 #endif /* defined(__i386__) || defined(__x86_64__) */
994 
995 #define STRARRAY(arg, name, array) \
996           .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
997           .arg_parm      = { [arg] = &strarray__##array, }
998 
999 static struct syscall_fmt {
1000         const char *name;
1001         const char *alias;
1002         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
1003         void       *arg_parm[6];
1004         bool       errmsg;
1005         bool       timeout;
1006         bool       hexret;
1007 } syscall_fmts[] = {
1008         { .name     = "access",     .errmsg = true,
1009           .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
1010         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
1011         { .name     = "brk",        .hexret = true,
1012           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
1013         { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
1014         { .name     = "close",      .errmsg = true,
1015           .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
1016         { .name     = "connect",    .errmsg = true, },
1017         { .name     = "dup",        .errmsg = true,
1018           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1019         { .name     = "dup2",       .errmsg = true,
1020           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1021         { .name     = "dup3",       .errmsg = true,
1022           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1023         { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
1024         { .name     = "eventfd2",   .errmsg = true,
1025           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
1026         { .name     = "faccessat",  .errmsg = true,
1027           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1028         { .name     = "fadvise64",  .errmsg = true,
1029           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1030         { .name     = "fallocate",  .errmsg = true,
1031           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1032         { .name     = "fchdir",     .errmsg = true,
1033           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1034         { .name     = "fchmod",     .errmsg = true,
1035           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1036         { .name     = "fchmodat",   .errmsg = true,
1037           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1038         { .name     = "fchown",     .errmsg = true,
1039           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1040         { .name     = "fchownat",   .errmsg = true,
1041           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1042         { .name     = "fcntl",      .errmsg = true,
1043           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1044                              [1] = SCA_STRARRAY, /* cmd */ },
1045           .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
1046         { .name     = "fdatasync",  .errmsg = true,
1047           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1048         { .name     = "flock",      .errmsg = true,
1049           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1050                              [1] = SCA_FLOCK, /* cmd */ }, },
1051         { .name     = "fsetxattr",  .errmsg = true,
1052           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1053         { .name     = "fstat",      .errmsg = true, .alias = "newfstat",
1054           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1055         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat",
1056           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1057         { .name     = "fstatfs",    .errmsg = true,
1058           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1059         { .name     = "fsync",    .errmsg = true,
1060           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1061         { .name     = "ftruncate", .errmsg = true,
1062           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1063         { .name     = "futex",      .errmsg = true,
1064           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
1065         { .name     = "futimesat", .errmsg = true,
1066           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1067         { .name     = "getdents",   .errmsg = true,
1068           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1069         { .name     = "getdents64", .errmsg = true,
1070           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1071         { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1072         { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1073         { .name     = "ioctl",      .errmsg = true,
1074           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1075 #if defined(__i386__) || defined(__x86_64__)
1076 /*
1077  * FIXME: Make this available to all arches.
1078  */
1079                              [1] = SCA_STRHEXARRAY, /* cmd */
1080                              [2] = SCA_HEX, /* arg */ },
1081           .arg_parm      = { [1] = &strarray__tioctls, /* cmd */ }, },
1082 #else
1083                              [2] = SCA_HEX, /* arg */ }, },
1084 #endif
1085         { .name     = "kill",       .errmsg = true,
1086           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1087         { .name     = "linkat",     .errmsg = true,
1088           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1089         { .name     = "lseek",      .errmsg = true,
1090           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1091                              [2] = SCA_STRARRAY, /* whence */ },
1092           .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
1093         { .name     = "lstat",      .errmsg = true, .alias = "newlstat", },
1094         { .name     = "madvise",    .errmsg = true,
1095           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
1096                              [2] = SCA_MADV_BHV, /* behavior */ }, },
1097         { .name     = "mkdirat",    .errmsg = true,
1098           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1099         { .name     = "mknodat",    .errmsg = true,
1100           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1101         { .name     = "mlock",      .errmsg = true,
1102           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1103         { .name     = "mlockall",   .errmsg = true,
1104           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1105         { .name     = "mmap",       .hexret = true,
1106           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
1107                              [2] = SCA_MMAP_PROT, /* prot */
1108                              [3] = SCA_MMAP_FLAGS, /* flags */
1109                              [4] = SCA_FD,        /* fd */ }, },
1110         { .name     = "mprotect",   .errmsg = true,
1111           .arg_scnprintf = { [0] = SCA_HEX, /* start */
1112                              [2] = SCA_MMAP_PROT, /* prot */ }, },
1113         { .name     = "mremap",     .hexret = true,
1114           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1115                              [3] = SCA_MREMAP_FLAGS, /* flags */
1116                              [4] = SCA_HEX, /* new_addr */ }, },
1117         { .name     = "munlock",    .errmsg = true,
1118           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1119         { .name     = "munmap",     .errmsg = true,
1120           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1121         { .name     = "name_to_handle_at", .errmsg = true,
1122           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1123         { .name     = "newfstatat", .errmsg = true,
1124           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1125         { .name     = "open",       .errmsg = true,
1126           .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1127         { .name     = "open_by_handle_at", .errmsg = true,
1128           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1129                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1130         { .name     = "openat",     .errmsg = true,
1131           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1132                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1133         { .name     = "perf_event_open", .errmsg = true,
1134           .arg_scnprintf = { [1] = SCA_INT, /* pid */
1135                              [2] = SCA_INT, /* cpu */
1136                              [3] = SCA_FD,  /* group_fd */
1137                              [4] = SCA_PERF_FLAGS,  /* flags */ }, },
1138         { .name     = "pipe2",      .errmsg = true,
1139           .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1140         { .name     = "poll",       .errmsg = true, .timeout = true, },
1141         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
1142         { .name     = "pread",      .errmsg = true, .alias = "pread64",
1143           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1144         { .name     = "preadv",     .errmsg = true, .alias = "pread",
1145           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1146         { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1147         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64",
1148           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1149         { .name     = "pwritev",    .errmsg = true,
1150           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1151         { .name     = "read",       .errmsg = true,
1152           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1153         { .name     = "readlinkat", .errmsg = true,
1154           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1155         { .name     = "readv",      .errmsg = true,
1156           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1157         { .name     = "recvfrom",   .errmsg = true,
1158           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1159         { .name     = "recvmmsg",   .errmsg = true,
1160           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1161         { .name     = "recvmsg",    .errmsg = true,
1162           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1163         { .name     = "renameat",   .errmsg = true,
1164           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1165         { .name     = "rt_sigaction", .errmsg = true,
1166           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1167         { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1168         { .name     = "rt_sigqueueinfo", .errmsg = true,
1169           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1170         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
1171           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1172         { .name     = "select",     .errmsg = true, .timeout = true, },
1173         { .name     = "sendmmsg",    .errmsg = true,
1174           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1175         { .name     = "sendmsg",    .errmsg = true,
1176           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1177         { .name     = "sendto",     .errmsg = true,
1178           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1179         { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1180         { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1181         { .name     = "shutdown",   .errmsg = true,
1182           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1183         { .name     = "socket",     .errmsg = true,
1184           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1185                              [1] = SCA_SK_TYPE, /* type */ },
1186           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1187         { .name     = "socketpair", .errmsg = true,
1188           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1189                              [1] = SCA_SK_TYPE, /* type */ },
1190           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1191         { .name     = "stat",       .errmsg = true, .alias = "newstat", },
1192         { .name     = "symlinkat",  .errmsg = true,
1193           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1194         { .name     = "tgkill",     .errmsg = true,
1195           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1196         { .name     = "tkill",      .errmsg = true,
1197           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1198         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
1199         { .name     = "unlinkat",   .errmsg = true,
1200           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1201         { .name     = "utimensat",  .errmsg = true,
1202           .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1203         { .name     = "write",      .errmsg = true,
1204           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1205         { .name     = "writev",     .errmsg = true,
1206           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1207 };
1208 
1209 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1210 {
1211         const struct syscall_fmt *fmt = fmtp;
1212         return strcmp(name, fmt->name);
1213 }
1214 
1215 static struct syscall_fmt *syscall_fmt__find(const char *name)
1216 {
1217         const int nmemb = ARRAY_SIZE(syscall_fmts);
1218         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1219 }
1220 
1221 struct syscall {
1222         struct event_format *tp_format;
1223         int                 nr_args;
1224         struct format_field *args;
1225         const char          *name;
1226         bool                filtered;
1227         bool                is_exit;
1228         struct syscall_fmt  *fmt;
1229         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1230         void                **arg_parm;
1231 };
1232 
1233 static size_t fprintf_duration(unsigned long t, FILE *fp)
1234 {
1235         double duration = (double)t / NSEC_PER_MSEC;
1236         size_t printed = fprintf(fp, "(");
1237 
1238         if (duration >= 1.0)
1239                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1240         else if (duration >= 0.01)
1241                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1242         else
1243                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1244         return printed + fprintf(fp, "): ");
1245 }
1246 
1247 struct thread_trace {
1248         u64               entry_time;
1249         u64               exit_time;
1250         bool              entry_pending;
1251         unsigned long     nr_events;
1252         unsigned long     pfmaj, pfmin;
1253         char              *entry_str;
1254         double            runtime_ms;
1255         struct {
1256                 int       max;
1257                 char      **table;
1258         } paths;
1259 
1260         struct intlist *syscall_stats;
1261 };
1262 
1263 static struct thread_trace *thread_trace__new(void)
1264 {
1265         struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1266 
1267         if (ttrace)
1268                 ttrace->paths.max = -1;
1269 
1270         ttrace->syscall_stats = intlist__new(NULL);
1271 
1272         return ttrace;
1273 }
1274 
1275 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1276 {
1277         struct thread_trace *ttrace;
1278 
1279         if (thread == NULL)
1280                 goto fail;
1281 
1282         if (thread__priv(thread) == NULL)
1283                 thread__set_priv(thread, thread_trace__new());
1284 
1285         if (thread__priv(thread) == NULL)
1286                 goto fail;
1287 
1288         ttrace = thread__priv(thread);
1289         ++ttrace->nr_events;
1290 
1291         return ttrace;
1292 fail:
1293         color_fprintf(fp, PERF_COLOR_RED,
1294                       "WARNING: not enough memory, dropping samples!\n");
1295         return NULL;
1296 }
1297 
1298 #define TRACE_PFMAJ             (1 << 0)
1299 #define TRACE_PFMIN             (1 << 1)
1300 
1301 struct trace {
1302         struct perf_tool        tool;
1303         struct {
1304                 int             machine;
1305                 int             open_id;
1306         }                       audit;
1307         struct {
1308                 int             max;
1309                 struct syscall  *table;
1310         } syscalls;
1311         struct record_opts      opts;
1312         struct perf_evlist      *evlist;
1313         struct machine          *host;
1314         struct thread           *current;
1315         u64                     base_time;
1316         FILE                    *output;
1317         unsigned long           nr_events;
1318         struct strlist          *ev_qualifier;
1319         const char              *last_vfs_getname;
1320         struct intlist          *tid_list;
1321         struct intlist          *pid_list;
1322         struct {
1323                 size_t          nr;
1324                 pid_t           *entries;
1325         }                       filter_pids;
1326         double                  duration_filter;
1327         double                  runtime_ms;
1328         struct {
1329                 u64             vfs_getname,
1330                                 proc_getname;
1331         } stats;
1332         bool                    not_ev_qualifier;
1333         bool                    live;
1334         bool                    full_time;
1335         bool                    sched;
1336         bool                    multiple_threads;
1337         bool                    summary;
1338         bool                    summary_only;
1339         bool                    show_comm;
1340         bool                    show_tool_stats;
1341         bool                    trace_syscalls;
1342         bool                    force;
1343         int                     trace_pgfaults;
1344 };
1345 
1346 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1347 {
1348         struct thread_trace *ttrace = thread__priv(thread);
1349 
1350         if (fd > ttrace->paths.max) {
1351                 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1352 
1353                 if (npath == NULL)
1354                         return -1;
1355 
1356                 if (ttrace->paths.max != -1) {
1357                         memset(npath + ttrace->paths.max + 1, 0,
1358                                (fd - ttrace->paths.max) * sizeof(char *));
1359                 } else {
1360                         memset(npath, 0, (fd + 1) * sizeof(char *));
1361                 }
1362 
1363                 ttrace->paths.table = npath;
1364                 ttrace->paths.max   = fd;
1365         }
1366 
1367         ttrace->paths.table[fd] = strdup(pathname);
1368 
1369         return ttrace->paths.table[fd] != NULL ? 0 : -1;
1370 }
1371 
1372 static int thread__read_fd_path(struct thread *thread, int fd)
1373 {
1374         char linkname[PATH_MAX], pathname[PATH_MAX];
1375         struct stat st;
1376         int ret;
1377 
1378         if (thread->pid_ == thread->tid) {
1379                 scnprintf(linkname, sizeof(linkname),
1380                           "/proc/%d/fd/%d", thread->pid_, fd);
1381         } else {
1382                 scnprintf(linkname, sizeof(linkname),
1383                           "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1384         }
1385 
1386         if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1387                 return -1;
1388 
1389         ret = readlink(linkname, pathname, sizeof(pathname));
1390 
1391         if (ret < 0 || ret > st.st_size)
1392                 return -1;
1393 
1394         pathname[ret] = '\0';
1395         return trace__set_fd_pathname(thread, fd, pathname);
1396 }
1397 
1398 static const char *thread__fd_path(struct thread *thread, int fd,
1399                                    struct trace *trace)
1400 {
1401         struct thread_trace *ttrace = thread__priv(thread);
1402 
1403         if (ttrace == NULL)
1404                 return NULL;
1405 
1406         if (fd < 0)
1407                 return NULL;
1408 
1409         if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1410                 if (!trace->live)
1411                         return NULL;
1412                 ++trace->stats.proc_getname;
1413                 if (thread__read_fd_path(thread, fd))
1414                         return NULL;
1415         }
1416 
1417         return ttrace->paths.table[fd];
1418 }
1419 
1420 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1421                                         struct syscall_arg *arg)
1422 {
1423         int fd = arg->val;
1424         size_t printed = scnprintf(bf, size, "%d", fd);
1425         const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1426 
1427         if (path)
1428                 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1429 
1430         return printed;
1431 }
1432 
1433 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1434                                               struct syscall_arg *arg)
1435 {
1436         int fd = arg->val;
1437         size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1438         struct thread_trace *ttrace = thread__priv(arg->thread);
1439 
1440         if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1441                 zfree(&ttrace->paths.table[fd]);
1442 
1443         return printed;
1444 }
1445 
1446 static bool trace__filter_duration(struct trace *trace, double t)
1447 {
1448         return t < (trace->duration_filter * NSEC_PER_MSEC);
1449 }
1450 
1451 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1452 {
1453         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1454 
1455         return fprintf(fp, "%10.3f ", ts);
1456 }
1457 
1458 static bool done = false;
1459 static bool interrupted = false;
1460 
1461 static void sig_handler(int sig)
1462 {
1463         done = true;
1464         interrupted = sig == SIGINT;
1465 }
1466 
1467 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1468                                         u64 duration, u64 tstamp, FILE *fp)
1469 {
1470         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1471         printed += fprintf_duration(duration, fp);
1472 
1473         if (trace->multiple_threads) {
1474                 if (trace->show_comm)
1475                         printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1476                 printed += fprintf(fp, "%d ", thread->tid);
1477         }
1478 
1479         return printed;
1480 }
1481 
1482 static int trace__process_event(struct trace *trace, struct machine *machine,
1483                                 union perf_event *event, struct perf_sample *sample)
1484 {
1485         int ret = 0;
1486 
1487         switch (event->header.type) {
1488         case PERF_RECORD_LOST:
1489                 color_fprintf(trace->output, PERF_COLOR_RED,
1490                               "LOST %" PRIu64 " events!\n", event->lost.lost);
1491                 ret = machine__process_lost_event(machine, event, sample);
1492         default:
1493                 ret = machine__process_event(machine, event, sample);
1494                 break;
1495         }
1496 
1497         return ret;
1498 }
1499 
1500 static int trace__tool_process(struct perf_tool *tool,
1501                                union perf_event *event,
1502                                struct perf_sample *sample,
1503                                struct machine *machine)
1504 {
1505         struct trace *trace = container_of(tool, struct trace, tool);
1506         return trace__process_event(trace, machine, event, sample);
1507 }
1508 
1509 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1510 {
1511         int err = symbol__init(NULL);
1512 
1513         if (err)
1514                 return err;
1515 
1516         trace->host = machine__new_host();
1517         if (trace->host == NULL)
1518                 return -ENOMEM;
1519 
1520         err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1521                                             evlist->threads, trace__tool_process, false,
1522                                             trace->opts.proc_map_timeout);
1523         if (err)
1524                 symbol__exit();
1525 
1526         return err;
1527 }
1528 
1529 static int syscall__set_arg_fmts(struct syscall *sc)
1530 {
1531         struct format_field *field;
1532         int idx = 0;
1533 
1534         sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1535         if (sc->arg_scnprintf == NULL)
1536                 return -1;
1537 
1538         if (sc->fmt)
1539                 sc->arg_parm = sc->fmt->arg_parm;
1540 
1541         for (field = sc->args; field; field = field->next) {
1542                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1543                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1544                 else if (field->flags & FIELD_IS_POINTER)
1545                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1546                 ++idx;
1547         }
1548 
1549         return 0;
1550 }
1551 
1552 static int trace__read_syscall_info(struct trace *trace, int id)
1553 {
1554         char tp_name[128];
1555         struct syscall *sc;
1556         const char *name = audit_syscall_to_name(id, trace->audit.machine);
1557 
1558         if (name == NULL)
1559                 return -1;
1560 
1561         if (id > trace->syscalls.max) {
1562                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1563 
1564                 if (nsyscalls == NULL)
1565                         return -1;
1566 
1567                 if (trace->syscalls.max != -1) {
1568                         memset(nsyscalls + trace->syscalls.max + 1, 0,
1569                                (id - trace->syscalls.max) * sizeof(*sc));
1570                 } else {
1571                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1572                 }
1573 
1574                 trace->syscalls.table = nsyscalls;
1575                 trace->syscalls.max   = id;
1576         }
1577 
1578         sc = trace->syscalls.table + id;
1579         sc->name = name;
1580 
1581         if (trace->ev_qualifier) {
1582                 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1583 
1584                 if (!(in ^ trace->not_ev_qualifier)) {
1585                         sc->filtered = true;
1586                         /*
1587                          * No need to do read tracepoint information since this will be
1588                          * filtered out.
1589                          */
1590                         return 0;
1591                 }
1592         }
1593 
1594         sc->fmt  = syscall_fmt__find(sc->name);
1595 
1596         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1597         sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1598 
1599         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1600                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1601                 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1602         }
1603 
1604         if (sc->tp_format == NULL)
1605                 return -1;
1606 
1607         sc->args = sc->tp_format->format.fields;
1608         sc->nr_args = sc->tp_format->format.nr_fields;
1609         /* drop nr field - not relevant here; does not exist on older kernels */
1610         if (sc->args && strcmp(sc->args->name, "nr") == 0) {
1611                 sc->args = sc->args->next;
1612                 --sc->nr_args;
1613         }
1614 
1615         sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1616 
1617         return syscall__set_arg_fmts(sc);
1618 }
1619 
1620 static int trace__validate_ev_qualifier(struct trace *trace)
1621 {
1622         int err = 0;
1623         struct str_node *pos;
1624 
1625         strlist__for_each(pos, trace->ev_qualifier) {
1626                 const char *sc = pos->s;
1627 
1628                 if (audit_name_to_syscall(sc, trace->audit.machine) < 0) {
1629                         if (err == 0) {
1630                                 fputs("Error:\tInvalid syscall ", trace->output);
1631                                 err = -EINVAL;
1632                         } else {
1633                                 fputs(", ", trace->output);
1634                         }
1635 
1636                         fputs(sc, trace->output);
1637                 }
1638         }
1639 
1640         if (err < 0) {
1641                 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1642                       "\nHint:\tand: 'man syscalls'\n", trace->output);
1643         }
1644 
1645         return err;
1646 }
1647 
1648 /*
1649  * args is to be interpreted as a series of longs but we need to handle
1650  * 8-byte unaligned accesses. args points to raw_data within the event
1651  * and raw_data is guaranteed to be 8-byte unaligned because it is
1652  * preceded by raw_size which is a u32. So we need to copy args to a temp
1653  * variable to read it. Most notably this avoids extended load instructions
1654  * on unaligned addresses
1655  */
1656 
1657 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1658                                       unsigned char *args, struct trace *trace,
1659                                       struct thread *thread)
1660 {
1661         size_t printed = 0;
1662         unsigned char *p;
1663         unsigned long val;
1664 
1665         if (sc->args != NULL) {
1666                 struct format_field *field;
1667                 u8 bit = 1;
1668                 struct syscall_arg arg = {
1669                         .idx    = 0,
1670                         .mask   = 0,
1671                         .trace  = trace,
1672                         .thread = thread,
1673                 };
1674 
1675                 for (field = sc->args; field;
1676                      field = field->next, ++arg.idx, bit <<= 1) {
1677                         if (arg.mask & bit)
1678                                 continue;
1679 
1680                         /* special care for unaligned accesses */
1681                         p = args + sizeof(unsigned long) * arg.idx;
1682                         memcpy(&val, p, sizeof(val));
1683 
1684                         /*
1685                          * Suppress this argument if its value is zero and
1686                          * and we don't have a string associated in an
1687                          * strarray for it.
1688                          */
1689                         if (val == 0 &&
1690                             !(sc->arg_scnprintf &&
1691                               sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1692                               sc->arg_parm[arg.idx]))
1693                                 continue;
1694 
1695                         printed += scnprintf(bf + printed, size - printed,
1696                                              "%s%s: ", printed ? ", " : "", field->name);
1697                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1698                                 arg.val = val;
1699                                 if (sc->arg_parm)
1700                                         arg.parm = sc->arg_parm[arg.idx];
1701                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1702                                                                       size - printed, &arg);
1703                         } else {
1704                                 printed += scnprintf(bf + printed, size - printed,
1705                                                      "%ld", val);
1706                         }
1707                 }
1708         } else {
1709                 int i = 0;
1710 
1711                 while (i < 6) {
1712                         /* special care for unaligned accesses */
1713                         p = args + sizeof(unsigned long) * i;
1714                         memcpy(&val, p, sizeof(val));
1715                         printed += scnprintf(bf + printed, size - printed,
1716                                              "%sarg%d: %ld",
1717                                              printed ? ", " : "", i, val);
1718                         ++i;
1719                 }
1720         }
1721 
1722         return printed;
1723 }
1724 
1725 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1726                                   union perf_event *event,
1727                                   struct perf_sample *sample);
1728 
1729 static struct syscall *trace__syscall_info(struct trace *trace,
1730                                            struct perf_evsel *evsel, int id)
1731 {
1732 
1733         if (id < 0) {
1734 
1735                 /*
1736                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1737                  * before that, leaving at a higher verbosity level till that is
1738                  * explained. Reproduced with plain ftrace with:
1739                  *
1740                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1741                  * grep "NR -1 " /t/trace_pipe
1742                  *
1743                  * After generating some load on the machine.
1744                  */
1745                 if (verbose > 1) {
1746                         static u64 n;
1747                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1748                                 id, perf_evsel__name(evsel), ++n);
1749                 }
1750                 return NULL;
1751         }
1752 
1753         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1754             trace__read_syscall_info(trace, id))
1755                 goto out_cant_read;
1756 
1757         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1758                 goto out_cant_read;
1759 
1760         return &trace->syscalls.table[id];
1761 
1762 out_cant_read:
1763         if (verbose) {
1764                 fprintf(trace->output, "Problems reading syscall %d", id);
1765                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1766                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1767                 fputs(" information\n", trace->output);
1768         }
1769         return NULL;
1770 }
1771 
1772 static void thread__update_stats(struct thread_trace *ttrace,
1773                                  int id, struct perf_sample *sample)
1774 {
1775         struct int_node *inode;
1776         struct stats *stats;
1777         u64 duration = 0;
1778 
1779         inode = intlist__findnew(ttrace->syscall_stats, id);
1780         if (inode == NULL)
1781                 return;
1782 
1783         stats = inode->priv;
1784         if (stats == NULL) {
1785                 stats = malloc(sizeof(struct stats));
1786                 if (stats == NULL)
1787                         return;
1788                 init_stats(stats);
1789                 inode->priv = stats;
1790         }
1791 
1792         if (ttrace->entry_time && sample->time > ttrace->entry_time)
1793                 duration = sample->time - ttrace->entry_time;
1794 
1795         update_stats(stats, duration);
1796 }
1797 
1798 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1799 {
1800         struct thread_trace *ttrace;
1801         u64 duration;
1802         size_t printed;
1803 
1804         if (trace->current == NULL)
1805                 return 0;
1806 
1807         ttrace = thread__priv(trace->current);
1808 
1809         if (!ttrace->entry_pending)
1810                 return 0;
1811 
1812         duration = sample->time - ttrace->entry_time;
1813 
1814         printed  = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1815         printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1816         ttrace->entry_pending = false;
1817 
1818         return printed;
1819 }
1820 
1821 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1822                             union perf_event *event __maybe_unused,
1823                             struct perf_sample *sample)
1824 {
1825         char *msg;
1826         void *args;
1827         size_t printed = 0;
1828         struct thread *thread;
1829         int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1830         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1831         struct thread_trace *ttrace;
1832 
1833         if (sc == NULL)
1834                 return -1;
1835 
1836         if (sc->filtered)
1837                 return 0;
1838 
1839         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1840         ttrace = thread__trace(thread, trace->output);
1841         if (ttrace == NULL)
1842                 goto out_put;
1843 
1844         args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1845 
1846         if (ttrace->entry_str == NULL) {
1847                 ttrace->entry_str = malloc(1024);
1848                 if (!ttrace->entry_str)
1849                         goto out_put;
1850         }
1851 
1852         if (!trace->summary_only)
1853                 trace__printf_interrupted_entry(trace, sample);
1854 
1855         ttrace->entry_time = sample->time;
1856         msg = ttrace->entry_str;
1857         printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1858 
1859         printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1860                                            args, trace, thread);
1861 
1862         if (sc->is_exit) {
1863                 if (!trace->duration_filter && !trace->summary_only) {
1864                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1865                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1866                 }
1867         } else
1868                 ttrace->entry_pending = true;
1869 
1870         if (trace->current != thread) {
1871                 thread__put(trace->current);
1872                 trace->current = thread__get(thread);
1873         }
1874         err = 0;
1875 out_put:
1876         thread__put(thread);
1877         return err;
1878 }
1879 
1880 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1881                            union perf_event *event __maybe_unused,
1882                            struct perf_sample *sample)
1883 {
1884         long ret;
1885         u64 duration = 0;
1886         struct thread *thread;
1887         int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1888         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1889         struct thread_trace *ttrace;
1890 
1891         if (sc == NULL)
1892                 return -1;
1893 
1894         if (sc->filtered)
1895                 return 0;
1896 
1897         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1898         ttrace = thread__trace(thread, trace->output);
1899         if (ttrace == NULL)
1900                 goto out_put;
1901 
1902         if (trace->summary)
1903                 thread__update_stats(ttrace, id, sample);
1904 
1905         ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1906 
1907         if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1908                 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1909                 trace->last_vfs_getname = NULL;
1910                 ++trace->stats.vfs_getname;
1911         }
1912 
1913         ttrace->exit_time = sample->time;
1914 
1915         if (ttrace->entry_time) {
1916                 duration = sample->time - ttrace->entry_time;
1917                 if (trace__filter_duration(trace, duration))
1918                         goto out;
1919         } else if (trace->duration_filter)
1920                 goto out;
1921 
1922         if (trace->summary_only)
1923                 goto out;
1924 
1925         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1926 
1927         if (ttrace->entry_pending) {
1928                 fprintf(trace->output, "%-70s", ttrace->entry_str);
1929         } else {
1930                 fprintf(trace->output, " ... [");
1931                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1932                 fprintf(trace->output, "]: %s()", sc->name);
1933         }
1934 
1935         if (sc->fmt == NULL) {
1936 signed_print:
1937                 fprintf(trace->output, ") = %ld", ret);
1938         } else if (ret < 0 && sc->fmt->errmsg) {
1939                 char bf[STRERR_BUFSIZE];
1940                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1941                            *e = audit_errno_to_name(-ret);
1942 
1943                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1944         } else if (ret == 0 && sc->fmt->timeout)
1945                 fprintf(trace->output, ") = 0 Timeout");
1946         else if (sc->fmt->hexret)
1947                 fprintf(trace->output, ") = %#lx", ret);
1948         else
1949                 goto signed_print;
1950 
1951         fputc('\n', trace->output);
1952 out:
1953         ttrace->entry_pending = false;
1954         err = 0;
1955 out_put:
1956         thread__put(thread);
1957         return err;
1958 }
1959 
1960 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1961                               union perf_event *event __maybe_unused,
1962                               struct perf_sample *sample)
1963 {
1964         trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1965         return 0;
1966 }
1967 
1968 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1969                                      union perf_event *event __maybe_unused,
1970                                      struct perf_sample *sample)
1971 {
1972         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1973         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1974         struct thread *thread = machine__findnew_thread(trace->host,
1975                                                         sample->pid,
1976                                                         sample->tid);
1977         struct thread_trace *ttrace = thread__trace(thread, trace->output);
1978 
1979         if (ttrace == NULL)
1980                 goto out_dump;
1981 
1982         ttrace->runtime_ms += runtime_ms;
1983         trace->runtime_ms += runtime_ms;
1984         thread__put(thread);
1985         return 0;
1986 
1987 out_dump:
1988         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1989                evsel->name,
1990                perf_evsel__strval(evsel, sample, "comm"),
1991                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1992                runtime,
1993                perf_evsel__intval(evsel, sample, "vruntime"));
1994         thread__put(thread);
1995         return 0;
1996 }
1997 
1998 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1999                                 union perf_event *event __maybe_unused,
2000                                 struct perf_sample *sample)
2001 {
2002         trace__printf_interrupted_entry(trace, sample);
2003         trace__fprintf_tstamp(trace, sample->time, trace->output);
2004 
2005         if (trace->trace_syscalls)
2006                 fprintf(trace->output, "(         ): ");
2007 
2008         fprintf(trace->output, "%s:", evsel->name);
2009 
2010         if (evsel->tp_format) {
2011                 event_format__fprintf(evsel->tp_format, sample->cpu,
2012                                       sample->raw_data, sample->raw_size,
2013                                       trace->output);
2014         }
2015 
2016         fprintf(trace->output, ")\n");
2017         return 0;
2018 }
2019 
2020 static void print_location(FILE *f, struct perf_sample *sample,
2021                            struct addr_location *al,
2022                            bool print_dso, bool print_sym)
2023 {
2024 
2025         if ((verbose || print_dso) && al->map)
2026                 fprintf(f, "%s@", al->map->dso->long_name);
2027 
2028         if ((verbose || print_sym) && al->sym)
2029                 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2030                         al->addr - al->sym->start);
2031         else if (al->map)
2032                 fprintf(f, "0x%" PRIx64, al->addr);
2033         else
2034                 fprintf(f, "0x%" PRIx64, sample->addr);
2035 }
2036 
2037 static int trace__pgfault(struct trace *trace,
2038                           struct perf_evsel *evsel,
2039                           union perf_event *event,
2040                           struct perf_sample *sample)
2041 {
2042         struct thread *thread;
2043         u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
2044         struct addr_location al;
2045         char map_type = 'd';
2046         struct thread_trace *ttrace;
2047         int err = -1;
2048 
2049         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2050         ttrace = thread__trace(thread, trace->output);
2051         if (ttrace == NULL)
2052                 goto out_put;
2053 
2054         if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2055                 ttrace->pfmaj++;
2056         else
2057                 ttrace->pfmin++;
2058 
2059         if (trace->summary_only)
2060                 goto out;
2061 
2062         thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
2063                               sample->ip, &al);
2064 
2065         trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
2066 
2067         fprintf(trace->output, "%sfault [",
2068                 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2069                 "maj" : "min");
2070 
2071         print_location(trace->output, sample, &al, false, true);
2072 
2073         fprintf(trace->output, "] => ");
2074 
2075         thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
2076                                    sample->addr, &al);
2077 
2078         if (!al.map) {
2079                 thread__find_addr_location(thread, cpumode,
2080                                            MAP__FUNCTION, sample->addr, &al);
2081 
2082                 if (al.map)
2083                         map_type = 'x';
2084                 else
2085                         map_type = '?';
2086         }
2087 
2088         print_location(trace->output, sample, &al, true, false);
2089 
2090         fprintf(trace->output, " (%c%c)\n", map_type, al.level);
2091 out:
2092         err = 0;
2093 out_put:
2094         thread__put(thread);
2095         return err;
2096 }
2097 
2098 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
2099 {
2100         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
2101             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
2102                 return false;
2103 
2104         if (trace->pid_list || trace->tid_list)
2105                 return true;
2106 
2107         return false;
2108 }
2109 
2110 static int trace__process_sample(struct perf_tool *tool,
2111                                  union perf_event *event,
2112                                  struct perf_sample *sample,
2113                                  struct perf_evsel *evsel,
2114                                  struct machine *machine __maybe_unused)
2115 {
2116         struct trace *trace = container_of(tool, struct trace, tool);
2117         int err = 0;
2118 
2119         tracepoint_handler handler = evsel->handler;
2120 
2121         if (skip_sample(trace, sample))
2122                 return 0;
2123 
2124         if (!trace->full_time && trace->base_time == 0)
2125                 trace->base_time = sample->time;
2126 
2127         if (handler) {
2128                 ++trace->nr_events;
2129                 handler(trace, evsel, event, sample);
2130         }
2131 
2132         return err;
2133 }
2134 
2135 static int parse_target_str(struct trace *trace)
2136 {
2137         if (trace->opts.target.pid) {
2138                 trace->pid_list = intlist__new(trace->opts.target.pid);
2139                 if (trace->pid_list == NULL) {
2140                         pr_err("Error parsing process id string\n");
2141                         return -EINVAL;
2142                 }
2143         }
2144 
2145         if (trace->opts.target.tid) {
2146                 trace->tid_list = intlist__new(trace->opts.target.tid);
2147                 if (trace->tid_list == NULL) {
2148                         pr_err("Error parsing thread id string\n");
2149                         return -EINVAL;
2150                 }
2151         }
2152 
2153         return 0;
2154 }
2155 
2156 static int trace__record(struct trace *trace, int argc, const char **argv)
2157 {
2158         unsigned int rec_argc, i, j;
2159         const char **rec_argv;
2160         const char * const record_args[] = {
2161                 "record",
2162                 "-R",
2163                 "-m", "1024",
2164                 "-c", "1",
2165         };
2166 
2167         const char * const sc_args[] = { "-e", };
2168         unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2169         const char * const majpf_args[] = { "-e", "major-faults" };
2170         unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2171         const char * const minpf_args[] = { "-e", "minor-faults" };
2172         unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2173 
2174         /* +1 is for the event string below */
2175         rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2176                 majpf_args_nr + minpf_args_nr + argc;
2177         rec_argv = calloc(rec_argc + 1, sizeof(char *));
2178 
2179         if (rec_argv == NULL)
2180                 return -ENOMEM;
2181 
2182         j = 0;
2183         for (i = 0; i < ARRAY_SIZE(record_args); i++)
2184                 rec_argv[j++] = record_args[i];
2185 
2186         if (trace->trace_syscalls) {
2187                 for (i = 0; i < sc_args_nr; i++)
2188                         rec_argv[j++] = sc_args[i];
2189 
2190                 /* event string may be different for older kernels - e.g., RHEL6 */
2191                 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2192                         rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2193                 else if (is_valid_tracepoint("syscalls:sys_enter"))
2194                         rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2195                 else {
2196                         pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2197                         return -1;
2198                 }
2199         }
2200 
2201         if (trace->trace_pgfaults & TRACE_PFMAJ)
2202                 for (i = 0; i < majpf_args_nr; i++)
2203                         rec_argv[j++] = majpf_args[i];
2204 
2205         if (trace->trace_pgfaults & TRACE_PFMIN)
2206                 for (i = 0; i < minpf_args_nr; i++)
2207                         rec_argv[j++] = minpf_args[i];
2208 
2209         for (i = 0; i < (unsigned int)argc; i++)
2210                 rec_argv[j++] = argv[i];
2211 
2212         return cmd_record(j, rec_argv, NULL);
2213 }
2214 
2215 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2216 
2217 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2218 {
2219         struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2220         if (evsel == NULL)
2221                 return;
2222 
2223         if (perf_evsel__field(evsel, "pathname") == NULL) {
2224                 perf_evsel__delete(evsel);
2225                 return;
2226         }
2227 
2228         evsel->handler = trace__vfs_getname;
2229         perf_evlist__add(evlist, evsel);
2230 }
2231 
2232 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2233                                     u64 config)
2234 {
2235         struct perf_evsel *evsel;
2236         struct perf_event_attr attr = {
2237                 .type = PERF_TYPE_SOFTWARE,
2238                 .mmap_data = 1,
2239         };
2240 
2241         attr.config = config;
2242         attr.sample_period = 1;
2243 
2244         event_attr_init(&attr);
2245 
2246         evsel = perf_evsel__new(&attr);
2247         if (!evsel)
2248                 return -ENOMEM;
2249 
2250         evsel->handler = trace__pgfault;
2251         perf_evlist__add(evlist, evsel);
2252 
2253         return 0;
2254 }
2255 
2256 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2257 {
2258         const u32 type = event->header.type;
2259         struct perf_evsel *evsel;
2260 
2261         if (!trace->full_time && trace->base_time == 0)
2262                 trace->base_time = sample->time;
2263 
2264         if (type != PERF_RECORD_SAMPLE) {
2265                 trace__process_event(trace, trace->host, event, sample);
2266                 return;
2267         }
2268 
2269         evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2270         if (evsel == NULL) {
2271                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2272                 return;
2273         }
2274 
2275         if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2276             sample->raw_data == NULL) {
2277                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2278                        perf_evsel__name(evsel), sample->tid,
2279                        sample->cpu, sample->raw_size);
2280         } else {
2281                 tracepoint_handler handler = evsel->handler;
2282                 handler(trace, evsel, event, sample);
2283         }
2284 }
2285 
2286 static int trace__run(struct trace *trace, int argc, const char **argv)
2287 {
2288         struct perf_evlist *evlist = trace->evlist;
2289         int err = -1, i;
2290         unsigned long before;
2291         const bool forks = argc > 0;
2292         bool draining = false;
2293 
2294         trace->live = true;
2295 
2296         if (trace->trace_syscalls &&
2297             perf_evlist__add_syscall_newtp(evlist, trace__sys_enter,
2298                                            trace__sys_exit))
2299                 goto out_error_raw_syscalls;
2300 
2301         if (trace->trace_syscalls)
2302                 perf_evlist__add_vfs_getname(evlist);
2303 
2304         if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2305             perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2306                 goto out_error_mem;
2307         }
2308 
2309         if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2310             perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2311                 goto out_error_mem;
2312 
2313         if (trace->sched &&
2314             perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2315                                    trace__sched_stat_runtime))
2316                 goto out_error_sched_stat_runtime;
2317 
2318         err = perf_evlist__create_maps(evlist, &trace->opts.target);
2319         if (err < 0) {
2320                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2321                 goto out_delete_evlist;
2322         }
2323 
2324         err = trace__symbols_init(trace, evlist);
2325         if (err < 0) {
2326                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2327                 goto out_delete_evlist;
2328         }
2329 
2330         perf_evlist__config(evlist, &trace->opts);
2331 
2332         signal(SIGCHLD, sig_handler);
2333         signal(SIGINT, sig_handler);
2334 
2335         if (forks) {
2336                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2337                                                     argv, false, NULL);
2338                 if (err < 0) {
2339                         fprintf(trace->output, "Couldn't run the workload!\n");
2340                         goto out_delete_evlist;
2341                 }
2342         }
2343 
2344         err = perf_evlist__open(evlist);
2345         if (err < 0)
2346                 goto out_error_open;
2347 
2348         /*
2349          * Better not use !target__has_task() here because we need to cover the
2350          * case where no threads were specified in the command line, but a
2351          * workload was, and in that case we will fill in the thread_map when
2352          * we fork the workload in perf_evlist__prepare_workload.
2353          */
2354         if (trace->filter_pids.nr > 0)
2355                 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2356         else if (thread_map__pid(evlist->threads, 0) == -1)
2357                 err = perf_evlist__set_filter_pid(evlist, getpid());
2358 
2359         if (err < 0) {
2360                 printf("err=%d,%s\n", -err, strerror(-err));
2361                 exit(1);
2362         }
2363 
2364         err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2365         if (err < 0)
2366                 goto out_error_mmap;
2367 
2368         if (!target__none(&trace->opts.target))
2369                 perf_evlist__enable(evlist);
2370 
2371         if (forks)
2372                 perf_evlist__start_workload(evlist);
2373 
2374         trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2375                                   evlist->threads->nr > 1 ||
2376                                   perf_evlist__first(evlist)->attr.inherit;
2377 again:
2378         before = trace->nr_events;
2379 
2380         for (i = 0; i < evlist->nr_mmaps; i++) {
2381                 union perf_event *event;
2382 
2383                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2384                         struct perf_sample sample;
2385 
2386                         ++trace->nr_events;
2387 
2388                         err = perf_evlist__parse_sample(evlist, event, &sample);
2389                         if (err) {
2390                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2391                                 goto next_event;
2392                         }
2393 
2394                         trace__handle_event(trace, event, &sample);
2395 next_event:
2396                         perf_evlist__mmap_consume(evlist, i);
2397 
2398                         if (interrupted)
2399                                 goto out_disable;
2400 
2401                         if (done && !draining) {
2402                                 perf_evlist__disable(evlist);
2403                                 draining = true;
2404                         }
2405                 }
2406         }
2407 
2408         if (trace->nr_events == before) {
2409                 int timeout = done ? 100 : -1;
2410 
2411                 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2412                         if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2413                                 draining = true;
2414 
2415                         goto again;
2416                 }
2417         } else {
2418                 goto again;
2419         }
2420 
2421 out_disable:
2422         thread__zput(trace->current);
2423 
2424         perf_evlist__disable(evlist);
2425 
2426         if (!err) {
2427                 if (trace->summary)
2428                         trace__fprintf_thread_summary(trace, trace->output);
2429 
2430                 if (trace->show_tool_stats) {
2431                         fprintf(trace->output, "Stats:\n "
2432                                                " vfs_getname : %" PRIu64 "\n"
2433                                                " proc_getname: %" PRIu64 "\n",
2434                                 trace->stats.vfs_getname,
2435                                 trace->stats.proc_getname);
2436                 }
2437         }
2438 
2439 out_delete_evlist:
2440         perf_evlist__delete(evlist);
2441         trace->evlist = NULL;
2442         trace->live = false;
2443         return err;
2444 {
2445         char errbuf[BUFSIZ];
2446 
2447 out_error_sched_stat_runtime:
2448         debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2449         goto out_error;
2450 
2451 out_error_raw_syscalls:
2452         debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2453         goto out_error;
2454 
2455 out_error_mmap:
2456         perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2457         goto out_error;
2458 
2459 out_error_open:
2460         perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2461 
2462 out_error:
2463         fprintf(trace->output, "%s\n", errbuf);
2464         goto out_delete_evlist;
2465 }
2466 out_error_mem:
2467         fprintf(trace->output, "Not enough memory to run!\n");
2468         goto out_delete_evlist;
2469 }
2470 
2471 static int trace__replay(struct trace *trace)
2472 {
2473         const struct perf_evsel_str_handler handlers[] = {
2474                 { "probe:vfs_getname",       trace__vfs_getname, },
2475         };
2476         struct perf_data_file file = {
2477                 .path  = input_name,
2478                 .mode  = PERF_DATA_MODE_READ,
2479                 .force = trace->force,
2480         };
2481         struct perf_session *session;
2482         struct perf_evsel *evsel;
2483         int err = -1;
2484 
2485         trace->tool.sample        = trace__process_sample;
2486         trace->tool.mmap          = perf_event__process_mmap;
2487         trace->tool.mmap2         = perf_event__process_mmap2;
2488         trace->tool.comm          = perf_event__process_comm;
2489         trace->tool.exit          = perf_event__process_exit;
2490         trace->tool.fork          = perf_event__process_fork;
2491         trace->tool.attr          = perf_event__process_attr;
2492         trace->tool.tracing_data = perf_event__process_tracing_data;
2493         trace->tool.build_id      = perf_event__process_build_id;
2494 
2495         trace->tool.ordered_events = true;
2496         trace->tool.ordering_requires_timestamps = true;
2497 
2498         /* add tid to output */
2499         trace->multiple_threads = true;
2500 
2501         session = perf_session__new(&file, false, &trace->tool);
2502         if (session == NULL)
2503                 return -1;
2504 
2505         if (symbol__init(&session->header.env) < 0)
2506                 goto out;
2507 
2508         trace->host = &session->machines.host;
2509 
2510         err = perf_session__set_tracepoints_handlers(session, handlers);
2511         if (err)
2512                 goto out;
2513 
2514         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2515                                                      "raw_syscalls:sys_enter");
2516         /* older kernels have syscalls tp versus raw_syscalls */
2517         if (evsel == NULL)
2518                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2519                                                              "syscalls:sys_enter");
2520 
2521         if (evsel &&
2522             (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2523             perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2524                 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2525                 goto out;
2526         }
2527 
2528         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2529                                                      "raw_syscalls:sys_exit");
2530         if (evsel == NULL)
2531                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2532                                                              "syscalls:sys_exit");
2533         if (evsel &&
2534             (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2535             perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2536                 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2537                 goto out;
2538         }
2539 
2540         evlist__for_each(session->evlist, evsel) {
2541                 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2542                     (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2543                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2544                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2545                         evsel->handler = trace__pgfault;
2546         }
2547 
2548         err = parse_target_str(trace);
2549         if (err != 0)
2550                 goto out;
2551 
2552         setup_pager();
2553 
2554         err = perf_session__process_events(session);
2555         if (err)
2556                 pr_err("Failed to process events, error %d", err);
2557 
2558         else if (trace->summary)
2559                 trace__fprintf_thread_summary(trace, trace->output);
2560 
2561 out:
2562         perf_session__delete(session);
2563 
2564         return err;
2565 }
2566 
2567 static size_t trace__fprintf_threads_header(FILE *fp)
2568 {
2569         size_t printed;
2570 
2571         printed  = fprintf(fp, "\n Summary of events:\n\n");
2572 
2573         return printed;
2574 }
2575 
2576 static size_t thread__dump_stats(struct thread_trace *ttrace,
2577                                  struct trace *trace, FILE *fp)
2578 {
2579         struct stats *stats;
2580         size_t printed = 0;
2581         struct syscall *sc;
2582         struct int_node *inode = intlist__first(ttrace->syscall_stats);
2583 
2584         if (inode == NULL)
2585                 return 0;
2586 
2587         printed += fprintf(fp, "\n");
2588 
2589         printed += fprintf(fp, "   syscall            calls      min       avg       max      stddev\n");
2590         printed += fprintf(fp, "                               (msec)    (msec)    (msec)        (%%)\n");
2591         printed += fprintf(fp, "   --------------- -------- --------- --------- ---------     ------\n");
2592 
2593         /* each int_node is a syscall */
2594         while (inode) {
2595                 stats = inode->priv;
2596                 if (stats) {
2597                         double min = (double)(stats->min) / NSEC_PER_MSEC;
2598                         double max = (double)(stats->max) / NSEC_PER_MSEC;
2599                         double avg = avg_stats(stats);
2600                         double pct;
2601                         u64 n = (u64) stats->n;
2602 
2603                         pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2604                         avg /= NSEC_PER_MSEC;
2605 
2606                         sc = &trace->syscalls.table[inode->i];
2607                         printed += fprintf(fp, "   %-15s", sc->name);
2608                         printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2609                                            n, min, avg);
2610                         printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2611                 }
2612 
2613                 inode = intlist__next(inode);
2614         }
2615 
2616         printed += fprintf(fp, "\n\n");
2617 
2618         return printed;
2619 }
2620 
2621 /* struct used to pass data to per-thread function */
2622 struct summary_data {
2623         FILE *fp;
2624         struct trace *trace;
2625         size_t printed;
2626 };
2627 
2628 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2629 {
2630         struct summary_data *data = priv;
2631         FILE *fp = data->fp;
2632         size_t printed = data->printed;
2633         struct trace *trace = data->trace;
2634         struct thread_trace *ttrace = thread__priv(thread);
2635         double ratio;
2636 
2637         if (ttrace == NULL)
2638                 return 0;
2639 
2640         ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2641 
2642         printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2643         printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2644         printed += fprintf(fp, "%.1f%%", ratio);
2645         if (ttrace->pfmaj)
2646                 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2647         if (ttrace->pfmin)
2648                 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2649         printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2650         printed += thread__dump_stats(ttrace, trace, fp);
2651 
2652         data->printed += printed;
2653 
2654         return 0;
2655 }
2656 
2657 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2658 {
2659         struct summary_data data = {
2660                 .fp = fp,
2661                 .trace = trace
2662         };
2663         data.printed = trace__fprintf_threads_header(fp);
2664 
2665         machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2666 
2667         return data.printed;
2668 }
2669 
2670 static int trace__set_duration(const struct option *opt, const char *str,
2671                                int unset __maybe_unused)
2672 {
2673         struct trace *trace = opt->value;
2674 
2675         trace->duration_filter = atof(str);
2676         return 0;
2677 }
2678 
2679 static int trace__set_filter_pids(const struct option *opt, const char *str,
2680                                   int unset __maybe_unused)
2681 {
2682         int ret = -1;
2683         size_t i;
2684         struct trace *trace = opt->value;
2685         /*
2686          * FIXME: introduce a intarray class, plain parse csv and create a
2687          * { int nr, int entries[] } struct...
2688          */
2689         struct intlist *list = intlist__new(str);
2690 
2691         if (list == NULL)
2692                 return -1;
2693 
2694         i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2695         trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2696 
2697         if (trace->filter_pids.entries == NULL)
2698                 goto out;
2699 
2700         trace->filter_pids.entries[0] = getpid();
2701 
2702         for (i = 1; i < trace->filter_pids.nr; ++i)
2703                 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2704 
2705         intlist__delete(list);
2706         ret = 0;
2707 out:
2708         return ret;
2709 }
2710 
2711 static int trace__open_output(struct trace *trace, const char *filename)
2712 {
2713         struct stat st;
2714 
2715         if (!stat(filename, &st) && st.st_size) {
2716                 char oldname[PATH_MAX];
2717 
2718                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2719                 unlink(oldname);
2720                 rename(filename, oldname);
2721         }
2722 
2723         trace->output = fopen(filename, "w");
2724 
2725         return trace->output == NULL ? -errno : 0;
2726 }
2727 
2728 static int parse_pagefaults(const struct option *opt, const char *str,
2729                             int unset __maybe_unused)
2730 {
2731         int *trace_pgfaults = opt->value;
2732 
2733         if (strcmp(str, "all") == 0)
2734                 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2735         else if (strcmp(str, "maj") == 0)
2736                 *trace_pgfaults |= TRACE_PFMAJ;
2737         else if (strcmp(str, "min") == 0)
2738                 *trace_pgfaults |= TRACE_PFMIN;
2739         else
2740                 return -1;
2741 
2742         return 0;
2743 }
2744 
2745 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2746 {
2747         struct perf_evsel *evsel;
2748 
2749         evlist__for_each(evlist, evsel)
2750                 evsel->handler = handler;
2751 }
2752 
2753 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2754 {
2755         const char *trace_usage[] = {
2756                 "perf trace [<options>] [<command>]",
2757                 "perf trace [<options>] -- <command> [<options>]",
2758                 "perf trace record [<options>] [<command>]",
2759                 "perf trace record [<options>] -- <command> [<options>]",
2760                 NULL
2761         };
2762         struct trace trace = {
2763                 .audit = {
2764                         .machine = audit_detect_machine(),
2765                         .open_id = audit_name_to_syscall("open", trace.audit.machine),
2766                 },
2767                 .syscalls = {
2768                         . max = -1,
2769                 },
2770                 .opts = {
2771                         .target = {
2772                                 .uid       = UINT_MAX,
2773                                 .uses_mmap = true,
2774                         },
2775                         .user_freq     = UINT_MAX,
2776                         .user_interval = ULLONG_MAX,
2777                         .no_buffering  = true,
2778                         .mmap_pages    = UINT_MAX,
2779                         .proc_map_timeout  = 500,
2780                 },
2781                 .output = stdout,
2782                 .show_comm = true,
2783                 .trace_syscalls = true,
2784         };
2785         const char *output_name = NULL;
2786         const char *ev_qualifier_str = NULL;
2787         const struct option trace_options[] = {
2788         OPT_CALLBACK(0, "event", &trace.evlist, "event",
2789                      "event selector. use 'perf list' to list available events",
2790                      parse_events_option),
2791         OPT_BOOLEAN(0, "comm", &trace.show_comm,
2792                     "show the thread COMM next to its id"),
2793         OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2794         OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
2795         OPT_STRING('o', "output", &output_name, "file", "output file name"),
2796         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2797         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2798                     "trace events on existing process id"),
2799         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2800                     "trace events on existing thread id"),
2801         OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
2802                      "pids to filter (by the kernel)", trace__set_filter_pids),
2803         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2804                     "system-wide collection from all CPUs"),
2805         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2806                     "list of cpus to monitor"),
2807         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2808                     "child tasks do not inherit counters"),
2809         OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2810                      "number of mmap data pages",
2811                      perf_evlist__parse_mmap_pages),
2812         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2813                    "user to profile"),
2814         OPT_CALLBACK(0, "duration", &trace, "float",
2815                      "show only events with duration > N.M ms",
2816                      trace__set_duration),
2817         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2818         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2819         OPT_BOOLEAN('T', "time", &trace.full_time,
2820                     "Show full timestamp, not time relative to first start"),
2821         OPT_BOOLEAN('s', "summary", &trace.summary_only,
2822                     "Show only syscall summary with statistics"),
2823         OPT_BOOLEAN('S', "with-summary", &trace.summary,
2824                     "Show all syscalls and summary with statistics"),
2825         OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2826                      "Trace pagefaults", parse_pagefaults, "maj"),
2827         OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
2828         OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
2829         OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
2830                         "per thread proc mmap processing timeout in ms"),
2831         OPT_END()
2832         };
2833         const char * const trace_subcommands[] = { "record", NULL };
2834         int err;
2835         char bf[BUFSIZ];
2836 
2837         signal(SIGSEGV, sighandler_dump_stack);
2838         signal(SIGFPE, sighandler_dump_stack);
2839 
2840         trace.evlist = perf_evlist__new();
2841 
2842         if (trace.evlist == NULL) {
2843                 pr_err("Not enough memory to run!\n");
2844                 err = -ENOMEM;
2845                 goto out;
2846         }
2847 
2848         argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
2849                                  trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
2850 
2851         if (trace.trace_pgfaults) {
2852                 trace.opts.sample_address = true;
2853                 trace.opts.sample_time = true;
2854         }
2855 
2856         if (trace.evlist->nr_entries > 0)
2857                 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
2858 
2859         if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2860                 return trace__record(&trace, argc-1, &argv[1]);
2861 
2862         /* summary_only implies summary option, but don't overwrite summary if set */
2863         if (trace.summary_only)
2864                 trace.summary = trace.summary_only;
2865 
2866         if (!trace.trace_syscalls && !trace.trace_pgfaults &&
2867             trace.evlist->nr_entries == 0 /* Was --events used? */) {
2868                 pr_err("Please specify something to trace.\n");
2869                 return -1;
2870         }
2871 
2872         if (output_name != NULL) {
2873                 err = trace__open_output(&trace, output_name);
2874                 if (err < 0) {
2875                         perror("failed to create output file");
2876                         goto out;
2877                 }
2878         }
2879 
2880         if (ev_qualifier_str != NULL) {
2881                 const char *s = ev_qualifier_str;
2882 
2883                 trace.not_ev_qualifier = *s == '!';
2884                 if (trace.not_ev_qualifier)
2885                         ++s;
2886                 trace.ev_qualifier = strlist__new(true, s);
2887                 if (trace.ev_qualifier == NULL) {
2888                         fputs("Not enough memory to parse event qualifier",
2889                               trace.output);
2890                         err = -ENOMEM;
2891                         goto out_close;
2892                 }
2893 
2894                 err = trace__validate_ev_qualifier(&trace);
2895                 if (err)
2896                         goto out_close;
2897         }
2898 
2899         err = target__validate(&trace.opts.target);
2900         if (err) {
2901                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2902                 fprintf(trace.output, "%s", bf);
2903                 goto out_close;
2904         }
2905 
2906         err = target__parse_uid(&trace.opts.target);
2907         if (err) {
2908                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2909                 fprintf(trace.output, "%s", bf);
2910                 goto out_close;
2911         }
2912 
2913         if (!argc && target__none(&trace.opts.target))
2914                 trace.opts.target.system_wide = true;
2915 
2916         if (input_name)
2917                 err = trace__replay(&trace);
2918         else
2919                 err = trace__run(&trace, argc, argv);
2920 
2921 out_close:
2922         if (output_name != NULL)
2923                 fclose(trace.output);
2924 out:
2925         return err;
2926 }
2927 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp