~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/tools/perf/builtin-trace.c

Version: ~ [ linux-5.17-rc1 ] ~ [ linux-5.16.2 ] ~ [ linux-5.15.16 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.93 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.173 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.225 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.262 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.297 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.299 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.18.140 ] ~ [ linux-3.16.85 ] ~ [ linux-3.14.79 ] ~ [ linux-3.12.74 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * builtin-trace.c
  3  *
  4  * Builtin 'trace' command:
  5  *
  6  * Display a continuously updated trace of any workload, CPU, specific PID,
  7  * system wide, etc.  Default format is loosely strace like, but any other
  8  * event may be specified using --event.
  9  *
 10  * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
 11  *
 12  * Initially based on the 'trace' prototype by Thomas Gleixner:
 13  *
 14  * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
 15  *
 16  * Released under the GPL v2. (and only v2, not any later version)
 17  */
 18 
 19 #include <traceevent/event-parse.h>
 20 #include <api/fs/tracing_path.h>
 21 #include "builtin.h"
 22 #include "util/color.h"
 23 #include "util/debug.h"
 24 #include "util/event.h"
 25 #include "util/evlist.h"
 26 #include <subcmd/exec-cmd.h>
 27 #include "util/machine.h"
 28 #include "util/path.h"
 29 #include "util/session.h"
 30 #include "util/thread.h"
 31 #include <subcmd/parse-options.h>
 32 #include "util/strlist.h"
 33 #include "util/intlist.h"
 34 #include "util/thread_map.h"
 35 #include "util/stat.h"
 36 #include "trace/beauty/beauty.h"
 37 #include "trace-event.h"
 38 #include "util/parse-events.h"
 39 #include "util/bpf-loader.h"
 40 #include "callchain.h"
 41 #include "print_binary.h"
 42 #include "string2.h"
 43 #include "syscalltbl.h"
 44 #include "rb_resort.h"
 45 
 46 #include <errno.h>
 47 #include <inttypes.h>
 48 #include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
 49 #include <poll.h>
 50 #include <signal.h>
 51 #include <stdlib.h>
 52 #include <string.h>
 53 #include <linux/err.h>
 54 #include <linux/filter.h>
 55 #include <linux/audit.h>
 56 #include <linux/kernel.h>
 57 #include <linux/random.h>
 58 #include <linux/stringify.h>
 59 #include <linux/time64.h>
 60 
 61 #include "sane_ctype.h"
 62 
 63 #ifndef O_CLOEXEC
 64 # define O_CLOEXEC              02000000
 65 #endif
 66 
 67 struct trace {
 68         struct perf_tool        tool;
 69         struct syscalltbl       *sctbl;
 70         struct {
 71                 int             max;
 72                 struct syscall  *table;
 73                 struct {
 74                         struct perf_evsel *sys_enter,
 75                                           *sys_exit;
 76                 }               events;
 77         } syscalls;
 78         struct record_opts      opts;
 79         struct perf_evlist      *evlist;
 80         struct machine          *host;
 81         struct thread           *current;
 82         u64                     base_time;
 83         FILE                    *output;
 84         unsigned long           nr_events;
 85         struct strlist          *ev_qualifier;
 86         struct {
 87                 size_t          nr;
 88                 int             *entries;
 89         }                       ev_qualifier_ids;
 90         struct {
 91                 size_t          nr;
 92                 pid_t           *entries;
 93         }                       filter_pids;
 94         double                  duration_filter;
 95         double                  runtime_ms;
 96         struct {
 97                 u64             vfs_getname,
 98                                 proc_getname;
 99         } stats;
100         unsigned int            max_stack;
101         unsigned int            min_stack;
102         bool                    not_ev_qualifier;
103         bool                    live;
104         bool                    full_time;
105         bool                    sched;
106         bool                    multiple_threads;
107         bool                    summary;
108         bool                    summary_only;
109         bool                    show_comm;
110         bool                    show_tool_stats;
111         bool                    trace_syscalls;
112         bool                    kernel_syscallchains;
113         bool                    force;
114         bool                    vfs_getname;
115         int                     trace_pgfaults;
116         int                     open_id;
117 };
118 
119 struct tp_field {
120         int offset;
121         union {
122                 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
123                 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
124         };
125 };
126 
127 #define TP_UINT_FIELD(bits) \
128 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
129 { \
130         u##bits value; \
131         memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
132         return value;  \
133 }
134 
135 TP_UINT_FIELD(8);
136 TP_UINT_FIELD(16);
137 TP_UINT_FIELD(32);
138 TP_UINT_FIELD(64);
139 
140 #define TP_UINT_FIELD__SWAPPED(bits) \
141 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
142 { \
143         u##bits value; \
144         memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
145         return bswap_##bits(value);\
146 }
147 
148 TP_UINT_FIELD__SWAPPED(16);
149 TP_UINT_FIELD__SWAPPED(32);
150 TP_UINT_FIELD__SWAPPED(64);
151 
152 static int tp_field__init_uint(struct tp_field *field,
153                                struct format_field *format_field,
154                                bool needs_swap)
155 {
156         field->offset = format_field->offset;
157 
158         switch (format_field->size) {
159         case 1:
160                 field->integer = tp_field__u8;
161                 break;
162         case 2:
163                 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
164                 break;
165         case 4:
166                 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
167                 break;
168         case 8:
169                 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
170                 break;
171         default:
172                 return -1;
173         }
174 
175         return 0;
176 }
177 
178 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
179 {
180         return sample->raw_data + field->offset;
181 }
182 
183 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
184 {
185         field->offset = format_field->offset;
186         field->pointer = tp_field__ptr;
187         return 0;
188 }
189 
190 struct syscall_tp {
191         struct tp_field id;
192         union {
193                 struct tp_field args, ret;
194         };
195 };
196 
197 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
198                                           struct tp_field *field,
199                                           const char *name)
200 {
201         struct format_field *format_field = perf_evsel__field(evsel, name);
202 
203         if (format_field == NULL)
204                 return -1;
205 
206         return tp_field__init_uint(field, format_field, evsel->needs_swap);
207 }
208 
209 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
210         ({ struct syscall_tp *sc = evsel->priv;\
211            perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
212 
213 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
214                                          struct tp_field *field,
215                                          const char *name)
216 {
217         struct format_field *format_field = perf_evsel__field(evsel, name);
218 
219         if (format_field == NULL)
220                 return -1;
221 
222         return tp_field__init_ptr(field, format_field);
223 }
224 
225 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
226         ({ struct syscall_tp *sc = evsel->priv;\
227            perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
228 
229 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
230 {
231         zfree(&evsel->priv);
232         perf_evsel__delete(evsel);
233 }
234 
235 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
236 {
237         evsel->priv = malloc(sizeof(struct syscall_tp));
238         if (evsel->priv != NULL) {
239                 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
240                         goto out_delete;
241 
242                 evsel->handler = handler;
243                 return 0;
244         }
245 
246         return -ENOMEM;
247 
248 out_delete:
249         zfree(&evsel->priv);
250         return -ENOENT;
251 }
252 
253 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
254 {
255         struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
256 
257         /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
258         if (IS_ERR(evsel))
259                 evsel = perf_evsel__newtp("syscalls", direction);
260 
261         if (IS_ERR(evsel))
262                 return NULL;
263 
264         if (perf_evsel__init_syscall_tp(evsel, handler))
265                 goto out_delete;
266 
267         return evsel;
268 
269 out_delete:
270         perf_evsel__delete_priv(evsel);
271         return NULL;
272 }
273 
274 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
275         ({ struct syscall_tp *fields = evsel->priv; \
276            fields->name.integer(&fields->name, sample); })
277 
278 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
279         ({ struct syscall_tp *fields = evsel->priv; \
280            fields->name.pointer(&fields->name, sample); })
281 
282 struct strarray {
283         int         offset;
284         int         nr_entries;
285         const char **entries;
286 };
287 
288 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
289         .nr_entries = ARRAY_SIZE(array), \
290         .entries = array, \
291 }
292 
293 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
294         .offset     = off, \
295         .nr_entries = ARRAY_SIZE(array), \
296         .entries = array, \
297 }
298 
299 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
300                                                 const char *intfmt,
301                                                 struct syscall_arg *arg)
302 {
303         struct strarray *sa = arg->parm;
304         int idx = arg->val - sa->offset;
305 
306         if (idx < 0 || idx >= sa->nr_entries)
307                 return scnprintf(bf, size, intfmt, arg->val);
308 
309         return scnprintf(bf, size, "%s", sa->entries[idx]);
310 }
311 
312 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
313                                               struct syscall_arg *arg)
314 {
315         return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
316 }
317 
318 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
319 
320 #if defined(__i386__) || defined(__x86_64__)
321 /*
322  * FIXME: Make this available to all arches as soon as the ioctl beautifier
323  *        gets rewritten to support all arches.
324  */
325 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
326                                                  struct syscall_arg *arg)
327 {
328         return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
329 }
330 
331 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
332 #endif /* defined(__i386__) || defined(__x86_64__) */
333 
334 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
335                                         struct syscall_arg *arg);
336 
337 #define SCA_FD syscall_arg__scnprintf_fd
338 
339 #ifndef AT_FDCWD
340 #define AT_FDCWD        -100
341 #endif
342 
343 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
344                                            struct syscall_arg *arg)
345 {
346         int fd = arg->val;
347 
348         if (fd == AT_FDCWD)
349                 return scnprintf(bf, size, "CWD");
350 
351         return syscall_arg__scnprintf_fd(bf, size, arg);
352 }
353 
354 #define SCA_FDAT syscall_arg__scnprintf_fd_at
355 
356 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
357                                               struct syscall_arg *arg);
358 
359 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
360 
361 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
362                                          struct syscall_arg *arg)
363 {
364         return scnprintf(bf, size, "%#lx", arg->val);
365 }
366 
367 #define SCA_HEX syscall_arg__scnprintf_hex
368 
369 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
370                                          struct syscall_arg *arg)
371 {
372         return scnprintf(bf, size, "%d", arg->val);
373 }
374 
375 #define SCA_INT syscall_arg__scnprintf_int
376 
377 static const char *bpf_cmd[] = {
378         "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
379         "MAP_GET_NEXT_KEY", "PROG_LOAD",
380 };
381 static DEFINE_STRARRAY(bpf_cmd);
382 
383 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
384 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
385 
386 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
387 static DEFINE_STRARRAY(itimers);
388 
389 static const char *keyctl_options[] = {
390         "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
391         "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
392         "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
393         "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
394         "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
395 };
396 static DEFINE_STRARRAY(keyctl_options);
397 
398 static const char *whences[] = { "SET", "CUR", "END",
399 #ifdef SEEK_DATA
400 "DATA",
401 #endif
402 #ifdef SEEK_HOLE
403 "HOLE",
404 #endif
405 };
406 static DEFINE_STRARRAY(whences);
407 
408 static const char *fcntl_cmds[] = {
409         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
410         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
411         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
412         "F_GETOWNER_UIDS",
413 };
414 static DEFINE_STRARRAY(fcntl_cmds);
415 
416 static const char *rlimit_resources[] = {
417         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
418         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
419         "RTTIME",
420 };
421 static DEFINE_STRARRAY(rlimit_resources);
422 
423 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
424 static DEFINE_STRARRAY(sighow);
425 
426 static const char *clockid[] = {
427         "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
428         "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
429         "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
430 };
431 static DEFINE_STRARRAY(clockid);
432 
433 static const char *socket_families[] = {
434         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
435         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
436         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
437         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
438         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
439         "ALG", "NFC", "VSOCK",
440 };
441 static DEFINE_STRARRAY(socket_families);
442 
443 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
444                                                  struct syscall_arg *arg)
445 {
446         size_t printed = 0;
447         int mode = arg->val;
448 
449         if (mode == F_OK) /* 0 */
450                 return scnprintf(bf, size, "F");
451 #define P_MODE(n) \
452         if (mode & n##_OK) { \
453                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
454                 mode &= ~n##_OK; \
455         }
456 
457         P_MODE(R);
458         P_MODE(W);
459         P_MODE(X);
460 #undef P_MODE
461 
462         if (mode)
463                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
464 
465         return printed;
466 }
467 
468 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
469 
470 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
471                                               struct syscall_arg *arg);
472 
473 #define SCA_FILENAME syscall_arg__scnprintf_filename
474 
475 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
476                                                 struct syscall_arg *arg)
477 {
478         int printed = 0, flags = arg->val;
479 
480 #define P_FLAG(n) \
481         if (flags & O_##n) { \
482                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
483                 flags &= ~O_##n; \
484         }
485 
486         P_FLAG(CLOEXEC);
487         P_FLAG(NONBLOCK);
488 #undef P_FLAG
489 
490         if (flags)
491                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
492 
493         return printed;
494 }
495 
496 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
497 
498 #if defined(__i386__) || defined(__x86_64__)
499 /*
500  * FIXME: Make this available to all arches.
501  */
502 #define TCGETS          0x5401
503 
504 static const char *tioctls[] = {
505         "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
506         "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
507         "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
508         "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
509         "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
510         "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
511         "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
512         "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
513         "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
514         "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
515         "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
516         [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
517         "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
518         "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
519         "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
520 };
521 
522 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
523 #endif /* defined(__i386__) || defined(__x86_64__) */
524 
525 #ifndef GRND_NONBLOCK
526 #define GRND_NONBLOCK   0x0001
527 #endif
528 #ifndef GRND_RANDOM
529 #define GRND_RANDOM     0x0002
530 #endif
531 
532 static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
533                                                    struct syscall_arg *arg)
534 {
535         int printed = 0, flags = arg->val;
536 
537 #define P_FLAG(n) \
538         if (flags & GRND_##n) { \
539                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
540                 flags &= ~GRND_##n; \
541         }
542 
543         P_FLAG(RANDOM);
544         P_FLAG(NONBLOCK);
545 #undef P_FLAG
546 
547         if (flags)
548                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
549 
550         return printed;
551 }
552 
553 #define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
554 
555 #define STRARRAY(arg, name, array) \
556           .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
557           .arg_parm      = { [arg] = &strarray__##array, }
558 
559 #include "trace/beauty/eventfd.c"
560 #include "trace/beauty/flock.c"
561 #include "trace/beauty/futex_op.c"
562 #include "trace/beauty/mmap.c"
563 #include "trace/beauty/mode_t.c"
564 #include "trace/beauty/msg_flags.c"
565 #include "trace/beauty/open_flags.c"
566 #include "trace/beauty/perf_event_open.c"
567 #include "trace/beauty/pid.c"
568 #include "trace/beauty/sched_policy.c"
569 #include "trace/beauty/seccomp.c"
570 #include "trace/beauty/signum.c"
571 #include "trace/beauty/socket_type.c"
572 #include "trace/beauty/waitid_options.c"
573 
574 static struct syscall_fmt {
575         const char *name;
576         const char *alias;
577         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
578         void       *arg_parm[6];
579         bool       errmsg;
580         bool       errpid;
581         bool       timeout;
582         bool       hexret;
583 } syscall_fmts[] = {
584         { .name     = "access",     .errmsg = true,
585           .arg_scnprintf = { [1] = SCA_ACCMODE,  /* mode */ }, },
586         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
587         { .name     = "bpf",        .errmsg = true, STRARRAY(0, cmd, bpf_cmd), },
588         { .name     = "brk",        .hexret = true,
589           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
590         { .name     = "chdir",      .errmsg = true, },
591         { .name     = "chmod",      .errmsg = true, },
592         { .name     = "chroot",     .errmsg = true, },
593         { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
594         { .name     = "clone",      .errpid = true, },
595         { .name     = "close",      .errmsg = true,
596           .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
597         { .name     = "connect",    .errmsg = true, },
598         { .name     = "creat",      .errmsg = true, },
599         { .name     = "dup",        .errmsg = true, },
600         { .name     = "dup2",       .errmsg = true, },
601         { .name     = "dup3",       .errmsg = true, },
602         { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
603         { .name     = "eventfd2",   .errmsg = true,
604           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
605         { .name     = "faccessat",  .errmsg = true, },
606         { .name     = "fadvise64",  .errmsg = true, },
607         { .name     = "fallocate",  .errmsg = true, },
608         { .name     = "fchdir",     .errmsg = true, },
609         { .name     = "fchmod",     .errmsg = true, },
610         { .name     = "fchmodat",   .errmsg = true,
611           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
612         { .name     = "fchown",     .errmsg = true, },
613         { .name     = "fchownat",   .errmsg = true,
614           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
615         { .name     = "fcntl",      .errmsg = true,
616           .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ },
617           .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
618         { .name     = "fdatasync",  .errmsg = true, },
619         { .name     = "flock",      .errmsg = true,
620           .arg_scnprintf = { [1] = SCA_FLOCK, /* cmd */ }, },
621         { .name     = "fsetxattr",  .errmsg = true, },
622         { .name     = "fstat",      .errmsg = true, .alias = "newfstat", },
623         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat", },
624         { .name     = "fstatfs",    .errmsg = true, },
625         { .name     = "fsync",    .errmsg = true, },
626         { .name     = "ftruncate", .errmsg = true, },
627         { .name     = "futex",      .errmsg = true,
628           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
629         { .name     = "futimesat", .errmsg = true,
630           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
631         { .name     = "getdents",   .errmsg = true, },
632         { .name     = "getdents64", .errmsg = true, },
633         { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
634         { .name     = "getpid",     .errpid = true, },
635         { .name     = "getpgid",    .errpid = true, },
636         { .name     = "getppid",    .errpid = true, },
637         { .name     = "getrandom",  .errmsg = true,
638           .arg_scnprintf = { [2] = SCA_GETRANDOM_FLAGS, /* flags */ }, },
639         { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
640         { .name     = "getxattr",   .errmsg = true, },
641         { .name     = "inotify_add_watch",          .errmsg = true, },
642         { .name     = "ioctl",      .errmsg = true,
643           .arg_scnprintf = {
644 #if defined(__i386__) || defined(__x86_64__)
645 /*
646  * FIXME: Make this available to all arches.
647  */
648                              [1] = SCA_STRHEXARRAY, /* cmd */
649                              [2] = SCA_HEX, /* arg */ },
650           .arg_parm      = { [1] = &strarray__tioctls, /* cmd */ }, },
651 #else
652                              [2] = SCA_HEX, /* arg */ }, },
653 #endif
654         { .name     = "keyctl",     .errmsg = true, STRARRAY(0, option, keyctl_options), },
655         { .name     = "kill",       .errmsg = true,
656           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
657         { .name     = "lchown",    .errmsg = true, },
658         { .name     = "lgetxattr",  .errmsg = true, },
659         { .name     = "linkat",     .errmsg = true,
660           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
661         { .name     = "listxattr",  .errmsg = true, },
662         { .name     = "llistxattr", .errmsg = true, },
663         { .name     = "lremovexattr",  .errmsg = true, },
664         { .name     = "lseek",      .errmsg = true,
665           .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ },
666           .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
667         { .name     = "lsetxattr",  .errmsg = true, },
668         { .name     = "lstat",      .errmsg = true, .alias = "newlstat", },
669         { .name     = "lsxattr",    .errmsg = true, },
670         { .name     = "madvise",    .errmsg = true,
671           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
672                              [2] = SCA_MADV_BHV, /* behavior */ }, },
673         { .name     = "mkdir",    .errmsg = true, },
674         { .name     = "mkdirat",    .errmsg = true,
675           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
676         { .name     = "mknod",      .errmsg = true, },
677         { .name     = "mknodat",    .errmsg = true,
678           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
679         { .name     = "mlock",      .errmsg = true,
680           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
681         { .name     = "mlockall",   .errmsg = true,
682           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
683         { .name     = "mmap",       .hexret = true,
684 /* The standard mmap maps to old_mmap on s390x */
685 #if defined(__s390x__)
686         .alias = "old_mmap",
687 #endif
688           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
689                              [2] = SCA_MMAP_PROT, /* prot */
690                              [3] = SCA_MMAP_FLAGS, /* flags */ }, },
691         { .name     = "mprotect",   .errmsg = true,
692           .arg_scnprintf = { [0] = SCA_HEX, /* start */
693                              [2] = SCA_MMAP_PROT, /* prot */ }, },
694         { .name     = "mq_unlink", .errmsg = true,
695           .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
696         { .name     = "mremap",     .hexret = true,
697           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
698                              [3] = SCA_MREMAP_FLAGS, /* flags */
699                              [4] = SCA_HEX, /* new_addr */ }, },
700         { .name     = "munlock",    .errmsg = true,
701           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
702         { .name     = "munmap",     .errmsg = true,
703           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
704         { .name     = "name_to_handle_at", .errmsg = true,
705           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
706         { .name     = "newfstatat", .errmsg = true,
707           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
708         { .name     = "open",       .errmsg = true,
709           .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
710         { .name     = "open_by_handle_at", .errmsg = true,
711           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
712                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
713         { .name     = "openat",     .errmsg = true,
714           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
715                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
716         { .name     = "perf_event_open", .errmsg = true,
717           .arg_scnprintf = { [2] = SCA_INT, /* cpu */
718                              [3] = SCA_FD,  /* group_fd */
719                              [4] = SCA_PERF_FLAGS,  /* flags */ }, },
720         { .name     = "pipe2",      .errmsg = true,
721           .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
722         { .name     = "poll",       .errmsg = true, .timeout = true, },
723         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
724         { .name     = "pread",      .errmsg = true, .alias = "pread64", },
725         { .name     = "preadv",     .errmsg = true, .alias = "pread", },
726         { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
727         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64", },
728         { .name     = "pwritev",    .errmsg = true, },
729         { .name     = "read",       .errmsg = true, },
730         { .name     = "readlink",   .errmsg = true, },
731         { .name     = "readlinkat", .errmsg = true,
732           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
733         { .name     = "readv",      .errmsg = true, },
734         { .name     = "recvfrom",   .errmsg = true,
735           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
736         { .name     = "recvmmsg",   .errmsg = true,
737           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
738         { .name     = "recvmsg",    .errmsg = true,
739           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
740         { .name     = "removexattr", .errmsg = true, },
741         { .name     = "renameat",   .errmsg = true,
742           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
743         { .name     = "rmdir",    .errmsg = true, },
744         { .name     = "rt_sigaction", .errmsg = true,
745           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
746         { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
747         { .name     = "rt_sigqueueinfo", .errmsg = true,
748           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
749         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
750           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
751         { .name     = "sched_getattr",        .errmsg = true, },
752         { .name     = "sched_setattr",        .errmsg = true, },
753         { .name     = "sched_setscheduler",   .errmsg = true,
754           .arg_scnprintf = { [1] = SCA_SCHED_POLICY, /* policy */ }, },
755         { .name     = "seccomp", .errmsg = true,
756           .arg_scnprintf = { [0] = SCA_SECCOMP_OP, /* op */
757                              [1] = SCA_SECCOMP_FLAGS, /* flags */ }, },
758         { .name     = "select",     .errmsg = true, .timeout = true, },
759         { .name     = "sendmmsg",    .errmsg = true,
760           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
761         { .name     = "sendmsg",    .errmsg = true,
762           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
763         { .name     = "sendto",     .errmsg = true,
764           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
765         { .name     = "set_tid_address", .errpid = true, },
766         { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
767         { .name     = "setpgid",    .errmsg = true, },
768         { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
769         { .name     = "setxattr",   .errmsg = true, },
770         { .name     = "shutdown",   .errmsg = true, },
771         { .name     = "socket",     .errmsg = true,
772           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
773                              [1] = SCA_SK_TYPE, /* type */ },
774           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
775         { .name     = "socketpair", .errmsg = true,
776           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
777                              [1] = SCA_SK_TYPE, /* type */ },
778           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
779         { .name     = "stat",       .errmsg = true, .alias = "newstat", },
780         { .name     = "statfs",     .errmsg = true, },
781         { .name     = "statx",      .errmsg = true,
782           .arg_scnprintf = { [0] = SCA_FDAT, /* flags */
783                              [2] = SCA_STATX_FLAGS, /* flags */
784                              [3] = SCA_STATX_MASK, /* mask */ }, },
785         { .name     = "swapoff",    .errmsg = true,
786           .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
787         { .name     = "swapon",     .errmsg = true,
788           .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
789         { .name     = "symlinkat",  .errmsg = true,
790           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
791         { .name     = "tgkill",     .errmsg = true,
792           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
793         { .name     = "tkill",      .errmsg = true,
794           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
795         { .name     = "truncate",   .errmsg = true, },
796         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
797         { .name     = "unlinkat",   .errmsg = true,
798           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
799         { .name     = "utime",  .errmsg = true, },
800         { .name     = "utimensat",  .errmsg = true,
801           .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
802         { .name     = "utimes",  .errmsg = true, },
803         { .name     = "vmsplice",  .errmsg = true, },
804         { .name     = "wait4",      .errpid = true,
805           .arg_scnprintf = { [2] = SCA_WAITID_OPTIONS, /* options */ }, },
806         { .name     = "waitid",     .errpid = true,
807           .arg_scnprintf = { [3] = SCA_WAITID_OPTIONS, /* options */ }, },
808         { .name     = "write",      .errmsg = true, },
809         { .name     = "writev",     .errmsg = true, },
810 };
811 
812 static int syscall_fmt__cmp(const void *name, const void *fmtp)
813 {
814         const struct syscall_fmt *fmt = fmtp;
815         return strcmp(name, fmt->name);
816 }
817 
818 static struct syscall_fmt *syscall_fmt__find(const char *name)
819 {
820         const int nmemb = ARRAY_SIZE(syscall_fmts);
821         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
822 }
823 
824 struct syscall {
825         struct event_format *tp_format;
826         int                 nr_args;
827         struct format_field *args;
828         const char          *name;
829         bool                is_exit;
830         struct syscall_fmt  *fmt;
831         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
832         void                **arg_parm;
833 };
834 
835 /*
836  * We need to have this 'calculated' boolean because in some cases we really
837  * don't know what is the duration of a syscall, for instance, when we start
838  * a session and some threads are waiting for a syscall to finish, say 'poll',
839  * in which case all we can do is to print "( ? ) for duration and for the
840  * start timestamp.
841  */
842 static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp)
843 {
844         double duration = (double)t / NSEC_PER_MSEC;
845         size_t printed = fprintf(fp, "(");
846 
847         if (!calculated)
848                 printed += fprintf(fp, "     ?   ");
849         else if (duration >= 1.0)
850                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
851         else if (duration >= 0.01)
852                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
853         else
854                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
855         return printed + fprintf(fp, "): ");
856 }
857 
858 /**
859  * filename.ptr: The filename char pointer that will be vfs_getname'd
860  * filename.entry_str_pos: Where to insert the string translated from
861  *                         filename.ptr by the vfs_getname tracepoint/kprobe.
862  */
863 struct thread_trace {
864         u64               entry_time;
865         bool              entry_pending;
866         unsigned long     nr_events;
867         unsigned long     pfmaj, pfmin;
868         char              *entry_str;
869         double            runtime_ms;
870         struct {
871                 unsigned long ptr;
872                 short int     entry_str_pos;
873                 bool          pending_open;
874                 unsigned int  namelen;
875                 char          *name;
876         } filename;
877         struct {
878                 int       max;
879                 char      **table;
880         } paths;
881 
882         struct intlist *syscall_stats;
883 };
884 
885 static struct thread_trace *thread_trace__new(void)
886 {
887         struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
888 
889         if (ttrace)
890                 ttrace->paths.max = -1;
891 
892         ttrace->syscall_stats = intlist__new(NULL);
893 
894         return ttrace;
895 }
896 
897 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
898 {
899         struct thread_trace *ttrace;
900 
901         if (thread == NULL)
902                 goto fail;
903 
904         if (thread__priv(thread) == NULL)
905                 thread__set_priv(thread, thread_trace__new());
906 
907         if (thread__priv(thread) == NULL)
908                 goto fail;
909 
910         ttrace = thread__priv(thread);
911         ++ttrace->nr_events;
912 
913         return ttrace;
914 fail:
915         color_fprintf(fp, PERF_COLOR_RED,
916                       "WARNING: not enough memory, dropping samples!\n");
917         return NULL;
918 }
919 
920 #define TRACE_PFMAJ             (1 << 0)
921 #define TRACE_PFMIN             (1 << 1)
922 
923 static const size_t trace__entry_str_size = 2048;
924 
925 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
926 {
927         struct thread_trace *ttrace = thread__priv(thread);
928 
929         if (fd > ttrace->paths.max) {
930                 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
931 
932                 if (npath == NULL)
933                         return -1;
934 
935                 if (ttrace->paths.max != -1) {
936                         memset(npath + ttrace->paths.max + 1, 0,
937                                (fd - ttrace->paths.max) * sizeof(char *));
938                 } else {
939                         memset(npath, 0, (fd + 1) * sizeof(char *));
940                 }
941 
942                 ttrace->paths.table = npath;
943                 ttrace->paths.max   = fd;
944         }
945 
946         ttrace->paths.table[fd] = strdup(pathname);
947 
948         return ttrace->paths.table[fd] != NULL ? 0 : -1;
949 }
950 
951 static int thread__read_fd_path(struct thread *thread, int fd)
952 {
953         char linkname[PATH_MAX], pathname[PATH_MAX];
954         struct stat st;
955         int ret;
956 
957         if (thread->pid_ == thread->tid) {
958                 scnprintf(linkname, sizeof(linkname),
959                           "/proc/%d/fd/%d", thread->pid_, fd);
960         } else {
961                 scnprintf(linkname, sizeof(linkname),
962                           "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
963         }
964 
965         if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
966                 return -1;
967 
968         ret = readlink(linkname, pathname, sizeof(pathname));
969 
970         if (ret < 0 || ret > st.st_size)
971                 return -1;
972 
973         pathname[ret] = '\0';
974         return trace__set_fd_pathname(thread, fd, pathname);
975 }
976 
977 static const char *thread__fd_path(struct thread *thread, int fd,
978                                    struct trace *trace)
979 {
980         struct thread_trace *ttrace = thread__priv(thread);
981 
982         if (ttrace == NULL)
983                 return NULL;
984 
985         if (fd < 0)
986                 return NULL;
987 
988         if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
989                 if (!trace->live)
990                         return NULL;
991                 ++trace->stats.proc_getname;
992                 if (thread__read_fd_path(thread, fd))
993                         return NULL;
994         }
995 
996         return ttrace->paths.table[fd];
997 }
998 
999 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1000                                         struct syscall_arg *arg)
1001 {
1002         int fd = arg->val;
1003         size_t printed = scnprintf(bf, size, "%d", fd);
1004         const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1005 
1006         if (path)
1007                 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1008 
1009         return printed;
1010 }
1011 
1012 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1013                                               struct syscall_arg *arg)
1014 {
1015         int fd = arg->val;
1016         size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1017         struct thread_trace *ttrace = thread__priv(arg->thread);
1018 
1019         if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1020                 zfree(&ttrace->paths.table[fd]);
1021 
1022         return printed;
1023 }
1024 
1025 static void thread__set_filename_pos(struct thread *thread, const char *bf,
1026                                      unsigned long ptr)
1027 {
1028         struct thread_trace *ttrace = thread__priv(thread);
1029 
1030         ttrace->filename.ptr = ptr;
1031         ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1032 }
1033 
1034 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1035                                               struct syscall_arg *arg)
1036 {
1037         unsigned long ptr = arg->val;
1038 
1039         if (!arg->trace->vfs_getname)
1040                 return scnprintf(bf, size, "%#x", ptr);
1041 
1042         thread__set_filename_pos(arg->thread, bf, ptr);
1043         return 0;
1044 }
1045 
1046 static bool trace__filter_duration(struct trace *trace, double t)
1047 {
1048         return t < (trace->duration_filter * NSEC_PER_MSEC);
1049 }
1050 
1051 static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1052 {
1053         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1054 
1055         return fprintf(fp, "%10.3f ", ts);
1056 }
1057 
1058 /*
1059  * We're handling tstamp=0 as an undefined tstamp, i.e. like when we are
1060  * using ttrace->entry_time for a thread that receives a sys_exit without
1061  * first having received a sys_enter ("poll" issued before tracing session
1062  * starts, lost sys_enter exit due to ring buffer overflow).
1063  */
1064 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1065 {
1066         if (tstamp > 0)
1067                 return __trace__fprintf_tstamp(trace, tstamp, fp);
1068 
1069         return fprintf(fp, "         ? ");
1070 }
1071 
1072 static bool done = false;
1073 static bool interrupted = false;
1074 
1075 static void sig_handler(int sig)
1076 {
1077         done = true;
1078         interrupted = sig == SIGINT;
1079 }
1080 
1081 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1082                                         u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
1083 {
1084         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1085         printed += fprintf_duration(duration, duration_calculated, fp);
1086 
1087         if (trace->multiple_threads) {
1088                 if (trace->show_comm)
1089                         printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1090                 printed += fprintf(fp, "%d ", thread->tid);
1091         }
1092 
1093         return printed;
1094 }
1095 
1096 static int trace__process_event(struct trace *trace, struct machine *machine,
1097                                 union perf_event *event, struct perf_sample *sample)
1098 {
1099         int ret = 0;
1100 
1101         switch (event->header.type) {
1102         case PERF_RECORD_LOST:
1103                 color_fprintf(trace->output, PERF_COLOR_RED,
1104                               "LOST %" PRIu64 " events!\n", event->lost.lost);
1105                 ret = machine__process_lost_event(machine, event, sample);
1106                 break;
1107         default:
1108                 ret = machine__process_event(machine, event, sample);
1109                 break;
1110         }
1111 
1112         return ret;
1113 }
1114 
1115 static int trace__tool_process(struct perf_tool *tool,
1116                                union perf_event *event,
1117                                struct perf_sample *sample,
1118                                struct machine *machine)
1119 {
1120         struct trace *trace = container_of(tool, struct trace, tool);
1121         return trace__process_event(trace, machine, event, sample);
1122 }
1123 
1124 static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
1125 {
1126         struct machine *machine = vmachine;
1127 
1128         if (machine->kptr_restrict_warned)
1129                 return NULL;
1130 
1131         if (symbol_conf.kptr_restrict) {
1132                 pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1133                            "Check /proc/sys/kernel/kptr_restrict.\n\n"
1134                            "Kernel samples will not be resolved.\n");
1135                 machine->kptr_restrict_warned = true;
1136                 return NULL;
1137         }
1138 
1139         return machine__resolve_kernel_addr(vmachine, addrp, modp);
1140 }
1141 
1142 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1143 {
1144         int err = symbol__init(NULL);
1145 
1146         if (err)
1147                 return err;
1148 
1149         trace->host = machine__new_host();
1150         if (trace->host == NULL)
1151                 return -ENOMEM;
1152 
1153         if (trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr) < 0)
1154                 return -errno;
1155 
1156         err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1157                                             evlist->threads, trace__tool_process, false,
1158                                             trace->opts.proc_map_timeout);
1159         if (err)
1160                 symbol__exit();
1161 
1162         return err;
1163 }
1164 
1165 static int syscall__set_arg_fmts(struct syscall *sc)
1166 {
1167         struct format_field *field;
1168         int idx = 0, len;
1169 
1170         sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1171         if (sc->arg_scnprintf == NULL)
1172                 return -1;
1173 
1174         if (sc->fmt)
1175                 sc->arg_parm = sc->fmt->arg_parm;
1176 
1177         for (field = sc->args; field; field = field->next) {
1178                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1179                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1180                 else if (strcmp(field->type, "const char *") == 0 &&
1181                          (strcmp(field->name, "filename") == 0 ||
1182                           strcmp(field->name, "path") == 0 ||
1183                           strcmp(field->name, "pathname") == 0))
1184                         sc->arg_scnprintf[idx] = SCA_FILENAME;
1185                 else if (field->flags & FIELD_IS_POINTER)
1186                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1187                 else if (strcmp(field->type, "pid_t") == 0)
1188                         sc->arg_scnprintf[idx] = SCA_PID;
1189                 else if (strcmp(field->type, "umode_t") == 0)
1190                         sc->arg_scnprintf[idx] = SCA_MODE_T;
1191                 else if ((strcmp(field->type, "int") == 0 ||
1192                           strcmp(field->type, "unsigned int") == 0 ||
1193                           strcmp(field->type, "long") == 0) &&
1194                          (len = strlen(field->name)) >= 2 &&
1195                          strcmp(field->name + len - 2, "fd") == 0) {
1196                         /*
1197                          * /sys/kernel/tracing/events/syscalls/sys_enter*
1198                          * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
1199                          * 65 int
1200                          * 23 unsigned int
1201                          * 7 unsigned long
1202                          */
1203                         sc->arg_scnprintf[idx] = SCA_FD;
1204                 }
1205                 ++idx;
1206         }
1207 
1208         return 0;
1209 }
1210 
1211 static int trace__read_syscall_info(struct trace *trace, int id)
1212 {
1213         char tp_name[128];
1214         struct syscall *sc;
1215         const char *name = syscalltbl__name(trace->sctbl, id);
1216 
1217         if (name == NULL)
1218                 return -1;
1219 
1220         if (id > trace->syscalls.max) {
1221                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1222 
1223                 if (nsyscalls == NULL)
1224                         return -1;
1225 
1226                 if (trace->syscalls.max != -1) {
1227                         memset(nsyscalls + trace->syscalls.max + 1, 0,
1228                                (id - trace->syscalls.max) * sizeof(*sc));
1229                 } else {
1230                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1231                 }
1232 
1233                 trace->syscalls.table = nsyscalls;
1234                 trace->syscalls.max   = id;
1235         }
1236 
1237         sc = trace->syscalls.table + id;
1238         sc->name = name;
1239 
1240         sc->fmt  = syscall_fmt__find(sc->name);
1241 
1242         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1243         sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1244 
1245         if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
1246                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1247                 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1248         }
1249 
1250         if (IS_ERR(sc->tp_format))
1251                 return -1;
1252 
1253         sc->args = sc->tp_format->format.fields;
1254         sc->nr_args = sc->tp_format->format.nr_fields;
1255         /*
1256          * We need to check and discard the first variable '__syscall_nr'
1257          * or 'nr' that mean the syscall number. It is needless here.
1258          * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1259          */
1260         if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
1261                 sc->args = sc->args->next;
1262                 --sc->nr_args;
1263         }
1264 
1265         sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1266 
1267         return syscall__set_arg_fmts(sc);
1268 }
1269 
1270 static int trace__validate_ev_qualifier(struct trace *trace)
1271 {
1272         int err = 0, i;
1273         struct str_node *pos;
1274 
1275         trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1276         trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1277                                                  sizeof(trace->ev_qualifier_ids.entries[0]));
1278 
1279         if (trace->ev_qualifier_ids.entries == NULL) {
1280                 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1281                        trace->output);
1282                 err = -EINVAL;
1283                 goto out;
1284         }
1285 
1286         i = 0;
1287 
1288         strlist__for_each_entry(pos, trace->ev_qualifier) {
1289                 const char *sc = pos->s;
1290                 int id = syscalltbl__id(trace->sctbl, sc);
1291 
1292                 if (id < 0) {
1293                         if (err == 0) {
1294                                 fputs("Error:\tInvalid syscall ", trace->output);
1295                                 err = -EINVAL;
1296                         } else {
1297                                 fputs(", ", trace->output);
1298                         }
1299 
1300                         fputs(sc, trace->output);
1301                 }
1302 
1303                 trace->ev_qualifier_ids.entries[i++] = id;
1304         }
1305 
1306         if (err < 0) {
1307                 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1308                       "\nHint:\tand: 'man syscalls'\n", trace->output);
1309                 zfree(&trace->ev_qualifier_ids.entries);
1310                 trace->ev_qualifier_ids.nr = 0;
1311         }
1312 out:
1313         return err;
1314 }
1315 
1316 /*
1317  * args is to be interpreted as a series of longs but we need to handle
1318  * 8-byte unaligned accesses. args points to raw_data within the event
1319  * and raw_data is guaranteed to be 8-byte unaligned because it is
1320  * preceded by raw_size which is a u32. So we need to copy args to a temp
1321  * variable to read it. Most notably this avoids extended load instructions
1322  * on unaligned addresses
1323  */
1324 
1325 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1326                                       unsigned char *args, struct trace *trace,
1327                                       struct thread *thread)
1328 {
1329         size_t printed = 0;
1330         unsigned char *p;
1331         unsigned long val;
1332 
1333         if (sc->args != NULL) {
1334                 struct format_field *field;
1335                 u8 bit = 1;
1336                 struct syscall_arg arg = {
1337                         .idx    = 0,
1338                         .mask   = 0,
1339                         .trace  = trace,
1340                         .thread = thread,
1341                 };
1342 
1343                 for (field = sc->args; field;
1344                      field = field->next, ++arg.idx, bit <<= 1) {
1345                         if (arg.mask & bit)
1346                                 continue;
1347 
1348                         /* special care for unaligned accesses */
1349                         p = args + sizeof(unsigned long) * arg.idx;
1350                         memcpy(&val, p, sizeof(val));
1351 
1352                         /*
1353                          * Suppress this argument if its value is zero and
1354                          * and we don't have a string associated in an
1355                          * strarray for it.
1356                          */
1357                         if (val == 0 &&
1358                             !(sc->arg_scnprintf &&
1359                               sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1360                               sc->arg_parm[arg.idx]))
1361                                 continue;
1362 
1363                         printed += scnprintf(bf + printed, size - printed,
1364                                              "%s%s: ", printed ? ", " : "", field->name);
1365                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1366                                 arg.val = val;
1367                                 if (sc->arg_parm)
1368                                         arg.parm = sc->arg_parm[arg.idx];
1369                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1370                                                                       size - printed, &arg);
1371                         } else {
1372                                 printed += scnprintf(bf + printed, size - printed,
1373                                                      "%ld", val);
1374                         }
1375                 }
1376         } else if (IS_ERR(sc->tp_format)) {
1377                 /*
1378                  * If we managed to read the tracepoint /format file, then we
1379                  * may end up not having any args, like with gettid(), so only
1380                  * print the raw args when we didn't manage to read it.
1381                  */
1382                 int i = 0;
1383 
1384                 while (i < 6) {
1385                         /* special care for unaligned accesses */
1386                         p = args + sizeof(unsigned long) * i;
1387                         memcpy(&val, p, sizeof(val));
1388                         printed += scnprintf(bf + printed, size - printed,
1389                                              "%sarg%d: %ld",
1390                                              printed ? ", " : "", i, val);
1391                         ++i;
1392                 }
1393         }
1394 
1395         return printed;
1396 }
1397 
1398 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1399                                   union perf_event *event,
1400                                   struct perf_sample *sample);
1401 
1402 static struct syscall *trace__syscall_info(struct trace *trace,
1403                                            struct perf_evsel *evsel, int id)
1404 {
1405 
1406         if (id < 0) {
1407 
1408                 /*
1409                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1410                  * before that, leaving at a higher verbosity level till that is
1411                  * explained. Reproduced with plain ftrace with:
1412                  *
1413                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1414                  * grep "NR -1 " /t/trace_pipe
1415                  *
1416                  * After generating some load on the machine.
1417                  */
1418                 if (verbose > 1) {
1419                         static u64 n;
1420                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1421                                 id, perf_evsel__name(evsel), ++n);
1422                 }
1423                 return NULL;
1424         }
1425 
1426         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1427             trace__read_syscall_info(trace, id))
1428                 goto out_cant_read;
1429 
1430         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1431                 goto out_cant_read;
1432 
1433         return &trace->syscalls.table[id];
1434 
1435 out_cant_read:
1436         if (verbose > 0) {
1437                 fprintf(trace->output, "Problems reading syscall %d", id);
1438                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1439                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1440                 fputs(" information\n", trace->output);
1441         }
1442         return NULL;
1443 }
1444 
1445 static void thread__update_stats(struct thread_trace *ttrace,
1446                                  int id, struct perf_sample *sample)
1447 {
1448         struct int_node *inode;
1449         struct stats *stats;
1450         u64 duration = 0;
1451 
1452         inode = intlist__findnew(ttrace->syscall_stats, id);
1453         if (inode == NULL)
1454                 return;
1455 
1456         stats = inode->priv;
1457         if (stats == NULL) {
1458                 stats = malloc(sizeof(struct stats));
1459                 if (stats == NULL)
1460                         return;
1461                 init_stats(stats);
1462                 inode->priv = stats;
1463         }
1464 
1465         if (ttrace->entry_time && sample->time > ttrace->entry_time)
1466                 duration = sample->time - ttrace->entry_time;
1467 
1468         update_stats(stats, duration);
1469 }
1470 
1471 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1472 {
1473         struct thread_trace *ttrace;
1474         u64 duration;
1475         size_t printed;
1476 
1477         if (trace->current == NULL)
1478                 return 0;
1479 
1480         ttrace = thread__priv(trace->current);
1481 
1482         if (!ttrace->entry_pending)
1483                 return 0;
1484 
1485         duration = sample->time - ttrace->entry_time;
1486 
1487         printed  = trace__fprintf_entry_head(trace, trace->current, duration, true, ttrace->entry_time, trace->output);
1488         printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1489         ttrace->entry_pending = false;
1490 
1491         return printed;
1492 }
1493 
1494 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1495                             union perf_event *event __maybe_unused,
1496                             struct perf_sample *sample)
1497 {
1498         char *msg;
1499         void *args;
1500         size_t printed = 0;
1501         struct thread *thread;
1502         int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1503         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1504         struct thread_trace *ttrace;
1505 
1506         if (sc == NULL)
1507                 return -1;
1508 
1509         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1510         ttrace = thread__trace(thread, trace->output);
1511         if (ttrace == NULL)
1512                 goto out_put;
1513 
1514         args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1515 
1516         if (ttrace->entry_str == NULL) {
1517                 ttrace->entry_str = malloc(trace__entry_str_size);
1518                 if (!ttrace->entry_str)
1519                         goto out_put;
1520         }
1521 
1522         if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
1523                 trace__printf_interrupted_entry(trace, sample);
1524 
1525         ttrace->entry_time = sample->time;
1526         msg = ttrace->entry_str;
1527         printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1528 
1529         printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1530                                            args, trace, thread);
1531 
1532         if (sc->is_exit) {
1533                 if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) {
1534                         trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output);
1535                         fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
1536                 }
1537         } else {
1538                 ttrace->entry_pending = true;
1539                 /* See trace__vfs_getname & trace__sys_exit */
1540                 ttrace->filename.pending_open = false;
1541         }
1542 
1543         if (trace->current != thread) {
1544                 thread__put(trace->current);
1545                 trace->current = thread__get(thread);
1546         }
1547         err = 0;
1548 out_put:
1549         thread__put(thread);
1550         return err;
1551 }
1552 
1553 static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
1554                                     struct perf_sample *sample,
1555                                     struct callchain_cursor *cursor)
1556 {
1557         struct addr_location al;
1558 
1559         if (machine__resolve(trace->host, &al, sample) < 0 ||
1560             thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack))
1561                 return -1;
1562 
1563         return 0;
1564 }
1565 
1566 static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
1567 {
1568         /* TODO: user-configurable print_opts */
1569         const unsigned int print_opts = EVSEL__PRINT_SYM |
1570                                         EVSEL__PRINT_DSO |
1571                                         EVSEL__PRINT_UNKNOWN_AS_ADDR;
1572 
1573         return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
1574 }
1575 
1576 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1577                            union perf_event *event __maybe_unused,
1578                            struct perf_sample *sample)
1579 {
1580         long ret;
1581         u64 duration = 0;
1582         bool duration_calculated = false;
1583         struct thread *thread;
1584         int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
1585         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1586         struct thread_trace *ttrace;
1587 
1588         if (sc == NULL)
1589                 return -1;
1590 
1591         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1592         ttrace = thread__trace(thread, trace->output);
1593         if (ttrace == NULL)
1594                 goto out_put;
1595 
1596         if (trace->summary)
1597                 thread__update_stats(ttrace, id, sample);
1598 
1599         ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1600 
1601         if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
1602                 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
1603                 ttrace->filename.pending_open = false;
1604                 ++trace->stats.vfs_getname;
1605         }
1606 
1607         if (ttrace->entry_time) {
1608                 duration = sample->time - ttrace->entry_time;
1609                 if (trace__filter_duration(trace, duration))
1610                         goto out;
1611                 duration_calculated = true;
1612         } else if (trace->duration_filter)
1613                 goto out;
1614 
1615         if (sample->callchain) {
1616                 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1617                 if (callchain_ret == 0) {
1618                         if (callchain_cursor.nr < trace->min_stack)
1619                                 goto out;
1620                         callchain_ret = 1;
1621                 }
1622         }
1623 
1624         if (trace->summary_only)
1625                 goto out;
1626 
1627         trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output);
1628 
1629         if (ttrace->entry_pending) {
1630                 fprintf(trace->output, "%-70s", ttrace->entry_str);
1631         } else {
1632                 fprintf(trace->output, " ... [");
1633                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1634                 fprintf(trace->output, "]: %s()", sc->name);
1635         }
1636 
1637         if (sc->fmt == NULL) {
1638 signed_print:
1639                 fprintf(trace->output, ") = %ld", ret);
1640         } else if (ret < 0 && (sc->fmt->errmsg || sc->fmt->errpid)) {
1641                 char bf[STRERR_BUFSIZE];
1642                 const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
1643                            *e = audit_errno_to_name(-ret);
1644 
1645                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1646         } else if (ret == 0 && sc->fmt->timeout)
1647                 fprintf(trace->output, ") = 0 Timeout");
1648         else if (sc->fmt->hexret)
1649                 fprintf(trace->output, ") = %#lx", ret);
1650         else if (sc->fmt->errpid) {
1651                 struct thread *child = machine__find_thread(trace->host, ret, ret);
1652 
1653                 if (child != NULL) {
1654                         fprintf(trace->output, ") = %ld", ret);
1655                         if (child->comm_set)
1656                                 fprintf(trace->output, " (%s)", thread__comm_str(child));
1657                         thread__put(child);
1658                 }
1659         } else
1660                 goto signed_print;
1661 
1662         fputc('\n', trace->output);
1663 
1664         if (callchain_ret > 0)
1665                 trace__fprintf_callchain(trace, sample);
1666         else if (callchain_ret < 0)
1667                 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1668 out:
1669         ttrace->entry_pending = false;
1670         err = 0;
1671 out_put:
1672         thread__put(thread);
1673         return err;
1674 }
1675 
1676 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1677                               union perf_event *event __maybe_unused,
1678                               struct perf_sample *sample)
1679 {
1680         struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1681         struct thread_trace *ttrace;
1682         size_t filename_len, entry_str_len, to_move;
1683         ssize_t remaining_space;
1684         char *pos;
1685         const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
1686 
1687         if (!thread)
1688                 goto out;
1689 
1690         ttrace = thread__priv(thread);
1691         if (!ttrace)
1692                 goto out_put;
1693 
1694         filename_len = strlen(filename);
1695         if (filename_len == 0)
1696                 goto out_put;
1697 
1698         if (ttrace->filename.namelen < filename_len) {
1699                 char *f = realloc(ttrace->filename.name, filename_len + 1);
1700 
1701                 if (f == NULL)
1702                         goto out_put;
1703 
1704                 ttrace->filename.namelen = filename_len;
1705                 ttrace->filename.name = f;
1706         }
1707 
1708         strcpy(ttrace->filename.name, filename);
1709         ttrace->filename.pending_open = true;
1710 
1711         if (!ttrace->filename.ptr)
1712                 goto out_put;
1713 
1714         entry_str_len = strlen(ttrace->entry_str);
1715         remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
1716         if (remaining_space <= 0)
1717                 goto out_put;
1718 
1719         if (filename_len > (size_t)remaining_space) {
1720                 filename += filename_len - remaining_space;
1721                 filename_len = remaining_space;
1722         }
1723 
1724         to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
1725         pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
1726         memmove(pos + filename_len, pos, to_move);
1727         memcpy(pos, filename, filename_len);
1728 
1729         ttrace->filename.ptr = 0;
1730         ttrace->filename.entry_str_pos = 0;
1731 out_put:
1732         thread__put(thread);
1733 out:
1734         return 0;
1735 }
1736 
1737 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1738                                      union perf_event *event __maybe_unused,
1739                                      struct perf_sample *sample)
1740 {
1741         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1742         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1743         struct thread *thread = machine__findnew_thread(trace->host,
1744                                                         sample->pid,
1745                                                         sample->tid);
1746         struct thread_trace *ttrace = thread__trace(thread, trace->output);
1747 
1748         if (ttrace == NULL)
1749                 goto out_dump;
1750 
1751         ttrace->runtime_ms += runtime_ms;
1752         trace->runtime_ms += runtime_ms;
1753 out_put:
1754         thread__put(thread);
1755         return 0;
1756 
1757 out_dump:
1758         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1759                evsel->name,
1760                perf_evsel__strval(evsel, sample, "comm"),
1761                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1762                runtime,
1763                perf_evsel__intval(evsel, sample, "vruntime"));
1764         goto out_put;
1765 }
1766 
1767 static void bpf_output__printer(enum binary_printer_ops op,
1768                                 unsigned int val, void *extra)
1769 {
1770         FILE *output = extra;
1771         unsigned char ch = (unsigned char)val;
1772 
1773         switch (op) {
1774         case BINARY_PRINT_CHAR_DATA:
1775                 fprintf(output, "%c", isprint(ch) ? ch : '.');
1776                 break;
1777         case BINARY_PRINT_DATA_BEGIN:
1778         case BINARY_PRINT_LINE_BEGIN:
1779         case BINARY_PRINT_ADDR:
1780         case BINARY_PRINT_NUM_DATA:
1781         case BINARY_PRINT_NUM_PAD:
1782         case BINARY_PRINT_SEP:
1783         case BINARY_PRINT_CHAR_PAD:
1784         case BINARY_PRINT_LINE_END:
1785         case BINARY_PRINT_DATA_END:
1786         default:
1787                 break;
1788         }
1789 }
1790 
1791 static void bpf_output__fprintf(struct trace *trace,
1792                                 struct perf_sample *sample)
1793 {
1794         print_binary(sample->raw_data, sample->raw_size, 8,
1795                      bpf_output__printer, trace->output);
1796 }
1797 
1798 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1799                                 union perf_event *event __maybe_unused,
1800                                 struct perf_sample *sample)
1801 {
1802         int callchain_ret = 0;
1803 
1804         if (sample->callchain) {
1805                 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1806                 if (callchain_ret == 0) {
1807                         if (callchain_cursor.nr < trace->min_stack)
1808                                 goto out;
1809                         callchain_ret = 1;
1810                 }
1811         }
1812 
1813         trace__printf_interrupted_entry(trace, sample);
1814         trace__fprintf_tstamp(trace, sample->time, trace->output);
1815 
1816         if (trace->trace_syscalls)
1817                 fprintf(trace->output, "(         ): ");
1818 
1819         fprintf(trace->output, "%s:", evsel->name);
1820 
1821         if (perf_evsel__is_bpf_output(evsel)) {
1822                 bpf_output__fprintf(trace, sample);
1823         } else if (evsel->tp_format) {
1824                 event_format__fprintf(evsel->tp_format, sample->cpu,
1825                                       sample->raw_data, sample->raw_size,
1826                                       trace->output);
1827         }
1828 
1829         fprintf(trace->output, ")\n");
1830 
1831         if (callchain_ret > 0)
1832                 trace__fprintf_callchain(trace, sample);
1833         else if (callchain_ret < 0)
1834                 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1835 out:
1836         return 0;
1837 }
1838 
1839 static void print_location(FILE *f, struct perf_sample *sample,
1840                            struct addr_location *al,
1841                            bool print_dso, bool print_sym)
1842 {
1843 
1844         if ((verbose > 0 || print_dso) && al->map)
1845                 fprintf(f, "%s@", al->map->dso->long_name);
1846 
1847         if ((verbose > 0 || print_sym) && al->sym)
1848                 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
1849                         al->addr - al->sym->start);
1850         else if (al->map)
1851                 fprintf(f, "0x%" PRIx64, al->addr);
1852         else
1853                 fprintf(f, "0x%" PRIx64, sample->addr);
1854 }
1855 
1856 static int trace__pgfault(struct trace *trace,
1857                           struct perf_evsel *evsel,
1858                           union perf_event *event __maybe_unused,
1859                           struct perf_sample *sample)
1860 {
1861         struct thread *thread;
1862         struct addr_location al;
1863         char map_type = 'd';
1864         struct thread_trace *ttrace;
1865         int err = -1;
1866         int callchain_ret = 0;
1867 
1868         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1869 
1870         if (sample->callchain) {
1871                 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1872                 if (callchain_ret == 0) {
1873                         if (callchain_cursor.nr < trace->min_stack)
1874                                 goto out_put;
1875                         callchain_ret = 1;
1876                 }
1877         }
1878 
1879         ttrace = thread__trace(thread, trace->output);
1880         if (ttrace == NULL)
1881                 goto out_put;
1882 
1883         if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1884                 ttrace->pfmaj++;
1885         else
1886                 ttrace->pfmin++;
1887 
1888         if (trace->summary_only)
1889                 goto out;
1890 
1891         thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
1892                               sample->ip, &al);
1893 
1894         trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output);
1895 
1896         fprintf(trace->output, "%sfault [",
1897                 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1898                 "maj" : "min");
1899 
1900         print_location(trace->output, sample, &al, false, true);
1901 
1902         fprintf(trace->output, "] => ");
1903 
1904         thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
1905                                    sample->addr, &al);
1906 
1907         if (!al.map) {
1908                 thread__find_addr_location(thread, sample->cpumode,
1909                                            MAP__FUNCTION, sample->addr, &al);
1910 
1911                 if (al.map)
1912                         map_type = 'x';
1913                 else
1914                         map_type = '?';
1915         }
1916 
1917         print_location(trace->output, sample, &al, true, false);
1918 
1919         fprintf(trace->output, " (%c%c)\n", map_type, al.level);
1920 
1921         if (callchain_ret > 0)
1922                 trace__fprintf_callchain(trace, sample);
1923         else if (callchain_ret < 0)
1924                 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1925 out:
1926         err = 0;
1927 out_put:
1928         thread__put(thread);
1929         return err;
1930 }
1931 
1932 static void trace__set_base_time(struct trace *trace,
1933                                  struct perf_evsel *evsel,
1934                                  struct perf_sample *sample)
1935 {
1936         /*
1937          * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
1938          * and don't use sample->time unconditionally, we may end up having
1939          * some other event in the future without PERF_SAMPLE_TIME for good
1940          * reason, i.e. we may not be interested in its timestamps, just in
1941          * it taking place, picking some piece of information when it
1942          * appears in our event stream (vfs_getname comes to mind).
1943          */
1944         if (trace->base_time == 0 && !trace->full_time &&
1945             (evsel->attr.sample_type & PERF_SAMPLE_TIME))
1946                 trace->base_time = sample->time;
1947 }
1948 
1949 static int trace__process_sample(struct perf_tool *tool,
1950                                  union perf_event *event,
1951                                  struct perf_sample *sample,
1952                                  struct perf_evsel *evsel,
1953                                  struct machine *machine __maybe_unused)
1954 {
1955         struct trace *trace = container_of(tool, struct trace, tool);
1956         struct thread *thread;
1957         int err = 0;
1958 
1959         tracepoint_handler handler = evsel->handler;
1960 
1961         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1962         if (thread && thread__is_filtered(thread))
1963                 goto out;
1964 
1965         trace__set_base_time(trace, evsel, sample);
1966 
1967         if (handler) {
1968                 ++trace->nr_events;
1969                 handler(trace, evsel, event, sample);
1970         }
1971 out:
1972         thread__put(thread);
1973         return err;
1974 }
1975 
1976 static int trace__record(struct trace *trace, int argc, const char **argv)
1977 {
1978         unsigned int rec_argc, i, j;
1979         const char **rec_argv;
1980         const char * const record_args[] = {
1981                 "record",
1982                 "-R",
1983                 "-m", "1024",
1984                 "-c", "1",
1985         };
1986 
1987         const char * const sc_args[] = { "-e", };
1988         unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
1989         const char * const majpf_args[] = { "-e", "major-faults" };
1990         unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
1991         const char * const minpf_args[] = { "-e", "minor-faults" };
1992         unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
1993 
1994         /* +1 is for the event string below */
1995         rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
1996                 majpf_args_nr + minpf_args_nr + argc;
1997         rec_argv = calloc(rec_argc + 1, sizeof(char *));
1998 
1999         if (rec_argv == NULL)
2000                 return -ENOMEM;
2001 
2002         j = 0;
2003         for (i = 0; i < ARRAY_SIZE(record_args); i++)
2004                 rec_argv[j++] = record_args[i];
2005 
2006         if (trace->trace_syscalls) {
2007                 for (i = 0; i < sc_args_nr; i++)
2008                         rec_argv[j++] = sc_args[i];
2009 
2010                 /* event string may be different for older kernels - e.g., RHEL6 */
2011                 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2012                         rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2013                 else if (is_valid_tracepoint("syscalls:sys_enter"))
2014                         rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2015                 else {
2016                         pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2017                         return -1;
2018                 }
2019         }
2020 
2021         if (trace->trace_pgfaults & TRACE_PFMAJ)
2022                 for (i = 0; i < majpf_args_nr; i++)
2023                         rec_argv[j++] = majpf_args[i];
2024 
2025         if (trace->trace_pgfaults & TRACE_PFMIN)
2026                 for (i = 0; i < minpf_args_nr; i++)
2027                         rec_argv[j++] = minpf_args[i];
2028 
2029         for (i = 0; i < (unsigned int)argc; i++)
2030                 rec_argv[j++] = argv[i];
2031 
2032         return cmd_record(j, rec_argv);
2033 }
2034 
2035 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2036 
2037 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2038 {
2039         struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2040 
2041         if (IS_ERR(evsel))
2042                 return false;
2043 
2044         if (perf_evsel__field(evsel, "pathname") == NULL) {
2045                 perf_evsel__delete(evsel);
2046                 return false;
2047         }
2048 
2049         evsel->handler = trace__vfs_getname;
2050         perf_evlist__add(evlist, evsel);
2051         return true;
2052 }
2053 
2054 static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
2055 {
2056         struct perf_evsel *evsel;
2057         struct perf_event_attr attr = {
2058                 .type = PERF_TYPE_SOFTWARE,
2059                 .mmap_data = 1,
2060         };
2061 
2062         attr.config = config;
2063         attr.sample_period = 1;
2064 
2065         event_attr_init(&attr);
2066 
2067         evsel = perf_evsel__new(&attr);
2068         if (evsel)
2069                 evsel->handler = trace__pgfault;
2070 
2071         return evsel;
2072 }
2073 
2074 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2075 {
2076         const u32 type = event->header.type;
2077         struct perf_evsel *evsel;
2078 
2079         if (type != PERF_RECORD_SAMPLE) {
2080                 trace__process_event(trace, trace->host, event, sample);
2081                 return;
2082         }
2083 
2084         evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2085         if (evsel == NULL) {
2086                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2087                 return;
2088         }
2089 
2090         trace__set_base_time(trace, evsel, sample);
2091 
2092         if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2093             sample->raw_data == NULL) {
2094                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2095                        perf_evsel__name(evsel), sample->tid,
2096                        sample->cpu, sample->raw_size);
2097         } else {
2098                 tracepoint_handler handler = evsel->handler;
2099                 handler(trace, evsel, event, sample);
2100         }
2101 }
2102 
2103 static int trace__add_syscall_newtp(struct trace *trace)
2104 {
2105         int ret = -1;
2106         struct perf_evlist *evlist = trace->evlist;
2107         struct perf_evsel *sys_enter, *sys_exit;
2108 
2109         sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2110         if (sys_enter == NULL)
2111                 goto out;
2112 
2113         if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2114                 goto out_delete_sys_enter;
2115 
2116         sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2117         if (sys_exit == NULL)
2118                 goto out_delete_sys_enter;
2119 
2120         if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2121                 goto out_delete_sys_exit;
2122 
2123         perf_evlist__add(evlist, sys_enter);
2124         perf_evlist__add(evlist, sys_exit);
2125 
2126         if (callchain_param.enabled && !trace->kernel_syscallchains) {
2127                 /*
2128                  * We're interested only in the user space callchain
2129                  * leading to the syscall, allow overriding that for
2130                  * debugging reasons using --kernel_syscall_callchains
2131                  */
2132                 sys_exit->attr.exclude_callchain_kernel = 1;
2133         }
2134 
2135         trace->syscalls.events.sys_enter = sys_enter;
2136         trace->syscalls.events.sys_exit  = sys_exit;
2137 
2138         ret = 0;
2139 out:
2140         return ret;
2141 
2142 out_delete_sys_exit:
2143         perf_evsel__delete_priv(sys_exit);
2144 out_delete_sys_enter:
2145         perf_evsel__delete_priv(sys_enter);
2146         goto out;
2147 }
2148 
2149 static int trace__set_ev_qualifier_filter(struct trace *trace)
2150 {
2151         int err = -1;
2152         struct perf_evsel *sys_exit;
2153         char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2154                                                 trace->ev_qualifier_ids.nr,
2155                                                 trace->ev_qualifier_ids.entries);
2156 
2157         if (filter == NULL)
2158                 goto out_enomem;
2159 
2160         if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter,
2161                                           filter)) {
2162                 sys_exit = trace->syscalls.events.sys_exit;
2163                 err = perf_evsel__append_tp_filter(sys_exit, filter);
2164         }
2165 
2166         free(filter);
2167 out:
2168         return err;
2169 out_enomem:
2170         errno = ENOMEM;
2171         goto out;
2172 }
2173 
2174 static int trace__run(struct trace *trace, int argc, const char **argv)
2175 {
2176         struct perf_evlist *evlist = trace->evlist;
2177         struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
2178         int err = -1, i;
2179         unsigned long before;
2180         const bool forks = argc > 0;
2181         bool draining = false;
2182 
2183         trace->live = true;
2184 
2185         if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2186                 goto out_error_raw_syscalls;
2187 
2188         if (trace->trace_syscalls)
2189                 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2190 
2191         if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
2192                 pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
2193                 if (pgfault_maj == NULL)
2194                         goto out_error_mem;
2195                 perf_evlist__add(evlist, pgfault_maj);
2196         }
2197 
2198         if ((trace->trace_pgfaults & TRACE_PFMIN)) {
2199                 pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
2200                 if (pgfault_min == NULL)
2201                         goto out_error_mem;
2202                 perf_evlist__add(evlist, pgfault_min);
2203         }
2204 
2205         if (trace->sched &&
2206             perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2207                                    trace__sched_stat_runtime))
2208                 goto out_error_sched_stat_runtime;
2209 
2210         err = perf_evlist__create_maps(evlist, &trace->opts.target);
2211         if (err < 0) {
2212                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2213                 goto out_delete_evlist;
2214         }
2215 
2216         err = trace__symbols_init(trace, evlist);
2217         if (err < 0) {
2218                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2219                 goto out_delete_evlist;
2220         }
2221 
2222         perf_evlist__config(evlist, &trace->opts, NULL);
2223 
2224         if (callchain_param.enabled) {
2225                 bool use_identifier = false;
2226 
2227                 if (trace->syscalls.events.sys_exit) {
2228                         perf_evsel__config_callchain(trace->syscalls.events.sys_exit,
2229                                                      &trace->opts, &callchain_param);
2230                         use_identifier = true;
2231                 }
2232 
2233                 if (pgfault_maj) {
2234                         perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
2235                         use_identifier = true;
2236                 }
2237 
2238                 if (pgfault_min) {
2239                         perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
2240                         use_identifier = true;
2241                 }
2242 
2243                 if (use_identifier) {
2244                        /*
2245                         * Now we have evsels with different sample_ids, use
2246                         * PERF_SAMPLE_IDENTIFIER to map from sample to evsel
2247                         * from a fixed position in each ring buffer record.
2248                         *
2249                         * As of this the changeset introducing this comment, this
2250                         * isn't strictly needed, as the fields that can come before
2251                         * PERF_SAMPLE_ID are all used, but we'll probably disable
2252                         * some of those for things like copying the payload of
2253                         * pointer syscall arguments, and for vfs_getname we don't
2254                         * need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this
2255                         * here as a warning we need to use PERF_SAMPLE_IDENTIFIER.
2256                         */
2257                         perf_evlist__set_sample_bit(evlist, IDENTIFIER);
2258                         perf_evlist__reset_sample_bit(evlist, ID);
2259                 }
2260         }
2261 
2262         signal(SIGCHLD, sig_handler);
2263         signal(SIGINT, sig_handler);
2264 
2265         if (forks) {
2266                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2267                                                     argv, false, NULL);
2268                 if (err < 0) {
2269                         fprintf(trace->output, "Couldn't run the workload!\n");
2270                         goto out_delete_evlist;
2271                 }
2272         }
2273 
2274         err = perf_evlist__open(evlist);
2275         if (err < 0)
2276                 goto out_error_open;
2277 
2278         err = bpf__apply_obj_config();
2279         if (err) {
2280                 char errbuf[BUFSIZ];
2281 
2282                 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2283                 pr_err("ERROR: Apply config to BPF failed: %s\n",
2284                          errbuf);
2285                 goto out_error_open;
2286         }
2287 
2288         /*
2289          * Better not use !target__has_task() here because we need to cover the
2290          * case where no threads were specified in the command line, but a
2291          * workload was, and in that case we will fill in the thread_map when
2292          * we fork the workload in perf_evlist__prepare_workload.
2293          */
2294         if (trace->filter_pids.nr > 0)
2295                 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2296         else if (thread_map__pid(evlist->threads, 0) == -1)
2297                 err = perf_evlist__set_filter_pid(evlist, getpid());
2298 
2299         if (err < 0)
2300                 goto out_error_mem;
2301 
2302         if (trace->ev_qualifier_ids.nr > 0) {
2303                 err = trace__set_ev_qualifier_filter(trace);
2304                 if (err < 0)
2305                         goto out_errno;
2306 
2307                 pr_debug("event qualifier tracepoint filter: %s\n",
2308                          trace->syscalls.events.sys_exit->filter);
2309         }
2310 
2311         err = perf_evlist__apply_filters(evlist, &evsel);
2312         if (err < 0)
2313                 goto out_error_apply_filters;
2314 
2315         err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2316         if (err < 0)
2317                 goto out_error_mmap;
2318 
2319         if (!target__none(&trace->opts.target) && !trace->opts.initial_delay)
2320                 perf_evlist__enable(evlist);
2321 
2322         if (forks)
2323                 perf_evlist__start_workload(evlist);
2324 
2325         if (trace->opts.initial_delay) {
2326                 usleep(trace->opts.initial_delay * 1000);
2327                 perf_evlist__enable(evlist);
2328         }
2329 
2330         trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2331                                   evlist->threads->nr > 1 ||
2332                                   perf_evlist__first(evlist)->attr.inherit;
2333 again:
2334         before = trace->nr_events;
2335 
2336         for (i = 0; i < evlist->nr_mmaps; i++) {
2337                 union perf_event *event;
2338 
2339                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2340                         struct perf_sample sample;
2341 
2342                         ++trace->nr_events;
2343 
2344                         err = perf_evlist__parse_sample(evlist, event, &sample);
2345                         if (err) {
2346                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2347                                 goto next_event;
2348                         }
2349 
2350                         trace__handle_event(trace, event, &sample);
2351 next_event:
2352                         perf_evlist__mmap_consume(evlist, i);
2353 
2354                         if (interrupted)
2355                                 goto out_disable;
2356 
2357                         if (done && !draining) {
2358                                 perf_evlist__disable(evlist);
2359                                 draining = true;
2360                         }
2361                 }
2362         }
2363 
2364         if (trace->nr_events == before) {
2365                 int timeout = done ? 100 : -1;
2366 
2367                 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2368                         if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2369                                 draining = true;
2370 
2371                         goto again;
2372                 }
2373         } else {
2374                 goto again;
2375         }
2376 
2377 out_disable:
2378         thread__zput(trace->current);
2379 
2380         perf_evlist__disable(evlist);
2381 
2382         if (!err) {
2383                 if (trace->summary)
2384                         trace__fprintf_thread_summary(trace, trace->output);
2385 
2386                 if (trace->show_tool_stats) {
2387                         fprintf(trace->output, "Stats:\n "
2388                                                " vfs_getname : %" PRIu64 "\n"
2389                                                " proc_getname: %" PRIu64 "\n",
2390                                 trace->stats.vfs_getname,
2391                                 trace->stats.proc_getname);
2392                 }
2393         }
2394 
2395 out_delete_evlist:
2396         perf_evlist__delete(evlist);
2397         trace->evlist = NULL;
2398         trace->live = false;
2399         return err;
2400 {
2401         char errbuf[BUFSIZ];
2402 
2403 out_error_sched_stat_runtime:
2404         tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2405         goto out_error;
2406 
2407 out_error_raw_syscalls:
2408         tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2409         goto out_error;
2410 
2411 out_error_mmap:
2412         perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2413         goto out_error;
2414 
2415 out_error_open:
2416         perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2417 
2418 out_error:
2419         fprintf(trace->output, "%s\n", errbuf);
2420         goto out_delete_evlist;
2421 
2422 out_error_apply_filters:
2423         fprintf(trace->output,
2424                 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2425                 evsel->filter, perf_evsel__name(evsel), errno,
2426                 str_error_r(errno, errbuf, sizeof(errbuf)));
2427         goto out_delete_evlist;
2428 }
2429 out_error_mem:
2430         fprintf(trace->output, "Not enough memory to run!\n");
2431         goto out_delete_evlist;
2432 
2433 out_errno:
2434         fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2435         goto out_delete_evlist;
2436 }
2437 
2438 static int trace__replay(struct trace *trace)
2439 {
2440         const struct perf_evsel_str_handler handlers[] = {
2441                 { "probe:vfs_getname",       trace__vfs_getname, },
2442         };
2443         struct perf_data_file file = {
2444                 .path  = input_name,
2445                 .mode  = PERF_DATA_MODE_READ,
2446                 .force = trace->force,
2447         };
2448         struct perf_session *session;
2449         struct perf_evsel *evsel;
2450         int err = -1;
2451 
2452         trace->tool.sample        = trace__process_sample;
2453         trace->tool.mmap          = perf_event__process_mmap;
2454         trace->tool.mmap2         = perf_event__process_mmap2;
2455         trace->tool.comm          = perf_event__process_comm;
2456         trace->tool.exit          = perf_event__process_exit;
2457         trace->tool.fork          = perf_event__process_fork;
2458         trace->tool.attr          = perf_event__process_attr;
2459         trace->tool.tracing_data  = perf_event__process_tracing_data;
2460         trace->tool.build_id      = perf_event__process_build_id;
2461         trace->tool.namespaces    = perf_event__process_namespaces;
2462 
2463         trace->tool.ordered_events = true;
2464         trace->tool.ordering_requires_timestamps = true;
2465 
2466         /* add tid to output */
2467         trace->multiple_threads = true;
2468 
2469         session = perf_session__new(&file, false, &trace->tool);
2470         if (session == NULL)
2471                 return -1;
2472 
2473         if (trace->opts.target.pid)
2474                 symbol_conf.pid_list_str = strdup(trace->opts.target.pid);
2475 
2476         if (trace->opts.target.tid)
2477                 symbol_conf.tid_list_str = strdup(trace->opts.target.tid);
2478 
2479         if (symbol__init(&session->header.env) < 0)
2480                 goto out;
2481 
2482         trace->host = &session->machines.host;
2483 
2484         err = perf_session__set_tracepoints_handlers(session, handlers);
2485         if (err)
2486                 goto out;
2487 
2488         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2489                                                      "raw_syscalls:sys_enter");
2490         /* older kernels have syscalls tp versus raw_syscalls */
2491         if (evsel == NULL)
2492                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2493                                                              "syscalls:sys_enter");
2494 
2495         if (evsel &&
2496             (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2497             perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2498                 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2499                 goto out;
2500         }
2501 
2502         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2503                                                      "raw_syscalls:sys_exit");
2504         if (evsel == NULL)
2505                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2506                                                              "syscalls:sys_exit");
2507         if (evsel &&
2508             (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2509             perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2510                 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2511                 goto out;
2512         }
2513 
2514         evlist__for_each_entry(session->evlist, evsel) {
2515                 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2516                     (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2517                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2518                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2519                         evsel->handler = trace__pgfault;
2520         }
2521 
2522         setup_pager();
2523 
2524         err = perf_session__process_events(session);
2525         if (err)
2526                 pr_err("Failed to process events, error %d", err);
2527 
2528         else if (trace->summary)
2529                 trace__fprintf_thread_summary(trace, trace->output);
2530 
2531 out:
2532         perf_session__delete(session);
2533 
2534         return err;
2535 }
2536 
2537 static size_t trace__fprintf_threads_header(FILE *fp)
2538 {
2539         size_t printed;
2540 
2541         printed  = fprintf(fp, "\n Summary of events:\n\n");
2542 
2543         return printed;
2544 }
2545 
2546 DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
2547         struct stats    *stats;
2548         double          msecs;
2549         int             syscall;
2550 )
2551 {
2552         struct int_node *source = rb_entry(nd, struct int_node, rb_node);
2553         struct stats *stats = source->priv;
2554 
2555         entry->syscall = source->i;
2556         entry->stats   = stats;
2557         entry->msecs   = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
2558 }
2559 
2560 static size_t thread__dump_stats(struct thread_trace *ttrace,
2561                                  struct trace *trace, FILE *fp)
2562 {
2563         size_t printed = 0;
2564         struct syscall *sc;
2565         struct rb_node *nd;
2566         DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
2567 
2568         if (syscall_stats == NULL)
2569                 return 0;
2570 
2571         printed += fprintf(fp, "\n");
2572 
2573         printed += fprintf(fp, "   syscall            calls    total       min       avg       max      stddev\n");
2574         printed += fprintf(fp, "                               (msec)    (msec)    (msec)    (msec)        (%%)\n");
2575         printed += fprintf(fp, "   --------------- -------- --------- --------- --------- ---------     ------\n");
2576 
2577         resort_rb__for_each_entry(nd, syscall_stats) {
2578                 struct stats *stats = syscall_stats_entry->stats;
2579                 if (stats) {
2580                         double min = (double)(stats->min) / NSEC_PER_MSEC;
2581                         double max = (double)(stats->max) / NSEC_PER_MSEC;
2582                         double avg = avg_stats(stats);
2583                         double pct;
2584                         u64 n = (u64) stats->n;
2585 
2586                         pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2587                         avg /= NSEC_PER_MSEC;
2588 
2589                         sc = &trace->syscalls.table[syscall_stats_entry->syscall];
2590                         printed += fprintf(fp, "   %-15s", sc->name);
2591                         printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2592                                            n, syscall_stats_entry->msecs, min, avg);
2593                         printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2594                 }
2595         }
2596 
2597         resort_rb__delete(syscall_stats);
2598         printed += fprintf(fp, "\n\n");
2599 
2600         return printed;
2601 }
2602 
2603 static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
2604 {
2605         size_t printed = 0;
2606         struct thread_trace *ttrace = thread__priv(thread);
2607         double ratio;
2608 
2609         if (ttrace == NULL)
2610                 return 0;
2611 
2612         ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2613 
2614         printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2615         printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2616         printed += fprintf(fp, "%.1f%%", ratio);
2617         if (ttrace->pfmaj)
2618                 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2619         if (ttrace->pfmin)
2620                 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2621         if (trace->sched)
2622                 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2623         else if (fputc('\n', fp) != EOF)
2624                 ++printed;
2625 
2626         printed += thread__dump_stats(ttrace, trace, fp);
2627 
2628         return printed;
2629 }
2630 
2631 static unsigned long thread__nr_events(struct thread_trace *ttrace)
2632 {
2633         return ttrace ? ttrace->nr_events : 0;
2634 }
2635 
2636 DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
2637         struct thread *thread;
2638 )
2639 {
2640         entry->thread = rb_entry(nd, struct thread, rb_node);
2641 }
2642 
2643 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2644 {
2645         DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host);
2646         size_t printed = trace__fprintf_threads_header(fp);
2647         struct rb_node *nd;
2648 
2649         if (threads == NULL) {
2650                 fprintf(fp, "%s", "Error sorting output by nr_events!\n");
2651                 return 0;
2652         }
2653 
2654         resort_rb__for_each_entry(nd, threads)
2655                 printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
2656 
2657         resort_rb__delete(threads);
2658 
2659         return printed;
2660 }
2661 
2662 static int trace__set_duration(const struct option *opt, const char *str,
2663                                int unset __maybe_unused)
2664 {
2665         struct trace *trace = opt->value;
2666 
2667         trace->duration_filter = atof(str);
2668         return 0;
2669 }
2670 
2671 static int trace__set_filter_pids(const struct option *opt, const char *str,
2672                                   int unset __maybe_unused)
2673 {
2674         int ret = -1;
2675         size_t i;
2676         struct trace *trace = opt->value;
2677         /*
2678          * FIXME: introduce a intarray class, plain parse csv and create a
2679          * { int nr, int entries[] } struct...
2680          */
2681         struct intlist *list = intlist__new(str);
2682 
2683         if (list == NULL)
2684                 return -1;
2685 
2686         i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2687         trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2688 
2689         if (trace->filter_pids.entries == NULL)
2690                 goto out;
2691 
2692         trace->filter_pids.entries[0] = getpid();
2693 
2694         for (i = 1; i < trace->filter_pids.nr; ++i)
2695                 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2696 
2697         intlist__delete(list);
2698         ret = 0;
2699 out:
2700         return ret;
2701 }
2702 
2703 static int trace__open_output(struct trace *trace, const char *filename)
2704 {
2705         struct stat st;
2706 
2707         if (!stat(filename, &st) && st.st_size) {
2708                 char oldname[PATH_MAX];
2709 
2710                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2711                 unlink(oldname);
2712                 rename(filename, oldname);
2713         }
2714 
2715         trace->output = fopen(filename, "w");
2716 
2717         return trace->output == NULL ? -errno : 0;
2718 }
2719 
2720 static int parse_pagefaults(const struct option *opt, const char *str,
2721                             int unset __maybe_unused)
2722 {
2723         int *trace_pgfaults = opt->value;
2724 
2725         if (strcmp(str, "all") == 0)
2726                 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2727         else if (strcmp(str, "maj") == 0)
2728                 *trace_pgfaults |= TRACE_PFMAJ;
2729         else if (strcmp(str, "min") == 0)
2730                 *trace_pgfaults |= TRACE_PFMIN;
2731         else
2732                 return -1;
2733 
2734         return 0;
2735 }
2736 
2737 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2738 {
2739         struct perf_evsel *evsel;
2740 
2741         evlist__for_each_entry(evlist, evsel)
2742                 evsel->handler = handler;
2743 }
2744 
2745 /*
2746  * XXX: Hackish, just splitting the combined -e+--event (syscalls
2747  * (raw_syscalls:{sys_{enter,exit}} + events (tracepoints, HW, SW, etc) to use
2748  * existing facilities unchanged (trace->ev_qualifier + parse_options()).
2749  *
2750  * It'd be better to introduce a parse_options() variant that would return a
2751  * list with the terms it didn't match to an event...
2752  */
2753 static int trace__parse_events_option(const struct option *opt, const char *str,
2754                                       int unset __maybe_unused)
2755 {
2756         struct trace *trace = (struct trace *)opt->value;
2757         const char *s = str;
2758         char *sep = NULL, *lists[2] = { NULL, NULL, };
2759         int len = strlen(str), err = -1, list;
2760         char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
2761         char group_name[PATH_MAX];
2762 
2763         if (strace_groups_dir == NULL)
2764                 return -1;
2765 
2766         if (*s == '!') {
2767                 ++s;
2768                 trace->not_ev_qualifier = true;
2769         }
2770 
2771         while (1) {
2772                 if ((sep = strchr(s, ',')) != NULL)
2773                         *sep = '\0';
2774 
2775                 list = 0;
2776                 if (syscalltbl__id(trace->sctbl, s) >= 0) {
2777                         list = 1;
2778                 } else {
2779                         path__join(group_name, sizeof(group_name), strace_groups_dir, s);
2780                         if (access(group_name, R_OK) == 0)
2781                                 list = 1;
2782                 }
2783 
2784                 if (lists[list]) {
2785                         sprintf(lists[list] + strlen(lists[list]), ",%s", s);
2786                 } else {
2787                         lists[list] = malloc(len);
2788                         if (lists[list] == NULL)
2789                                 goto out;
2790                         strcpy(lists[list], s);
2791                 }
2792 
2793                 if (!sep)
2794                         break;
2795 
2796                 *sep = ',';
2797                 s = sep + 1;
2798         }
2799 
2800         if (lists[1] != NULL) {
2801                 struct strlist_config slist_config = {
2802                         .dirname = strace_groups_dir,
2803                 };
2804 
2805                 trace->ev_qualifier = strlist__new(lists[1], &slist_config);
2806                 if (trace->ev_qualifier == NULL) {
2807                         fputs("Not enough memory to parse event qualifier", trace->output);
2808                         goto out;
2809                 }
2810 
2811                 if (trace__validate_ev_qualifier(trace))
2812                         goto out;
2813         }
2814 
2815         err = 0;
2816 
2817         if (lists[0]) {
2818                 struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event",
2819                                                "event selector. use 'perf list' to list available events",
2820                                                parse_events_option);
2821                 err = parse_events_option(&o, lists[0], 0);
2822         }
2823 out:
2824         if (sep)
2825                 *sep = ',';
2826 
2827         return err;
2828 }
2829 
2830 int cmd_trace(int argc, const char **argv)
2831 {
2832         const char *trace_usage[] = {
2833                 "perf trace [<options>] [<command>]",
2834                 "perf trace [<options>] -- <command> [<options>]",
2835                 "perf trace record [<options>] [<command>]",
2836                 "perf trace record [<options>] -- <command> [<options>]",
2837                 NULL
2838         };
2839         struct trace trace = {
2840                 .syscalls = {
2841                         . max = -1,
2842                 },
2843                 .opts = {
2844                         .target = {
2845                                 .uid       = UINT_MAX,
2846                                 .uses_mmap = true,
2847                         },
2848                         .user_freq     = UINT_MAX,
2849                         .user_interval = ULLONG_MAX,
2850                         .no_buffering  = true,
2851                         .mmap_pages    = UINT_MAX,
2852                         .proc_map_timeout  = 500,
2853                 },
2854                 .output = stderr,
2855                 .show_comm = true,
2856                 .trace_syscalls = true,
2857                 .kernel_syscallchains = false,
2858                 .max_stack = UINT_MAX,
2859         };
2860         const char *output_name = NULL;
2861         const struct option trace_options[] = {
2862         OPT_CALLBACK('e', "event", &trace, "event",
2863                      "event/syscall selector. use 'perf list' to list available events",
2864                      trace__parse_events_option),
2865         OPT_BOOLEAN(0, "comm", &trace.show_comm,
2866                     "show the thread COMM next to its id"),
2867         OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2868         OPT_CALLBACK(0, "expr", &trace, "expr", "list of syscalls/events to trace",
2869                      trace__parse_events_option),
2870         OPT_STRING('o', "output", &output_name, "file", "output file name"),
2871         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2872         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2873                     "trace events on existing process id"),
2874         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2875                     "trace events on existing thread id"),
2876         OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
2877                      "pids to filter (by the kernel)", trace__set_filter_pids),
2878         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2879                     "system-wide collection from all CPUs"),
2880         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2881                     "list of cpus to monitor"),
2882         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2883                     "child tasks do not inherit counters"),
2884         OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2885                      "number of mmap data pages",
2886                      perf_evlist__parse_mmap_pages),
2887         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2888                    "user to profile"),
2889         OPT_CALLBACK(0, "duration", &trace, "float",
2890                      "show only events with duration > N.M ms",
2891                      trace__set_duration),
2892         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2893         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2894         OPT_BOOLEAN('T', "time", &trace.full_time,
2895                     "Show full timestamp, not time relative to first start"),
2896         OPT_BOOLEAN('s', "summary", &trace.summary_only,
2897                     "Show only syscall summary with statistics"),
2898         OPT_BOOLEAN('S', "with-summary", &trace.summary,
2899                     "Show all syscalls and summary with statistics"),
2900         OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2901                      "Trace pagefaults", parse_pagefaults, "maj"),
2902         OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
2903         OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
2904         OPT_CALLBACK(0, "call-graph", &trace.opts,
2905                      "record_mode[,record_size]", record_callchain_help,
2906                      &record_parse_callchain_opt),
2907         OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
2908                     "Show the kernel callchains on the syscall exit path"),
2909         OPT_UINTEGER(0, "min-stack", &trace.min_stack,
2910                      "Set the minimum stack depth when parsing the callchain, "
2911                      "anything below the specified depth will be ignored."),
2912         OPT_UINTEGER(0, "max-stack", &trace.max_stack,
2913                      "Set the maximum stack depth when parsing the callchain, "
2914                      "anything beyond the specified depth will be ignored. "
2915                      "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
2916         OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
2917                         "per thread proc mmap processing timeout in ms"),
2918         OPT_UINTEGER('D', "delay", &trace.opts.initial_delay,
2919                      "ms to wait before starting measurement after program "
2920                      "start"),
2921         OPT_END()
2922         };
2923         bool __maybe_unused max_stack_user_set = true;
2924         bool mmap_pages_user_set = true;
2925         const char * const trace_subcommands[] = { "record", NULL };
2926         int err;
2927         char bf[BUFSIZ];
2928 
2929         signal(SIGSEGV, sighandler_dump_stack);
2930         signal(SIGFPE, sighandler_dump_stack);
2931 
2932         trace.evlist = perf_evlist__new();
2933         trace.sctbl = syscalltbl__new();
2934 
2935         if (trace.evlist == NULL || trace.sctbl == NULL) {
2936                 pr_err("Not enough memory to run!\n");
2937                 err = -ENOMEM;
2938                 goto out;
2939         }
2940 
2941         argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
2942                                  trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
2943 
2944         err = bpf__setup_stdout(trace.evlist);
2945         if (err) {
2946                 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
2947                 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
2948                 goto out;
2949         }
2950 
2951         err = -1;
2952 
2953         if (trace.trace_pgfaults) {
2954                 trace.opts.sample_address = true;
2955                 trace.opts.sample_time = true;
2956         }
2957 
2958         if (trace.opts.mmap_pages == UINT_MAX)
2959                 mmap_pages_user_set = false;
2960 
2961         if (trace.max_stack == UINT_MAX) {
2962                 trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl_perf_event_max_stack;
2963                 max_stack_user_set = false;
2964         }
2965 
2966 #ifdef HAVE_DWARF_UNWIND_SUPPORT
2967         if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled && trace.trace_syscalls)
2968                 record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
2969 #endif
2970 
2971         if (callchain_param.enabled) {
2972                 if (!mmap_pages_user_set && geteuid() == 0)
2973                         trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
2974 
2975                 symbol_conf.use_callchain = true;
2976         }
2977 
2978         if (trace.evlist->nr_entries > 0)
2979                 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
2980 
2981         if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2982                 return trace__record(&trace, argc-1, &argv[1]);
2983 
2984         /* summary_only implies summary option, but don't overwrite summary if set */
2985         if (trace.summary_only)
2986                 trace.summary = trace.summary_only;
2987 
2988         if (!trace.trace_syscalls && !trace.trace_pgfaults &&
2989             trace.evlist->nr_entries == 0 /* Was --events used? */) {
2990                 pr_err("Please specify something to trace.\n");
2991                 return -1;
2992         }
2993 
2994         if (!trace.trace_syscalls && trace.ev_qualifier) {
2995                 pr_err("The -e option can't be used with --no-syscalls.\n");
2996                 goto out;
2997         }
2998 
2999         if (output_name != NULL) {
3000                 err = trace__open_output(&trace, output_name);
3001                 if (err < 0) {
3002                         perror("failed to create output file");
3003                         goto out;
3004                 }
3005         }
3006 
3007         trace.open_id = syscalltbl__id(trace.sctbl, "open");
3008 
3009         err = target__validate(&trace.opts.target);
3010         if (err) {
3011                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3012                 fprintf(trace.output, "%s", bf);
3013                 goto out_close;
3014         }
3015 
3016         err = target__parse_uid(&trace.opts.target);
3017         if (err) {
3018                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3019                 fprintf(trace.output, "%s", bf);
3020                 goto out_close;
3021         }
3022 
3023         if (!argc && target__none(&trace.opts.target))
3024                 trace.opts.target.system_wide = true;
3025 
3026         if (input_name)
3027                 err = trace__replay(&trace);
3028         else
3029                 err = trace__run(&trace, argc, argv);
3030 
3031 out_close:
3032         if (output_name != NULL)
3033                 fclose(trace.output);
3034 out:
3035         return err;
3036 }
3037 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp