~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/kernel/trace/trace.c

Version: ~ [ linux-5.13-rc1 ] ~ [ linux-5.12.2 ] ~ [ linux-5.11.19 ] ~ [ linux-5.10.35 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.117 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.190 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.232 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.268 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.268 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.18.140 ] ~ [ linux-3.16.85 ] ~ [ linux-3.14.79 ] ~ [ linux-3.12.74 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * ring buffer based function tracer
  3  *
  4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
  5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
  6  *
  7  * Originally taken from the RT patch by:
  8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
  9  *
 10  * Based on code from the latency_tracer, that is:
 11  *  Copyright (C) 2004-2006 Ingo Molnar
 12  *  Copyright (C) 2004 Nadia Yvette Chambers
 13  */
 14 #include <linux/ring_buffer.h>
 15 #include <generated/utsrelease.h>
 16 #include <linux/stacktrace.h>
 17 #include <linux/writeback.h>
 18 #include <linux/kallsyms.h>
 19 #include <linux/seq_file.h>
 20 #include <linux/notifier.h>
 21 #include <linux/irqflags.h>
 22 #include <linux/debugfs.h>
 23 #include <linux/tracefs.h>
 24 #include <linux/pagemap.h>
 25 #include <linux/hardirq.h>
 26 #include <linux/linkage.h>
 27 #include <linux/uaccess.h>
 28 #include <linux/vmalloc.h>
 29 #include <linux/ftrace.h>
 30 #include <linux/module.h>
 31 #include <linux/percpu.h>
 32 #include <linux/splice.h>
 33 #include <linux/kdebug.h>
 34 #include <linux/string.h>
 35 #include <linux/mount.h>
 36 #include <linux/rwsem.h>
 37 #include <linux/slab.h>
 38 #include <linux/ctype.h>
 39 #include <linux/init.h>
 40 #include <linux/poll.h>
 41 #include <linux/nmi.h>
 42 #include <linux/fs.h>
 43 #include <linux/trace.h>
 44 #include <linux/sched/rt.h>
 45 
 46 #include "trace.h"
 47 #include "trace_output.h"
 48 
 49 /*
 50  * On boot up, the ring buffer is set to the minimum size, so that
 51  * we do not waste memory on systems that are not using tracing.
 52  */
 53 bool ring_buffer_expanded;
 54 
 55 /*
 56  * We need to change this state when a selftest is running.
 57  * A selftest will lurk into the ring-buffer to count the
 58  * entries inserted during the selftest although some concurrent
 59  * insertions into the ring-buffer such as trace_printk could occurred
 60  * at the same time, giving false positive or negative results.
 61  */
 62 static bool __read_mostly tracing_selftest_running;
 63 
 64 /*
 65  * If a tracer is running, we do not want to run SELFTEST.
 66  */
 67 bool __read_mostly tracing_selftest_disabled;
 68 
 69 /* Pipe tracepoints to printk */
 70 struct trace_iterator *tracepoint_print_iter;
 71 int tracepoint_printk;
 72 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
 73 
 74 /* For tracers that don't implement custom flags */
 75 static struct tracer_opt dummy_tracer_opt[] = {
 76         { }
 77 };
 78 
 79 static int
 80 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
 81 {
 82         return 0;
 83 }
 84 
 85 /*
 86  * To prevent the comm cache from being overwritten when no
 87  * tracing is active, only save the comm when a trace event
 88  * occurred.
 89  */
 90 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
 91 
 92 /*
 93  * Kill all tracing for good (never come back).
 94  * It is initialized to 1 but will turn to zero if the initialization
 95  * of the tracer is successful. But that is the only place that sets
 96  * this back to zero.
 97  */
 98 static int tracing_disabled = 1;
 99 
100 cpumask_var_t __read_mostly     tracing_buffer_mask;
101 
102 /*
103  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
104  *
105  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
106  * is set, then ftrace_dump is called. This will output the contents
107  * of the ftrace buffers to the console.  This is very useful for
108  * capturing traces that lead to crashes and outputing it to a
109  * serial console.
110  *
111  * It is default off, but you can enable it with either specifying
112  * "ftrace_dump_on_oops" in the kernel command line, or setting
113  * /proc/sys/kernel/ftrace_dump_on_oops
114  * Set 1 if you want to dump buffers of all CPUs
115  * Set 2 if you want to dump the buffer of the CPU that triggered oops
116  */
117 
118 enum ftrace_dump_mode ftrace_dump_on_oops;
119 
120 /* When set, tracing will stop when a WARN*() is hit */
121 int __disable_trace_on_warning;
122 
123 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
124 /* Map of enums to their values, for "eval_map" file */
125 struct trace_eval_map_head {
126         struct module                   *mod;
127         unsigned long                   length;
128 };
129 
130 union trace_eval_map_item;
131 
132 struct trace_eval_map_tail {
133         /*
134          * "end" is first and points to NULL as it must be different
135          * than "mod" or "eval_string"
136          */
137         union trace_eval_map_item       *next;
138         const char                      *end;   /* points to NULL */
139 };
140 
141 static DEFINE_MUTEX(trace_eval_mutex);
142 
143 /*
144  * The trace_eval_maps are saved in an array with two extra elements,
145  * one at the beginning, and one at the end. The beginning item contains
146  * the count of the saved maps (head.length), and the module they
147  * belong to if not built in (head.mod). The ending item contains a
148  * pointer to the next array of saved eval_map items.
149  */
150 union trace_eval_map_item {
151         struct trace_eval_map           map;
152         struct trace_eval_map_head      head;
153         struct trace_eval_map_tail      tail;
154 };
155 
156 static union trace_eval_map_item *trace_eval_maps;
157 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
158 
159 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
160 
161 #define MAX_TRACER_SIZE         100
162 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
163 static char *default_bootup_tracer;
164 
165 static bool allocate_snapshot;
166 
167 static int __init set_cmdline_ftrace(char *str)
168 {
169         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
170         default_bootup_tracer = bootup_tracer_buf;
171         /* We are using ftrace early, expand it */
172         ring_buffer_expanded = true;
173         return 1;
174 }
175 __setup("ftrace=", set_cmdline_ftrace);
176 
177 static int __init set_ftrace_dump_on_oops(char *str)
178 {
179         if (*str++ != '=' || !*str) {
180                 ftrace_dump_on_oops = DUMP_ALL;
181                 return 1;
182         }
183 
184         if (!strcmp("orig_cpu", str)) {
185                 ftrace_dump_on_oops = DUMP_ORIG;
186                 return 1;
187         }
188 
189         return 0;
190 }
191 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
192 
193 static int __init stop_trace_on_warning(char *str)
194 {
195         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
196                 __disable_trace_on_warning = 1;
197         return 1;
198 }
199 __setup("traceoff_on_warning", stop_trace_on_warning);
200 
201 static int __init boot_alloc_snapshot(char *str)
202 {
203         allocate_snapshot = true;
204         /* We also need the main ring buffer expanded */
205         ring_buffer_expanded = true;
206         return 1;
207 }
208 __setup("alloc_snapshot", boot_alloc_snapshot);
209 
210 
211 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
212 
213 static int __init set_trace_boot_options(char *str)
214 {
215         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
216         return 0;
217 }
218 __setup("trace_options=", set_trace_boot_options);
219 
220 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
221 static char *trace_boot_clock __initdata;
222 
223 static int __init set_trace_boot_clock(char *str)
224 {
225         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
226         trace_boot_clock = trace_boot_clock_buf;
227         return 0;
228 }
229 __setup("trace_clock=", set_trace_boot_clock);
230 
231 static int __init set_tracepoint_printk(char *str)
232 {
233         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
234                 tracepoint_printk = 1;
235         return 1;
236 }
237 __setup("tp_printk", set_tracepoint_printk);
238 
239 unsigned long long ns2usecs(u64 nsec)
240 {
241         nsec += 500;
242         do_div(nsec, 1000);
243         return nsec;
244 }
245 
246 /* trace_flags holds trace_options default values */
247 #define TRACE_DEFAULT_FLAGS                                             \
248         (FUNCTION_DEFAULT_FLAGS |                                       \
249          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
250          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
251          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
252          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
253 
254 /* trace_options that are only supported by global_trace */
255 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
256                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
257 
258 /* trace_flags that are default zero for instances */
259 #define ZEROED_TRACE_FLAGS \
260         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
261 
262 /*
263  * The global_trace is the descriptor that holds the top-level tracing
264  * buffers for the live tracing.
265  */
266 static struct trace_array global_trace = {
267         .trace_flags = TRACE_DEFAULT_FLAGS,
268 };
269 
270 LIST_HEAD(ftrace_trace_arrays);
271 
272 int trace_array_get(struct trace_array *this_tr)
273 {
274         struct trace_array *tr;
275         int ret = -ENODEV;
276 
277         mutex_lock(&trace_types_lock);
278         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
279                 if (tr == this_tr) {
280                         tr->ref++;
281                         ret = 0;
282                         break;
283                 }
284         }
285         mutex_unlock(&trace_types_lock);
286 
287         return ret;
288 }
289 
290 static void __trace_array_put(struct trace_array *this_tr)
291 {
292         WARN_ON(!this_tr->ref);
293         this_tr->ref--;
294 }
295 
296 void trace_array_put(struct trace_array *this_tr)
297 {
298         mutex_lock(&trace_types_lock);
299         __trace_array_put(this_tr);
300         mutex_unlock(&trace_types_lock);
301 }
302 
303 int call_filter_check_discard(struct trace_event_call *call, void *rec,
304                               struct ring_buffer *buffer,
305                               struct ring_buffer_event *event)
306 {
307         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
308             !filter_match_preds(call->filter, rec)) {
309                 __trace_event_discard_commit(buffer, event);
310                 return 1;
311         }
312 
313         return 0;
314 }
315 
316 void trace_free_pid_list(struct trace_pid_list *pid_list)
317 {
318         vfree(pid_list->pids);
319         kfree(pid_list);
320 }
321 
322 /**
323  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
324  * @filtered_pids: The list of pids to check
325  * @search_pid: The PID to find in @filtered_pids
326  *
327  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
328  */
329 bool
330 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
331 {
332         /*
333          * If pid_max changed after filtered_pids was created, we
334          * by default ignore all pids greater than the previous pid_max.
335          */
336         if (search_pid >= filtered_pids->pid_max)
337                 return false;
338 
339         return test_bit(search_pid, filtered_pids->pids);
340 }
341 
342 /**
343  * trace_ignore_this_task - should a task be ignored for tracing
344  * @filtered_pids: The list of pids to check
345  * @task: The task that should be ignored if not filtered
346  *
347  * Checks if @task should be traced or not from @filtered_pids.
348  * Returns true if @task should *NOT* be traced.
349  * Returns false if @task should be traced.
350  */
351 bool
352 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
353 {
354         /*
355          * Return false, because if filtered_pids does not exist,
356          * all pids are good to trace.
357          */
358         if (!filtered_pids)
359                 return false;
360 
361         return !trace_find_filtered_pid(filtered_pids, task->pid);
362 }
363 
364 /**
365  * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
366  * @pid_list: The list to modify
367  * @self: The current task for fork or NULL for exit
368  * @task: The task to add or remove
369  *
370  * If adding a task, if @self is defined, the task is only added if @self
371  * is also included in @pid_list. This happens on fork and tasks should
372  * only be added when the parent is listed. If @self is NULL, then the
373  * @task pid will be removed from the list, which would happen on exit
374  * of a task.
375  */
376 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
377                                   struct task_struct *self,
378                                   struct task_struct *task)
379 {
380         if (!pid_list)
381                 return;
382 
383         /* For forks, we only add if the forking task is listed */
384         if (self) {
385                 if (!trace_find_filtered_pid(pid_list, self->pid))
386                         return;
387         }
388 
389         /* Sorry, but we don't support pid_max changing after setting */
390         if (task->pid >= pid_list->pid_max)
391                 return;
392 
393         /* "self" is set for forks, and NULL for exits */
394         if (self)
395                 set_bit(task->pid, pid_list->pids);
396         else
397                 clear_bit(task->pid, pid_list->pids);
398 }
399 
400 /**
401  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
402  * @pid_list: The pid list to show
403  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
404  * @pos: The position of the file
405  *
406  * This is used by the seq_file "next" operation to iterate the pids
407  * listed in a trace_pid_list structure.
408  *
409  * Returns the pid+1 as we want to display pid of zero, but NULL would
410  * stop the iteration.
411  */
412 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
413 {
414         unsigned long pid = (unsigned long)v;
415 
416         (*pos)++;
417 
418         /* pid already is +1 of the actual prevous bit */
419         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
420 
421         /* Return pid + 1 to allow zero to be represented */
422         if (pid < pid_list->pid_max)
423                 return (void *)(pid + 1);
424 
425         return NULL;
426 }
427 
428 /**
429  * trace_pid_start - Used for seq_file to start reading pid lists
430  * @pid_list: The pid list to show
431  * @pos: The position of the file
432  *
433  * This is used by seq_file "start" operation to start the iteration
434  * of listing pids.
435  *
436  * Returns the pid+1 as we want to display pid of zero, but NULL would
437  * stop the iteration.
438  */
439 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
440 {
441         unsigned long pid;
442         loff_t l = 0;
443 
444         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
445         if (pid >= pid_list->pid_max)
446                 return NULL;
447 
448         /* Return pid + 1 so that zero can be the exit value */
449         for (pid++; pid && l < *pos;
450              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
451                 ;
452         return (void *)pid;
453 }
454 
455 /**
456  * trace_pid_show - show the current pid in seq_file processing
457  * @m: The seq_file structure to write into
458  * @v: A void pointer of the pid (+1) value to display
459  *
460  * Can be directly used by seq_file operations to display the current
461  * pid value.
462  */
463 int trace_pid_show(struct seq_file *m, void *v)
464 {
465         unsigned long pid = (unsigned long)v - 1;
466 
467         seq_printf(m, "%lu\n", pid);
468         return 0;
469 }
470 
471 /* 128 should be much more than enough */
472 #define PID_BUF_SIZE            127
473 
474 int trace_pid_write(struct trace_pid_list *filtered_pids,
475                     struct trace_pid_list **new_pid_list,
476                     const char __user *ubuf, size_t cnt)
477 {
478         struct trace_pid_list *pid_list;
479         struct trace_parser parser;
480         unsigned long val;
481         int nr_pids = 0;
482         ssize_t read = 0;
483         ssize_t ret = 0;
484         loff_t pos;
485         pid_t pid;
486 
487         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
488                 return -ENOMEM;
489 
490         /*
491          * Always recreate a new array. The write is an all or nothing
492          * operation. Always create a new array when adding new pids by
493          * the user. If the operation fails, then the current list is
494          * not modified.
495          */
496         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
497         if (!pid_list)
498                 return -ENOMEM;
499 
500         pid_list->pid_max = READ_ONCE(pid_max);
501 
502         /* Only truncating will shrink pid_max */
503         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
504                 pid_list->pid_max = filtered_pids->pid_max;
505 
506         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
507         if (!pid_list->pids) {
508                 kfree(pid_list);
509                 return -ENOMEM;
510         }
511 
512         if (filtered_pids) {
513                 /* copy the current bits to the new max */
514                 for_each_set_bit(pid, filtered_pids->pids,
515                                  filtered_pids->pid_max) {
516                         set_bit(pid, pid_list->pids);
517                         nr_pids++;
518                 }
519         }
520 
521         while (cnt > 0) {
522 
523                 pos = 0;
524 
525                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
526                 if (ret < 0 || !trace_parser_loaded(&parser))
527                         break;
528 
529                 read += ret;
530                 ubuf += ret;
531                 cnt -= ret;
532 
533                 parser.buffer[parser.idx] = 0;
534 
535                 ret = -EINVAL;
536                 if (kstrtoul(parser.buffer, 0, &val))
537                         break;
538                 if (val >= pid_list->pid_max)
539                         break;
540 
541                 pid = (pid_t)val;
542 
543                 set_bit(pid, pid_list->pids);
544                 nr_pids++;
545 
546                 trace_parser_clear(&parser);
547                 ret = 0;
548         }
549         trace_parser_put(&parser);
550 
551         if (ret < 0) {
552                 trace_free_pid_list(pid_list);
553                 return ret;
554         }
555 
556         if (!nr_pids) {
557                 /* Cleared the list of pids */
558                 trace_free_pid_list(pid_list);
559                 read = ret;
560                 pid_list = NULL;
561         }
562 
563         *new_pid_list = pid_list;
564 
565         return read;
566 }
567 
568 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
569 {
570         u64 ts;
571 
572         /* Early boot up does not have a buffer yet */
573         if (!buf->buffer)
574                 return trace_clock_local();
575 
576         ts = ring_buffer_time_stamp(buf->buffer, cpu);
577         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
578 
579         return ts;
580 }
581 
582 u64 ftrace_now(int cpu)
583 {
584         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
585 }
586 
587 /**
588  * tracing_is_enabled - Show if global_trace has been disabled
589  *
590  * Shows if the global trace has been enabled or not. It uses the
591  * mirror flag "buffer_disabled" to be used in fast paths such as for
592  * the irqsoff tracer. But it may be inaccurate due to races. If you
593  * need to know the accurate state, use tracing_is_on() which is a little
594  * slower, but accurate.
595  */
596 int tracing_is_enabled(void)
597 {
598         /*
599          * For quick access (irqsoff uses this in fast path), just
600          * return the mirror variable of the state of the ring buffer.
601          * It's a little racy, but we don't really care.
602          */
603         smp_rmb();
604         return !global_trace.buffer_disabled;
605 }
606 
607 /*
608  * trace_buf_size is the size in bytes that is allocated
609  * for a buffer. Note, the number of bytes is always rounded
610  * to page size.
611  *
612  * This number is purposely set to a low number of 16384.
613  * If the dump on oops happens, it will be much appreciated
614  * to not have to wait for all that output. Anyway this can be
615  * boot time and run time configurable.
616  */
617 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
618 
619 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
620 
621 /* trace_types holds a link list of available tracers. */
622 static struct tracer            *trace_types __read_mostly;
623 
624 /*
625  * trace_types_lock is used to protect the trace_types list.
626  */
627 DEFINE_MUTEX(trace_types_lock);
628 
629 /*
630  * serialize the access of the ring buffer
631  *
632  * ring buffer serializes readers, but it is low level protection.
633  * The validity of the events (which returns by ring_buffer_peek() ..etc)
634  * are not protected by ring buffer.
635  *
636  * The content of events may become garbage if we allow other process consumes
637  * these events concurrently:
638  *   A) the page of the consumed events may become a normal page
639  *      (not reader page) in ring buffer, and this page will be rewrited
640  *      by events producer.
641  *   B) The page of the consumed events may become a page for splice_read,
642  *      and this page will be returned to system.
643  *
644  * These primitives allow multi process access to different cpu ring buffer
645  * concurrently.
646  *
647  * These primitives don't distinguish read-only and read-consume access.
648  * Multi read-only access are also serialized.
649  */
650 
651 #ifdef CONFIG_SMP
652 static DECLARE_RWSEM(all_cpu_access_lock);
653 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
654 
655 static inline void trace_access_lock(int cpu)
656 {
657         if (cpu == RING_BUFFER_ALL_CPUS) {
658                 /* gain it for accessing the whole ring buffer. */
659                 down_write(&all_cpu_access_lock);
660         } else {
661                 /* gain it for accessing a cpu ring buffer. */
662 
663                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
664                 down_read(&all_cpu_access_lock);
665 
666                 /* Secondly block other access to this @cpu ring buffer. */
667                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
668         }
669 }
670 
671 static inline void trace_access_unlock(int cpu)
672 {
673         if (cpu == RING_BUFFER_ALL_CPUS) {
674                 up_write(&all_cpu_access_lock);
675         } else {
676                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
677                 up_read(&all_cpu_access_lock);
678         }
679 }
680 
681 static inline void trace_access_lock_init(void)
682 {
683         int cpu;
684 
685         for_each_possible_cpu(cpu)
686                 mutex_init(&per_cpu(cpu_access_lock, cpu));
687 }
688 
689 #else
690 
691 static DEFINE_MUTEX(access_lock);
692 
693 static inline void trace_access_lock(int cpu)
694 {
695         (void)cpu;
696         mutex_lock(&access_lock);
697 }
698 
699 static inline void trace_access_unlock(int cpu)
700 {
701         (void)cpu;
702         mutex_unlock(&access_lock);
703 }
704 
705 static inline void trace_access_lock_init(void)
706 {
707 }
708 
709 #endif
710 
711 #ifdef CONFIG_STACKTRACE
712 static void __ftrace_trace_stack(struct ring_buffer *buffer,
713                                  unsigned long flags,
714                                  int skip, int pc, struct pt_regs *regs);
715 static inline void ftrace_trace_stack(struct trace_array *tr,
716                                       struct ring_buffer *buffer,
717                                       unsigned long flags,
718                                       int skip, int pc, struct pt_regs *regs);
719 
720 #else
721 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
722                                         unsigned long flags,
723                                         int skip, int pc, struct pt_regs *regs)
724 {
725 }
726 static inline void ftrace_trace_stack(struct trace_array *tr,
727                                       struct ring_buffer *buffer,
728                                       unsigned long flags,
729                                       int skip, int pc, struct pt_regs *regs)
730 {
731 }
732 
733 #endif
734 
735 static __always_inline void
736 trace_event_setup(struct ring_buffer_event *event,
737                   int type, unsigned long flags, int pc)
738 {
739         struct trace_entry *ent = ring_buffer_event_data(event);
740 
741         tracing_generic_entry_update(ent, flags, pc);
742         ent->type = type;
743 }
744 
745 static __always_inline struct ring_buffer_event *
746 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
747                           int type,
748                           unsigned long len,
749                           unsigned long flags, int pc)
750 {
751         struct ring_buffer_event *event;
752 
753         event = ring_buffer_lock_reserve(buffer, len);
754         if (event != NULL)
755                 trace_event_setup(event, type, flags, pc);
756 
757         return event;
758 }
759 
760 void tracer_tracing_on(struct trace_array *tr)
761 {
762         if (tr->trace_buffer.buffer)
763                 ring_buffer_record_on(tr->trace_buffer.buffer);
764         /*
765          * This flag is looked at when buffers haven't been allocated
766          * yet, or by some tracers (like irqsoff), that just want to
767          * know if the ring buffer has been disabled, but it can handle
768          * races of where it gets disabled but we still do a record.
769          * As the check is in the fast path of the tracers, it is more
770          * important to be fast than accurate.
771          */
772         tr->buffer_disabled = 0;
773         /* Make the flag seen by readers */
774         smp_wmb();
775 }
776 
777 /**
778  * tracing_on - enable tracing buffers
779  *
780  * This function enables tracing buffers that may have been
781  * disabled with tracing_off.
782  */
783 void tracing_on(void)
784 {
785         tracer_tracing_on(&global_trace);
786 }
787 EXPORT_SYMBOL_GPL(tracing_on);
788 
789 
790 static __always_inline void
791 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
792 {
793         __this_cpu_write(trace_taskinfo_save, true);
794 
795         /* If this is the temp buffer, we need to commit fully */
796         if (this_cpu_read(trace_buffered_event) == event) {
797                 /* Length is in event->array[0] */
798                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
799                 /* Release the temp buffer */
800                 this_cpu_dec(trace_buffered_event_cnt);
801         } else
802                 ring_buffer_unlock_commit(buffer, event);
803 }
804 
805 /**
806  * __trace_puts - write a constant string into the trace buffer.
807  * @ip:    The address of the caller
808  * @str:   The constant string to write
809  * @size:  The size of the string.
810  */
811 int __trace_puts(unsigned long ip, const char *str, int size)
812 {
813         struct ring_buffer_event *event;
814         struct ring_buffer *buffer;
815         struct print_entry *entry;
816         unsigned long irq_flags;
817         int alloc;
818         int pc;
819 
820         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
821                 return 0;
822 
823         pc = preempt_count();
824 
825         if (unlikely(tracing_selftest_running || tracing_disabled))
826                 return 0;
827 
828         alloc = sizeof(*entry) + size + 2; /* possible \n added */
829 
830         local_save_flags(irq_flags);
831         buffer = global_trace.trace_buffer.buffer;
832         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
833                                             irq_flags, pc);
834         if (!event)
835                 return 0;
836 
837         entry = ring_buffer_event_data(event);
838         entry->ip = ip;
839 
840         memcpy(&entry->buf, str, size);
841 
842         /* Add a newline if necessary */
843         if (entry->buf[size - 1] != '\n') {
844                 entry->buf[size] = '\n';
845                 entry->buf[size + 1] = '\0';
846         } else
847                 entry->buf[size] = '\0';
848 
849         __buffer_unlock_commit(buffer, event);
850         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
851 
852         return size;
853 }
854 EXPORT_SYMBOL_GPL(__trace_puts);
855 
856 /**
857  * __trace_bputs - write the pointer to a constant string into trace buffer
858  * @ip:    The address of the caller
859  * @str:   The constant string to write to the buffer to
860  */
861 int __trace_bputs(unsigned long ip, const char *str)
862 {
863         struct ring_buffer_event *event;
864         struct ring_buffer *buffer;
865         struct bputs_entry *entry;
866         unsigned long irq_flags;
867         int size = sizeof(struct bputs_entry);
868         int pc;
869 
870         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
871                 return 0;
872 
873         pc = preempt_count();
874 
875         if (unlikely(tracing_selftest_running || tracing_disabled))
876                 return 0;
877 
878         local_save_flags(irq_flags);
879         buffer = global_trace.trace_buffer.buffer;
880         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
881                                             irq_flags, pc);
882         if (!event)
883                 return 0;
884 
885         entry = ring_buffer_event_data(event);
886         entry->ip                       = ip;
887         entry->str                      = str;
888 
889         __buffer_unlock_commit(buffer, event);
890         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
891 
892         return 1;
893 }
894 EXPORT_SYMBOL_GPL(__trace_bputs);
895 
896 #ifdef CONFIG_TRACER_SNAPSHOT
897 static void tracing_snapshot_instance(struct trace_array *tr)
898 {
899         struct tracer *tracer = tr->current_trace;
900         unsigned long flags;
901 
902         if (in_nmi()) {
903                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
904                 internal_trace_puts("*** snapshot is being ignored        ***\n");
905                 return;
906         }
907 
908         if (!tr->allocated_snapshot) {
909                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
910                 internal_trace_puts("*** stopping trace here!   ***\n");
911                 tracing_off();
912                 return;
913         }
914 
915         /* Note, snapshot can not be used when the tracer uses it */
916         if (tracer->use_max_tr) {
917                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
918                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
919                 return;
920         }
921 
922         local_irq_save(flags);
923         update_max_tr(tr, current, smp_processor_id());
924         local_irq_restore(flags);
925 }
926 
927 /**
928  * tracing_snapshot - take a snapshot of the current buffer.
929  *
930  * This causes a swap between the snapshot buffer and the current live
931  * tracing buffer. You can use this to take snapshots of the live
932  * trace when some condition is triggered, but continue to trace.
933  *
934  * Note, make sure to allocate the snapshot with either
935  * a tracing_snapshot_alloc(), or by doing it manually
936  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
937  *
938  * If the snapshot buffer is not allocated, it will stop tracing.
939  * Basically making a permanent snapshot.
940  */
941 void tracing_snapshot(void)
942 {
943         struct trace_array *tr = &global_trace;
944 
945         tracing_snapshot_instance(tr);
946 }
947 EXPORT_SYMBOL_GPL(tracing_snapshot);
948 
949 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
950                                         struct trace_buffer *size_buf, int cpu_id);
951 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
952 
953 static int alloc_snapshot(struct trace_array *tr)
954 {
955         int ret;
956 
957         if (!tr->allocated_snapshot) {
958 
959                 /* allocate spare buffer */
960                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
961                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
962                 if (ret < 0)
963                         return ret;
964 
965                 tr->allocated_snapshot = true;
966         }
967 
968         return 0;
969 }
970 
971 static void free_snapshot(struct trace_array *tr)
972 {
973         /*
974          * We don't free the ring buffer. instead, resize it because
975          * The max_tr ring buffer has some state (e.g. ring->clock) and
976          * we want preserve it.
977          */
978         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
979         set_buffer_entries(&tr->max_buffer, 1);
980         tracing_reset_online_cpus(&tr->max_buffer);
981         tr->allocated_snapshot = false;
982 }
983 
984 /**
985  * tracing_alloc_snapshot - allocate snapshot buffer.
986  *
987  * This only allocates the snapshot buffer if it isn't already
988  * allocated - it doesn't also take a snapshot.
989  *
990  * This is meant to be used in cases where the snapshot buffer needs
991  * to be set up for events that can't sleep but need to be able to
992  * trigger a snapshot.
993  */
994 int tracing_alloc_snapshot(void)
995 {
996         struct trace_array *tr = &global_trace;
997         int ret;
998 
999         ret = alloc_snapshot(tr);
1000         WARN_ON(ret < 0);
1001 
1002         return ret;
1003 }
1004 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1005 
1006 /**
1007  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1008  *
1009  * This is similar to tracing_snapshot(), but it will allocate the
1010  * snapshot buffer if it isn't already allocated. Use this only
1011  * where it is safe to sleep, as the allocation may sleep.
1012  *
1013  * This causes a swap between the snapshot buffer and the current live
1014  * tracing buffer. You can use this to take snapshots of the live
1015  * trace when some condition is triggered, but continue to trace.
1016  */
1017 void tracing_snapshot_alloc(void)
1018 {
1019         int ret;
1020 
1021         ret = tracing_alloc_snapshot();
1022         if (ret < 0)
1023                 return;
1024 
1025         tracing_snapshot();
1026 }
1027 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1028 #else
1029 void tracing_snapshot(void)
1030 {
1031         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1032 }
1033 EXPORT_SYMBOL_GPL(tracing_snapshot);
1034 int tracing_alloc_snapshot(void)
1035 {
1036         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1037         return -ENODEV;
1038 }
1039 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1040 void tracing_snapshot_alloc(void)
1041 {
1042         /* Give warning */
1043         tracing_snapshot();
1044 }
1045 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1046 #endif /* CONFIG_TRACER_SNAPSHOT */
1047 
1048 void tracer_tracing_off(struct trace_array *tr)
1049 {
1050         if (tr->trace_buffer.buffer)
1051                 ring_buffer_record_off(tr->trace_buffer.buffer);
1052         /*
1053          * This flag is looked at when buffers haven't been allocated
1054          * yet, or by some tracers (like irqsoff), that just want to
1055          * know if the ring buffer has been disabled, but it can handle
1056          * races of where it gets disabled but we still do a record.
1057          * As the check is in the fast path of the tracers, it is more
1058          * important to be fast than accurate.
1059          */
1060         tr->buffer_disabled = 1;
1061         /* Make the flag seen by readers */
1062         smp_wmb();
1063 }
1064 
1065 /**
1066  * tracing_off - turn off tracing buffers
1067  *
1068  * This function stops the tracing buffers from recording data.
1069  * It does not disable any overhead the tracers themselves may
1070  * be causing. This function simply causes all recording to
1071  * the ring buffers to fail.
1072  */
1073 void tracing_off(void)
1074 {
1075         tracer_tracing_off(&global_trace);
1076 }
1077 EXPORT_SYMBOL_GPL(tracing_off);
1078 
1079 void disable_trace_on_warning(void)
1080 {
1081         if (__disable_trace_on_warning)
1082                 tracing_off();
1083 }
1084 
1085 /**
1086  * tracer_tracing_is_on - show real state of ring buffer enabled
1087  * @tr : the trace array to know if ring buffer is enabled
1088  *
1089  * Shows real state of the ring buffer if it is enabled or not.
1090  */
1091 int tracer_tracing_is_on(struct trace_array *tr)
1092 {
1093         if (tr->trace_buffer.buffer)
1094                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1095         return !tr->buffer_disabled;
1096 }
1097 
1098 /**
1099  * tracing_is_on - show state of ring buffers enabled
1100  */
1101 int tracing_is_on(void)
1102 {
1103         return tracer_tracing_is_on(&global_trace);
1104 }
1105 EXPORT_SYMBOL_GPL(tracing_is_on);
1106 
1107 static int __init set_buf_size(char *str)
1108 {
1109         unsigned long buf_size;
1110 
1111         if (!str)
1112                 return 0;
1113         buf_size = memparse(str, &str);
1114         /* nr_entries can not be zero */
1115         if (buf_size == 0)
1116                 return 0;
1117         trace_buf_size = buf_size;
1118         return 1;
1119 }
1120 __setup("trace_buf_size=", set_buf_size);
1121 
1122 static int __init set_tracing_thresh(char *str)
1123 {
1124         unsigned long threshold;
1125         int ret;
1126 
1127         if (!str)
1128                 return 0;
1129         ret = kstrtoul(str, 0, &threshold);
1130         if (ret < 0)
1131                 return 0;
1132         tracing_thresh = threshold * 1000;
1133         return 1;
1134 }
1135 __setup("tracing_thresh=", set_tracing_thresh);
1136 
1137 unsigned long nsecs_to_usecs(unsigned long nsecs)
1138 {
1139         return nsecs / 1000;
1140 }
1141 
1142 /*
1143  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1144  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1145  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1146  * of strings in the order that the evals (enum) were defined.
1147  */
1148 #undef C
1149 #define C(a, b) b
1150 
1151 /* These must match the bit postions in trace_iterator_flags */
1152 static const char *trace_options[] = {
1153         TRACE_FLAGS
1154         NULL
1155 };
1156 
1157 static struct {
1158         u64 (*func)(void);
1159         const char *name;
1160         int in_ns;              /* is this clock in nanoseconds? */
1161 } trace_clocks[] = {
1162         { trace_clock_local,            "local",        1 },
1163         { trace_clock_global,           "global",       1 },
1164         { trace_clock_counter,          "counter",      0 },
1165         { trace_clock_jiffies,          "uptime",       0 },
1166         { trace_clock,                  "perf",         1 },
1167         { ktime_get_mono_fast_ns,       "mono",         1 },
1168         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1169         { ktime_get_boot_fast_ns,       "boot",         1 },
1170         ARCH_TRACE_CLOCKS
1171 };
1172 
1173 /*
1174  * trace_parser_get_init - gets the buffer for trace parser
1175  */
1176 int trace_parser_get_init(struct trace_parser *parser, int size)
1177 {
1178         memset(parser, 0, sizeof(*parser));
1179 
1180         parser->buffer = kmalloc(size, GFP_KERNEL);
1181         if (!parser->buffer)
1182                 return 1;
1183 
1184         parser->size = size;
1185         return 0;
1186 }
1187 
1188 /*
1189  * trace_parser_put - frees the buffer for trace parser
1190  */
1191 void trace_parser_put(struct trace_parser *parser)
1192 {
1193         kfree(parser->buffer);
1194         parser->buffer = NULL;
1195 }
1196 
1197 /*
1198  * trace_get_user - reads the user input string separated by  space
1199  * (matched by isspace(ch))
1200  *
1201  * For each string found the 'struct trace_parser' is updated,
1202  * and the function returns.
1203  *
1204  * Returns number of bytes read.
1205  *
1206  * See kernel/trace/trace.h for 'struct trace_parser' details.
1207  */
1208 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1209         size_t cnt, loff_t *ppos)
1210 {
1211         char ch;
1212         size_t read = 0;
1213         ssize_t ret;
1214 
1215         if (!*ppos)
1216                 trace_parser_clear(parser);
1217 
1218         ret = get_user(ch, ubuf++);
1219         if (ret)
1220                 goto out;
1221 
1222         read++;
1223         cnt--;
1224 
1225         /*
1226          * The parser is not finished with the last write,
1227          * continue reading the user input without skipping spaces.
1228          */
1229         if (!parser->cont) {
1230                 /* skip white space */
1231                 while (cnt && isspace(ch)) {
1232                         ret = get_user(ch, ubuf++);
1233                         if (ret)
1234                                 goto out;
1235                         read++;
1236                         cnt--;
1237                 }
1238 
1239                 /* only spaces were written */
1240                 if (isspace(ch)) {
1241                         *ppos += read;
1242                         ret = read;
1243                         goto out;
1244                 }
1245 
1246                 parser->idx = 0;
1247         }
1248 
1249         /* read the non-space input */
1250         while (cnt && !isspace(ch)) {
1251                 if (parser->idx < parser->size - 1)
1252                         parser->buffer[parser->idx++] = ch;
1253                 else {
1254                         ret = -EINVAL;
1255                         goto out;
1256                 }
1257                 ret = get_user(ch, ubuf++);
1258                 if (ret)
1259                         goto out;
1260                 read++;
1261                 cnt--;
1262         }
1263 
1264         /* We either got finished input or we have to wait for another call. */
1265         if (isspace(ch)) {
1266                 parser->buffer[parser->idx] = 0;
1267                 parser->cont = false;
1268         } else if (parser->idx < parser->size - 1) {
1269                 parser->cont = true;
1270                 parser->buffer[parser->idx++] = ch;
1271         } else {
1272                 ret = -EINVAL;
1273                 goto out;
1274         }
1275 
1276         *ppos += read;
1277         ret = read;
1278 
1279 out:
1280         return ret;
1281 }
1282 
1283 /* TODO add a seq_buf_to_buffer() */
1284 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1285 {
1286         int len;
1287 
1288         if (trace_seq_used(s) <= s->seq.readpos)
1289                 return -EBUSY;
1290 
1291         len = trace_seq_used(s) - s->seq.readpos;
1292         if (cnt > len)
1293                 cnt = len;
1294         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1295 
1296         s->seq.readpos += cnt;
1297         return cnt;
1298 }
1299 
1300 unsigned long __read_mostly     tracing_thresh;
1301 
1302 #ifdef CONFIG_TRACER_MAX_TRACE
1303 /*
1304  * Copy the new maximum trace into the separate maximum-trace
1305  * structure. (this way the maximum trace is permanently saved,
1306  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1307  */
1308 static void
1309 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1310 {
1311         struct trace_buffer *trace_buf = &tr->trace_buffer;
1312         struct trace_buffer *max_buf = &tr->max_buffer;
1313         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1314         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1315 
1316         max_buf->cpu = cpu;
1317         max_buf->time_start = data->preempt_timestamp;
1318 
1319         max_data->saved_latency = tr->max_latency;
1320         max_data->critical_start = data->critical_start;
1321         max_data->critical_end = data->critical_end;
1322 
1323         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1324         max_data->pid = tsk->pid;
1325         /*
1326          * If tsk == current, then use current_uid(), as that does not use
1327          * RCU. The irq tracer can be called out of RCU scope.
1328          */
1329         if (tsk == current)
1330                 max_data->uid = current_uid();
1331         else
1332                 max_data->uid = task_uid(tsk);
1333 
1334         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1335         max_data->policy = tsk->policy;
1336         max_data->rt_priority = tsk->rt_priority;
1337 
1338         /* record this tasks comm */
1339         tracing_record_cmdline(tsk);
1340 }
1341 
1342 /**
1343  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1344  * @tr: tracer
1345  * @tsk: the task with the latency
1346  * @cpu: The cpu that initiated the trace.
1347  *
1348  * Flip the buffers between the @tr and the max_tr and record information
1349  * about which task was the cause of this latency.
1350  */
1351 void
1352 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1353 {
1354         struct ring_buffer *buf;
1355 
1356         if (tr->stop_count)
1357                 return;
1358 
1359         WARN_ON_ONCE(!irqs_disabled());
1360 
1361         if (!tr->allocated_snapshot) {
1362                 /* Only the nop tracer should hit this when disabling */
1363                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1364                 return;
1365         }
1366 
1367         arch_spin_lock(&tr->max_lock);
1368 
1369         buf = tr->trace_buffer.buffer;
1370         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1371         tr->max_buffer.buffer = buf;
1372 
1373         __update_max_tr(tr, tsk, cpu);
1374         arch_spin_unlock(&tr->max_lock);
1375 }
1376 
1377 /**
1378  * update_max_tr_single - only copy one trace over, and reset the rest
1379  * @tr - tracer
1380  * @tsk - task with the latency
1381  * @cpu - the cpu of the buffer to copy.
1382  *
1383  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1384  */
1385 void
1386 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1387 {
1388         int ret;
1389 
1390         if (tr->stop_count)
1391                 return;
1392 
1393         WARN_ON_ONCE(!irqs_disabled());
1394         if (!tr->allocated_snapshot) {
1395                 /* Only the nop tracer should hit this when disabling */
1396                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1397                 return;
1398         }
1399 
1400         arch_spin_lock(&tr->max_lock);
1401 
1402         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1403 
1404         if (ret == -EBUSY) {
1405                 /*
1406                  * We failed to swap the buffer due to a commit taking
1407                  * place on this CPU. We fail to record, but we reset
1408                  * the max trace buffer (no one writes directly to it)
1409                  * and flag that it failed.
1410                  */
1411                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1412                         "Failed to swap buffers due to commit in progress\n");
1413         }
1414 
1415         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1416 
1417         __update_max_tr(tr, tsk, cpu);
1418         arch_spin_unlock(&tr->max_lock);
1419 }
1420 #endif /* CONFIG_TRACER_MAX_TRACE */
1421 
1422 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1423 {
1424         /* Iterators are static, they should be filled or empty */
1425         if (trace_buffer_iter(iter, iter->cpu_file))
1426                 return 0;
1427 
1428         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1429                                 full);
1430 }
1431 
1432 #ifdef CONFIG_FTRACE_STARTUP_TEST
1433 static bool selftests_can_run;
1434 
1435 struct trace_selftests {
1436         struct list_head                list;
1437         struct tracer                   *type;
1438 };
1439 
1440 static LIST_HEAD(postponed_selftests);
1441 
1442 static int save_selftest(struct tracer *type)
1443 {
1444         struct trace_selftests *selftest;
1445 
1446         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1447         if (!selftest)
1448                 return -ENOMEM;
1449 
1450         selftest->type = type;
1451         list_add(&selftest->list, &postponed_selftests);
1452         return 0;
1453 }
1454 
1455 static int run_tracer_selftest(struct tracer *type)
1456 {
1457         struct trace_array *tr = &global_trace;
1458         struct tracer *saved_tracer = tr->current_trace;
1459         int ret;
1460 
1461         if (!type->selftest || tracing_selftest_disabled)
1462                 return 0;
1463 
1464         /*
1465          * If a tracer registers early in boot up (before scheduling is
1466          * initialized and such), then do not run its selftests yet.
1467          * Instead, run it a little later in the boot process.
1468          */
1469         if (!selftests_can_run)
1470                 return save_selftest(type);
1471 
1472         /*
1473          * Run a selftest on this tracer.
1474          * Here we reset the trace buffer, and set the current
1475          * tracer to be this tracer. The tracer can then run some
1476          * internal tracing to verify that everything is in order.
1477          * If we fail, we do not register this tracer.
1478          */
1479         tracing_reset_online_cpus(&tr->trace_buffer);
1480 
1481         tr->current_trace = type;
1482 
1483 #ifdef CONFIG_TRACER_MAX_TRACE
1484         if (type->use_max_tr) {
1485                 /* If we expanded the buffers, make sure the max is expanded too */
1486                 if (ring_buffer_expanded)
1487                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1488                                            RING_BUFFER_ALL_CPUS);
1489                 tr->allocated_snapshot = true;
1490         }
1491 #endif
1492 
1493         /* the test is responsible for initializing and enabling */
1494         pr_info("Testing tracer %s: ", type->name);
1495         ret = type->selftest(type, tr);
1496         /* the test is responsible for resetting too */
1497         tr->current_trace = saved_tracer;
1498         if (ret) {
1499                 printk(KERN_CONT "FAILED!\n");
1500                 /* Add the warning after printing 'FAILED' */
1501                 WARN_ON(1);
1502                 return -1;
1503         }
1504         /* Only reset on passing, to avoid touching corrupted buffers */
1505         tracing_reset_online_cpus(&tr->trace_buffer);
1506 
1507 #ifdef CONFIG_TRACER_MAX_TRACE
1508         if (type->use_max_tr) {
1509                 tr->allocated_snapshot = false;
1510 
1511                 /* Shrink the max buffer again */
1512                 if (ring_buffer_expanded)
1513                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1514                                            RING_BUFFER_ALL_CPUS);
1515         }
1516 #endif
1517 
1518         printk(KERN_CONT "PASSED\n");
1519         return 0;
1520 }
1521 
1522 static __init int init_trace_selftests(void)
1523 {
1524         struct trace_selftests *p, *n;
1525         struct tracer *t, **last;
1526         int ret;
1527 
1528         selftests_can_run = true;
1529 
1530         mutex_lock(&trace_types_lock);
1531 
1532         if (list_empty(&postponed_selftests))
1533                 goto out;
1534 
1535         pr_info("Running postponed tracer tests:\n");
1536 
1537         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1538                 ret = run_tracer_selftest(p->type);
1539                 /* If the test fails, then warn and remove from available_tracers */
1540                 if (ret < 0) {
1541                         WARN(1, "tracer: %s failed selftest, disabling\n",
1542                              p->type->name);
1543                         last = &trace_types;
1544                         for (t = trace_types; t; t = t->next) {
1545                                 if (t == p->type) {
1546                                         *last = t->next;
1547                                         break;
1548                                 }
1549                                 last = &t->next;
1550                         }
1551                 }
1552                 list_del(&p->list);
1553                 kfree(p);
1554         }
1555 
1556  out:
1557         mutex_unlock(&trace_types_lock);
1558 
1559         return 0;
1560 }
1561 core_initcall(init_trace_selftests);
1562 #else
1563 static inline int run_tracer_selftest(struct tracer *type)
1564 {
1565         return 0;
1566 }
1567 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1568 
1569 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1570 
1571 static void __init apply_trace_boot_options(void);
1572 
1573 /**
1574  * register_tracer - register a tracer with the ftrace system.
1575  * @type - the plugin for the tracer
1576  *
1577  * Register a new plugin tracer.
1578  */
1579 int __init register_tracer(struct tracer *type)
1580 {
1581         struct tracer *t;
1582         int ret = 0;
1583 
1584         if (!type->name) {
1585                 pr_info("Tracer must have a name\n");
1586                 return -1;
1587         }
1588 
1589         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1590                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1591                 return -1;
1592         }
1593 
1594         mutex_lock(&trace_types_lock);
1595 
1596         tracing_selftest_running = true;
1597 
1598         for (t = trace_types; t; t = t->next) {
1599                 if (strcmp(type->name, t->name) == 0) {
1600                         /* already found */
1601                         pr_info("Tracer %s already registered\n",
1602                                 type->name);
1603                         ret = -1;
1604                         goto out;
1605                 }
1606         }
1607 
1608         if (!type->set_flag)
1609                 type->set_flag = &dummy_set_flag;
1610         if (!type->flags) {
1611                 /*allocate a dummy tracer_flags*/
1612                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1613                 if (!type->flags) {
1614                         ret = -ENOMEM;
1615                         goto out;
1616                 }
1617                 type->flags->val = 0;
1618                 type->flags->opts = dummy_tracer_opt;
1619         } else
1620                 if (!type->flags->opts)
1621                         type->flags->opts = dummy_tracer_opt;
1622 
1623         /* store the tracer for __set_tracer_option */
1624         type->flags->trace = type;
1625 
1626         ret = run_tracer_selftest(type);
1627         if (ret < 0)
1628                 goto out;
1629 
1630         type->next = trace_types;
1631         trace_types = type;
1632         add_tracer_options(&global_trace, type);
1633 
1634  out:
1635         tracing_selftest_running = false;
1636         mutex_unlock(&trace_types_lock);
1637 
1638         if (ret || !default_bootup_tracer)
1639                 goto out_unlock;
1640 
1641         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1642                 goto out_unlock;
1643 
1644         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1645         /* Do we want this tracer to start on bootup? */
1646         tracing_set_tracer(&global_trace, type->name);
1647         default_bootup_tracer = NULL;
1648 
1649         apply_trace_boot_options();
1650 
1651         /* disable other selftests, since this will break it. */
1652         tracing_selftest_disabled = true;
1653 #ifdef CONFIG_FTRACE_STARTUP_TEST
1654         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1655                type->name);
1656 #endif
1657 
1658  out_unlock:
1659         return ret;
1660 }
1661 
1662 void tracing_reset(struct trace_buffer *buf, int cpu)
1663 {
1664         struct ring_buffer *buffer = buf->buffer;
1665 
1666         if (!buffer)
1667                 return;
1668 
1669         ring_buffer_record_disable(buffer);
1670 
1671         /* Make sure all commits have finished */
1672         synchronize_sched();
1673         ring_buffer_reset_cpu(buffer, cpu);
1674 
1675         ring_buffer_record_enable(buffer);
1676 }
1677 
1678 void tracing_reset_online_cpus(struct trace_buffer *buf)
1679 {
1680         struct ring_buffer *buffer = buf->buffer;
1681         int cpu;
1682 
1683         if (!buffer)
1684                 return;
1685 
1686         ring_buffer_record_disable(buffer);
1687 
1688         /* Make sure all commits have finished */
1689         synchronize_sched();
1690 
1691         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1692 
1693         for_each_online_cpu(cpu)
1694                 ring_buffer_reset_cpu(buffer, cpu);
1695 
1696         ring_buffer_record_enable(buffer);
1697 }
1698 
1699 /* Must have trace_types_lock held */
1700 void tracing_reset_all_online_cpus(void)
1701 {
1702         struct trace_array *tr;
1703 
1704         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1705                 if (!tr->clear_trace)
1706                         continue;
1707                 tr->clear_trace = false;
1708                 tracing_reset_online_cpus(&tr->trace_buffer);
1709 #ifdef CONFIG_TRACER_MAX_TRACE
1710                 tracing_reset_online_cpus(&tr->max_buffer);
1711 #endif
1712         }
1713 }
1714 
1715 static int *tgid_map;
1716 
1717 #define SAVED_CMDLINES_DEFAULT 128
1718 #define NO_CMDLINE_MAP UINT_MAX
1719 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1720 struct saved_cmdlines_buffer {
1721         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1722         unsigned *map_cmdline_to_pid;
1723         unsigned cmdline_num;
1724         int cmdline_idx;
1725         char *saved_cmdlines;
1726 };
1727 static struct saved_cmdlines_buffer *savedcmd;
1728 
1729 /* temporary disable recording */
1730 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1731 
1732 static inline char *get_saved_cmdlines(int idx)
1733 {
1734         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1735 }
1736 
1737 static inline void set_cmdline(int idx, const char *cmdline)
1738 {
1739         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1740 }
1741 
1742 static int allocate_cmdlines_buffer(unsigned int val,
1743                                     struct saved_cmdlines_buffer *s)
1744 {
1745         s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1746                                         GFP_KERNEL);
1747         if (!s->map_cmdline_to_pid)
1748                 return -ENOMEM;
1749 
1750         s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1751         if (!s->saved_cmdlines) {
1752                 kfree(s->map_cmdline_to_pid);
1753                 return -ENOMEM;
1754         }
1755 
1756         s->cmdline_idx = 0;
1757         s->cmdline_num = val;
1758         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1759                sizeof(s->map_pid_to_cmdline));
1760         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1761                val * sizeof(*s->map_cmdline_to_pid));
1762 
1763         return 0;
1764 }
1765 
1766 static int trace_create_savedcmd(void)
1767 {
1768         int ret;
1769 
1770         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1771         if (!savedcmd)
1772                 return -ENOMEM;
1773 
1774         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1775         if (ret < 0) {
1776                 kfree(savedcmd);
1777                 savedcmd = NULL;
1778                 return -ENOMEM;
1779         }
1780 
1781         return 0;
1782 }
1783 
1784 int is_tracing_stopped(void)
1785 {
1786         return global_trace.stop_count;
1787 }
1788 
1789 /**
1790  * tracing_start - quick start of the tracer
1791  *
1792  * If tracing is enabled but was stopped by tracing_stop,
1793  * this will start the tracer back up.
1794  */
1795 void tracing_start(void)
1796 {
1797         struct ring_buffer *buffer;
1798         unsigned long flags;
1799 
1800         if (tracing_disabled)
1801                 return;
1802 
1803         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1804         if (--global_trace.stop_count) {
1805                 if (global_trace.stop_count < 0) {
1806                         /* Someone screwed up their debugging */
1807                         WARN_ON_ONCE(1);
1808                         global_trace.stop_count = 0;
1809                 }
1810                 goto out;
1811         }
1812 
1813         /* Prevent the buffers from switching */
1814         arch_spin_lock(&global_trace.max_lock);
1815 
1816         buffer = global_trace.trace_buffer.buffer;
1817         if (buffer)
1818                 ring_buffer_record_enable(buffer);
1819 
1820 #ifdef CONFIG_TRACER_MAX_TRACE
1821         buffer = global_trace.max_buffer.buffer;
1822         if (buffer)
1823                 ring_buffer_record_enable(buffer);
1824 #endif
1825 
1826         arch_spin_unlock(&global_trace.max_lock);
1827 
1828  out:
1829         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1830 }
1831 
1832 static void tracing_start_tr(struct trace_array *tr)
1833 {
1834         struct ring_buffer *buffer;
1835         unsigned long flags;
1836 
1837         if (tracing_disabled)
1838                 return;
1839 
1840         /* If global, we need to also start the max tracer */
1841         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1842                 return tracing_start();
1843 
1844         raw_spin_lock_irqsave(&tr->start_lock, flags);
1845 
1846         if (--tr->stop_count) {
1847                 if (tr->stop_count < 0) {
1848                         /* Someone screwed up their debugging */
1849                         WARN_ON_ONCE(1);
1850                         tr->stop_count = 0;
1851                 }
1852                 goto out;
1853         }
1854 
1855         buffer = tr->trace_buffer.buffer;
1856         if (buffer)
1857                 ring_buffer_record_enable(buffer);
1858 
1859  out:
1860         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1861 }
1862 
1863 /**
1864  * tracing_stop - quick stop of the tracer
1865  *
1866  * Light weight way to stop tracing. Use in conjunction with
1867  * tracing_start.
1868  */
1869 void tracing_stop(void)
1870 {
1871         struct ring_buffer *buffer;
1872         unsigned long flags;
1873 
1874         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1875         if (global_trace.stop_count++)
1876                 goto out;
1877 
1878         /* Prevent the buffers from switching */
1879         arch_spin_lock(&global_trace.max_lock);
1880 
1881         buffer = global_trace.trace_buffer.buffer;
1882         if (buffer)
1883                 ring_buffer_record_disable(buffer);
1884 
1885 #ifdef CONFIG_TRACER_MAX_TRACE
1886         buffer = global_trace.max_buffer.buffer;
1887         if (buffer)
1888                 ring_buffer_record_disable(buffer);
1889 #endif
1890 
1891         arch_spin_unlock(&global_trace.max_lock);
1892 
1893  out:
1894         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1895 }
1896 
1897 static void tracing_stop_tr(struct trace_array *tr)
1898 {
1899         struct ring_buffer *buffer;
1900         unsigned long flags;
1901 
1902         /* If global, we need to also stop the max tracer */
1903         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1904                 return tracing_stop();
1905 
1906         raw_spin_lock_irqsave(&tr->start_lock, flags);
1907         if (tr->stop_count++)
1908                 goto out;
1909 
1910         buffer = tr->trace_buffer.buffer;
1911         if (buffer)
1912                 ring_buffer_record_disable(buffer);
1913 
1914  out:
1915         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1916 }
1917 
1918 static int trace_save_cmdline(struct task_struct *tsk)
1919 {
1920         unsigned pid, idx;
1921 
1922         /* treat recording of idle task as a success */
1923         if (!tsk->pid)
1924                 return 1;
1925 
1926         if (unlikely(tsk->pid > PID_MAX_DEFAULT))
1927                 return 0;
1928 
1929         /*
1930          * It's not the end of the world if we don't get
1931          * the lock, but we also don't want to spin
1932          * nor do we want to disable interrupts,
1933          * so if we miss here, then better luck next time.
1934          */
1935         if (!arch_spin_trylock(&trace_cmdline_lock))
1936                 return 0;
1937 
1938         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1939         if (idx == NO_CMDLINE_MAP) {
1940                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1941 
1942                 /*
1943                  * Check whether the cmdline buffer at idx has a pid
1944                  * mapped. We are going to overwrite that entry so we
1945                  * need to clear the map_pid_to_cmdline. Otherwise we
1946                  * would read the new comm for the old pid.
1947                  */
1948                 pid = savedcmd->map_cmdline_to_pid[idx];
1949                 if (pid != NO_CMDLINE_MAP)
1950                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1951 
1952                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1953                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1954 
1955                 savedcmd->cmdline_idx = idx;
1956         }
1957 
1958         set_cmdline(idx, tsk->comm);
1959 
1960         arch_spin_unlock(&trace_cmdline_lock);
1961 
1962         return 1;
1963 }
1964 
1965 static void __trace_find_cmdline(int pid, char comm[])
1966 {
1967         unsigned map;
1968 
1969         if (!pid) {
1970                 strcpy(comm, "<idle>");
1971                 return;
1972         }
1973 
1974         if (WARN_ON_ONCE(pid < 0)) {
1975                 strcpy(comm, "<XXX>");
1976                 return;
1977         }
1978 
1979         if (pid > PID_MAX_DEFAULT) {
1980                 strcpy(comm, "<...>");
1981                 return;
1982         }
1983 
1984         map = savedcmd->map_pid_to_cmdline[pid];
1985         if (map != NO_CMDLINE_MAP)
1986                 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
1987         else
1988                 strcpy(comm, "<...>");
1989 }
1990 
1991 void trace_find_cmdline(int pid, char comm[])
1992 {
1993         preempt_disable();
1994         arch_spin_lock(&trace_cmdline_lock);
1995 
1996         __trace_find_cmdline(pid, comm);
1997 
1998         arch_spin_unlock(&trace_cmdline_lock);
1999         preempt_enable();
2000 }
2001 
2002 int trace_find_tgid(int pid)
2003 {
2004         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2005                 return 0;
2006 
2007         return tgid_map[pid];
2008 }
2009 
2010 static int trace_save_tgid(struct task_struct *tsk)
2011 {
2012         /* treat recording of idle task as a success */
2013         if (!tsk->pid)
2014                 return 1;
2015 
2016         if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2017                 return 0;
2018 
2019         tgid_map[tsk->pid] = tsk->tgid;
2020         return 1;
2021 }
2022 
2023 static bool tracing_record_taskinfo_skip(int flags)
2024 {
2025         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2026                 return true;
2027         if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2028                 return true;
2029         if (!__this_cpu_read(trace_taskinfo_save))
2030                 return true;
2031         return false;
2032 }
2033 
2034 /**
2035  * tracing_record_taskinfo - record the task info of a task
2036  *
2037  * @task  - task to record
2038  * @flags - TRACE_RECORD_CMDLINE for recording comm
2039  *        - TRACE_RECORD_TGID for recording tgid
2040  */
2041 void tracing_record_taskinfo(struct task_struct *task, int flags)
2042 {
2043         bool done;
2044 
2045         if (tracing_record_taskinfo_skip(flags))
2046                 return;
2047 
2048         /*
2049          * Record as much task information as possible. If some fail, continue
2050          * to try to record the others.
2051          */
2052         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2053         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2054 
2055         /* If recording any information failed, retry again soon. */
2056         if (!done)
2057                 return;
2058 
2059         __this_cpu_write(trace_taskinfo_save, false);
2060 }
2061 
2062 /**
2063  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2064  *
2065  * @prev - previous task during sched_switch
2066  * @next - next task during sched_switch
2067  * @flags - TRACE_RECORD_CMDLINE for recording comm
2068  *          TRACE_RECORD_TGID for recording tgid
2069  */
2070 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2071                                           struct task_struct *next, int flags)
2072 {
2073         bool done;
2074 
2075         if (tracing_record_taskinfo_skip(flags))
2076                 return;
2077 
2078         /*
2079          * Record as much task information as possible. If some fail, continue
2080          * to try to record the others.
2081          */
2082         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2083         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2084         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2085         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2086 
2087         /* If recording any information failed, retry again soon. */
2088         if (!done)
2089                 return;
2090 
2091         __this_cpu_write(trace_taskinfo_save, false);
2092 }
2093 
2094 /* Helpers to record a specific task information */
2095 void tracing_record_cmdline(struct task_struct *task)
2096 {
2097         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2098 }
2099 
2100 void tracing_record_tgid(struct task_struct *task)
2101 {
2102         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2103 }
2104 
2105 /*
2106  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2107  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2108  * simplifies those functions and keeps them in sync.
2109  */
2110 enum print_line_t trace_handle_return(struct trace_seq *s)
2111 {
2112         return trace_seq_has_overflowed(s) ?
2113                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2114 }
2115 EXPORT_SYMBOL_GPL(trace_handle_return);
2116 
2117 void
2118 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2119                              int pc)
2120 {
2121         struct task_struct *tsk = current;
2122 
2123         entry->preempt_count            = pc & 0xff;
2124         entry->pid                      = (tsk) ? tsk->pid : 0;
2125         entry->flags =
2126 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2127                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2128 #else
2129                 TRACE_FLAG_IRQS_NOSUPPORT |
2130 #endif
2131                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2132                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2133                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2134                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2135                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2136 }
2137 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2138 
2139 struct ring_buffer_event *
2140 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2141                           int type,
2142                           unsigned long len,
2143                           unsigned long flags, int pc)
2144 {
2145         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2146 }
2147 
2148 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2149 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2150 static int trace_buffered_event_ref;
2151 
2152 /**
2153  * trace_buffered_event_enable - enable buffering events
2154  *
2155  * When events are being filtered, it is quicker to use a temporary
2156  * buffer to write the event data into if there's a likely chance
2157  * that it will not be committed. The discard of the ring buffer
2158  * is not as fast as committing, and is much slower than copying
2159  * a commit.
2160  *
2161  * When an event is to be filtered, allocate per cpu buffers to
2162  * write the event data into, and if the event is filtered and discarded
2163  * it is simply dropped, otherwise, the entire data is to be committed
2164  * in one shot.
2165  */
2166 void trace_buffered_event_enable(void)
2167 {
2168         struct ring_buffer_event *event;
2169         struct page *page;
2170         int cpu;
2171 
2172         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2173 
2174         if (trace_buffered_event_ref++)
2175                 return;
2176 
2177         for_each_tracing_cpu(cpu) {
2178                 page = alloc_pages_node(cpu_to_node(cpu),
2179                                         GFP_KERNEL | __GFP_NORETRY, 0);
2180                 if (!page)
2181                         goto failed;
2182 
2183                 event = page_address(page);
2184                 memset(event, 0, sizeof(*event));
2185 
2186                 per_cpu(trace_buffered_event, cpu) = event;
2187 
2188                 preempt_disable();
2189                 if (cpu == smp_processor_id() &&
2190                     this_cpu_read(trace_buffered_event) !=
2191                     per_cpu(trace_buffered_event, cpu))
2192                         WARN_ON_ONCE(1);
2193                 preempt_enable();
2194         }
2195 
2196         return;
2197  failed:
2198         trace_buffered_event_disable();
2199 }
2200 
2201 static void enable_trace_buffered_event(void *data)
2202 {
2203         /* Probably not needed, but do it anyway */
2204         smp_rmb();
2205         this_cpu_dec(trace_buffered_event_cnt);
2206 }
2207 
2208 static void disable_trace_buffered_event(void *data)
2209 {
2210         this_cpu_inc(trace_buffered_event_cnt);
2211 }
2212 
2213 /**
2214  * trace_buffered_event_disable - disable buffering events
2215  *
2216  * When a filter is removed, it is faster to not use the buffered
2217  * events, and to commit directly into the ring buffer. Free up
2218  * the temp buffers when there are no more users. This requires
2219  * special synchronization with current events.
2220  */
2221 void trace_buffered_event_disable(void)
2222 {
2223         int cpu;
2224 
2225         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2226 
2227         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2228                 return;
2229 
2230         if (--trace_buffered_event_ref)
2231                 return;
2232 
2233         preempt_disable();
2234         /* For each CPU, set the buffer as used. */
2235         smp_call_function_many(tracing_buffer_mask,
2236                                disable_trace_buffered_event, NULL, 1);
2237         preempt_enable();
2238 
2239         /* Wait for all current users to finish */
2240         synchronize_sched();
2241 
2242         for_each_tracing_cpu(cpu) {
2243                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2244                 per_cpu(trace_buffered_event, cpu) = NULL;
2245         }
2246         /*
2247          * Make sure trace_buffered_event is NULL before clearing
2248          * trace_buffered_event_cnt.
2249          */
2250         smp_wmb();
2251 
2252         preempt_disable();
2253         /* Do the work on each cpu */
2254         smp_call_function_many(tracing_buffer_mask,
2255                                enable_trace_buffered_event, NULL, 1);
2256         preempt_enable();
2257 }
2258 
2259 static struct ring_buffer *temp_buffer;
2260 
2261 struct ring_buffer_event *
2262 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2263                           struct trace_event_file *trace_file,
2264                           int type, unsigned long len,
2265                           unsigned long flags, int pc)
2266 {
2267         struct ring_buffer_event *entry;
2268         int val;
2269 
2270         *current_rb = trace_file->tr->trace_buffer.buffer;
2271 
2272         if ((trace_file->flags &
2273              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2274             (entry = this_cpu_read(trace_buffered_event))) {
2275                 /* Try to use the per cpu buffer first */
2276                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2277                 if (val == 1) {
2278                         trace_event_setup(entry, type, flags, pc);
2279                         entry->array[0] = len;
2280                         return entry;
2281                 }
2282                 this_cpu_dec(trace_buffered_event_cnt);
2283         }
2284 
2285         entry = __trace_buffer_lock_reserve(*current_rb,
2286                                             type, len, flags, pc);
2287         /*
2288          * If tracing is off, but we have triggers enabled
2289          * we still need to look at the event data. Use the temp_buffer
2290          * to store the trace event for the tigger to use. It's recusive
2291          * safe and will not be recorded anywhere.
2292          */
2293         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2294                 *current_rb = temp_buffer;
2295                 entry = __trace_buffer_lock_reserve(*current_rb,
2296                                                     type, len, flags, pc);
2297         }
2298         return entry;
2299 }
2300 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2301 
2302 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2303 static DEFINE_MUTEX(tracepoint_printk_mutex);
2304 
2305 static void output_printk(struct trace_event_buffer *fbuffer)
2306 {
2307         struct trace_event_call *event_call;
2308         struct trace_event *event;
2309         unsigned long flags;
2310         struct trace_iterator *iter = tracepoint_print_iter;
2311 
2312         /* We should never get here if iter is NULL */
2313         if (WARN_ON_ONCE(!iter))
2314                 return;
2315 
2316         event_call = fbuffer->trace_file->event_call;
2317         if (!event_call || !event_call->event.funcs ||
2318             !event_call->event.funcs->trace)
2319                 return;
2320 
2321         event = &fbuffer->trace_file->event_call->event;
2322 
2323         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2324         trace_seq_init(&iter->seq);
2325         iter->ent = fbuffer->entry;
2326         event_call->event.funcs->trace(iter, 0, event);
2327         trace_seq_putc(&iter->seq, 0);
2328         printk("%s", iter->seq.buffer);
2329 
2330         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2331 }
2332 
2333 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2334                              void __user *buffer, size_t *lenp,
2335                              loff_t *ppos)
2336 {
2337         int save_tracepoint_printk;
2338         int ret;
2339 
2340         mutex_lock(&tracepoint_printk_mutex);
2341         save_tracepoint_printk = tracepoint_printk;
2342 
2343         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2344 
2345         /*
2346          * This will force exiting early, as tracepoint_printk
2347          * is always zero when tracepoint_printk_iter is not allocated
2348          */
2349         if (!tracepoint_print_iter)
2350                 tracepoint_printk = 0;
2351 
2352         if (save_tracepoint_printk == tracepoint_printk)
2353                 goto out;
2354 
2355         if (tracepoint_printk)
2356                 static_key_enable(&tracepoint_printk_key.key);
2357         else
2358                 static_key_disable(&tracepoint_printk_key.key);
2359 
2360  out:
2361         mutex_unlock(&tracepoint_printk_mutex);
2362 
2363         return ret;
2364 }
2365 
2366 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2367 {
2368         if (static_key_false(&tracepoint_printk_key.key))
2369                 output_printk(fbuffer);
2370 
2371         event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2372                                     fbuffer->event, fbuffer->entry,
2373                                     fbuffer->flags, fbuffer->pc);
2374 }
2375 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2376 
2377 /*
2378  * Skip 3:
2379  *
2380  *   trace_buffer_unlock_commit_regs()
2381  *   trace_event_buffer_commit()
2382  *   trace_event_raw_event_xxx()
2383 */
2384 # define STACK_SKIP 3
2385 
2386 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2387                                      struct ring_buffer *buffer,
2388                                      struct ring_buffer_event *event,
2389                                      unsigned long flags, int pc,
2390                                      struct pt_regs *regs)
2391 {
2392         __buffer_unlock_commit(buffer, event);
2393 
2394         /*
2395          * If regs is not set, then skip the necessary functions.
2396          * Note, we can still get here via blktrace, wakeup tracer
2397          * and mmiotrace, but that's ok if they lose a function or
2398          * two. They are not that meaningful.
2399          */
2400         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2401         ftrace_trace_userstack(buffer, flags, pc);
2402 }
2403 
2404 /*
2405  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2406  */
2407 void
2408 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2409                                    struct ring_buffer_event *event)
2410 {
2411         __buffer_unlock_commit(buffer, event);
2412 }
2413 
2414 static void
2415 trace_process_export(struct trace_export *export,
2416                struct ring_buffer_event *event)
2417 {
2418         struct trace_entry *entry;
2419         unsigned int size = 0;
2420 
2421         entry = ring_buffer_event_data(event);
2422         size = ring_buffer_event_length(event);
2423         export->write(export, entry, size);
2424 }
2425 
2426 static DEFINE_MUTEX(ftrace_export_lock);
2427 
2428 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2429 
2430 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2431 
2432 static inline void ftrace_exports_enable(void)
2433 {
2434         static_branch_enable(&ftrace_exports_enabled);
2435 }
2436 
2437 static inline void ftrace_exports_disable(void)
2438 {
2439         static_branch_disable(&ftrace_exports_enabled);
2440 }
2441 
2442 void ftrace_exports(struct ring_buffer_event *event)
2443 {
2444         struct trace_export *export;
2445 
2446         preempt_disable_notrace();
2447 
2448         export = rcu_dereference_raw_notrace(ftrace_exports_list);
2449         while (export) {
2450                 trace_process_export(export, event);
2451                 export = rcu_dereference_raw_notrace(export->next);
2452         }
2453 
2454         preempt_enable_notrace();
2455 }
2456 
2457 static inline void
2458 add_trace_export(struct trace_export **list, struct trace_export *export)
2459 {
2460         rcu_assign_pointer(export->next, *list);
2461         /*
2462          * We are entering export into the list but another
2463          * CPU might be walking that list. We need to make sure
2464          * the export->next pointer is valid before another CPU sees
2465          * the export pointer included into the list.
2466          */
2467         rcu_assign_pointer(*list, export);
2468 }
2469 
2470 static inline int
2471 rm_trace_export(struct trace_export **list, struct trace_export *export)
2472 {
2473         struct trace_export **p;
2474 
2475         for (p = list; *p != NULL; p = &(*p)->next)
2476                 if (*p == export)
2477                         break;
2478 
2479         if (*p != export)
2480                 return -1;
2481 
2482         rcu_assign_pointer(*p, (*p)->next);
2483 
2484         return 0;
2485 }
2486 
2487 static inline void
2488 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2489 {
2490         if (*list == NULL)
2491                 ftrace_exports_enable();
2492 
2493         add_trace_export(list, export);
2494 }
2495 
2496 static inline int
2497 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2498 {
2499         int ret;
2500 
2501         ret = rm_trace_export(list, export);
2502         if (*list == NULL)
2503                 ftrace_exports_disable();
2504 
2505         return ret;
2506 }
2507 
2508 int register_ftrace_export(struct trace_export *export)
2509 {
2510         if (WARN_ON_ONCE(!export->write))
2511                 return -1;
2512 
2513         mutex_lock(&ftrace_export_lock);
2514 
2515         add_ftrace_export(&ftrace_exports_list, export);
2516 
2517         mutex_unlock(&ftrace_export_lock);
2518 
2519         return 0;
2520 }
2521 EXPORT_SYMBOL_GPL(register_ftrace_export);
2522 
2523 int unregister_ftrace_export(struct trace_export *export)
2524 {
2525         int ret;
2526 
2527         mutex_lock(&ftrace_export_lock);
2528 
2529         ret = rm_ftrace_export(&ftrace_exports_list, export);
2530 
2531         mutex_unlock(&ftrace_export_lock);
2532 
2533         return ret;
2534 }
2535 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2536 
2537 void
2538 trace_function(struct trace_array *tr,
2539                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2540                int pc)
2541 {
2542         struct trace_event_call *call = &event_function;
2543         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2544         struct ring_buffer_event *event;
2545         struct ftrace_entry *entry;
2546 
2547         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2548                                             flags, pc);
2549         if (!event)
2550                 return;
2551         entry   = ring_buffer_event_data(event);
2552         entry->ip                       = ip;
2553         entry->parent_ip                = parent_ip;
2554 
2555         if (!call_filter_check_discard(call, entry, buffer, event)) {
2556                 if (static_branch_unlikely(&ftrace_exports_enabled))
2557                         ftrace_exports(event);
2558                 __buffer_unlock_commit(buffer, event);
2559         }
2560 }
2561 
2562 #ifdef CONFIG_STACKTRACE
2563 
2564 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2565 struct ftrace_stack {
2566         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
2567 };
2568 
2569 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2570 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2571 
2572 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2573                                  unsigned long flags,
2574                                  int skip, int pc, struct pt_regs *regs)
2575 {
2576         struct trace_event_call *call = &event_kernel_stack;
2577         struct ring_buffer_event *event;
2578         struct stack_entry *entry;
2579         struct stack_trace trace;
2580         int use_stack;
2581         int size = FTRACE_STACK_ENTRIES;
2582 
2583         trace.nr_entries        = 0;
2584         trace.skip              = skip;
2585 
2586         /*
2587          * Add one, for this function and the call to save_stack_trace()
2588          * If regs is set, then these functions will not be in the way.
2589          */
2590 #ifndef CONFIG_UNWINDER_ORC
2591         if (!regs)
2592                 trace.skip++;
2593 #endif
2594 
2595         /*
2596          * Since events can happen in NMIs there's no safe way to
2597          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2598          * or NMI comes in, it will just have to use the default
2599          * FTRACE_STACK_SIZE.
2600          */
2601         preempt_disable_notrace();
2602 
2603         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2604         /*
2605          * We don't need any atomic variables, just a barrier.
2606          * If an interrupt comes in, we don't care, because it would
2607          * have exited and put the counter back to what we want.
2608          * We just need a barrier to keep gcc from moving things
2609          * around.
2610          */
2611         barrier();
2612         if (use_stack == 1) {
2613                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
2614                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
2615 
2616                 if (regs)
2617                         save_stack_trace_regs(regs, &trace);
2618                 else
2619                         save_stack_trace(&trace);
2620 
2621                 if (trace.nr_entries > size)
2622                         size = trace.nr_entries;
2623         } else
2624                 /* From now on, use_stack is a boolean */
2625                 use_stack = 0;
2626 
2627         size *= sizeof(unsigned long);
2628 
2629         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2630                                             sizeof(*entry) + size, flags, pc);
2631         if (!event)
2632                 goto out;
2633         entry = ring_buffer_event_data(event);
2634 
2635         memset(&entry->caller, 0, size);
2636 
2637         if (use_stack)
2638                 memcpy(&entry->caller, trace.entries,
2639                        trace.nr_entries * sizeof(unsigned long));
2640         else {
2641                 trace.max_entries       = FTRACE_STACK_ENTRIES;
2642                 trace.entries           = entry->caller;
2643                 if (regs)
2644                         save_stack_trace_regs(regs, &trace);
2645                 else
2646                         save_stack_trace(&trace);
2647         }
2648 
2649         entry->size = trace.nr_entries;
2650 
2651         if (!call_filter_check_discard(call, entry, buffer, event))
2652                 __buffer_unlock_commit(buffer, event);
2653 
2654  out:
2655         /* Again, don't let gcc optimize things here */
2656         barrier();
2657         __this_cpu_dec(ftrace_stack_reserve);
2658         preempt_enable_notrace();
2659 
2660 }
2661 
2662 static inline void ftrace_trace_stack(struct trace_array *tr,
2663                                       struct ring_buffer *buffer,
2664                                       unsigned long flags,
2665                                       int skip, int pc, struct pt_regs *regs)
2666 {
2667         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2668                 return;
2669 
2670         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2671 }
2672 
2673 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2674                    int pc)
2675 {
2676         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2677 
2678         if (rcu_is_watching()) {
2679                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2680                 return;
2681         }
2682 
2683         /*
2684          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2685          * but if the above rcu_is_watching() failed, then the NMI
2686          * triggered someplace critical, and rcu_irq_enter() should
2687          * not be called from NMI.
2688          */
2689         if (unlikely(in_nmi()))
2690                 return;
2691 
2692         /*
2693          * It is possible that a function is being traced in a
2694          * location that RCU is not watching. A call to
2695          * rcu_irq_enter() will make sure that it is, but there's
2696          * a few internal rcu functions that could be traced
2697          * where that wont work either. In those cases, we just
2698          * do nothing.
2699          */
2700         if (unlikely(rcu_irq_enter_disabled()))
2701                 return;
2702 
2703         rcu_irq_enter_irqson();
2704         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2705         rcu_irq_exit_irqson();
2706 }
2707 
2708 /**
2709  * trace_dump_stack - record a stack back trace in the trace buffer
2710  * @skip: Number of functions to skip (helper handlers)
2711  */
2712 void trace_dump_stack(int skip)
2713 {
2714         unsigned long flags;
2715 
2716         if (tracing_disabled || tracing_selftest_running)
2717                 return;
2718 
2719         local_save_flags(flags);
2720 
2721 #ifndef CONFIG_UNWINDER_ORC
2722         /* Skip 1 to skip this function. */
2723         skip++;
2724 #endif
2725         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2726                              flags, skip, preempt_count(), NULL);
2727 }
2728 
2729 static DEFINE_PER_CPU(int, user_stack_count);
2730 
2731 void
2732 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2733 {
2734         struct trace_event_call *call = &event_user_stack;
2735         struct ring_buffer_event *event;
2736         struct userstack_entry *entry;
2737         struct stack_trace trace;
2738 
2739         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2740                 return;
2741 
2742         /*
2743          * NMIs can not handle page faults, even with fix ups.
2744          * The save user stack can (and often does) fault.
2745          */
2746         if (unlikely(in_nmi()))
2747                 return;
2748 
2749         /*
2750          * prevent recursion, since the user stack tracing may
2751          * trigger other kernel events.
2752          */
2753         preempt_disable();
2754         if (__this_cpu_read(user_stack_count))
2755                 goto out;
2756 
2757         __this_cpu_inc(user_stack_count);
2758 
2759         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2760                                             sizeof(*entry), flags, pc);
2761         if (!event)
2762                 goto out_drop_count;
2763         entry   = ring_buffer_event_data(event);
2764 
2765         entry->tgid             = current->tgid;
2766         memset(&entry->caller, 0, sizeof(entry->caller));
2767 
2768         trace.nr_entries        = 0;
2769         trace.max_entries       = FTRACE_STACK_ENTRIES;
2770         trace.skip              = 0;
2771         trace.entries           = entry->caller;
2772 
2773         save_stack_trace_user(&trace);
2774         if (!call_filter_check_discard(call, entry, buffer, event))
2775                 __buffer_unlock_commit(buffer, event);
2776 
2777  out_drop_count:
2778         __this_cpu_dec(user_stack_count);
2779  out:
2780         preempt_enable();
2781 }
2782 
2783 #ifdef UNUSED
2784 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2785 {
2786         ftrace_trace_userstack(tr, flags, preempt_count());
2787 }
2788 #endif /* UNUSED */
2789 
2790 #endif /* CONFIG_STACKTRACE */
2791 
2792 /* created for use with alloc_percpu */
2793 struct trace_buffer_struct {
2794         int nesting;
2795         char buffer[4][TRACE_BUF_SIZE];
2796 };
2797 
2798 static struct trace_buffer_struct *trace_percpu_buffer;
2799 
2800 /*
2801  * Thise allows for lockless recording.  If we're nested too deeply, then
2802  * this returns NULL.
2803  */
2804 static char *get_trace_buf(void)
2805 {
2806         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2807 
2808         if (!buffer || buffer->nesting >= 4)
2809                 return NULL;
2810 
2811         buffer->nesting++;
2812 
2813         /* Interrupts must see nesting incremented before we use the buffer */
2814         barrier();
2815         return &buffer->buffer[buffer->nesting][0];
2816 }
2817 
2818 static void put_trace_buf(void)
2819 {
2820         /* Don't let the decrement of nesting leak before this */
2821         barrier();
2822         this_cpu_dec(trace_percpu_buffer->nesting);
2823 }
2824 
2825 static int alloc_percpu_trace_buffer(void)
2826 {
2827         struct trace_buffer_struct *buffers;
2828 
2829         buffers = alloc_percpu(struct trace_buffer_struct);
2830         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2831                 return -ENOMEM;
2832 
2833         trace_percpu_buffer = buffers;
2834         return 0;
2835 }
2836 
2837 static int buffers_allocated;
2838 
2839 void trace_printk_init_buffers(void)
2840 {
2841         if (buffers_allocated)
2842                 return;
2843 
2844         if (alloc_percpu_trace_buffer())
2845                 return;
2846 
2847         /* trace_printk() is for debug use only. Don't use it in production. */
2848 
2849         pr_warn("\n");
2850         pr_warn("**********************************************************\n");
2851         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2852         pr_warn("**                                                      **\n");
2853         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2854         pr_warn("**                                                      **\n");
2855         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2856         pr_warn("** unsafe for production use.                           **\n");
2857         pr_warn("**                                                      **\n");
2858         pr_warn("** If you see this message and you are not debugging    **\n");
2859         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2860         pr_warn("**                                                      **\n");
2861         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2862         pr_warn("**********************************************************\n");
2863 
2864         /* Expand the buffers to set size */
2865         tracing_update_buffers();
2866 
2867         buffers_allocated = 1;
2868 
2869         /*
2870          * trace_printk_init_buffers() can be called by modules.
2871          * If that happens, then we need to start cmdline recording
2872          * directly here. If the global_trace.buffer is already
2873          * allocated here, then this was called by module code.
2874          */
2875         if (global_trace.trace_buffer.buffer)
2876                 tracing_start_cmdline_record();
2877 }
2878 
2879 void trace_printk_start_comm(void)
2880 {
2881         /* Start tracing comms if trace printk is set */
2882         if (!buffers_allocated)
2883                 return;
2884         tracing_start_cmdline_record();
2885 }
2886 
2887 static void trace_printk_start_stop_comm(int enabled)
2888 {
2889         if (!buffers_allocated)
2890                 return;
2891 
2892         if (enabled)
2893                 tracing_start_cmdline_record();
2894         else
2895                 tracing_stop_cmdline_record();
2896 }
2897 
2898 /**
2899  * trace_vbprintk - write binary msg to tracing buffer
2900  *
2901  */
2902 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2903 {
2904         struct trace_event_call *call = &event_bprint;
2905         struct ring_buffer_event *event;
2906         struct ring_buffer *buffer;
2907         struct trace_array *tr = &global_trace;
2908         struct bprint_entry *entry;
2909         unsigned long flags;
2910         char *tbuffer;
2911         int len = 0, size, pc;
2912 
2913         if (unlikely(tracing_selftest_running || tracing_disabled))
2914                 return 0;
2915 
2916         /* Don't pollute graph traces with trace_vprintk internals */
2917         pause_graph_tracing();
2918 
2919         pc = preempt_count();
2920         preempt_disable_notrace();
2921 
2922         tbuffer = get_trace_buf();
2923         if (!tbuffer) {
2924                 len = 0;
2925                 goto out_nobuffer;
2926         }
2927 
2928         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2929 
2930         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2931                 goto out;
2932 
2933         local_save_flags(flags);
2934         size = sizeof(*entry) + sizeof(u32) * len;
2935         buffer = tr->trace_buffer.buffer;
2936         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2937                                             flags, pc);
2938         if (!event)
2939                 goto out;
2940         entry = ring_buffer_event_data(event);
2941         entry->ip                       = ip;
2942         entry->fmt                      = fmt;
2943 
2944         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2945         if (!call_filter_check_discard(call, entry, buffer, event)) {
2946                 __buffer_unlock_commit(buffer, event);
2947                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2948         }
2949 
2950 out:
2951         put_trace_buf();
2952 
2953 out_nobuffer:
2954         preempt_enable_notrace();
2955         unpause_graph_tracing();
2956 
2957         return len;
2958 }
2959 EXPORT_SYMBOL_GPL(trace_vbprintk);
2960 
2961 static int
2962 __trace_array_vprintk(struct ring_buffer *buffer,
2963                       unsigned long ip, const char *fmt, va_list args)
2964 {
2965         struct trace_event_call *call = &event_print;
2966         struct ring_buffer_event *event;
2967         int len = 0, size, pc;
2968         struct print_entry *entry;
2969         unsigned long flags;
2970         char *tbuffer;
2971 
2972         if (tracing_disabled || tracing_selftest_running)
2973                 return 0;
2974 
2975         /* Don't pollute graph traces with trace_vprintk internals */
2976         pause_graph_tracing();
2977 
2978         pc = preempt_count();
2979         preempt_disable_notrace();
2980 
2981 
2982         tbuffer = get_trace_buf();
2983         if (!tbuffer) {
2984                 len = 0;
2985                 goto out_nobuffer;
2986         }
2987 
2988         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2989 
2990         local_save_flags(flags);
2991         size = sizeof(*entry) + len + 1;
2992         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2993                                             flags, pc);
2994         if (!event)
2995                 goto out;
2996         entry = ring_buffer_event_data(event);
2997         entry->ip = ip;
2998 
2999         memcpy(&entry->buf, tbuffer, len + 1);
3000         if (!call_filter_check_discard(call, entry, buffer, event)) {
3001                 __buffer_unlock_commit(buffer, event);
3002                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3003         }
3004 
3005 out:
3006         put_trace_buf();
3007 
3008 out_nobuffer:
3009         preempt_enable_notrace();
3010         unpause_graph_tracing();
3011 
3012         return len;
3013 }
3014 
3015 int trace_array_vprintk(struct trace_array *tr,
3016                         unsigned long ip, const char *fmt, va_list args)
3017 {
3018         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3019 }
3020 
3021 int trace_array_printk(struct trace_array *tr,
3022                        unsigned long ip, const char *fmt, ...)
3023 {
3024         int ret;
3025         va_list ap;
3026 
3027         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3028                 return 0;
3029 
3030         va_start(ap, fmt);
3031         ret = trace_array_vprintk(tr, ip, fmt, ap);
3032         va_end(ap);
3033         return ret;
3034 }
3035 
3036 int trace_array_printk_buf(struct ring_buffer *buffer,
3037                            unsigned long ip, const char *fmt, ...)
3038 {
3039         int ret;
3040         va_list ap;
3041 
3042         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3043                 return 0;
3044 
3045         va_start(ap, fmt);
3046         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3047         va_end(ap);
3048         return ret;
3049 }
3050 
3051 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3052 {
3053         return trace_array_vprintk(&global_trace, ip, fmt, args);
3054 }
3055 EXPORT_SYMBOL_GPL(trace_vprintk);
3056 
3057 static void trace_iterator_increment(struct trace_iterator *iter)
3058 {
3059         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3060 
3061         iter->idx++;
3062         if (buf_iter)
3063                 ring_buffer_read(buf_iter, NULL);
3064 }
3065 
3066 static struct trace_entry *
3067 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3068                 unsigned long *lost_events)
3069 {
3070         struct ring_buffer_event *event;
3071         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3072 
3073         if (buf_iter)
3074                 event = ring_buffer_iter_peek(buf_iter, ts);
3075         else
3076                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3077                                          lost_events);
3078 
3079         if (event) {
3080                 iter->ent_size = ring_buffer_event_length(event);
3081                 return ring_buffer_event_data(event);
3082         }
3083         iter->ent_size = 0;
3084         return NULL;
3085 }
3086 
3087 static struct trace_entry *
3088 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3089                   unsigned long *missing_events, u64 *ent_ts)
3090 {
3091         struct ring_buffer *buffer = iter->trace_buffer->buffer;
3092         struct trace_entry *ent, *next = NULL;
3093         unsigned long lost_events = 0, next_lost = 0;
3094         int cpu_file = iter->cpu_file;
3095         u64 next_ts = 0, ts;
3096         int next_cpu = -1;
3097         int next_size = 0;
3098         int cpu;
3099 
3100         /*
3101          * If we are in a per_cpu trace file, don't bother by iterating over
3102          * all cpu and peek directly.
3103          */
3104         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3105                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3106                         return NULL;
3107                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3108                 if (ent_cpu)
3109                         *ent_cpu = cpu_file;
3110 
3111                 return ent;
3112         }
3113 
3114         for_each_tracing_cpu(cpu) {
3115 
3116                 if (ring_buffer_empty_cpu(buffer, cpu))
3117                         continue;
3118 
3119                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3120 
3121                 /*
3122                  * Pick the entry with the smallest timestamp:
3123                  */
3124                 if (ent && (!next || ts < next_ts)) {
3125                         next = ent;
3126                         next_cpu = cpu;
3127                         next_ts = ts;
3128                         next_lost = lost_events;
3129                         next_size = iter->ent_size;
3130                 }
3131         }
3132 
3133         iter->ent_size = next_size;
3134 
3135         if (ent_cpu)
3136                 *ent_cpu = next_cpu;
3137 
3138         if (ent_ts)
3139                 *ent_ts = next_ts;
3140 
3141         if (missing_events)
3142                 *missing_events = next_lost;
3143 
3144         return next;
3145 }
3146 
3147 /* Find the next real entry, without updating the iterator itself */
3148 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3149                                           int *ent_cpu, u64 *ent_ts)
3150 {
3151         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3152 }
3153 
3154 /* Find the next real entry, and increment the iterator to the next entry */
3155 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3156 {
3157         iter->ent = __find_next_entry(iter, &iter->cpu,
3158                                       &iter->lost_events, &iter->ts);
3159 
3160         if (iter->ent)
3161                 trace_iterator_increment(iter);
3162 
3163         return iter->ent ? iter : NULL;
3164 }
3165 
3166 static void trace_consume(struct trace_iterator *iter)
3167 {
3168         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3169                             &iter->lost_events);
3170 }
3171 
3172 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3173 {
3174         struct trace_iterator *iter = m->private;
3175         int i = (int)*pos;
3176         void *ent;
3177 
3178         WARN_ON_ONCE(iter->leftover);
3179 
3180         (*pos)++;
3181 
3182         /* can't go backwards */
3183         if (iter->idx > i)
3184                 return NULL;
3185 
3186         if (iter->idx < 0)
3187                 ent = trace_find_next_entry_inc(iter);
3188         else
3189                 ent = iter;
3190 
3191         while (ent && iter->idx < i)
3192                 ent = trace_find_next_entry_inc(iter);
3193 
3194         iter->pos = *pos;
3195 
3196         return ent;
3197 }
3198 
3199 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3200 {
3201         struct ring_buffer_event *event;
3202         struct ring_buffer_iter *buf_iter;
3203         unsigned long entries = 0;
3204         u64 ts;
3205 
3206         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3207 
3208         buf_iter = trace_buffer_iter(iter, cpu);
3209         if (!buf_iter)
3210                 return;
3211 
3212         ring_buffer_iter_reset(buf_iter);
3213 
3214         /*
3215          * We could have the case with the max latency tracers
3216          * that a reset never took place on a cpu. This is evident
3217          * by the timestamp being before the start of the buffer.
3218          */
3219         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3220                 if (ts >= iter->trace_buffer->time_start)
3221                         break;
3222                 entries++;
3223                 ring_buffer_read(buf_iter, NULL);
3224         }
3225 
3226         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3227 }
3228 
3229 /*
3230  * The current tracer is copied to avoid a global locking
3231  * all around.
3232  */
3233 static void *s_start(struct seq_file *m, loff_t *pos)
3234 {
3235         struct trace_iterator *iter = m->private;
3236         struct trace_array *tr = iter->tr;
3237         int cpu_file = iter->cpu_file;
3238         void *p = NULL;
3239         loff_t l = 0;
3240         int cpu;
3241 
3242         /*
3243          * copy the tracer to avoid using a global lock all around.
3244          * iter->trace is a copy of current_trace, the pointer to the
3245          * name may be used instead of a strcmp(), as iter->trace->name
3246          * will point to the same string as current_trace->name.
3247          */
3248         mutex_lock(&trace_types_lock);
3249         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3250                 *iter->trace = *tr->current_trace;
3251         mutex_unlock(&trace_types_lock);
3252 
3253 #ifdef CONFIG_TRACER_MAX_TRACE
3254         if (iter->snapshot && iter->trace->use_max_tr)
3255                 return ERR_PTR(-EBUSY);
3256 #endif
3257 
3258         if (!iter->snapshot)
3259                 atomic_inc(&trace_record_taskinfo_disabled);
3260 
3261         if (*pos != iter->pos) {
3262                 iter->ent = NULL;
3263                 iter->cpu = 0;
3264                 iter->idx = -1;
3265 
3266                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3267                         for_each_tracing_cpu(cpu)
3268                                 tracing_iter_reset(iter, cpu);
3269                 } else
3270                         tracing_iter_reset(iter, cpu_file);
3271 
3272                 iter->leftover = 0;
3273                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3274                         ;
3275 
3276         } else {
3277                 /*
3278                  * If we overflowed the seq_file before, then we want
3279                  * to just reuse the trace_seq buffer again.
3280                  */
3281                 if (iter->leftover)
3282                         p = iter;
3283                 else {
3284                         l = *pos - 1;
3285                         p = s_next(m, p, &l);
3286                 }
3287         }
3288 
3289         trace_event_read_lock();
3290         trace_access_lock(cpu_file);
3291         return p;
3292 }
3293 
3294 static void s_stop(struct seq_file *m, void *p)
3295 {
3296         struct trace_iterator *iter = m->private;
3297 
3298 #ifdef CONFIG_TRACER_MAX_TRACE
3299         if (iter->snapshot && iter->trace->use_max_tr)
3300                 return;
3301 #endif
3302 
3303         if (!iter->snapshot)
3304                 atomic_dec(&trace_record_taskinfo_disabled);
3305 
3306         trace_access_unlock(iter->cpu_file);
3307         trace_event_read_unlock();
3308 }
3309 
3310 static void
3311 get_total_entries(struct trace_buffer *buf,
3312                   unsigned long *total, unsigned long *entries)
3313 {
3314         unsigned long count;
3315         int cpu;
3316 
3317         *total = 0;
3318         *entries = 0;
3319 
3320         for_each_tracing_cpu(cpu) {
3321                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3322                 /*
3323                  * If this buffer has skipped entries, then we hold all
3324                  * entries for the trace and we need to ignore the
3325                  * ones before the time stamp.
3326                  */
3327                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3328                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3329                         /* total is the same as the entries */
3330                         *total += count;
3331                 } else
3332                         *total += count +
3333                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
3334                 *entries += count;
3335         }
3336 }
3337 
3338 static void print_lat_help_header(struct seq_file *m)
3339 {
3340         seq_puts(m, "#                  _------=> CPU#            \n"
3341                     "#                 / _-----=> irqs-off        \n"
3342                     "#                | / _----=> need-resched    \n"
3343                     "#                || / _---=> hardirq/softirq \n"
3344                     "#                ||| / _--=> preempt-depth   \n"
3345                     "#                |||| /     delay            \n"
3346                     "#  cmd     pid   ||||| time  |   caller      \n"
3347                     "#     \\   /      |||||  \\    |   /         \n");
3348 }
3349 
3350 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3351 {
3352         unsigned long total;
3353         unsigned long entries;
3354 
3355         get_total_entries(buf, &total, &entries);
3356         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3357                    entries, total, num_online_cpus());
3358         seq_puts(m, "#\n");
3359 }
3360 
3361 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3362                                    unsigned int flags)
3363 {
3364         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3365 
3366         print_event_info(buf, m);
3367 
3368         seq_printf(m, "#           TASK-PID   CPU#   %s  TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3369         seq_printf(m, "#              | |       |    %s     |         |\n",      tgid ? "  |      " : "");
3370 }
3371 
3372 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3373                                        unsigned int flags)
3374 {
3375         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3376         const char tgid_space[] = "          ";
3377         const char space[] = "  ";
3378 
3379         seq_printf(m, "#                          %s  _-----=> irqs-off\n",
3380                    tgid ? tgid_space : space);
3381         seq_printf(m, "#                          %s / _----=> need-resched\n",
3382                    tgid ? tgid_space : space);
3383         seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",
3384                    tgid ? tgid_space : space);
3385         seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",
3386                    tgid ? tgid_space : space);
3387         seq_printf(m, "#                          %s||| /     delay\n",
3388                    tgid ? tgid_space : space);
3389         seq_printf(m, "#           TASK-PID   CPU#%s||||    TIMESTAMP  FUNCTION\n",
3390                    tgid ? "   TGID   " : space);
3391         seq_printf(m, "#              | |       | %s||||       |         |\n",
3392                    tgid ? "     |    " : space);
3393 }
3394 
3395 void
3396 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3397 {
3398         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3399         struct trace_buffer *buf = iter->trace_buffer;
3400         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3401         struct tracer *type = iter->trace;
3402         unsigned long entries;
3403         unsigned long total;
3404         const char *name = "preemption";
3405 
3406         name = type->name;
3407 
3408         get_total_entries(buf, &total, &entries);
3409 
3410         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3411                    name, UTS_RELEASE);
3412         seq_puts(m, "# -----------------------------------"
3413                  "---------------------------------\n");
3414         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3415                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3416                    nsecs_to_usecs(data->saved_latency),
3417                    entries,
3418                    total,
3419                    buf->cpu,
3420 #if defined(CONFIG_PREEMPT_NONE)
3421                    "server",
3422 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3423                    "desktop",
3424 #elif defined(CONFIG_PREEMPT)
3425                    "preempt",
3426 #else
3427                    "unknown",
3428 #endif
3429                    /* These are reserved for later use */
3430                    0, 0, 0, 0);
3431 #ifdef CONFIG_SMP
3432         seq_printf(m, " #P:%d)\n", num_online_cpus());
3433 #else
3434         seq_puts(m, ")\n");
3435 #endif
3436         seq_puts(m, "#    -----------------\n");
3437         seq_printf(m, "#    | task: %.16s-%d "
3438                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3439                    data->comm, data->pid,
3440                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3441                    data->policy, data->rt_priority);
3442         seq_puts(m, "#    -----------------\n");
3443 
3444         if (data->critical_start) {
3445                 seq_puts(m, "#  => started at: ");
3446                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3447                 trace_print_seq(m, &iter->seq);
3448                 seq_puts(m, "\n#  => ended at:   ");
3449                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3450                 trace_print_seq(m, &iter->seq);
3451                 seq_puts(m, "\n#\n");
3452         }
3453 
3454         seq_puts(m, "#\n");
3455 }
3456 
3457 static void test_cpu_buff_start(struct trace_iterator *iter)
3458 {
3459         struct trace_seq *s = &iter->seq;
3460         struct trace_array *tr = iter->tr;
3461 
3462         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3463                 return;
3464 
3465         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3466                 return;
3467 
3468         if (cpumask_available(iter->started) &&
3469             cpumask_test_cpu(iter->cpu, iter->started))
3470                 return;
3471 
3472         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3473                 return;
3474 
3475         if (cpumask_available(iter->started))
3476                 cpumask_set_cpu(iter->cpu, iter->started);
3477 
3478         /* Don't print started cpu buffer for the first entry of the trace */
3479         if (iter->idx > 1)
3480                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3481                                 iter->cpu);
3482 }
3483 
3484 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3485 {
3486         struct trace_array *tr = iter->tr;
3487         struct trace_seq *s = &iter->seq;
3488         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3489         struct trace_entry *entry;
3490         struct trace_event *event;
3491 
3492         entry = iter->ent;
3493 
3494         test_cpu_buff_start(iter);
3495 
3496         event = ftrace_find_event(entry->type);
3497 
3498         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3499                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3500                         trace_print_lat_context(iter);
3501                 else
3502                         trace_print_context(iter);
3503         }
3504 
3505         if (trace_seq_has_overflowed(s))
3506                 return TRACE_TYPE_PARTIAL_LINE;
3507 
3508         if (event)
3509                 return event->funcs->trace(iter, sym_flags, event);
3510 
3511         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3512 
3513         return trace_handle_return(s);
3514 }
3515 
3516 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3517 {
3518         struct trace_array *tr = iter->tr;
3519         struct trace_seq *s = &iter->seq;
3520         struct trace_entry *entry;
3521         struct trace_event *event;
3522 
3523         entry = iter->ent;
3524 
3525         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3526                 trace_seq_printf(s, "%d %d %llu ",
3527                                  entry->pid, iter->cpu, iter->ts);
3528 
3529         if (trace_seq_has_overflowed(s))
3530                 return TRACE_TYPE_PARTIAL_LINE;
3531 
3532         event = ftrace_find_event(entry->type);
3533         if (event)
3534                 return event->funcs->raw(iter, 0, event);
3535 
3536         trace_seq_printf(s, "%d ?\n", entry->type);
3537 
3538         return trace_handle_return(s);
3539 }
3540 
3541 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3542 {
3543         struct trace_array *tr = iter->tr;
3544         struct trace_seq *s = &iter->seq;
3545         unsigned char newline = '\n';
3546         struct trace_entry *entry;
3547         struct trace_event *event;
3548 
3549         entry = iter->ent;
3550 
3551         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3552                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3553                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3554                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3555                 if (trace_seq_has_overflowed(s))
3556                         return TRACE_TYPE_PARTIAL_LINE;
3557         }
3558 
3559         event = ftrace_find_event(entry->type);
3560         if (event) {
3561                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3562                 if (ret != TRACE_TYPE_HANDLED)
3563                         return ret;
3564         }
3565 
3566         SEQ_PUT_FIELD(s, newline);
3567 
3568         return trace_handle_return(s);
3569 }
3570 
3571 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3572 {
3573         struct trace_array *tr = iter->tr;
3574         struct trace_seq *s = &iter->seq;
3575         struct trace_entry *entry;
3576         struct trace_event *event;
3577 
3578         entry = iter->ent;
3579 
3580         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3581                 SEQ_PUT_FIELD(s, entry->pid);
3582                 SEQ_PUT_FIELD(s, iter->cpu);
3583                 SEQ_PUT_FIELD(s, iter->ts);
3584                 if (trace_seq_has_overflowed(s))
3585                         return TRACE_TYPE_PARTIAL_LINE;
3586         }
3587 
3588         event = ftrace_find_event(entry->type);
3589         return event ? event->funcs->binary(iter, 0, event) :
3590                 TRACE_TYPE_HANDLED;
3591 }
3592 
3593 int trace_empty(struct trace_iterator *iter)
3594 {
3595         struct ring_buffer_iter *buf_iter;
3596         int cpu;
3597 
3598         /* If we are looking at one CPU buffer, only check that one */
3599         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3600                 cpu = iter->cpu_file;
3601                 buf_iter = trace_buffer_iter(iter, cpu);
3602                 if (buf_iter) {
3603                         if (!ring_buffer_iter_empty(buf_iter))
3604                                 return 0;
3605                 } else {
3606                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3607                                 return 0;
3608                 }
3609                 return 1;
3610         }
3611 
3612         for_each_tracing_cpu(cpu) {
3613                 buf_iter = trace_buffer_iter(iter, cpu);
3614                 if (buf_iter) {
3615                         if (!ring_buffer_iter_empty(buf_iter))
3616                                 return 0;
3617                 } else {
3618                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3619                                 return 0;
3620                 }
3621         }
3622 
3623         return 1;
3624 }
3625 
3626 /*  Called with trace_event_read_lock() held. */
3627 enum print_line_t print_trace_line(struct trace_iterator *iter)
3628 {
3629         struct trace_array *tr = iter->tr;
3630         unsigned long trace_flags = tr->trace_flags;
3631         enum print_line_t ret;
3632 
3633         if (iter->lost_events) {
3634                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3635                                  iter->cpu, iter->lost_events);
3636                 if (trace_seq_has_overflowed(&iter->seq))
3637                         return TRACE_TYPE_PARTIAL_LINE;
3638         }
3639 
3640         if (iter->trace && iter->trace->print_line) {
3641                 ret = iter->trace->print_line(iter);
3642                 if (ret != TRACE_TYPE_UNHANDLED)
3643                         return ret;
3644         }
3645 
3646         if (iter->ent->type == TRACE_BPUTS &&
3647                         trace_flags & TRACE_ITER_PRINTK &&
3648                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3649                 return trace_print_bputs_msg_only(iter);
3650 
3651         if (iter->ent->type == TRACE_BPRINT &&
3652                         trace_flags & TRACE_ITER_PRINTK &&
3653                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3654                 return trace_print_bprintk_msg_only(iter);
3655 
3656         if (iter->ent->type == TRACE_PRINT &&
3657                         trace_flags & TRACE_ITER_PRINTK &&
3658                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3659                 return trace_print_printk_msg_only(iter);
3660 
3661         if (trace_flags & TRACE_ITER_BIN)
3662                 return print_bin_fmt(iter);
3663 
3664         if (trace_flags & TRACE_ITER_HEX)
3665                 return print_hex_fmt(iter);
3666 
3667         if (trace_flags & TRACE_ITER_RAW)
3668                 return print_raw_fmt(iter);
3669 
3670         return print_trace_fmt(iter);
3671 }
3672 
3673 void trace_latency_header(struct seq_file *m)
3674 {
3675         struct trace_iterator *iter = m->private;
3676         struct trace_array *tr = iter->tr;
3677 
3678         /* print nothing if the buffers are empty */
3679         if (trace_empty(iter))
3680                 return;
3681 
3682         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3683                 print_trace_header(m, iter);
3684 
3685         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3686                 print_lat_help_header(m);
3687 }
3688 
3689 void trace_default_header(struct seq_file *m)
3690 {
3691         struct trace_iterator *iter = m->private;
3692         struct trace_array *tr = iter->tr;
3693         unsigned long trace_flags = tr->trace_flags;
3694 
3695         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3696                 return;
3697 
3698         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3699                 /* print nothing if the buffers are empty */
3700                 if (trace_empty(iter))
3701                         return;
3702                 print_trace_header(m, iter);
3703                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3704                         print_lat_help_header(m);
3705         } else {
3706                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3707                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3708                                 print_func_help_header_irq(iter->trace_buffer,
3709                                                            m, trace_flags);
3710                         else
3711                                 print_func_help_header(iter->trace_buffer, m,
3712                                                        trace_flags);
3713                 }
3714         }
3715 }
3716 
3717 static void test_ftrace_alive(struct seq_file *m)
3718 {
3719         if (!ftrace_is_dead())
3720                 return;
3721         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3722                     "#          MAY BE MISSING FUNCTION EVENTS\n");
3723 }
3724 
3725 #ifdef CONFIG_TRACER_MAX_TRACE
3726 static void show_snapshot_main_help(struct seq_file *m)
3727 {
3728         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3729                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3730                     "#                      Takes a snapshot of the main buffer.\n"
3731                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3732                     "#                      (Doesn't have to be '2' works with any number that\n"
3733                     "#                       is not a '' or '1')\n");
3734 }
3735 
3736 static void show_snapshot_percpu_help(struct seq_file *m)
3737 {
3738         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3739 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3740         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3741                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
3742 #else
3743         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3744                     "#                     Must use main snapshot file to allocate.\n");
3745 #endif
3746         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3747                     "#                      (Doesn't have to be '2' works with any number that\n"
3748                     "#                       is not a '' or '1')\n");
3749 }
3750 
3751 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3752 {
3753         if (iter->tr->allocated_snapshot)
3754                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3755         else
3756                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3757 
3758         seq_puts(m, "# Snapshot commands:\n");
3759         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3760                 show_snapshot_main_help(m);
3761         else
3762                 show_snapshot_percpu_help(m);
3763 }
3764 #else
3765 /* Should never be called */
3766 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3767 #endif
3768 
3769 static int s_show(struct seq_file *m, void *v)
3770 {
3771         struct trace_iterator *iter = v;
3772         int ret;
3773 
3774         if (iter->ent == NULL) {
3775                 if (iter->tr) {
3776                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
3777                         seq_puts(m, "#\n");
3778                         test_ftrace_alive(m);
3779                 }
3780                 if (iter->snapshot && trace_empty(iter))
3781                         print_snapshot_help(m, iter);
3782                 else if (iter->trace && iter->trace->print_header)
3783                         iter->trace->print_header(m);
3784                 else
3785                         trace_default_header(m);
3786 
3787         } else if (iter->leftover) {
3788                 /*
3789                  * If we filled the seq_file buffer earlier, we
3790                  * want to just show it now.
3791                  */
3792                 ret = trace_print_seq(m, &iter->seq);
3793 
3794                 /* ret should this time be zero, but you never know */
3795                 iter->leftover = ret;
3796 
3797         } else {
3798                 print_trace_line(iter);
3799                 ret = trace_print_seq(m, &iter->seq);
3800                 /*
3801                  * If we overflow the seq_file buffer, then it will
3802                  * ask us for this data again at start up.
3803                  * Use that instead.
3804                  *  ret is 0 if seq_file write succeeded.
3805                  *        -1 otherwise.
3806                  */
3807                 iter->leftover = ret;
3808         }
3809 
3810         return 0;
3811 }
3812 
3813 /*
3814  * Should be used after trace_array_get(), trace_types_lock
3815  * ensures that i_cdev was already initialized.
3816  */
3817 static inline int tracing_get_cpu(struct inode *inode)
3818 {
3819         if (inode->i_cdev) /* See trace_create_cpu_file() */
3820                 return (long)inode->i_cdev - 1;
3821         return RING_BUFFER_ALL_CPUS;
3822 }
3823 
3824 static const struct seq_operations tracer_seq_ops = {
3825         .start          = s_start,
3826         .next           = s_next,
3827         .stop           = s_stop,
3828         .show           = s_show,
3829 };
3830 
3831 static struct trace_iterator *
3832 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3833 {
3834         struct trace_array *tr = inode->i_private;
3835         struct trace_iterator *iter;
3836         int cpu;
3837 
3838         if (tracing_disabled)
3839                 return ERR_PTR(-ENODEV);
3840 
3841         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3842         if (!iter)
3843                 return ERR_PTR(-ENOMEM);
3844 
3845         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3846                                     GFP_KERNEL);
3847         if (!iter->buffer_iter)
3848                 goto release;
3849 
3850         /*
3851          * We make a copy of the current tracer to avoid concurrent
3852          * changes on it while we are reading.
3853          */
3854         mutex_lock(&trace_types_lock);
3855         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3856         if (!iter->trace)
3857                 goto fail;
3858 
3859         *iter->trace = *tr->current_trace;
3860 
3861         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3862                 goto fail;
3863 
3864         iter->tr = tr;
3865 
3866 #ifdef CONFIG_TRACER_MAX_TRACE
3867         /* Currently only the top directory has a snapshot */
3868         if (tr->current_trace->print_max || snapshot)
3869                 iter->trace_buffer = &tr->max_buffer;
3870         else
3871 #endif
3872                 iter->trace_buffer = &tr->trace_buffer;
3873         iter->snapshot = snapshot;
3874         iter->pos = -1;
3875         iter->cpu_file = tracing_get_cpu(inode);
3876         mutex_init(&iter->mutex);
3877 
3878         /* Notify the tracer early; before we stop tracing. */
3879         if (iter->trace && iter->trace->open)
3880                 iter->trace->open(iter);
3881 
3882         /* Annotate start of buffers if we had overruns */
3883         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3884                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3885 
3886         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3887         if (trace_clocks[tr->clock_id].in_ns)
3888                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3889 
3890         /* stop the trace while dumping if we are not opening "snapshot" */
3891         if (!iter->snapshot)
3892                 tracing_stop_tr(tr);
3893 
3894         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3895                 for_each_tracing_cpu(cpu) {
3896                         iter->buffer_iter[cpu] =
3897                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3898                 }
3899                 ring_buffer_read_prepare_sync();
3900                 for_each_tracing_cpu(cpu) {
3901                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3902                         tracing_iter_reset(iter, cpu);
3903                 }
3904         } else {
3905                 cpu = iter->cpu_file;
3906                 iter->buffer_iter[cpu] =
3907                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3908                 ring_buffer_read_prepare_sync();
3909                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3910                 tracing_iter_reset(iter, cpu);
3911         }
3912 
3913         mutex_unlock(&trace_types_lock);
3914 
3915         return iter;
3916 
3917  fail:
3918         mutex_unlock(&trace_types_lock);
3919         kfree(iter->trace);
3920         kfree(iter->buffer_iter);
3921 release:
3922         seq_release_private(inode, file);
3923         return ERR_PTR(-ENOMEM);
3924 }
3925 
3926 int tracing_open_generic(struct inode *inode, struct file *filp)
3927 {
3928         if (tracing_disabled)
3929                 return -ENODEV;
3930 
3931         filp->private_data = inode->i_private;
3932         return 0;
3933 }
3934 
3935 bool tracing_is_disabled(void)
3936 {
3937         return (tracing_disabled) ? true: false;
3938 }
3939 
3940 /*
3941  * Open and update trace_array ref count.
3942  * Must have the current trace_array passed to it.
3943  */
3944 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3945 {
3946         struct trace_array *tr = inode->i_private;
3947 
3948         if (tracing_disabled)
3949                 return -ENODEV;
3950 
3951         if (trace_array_get(tr) < 0)
3952                 return -ENODEV;
3953 
3954         filp->private_data = inode->i_private;
3955 
3956         return 0;
3957 }
3958 
3959 static int tracing_release(struct inode *inode, struct file *file)
3960 {
3961         struct trace_array *tr = inode->i_private;
3962         struct seq_file *m = file->private_data;
3963         struct trace_iterator *iter;
3964         int cpu;
3965 
3966         if (!(file->f_mode & FMODE_READ)) {
3967                 trace_array_put(tr);
3968                 return 0;
3969         }
3970 
3971         /* Writes do not use seq_file */
3972         iter = m->private;
3973         mutex_lock(&trace_types_lock);
3974 
3975         for_each_tracing_cpu(cpu) {
3976                 if (iter->buffer_iter[cpu])
3977                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3978         }
3979 
3980         if (iter->trace && iter->trace->close)
3981                 iter->trace->close(iter);
3982 
3983         if (!iter->snapshot)
3984                 /* reenable tracing if it was previously enabled */
3985                 tracing_start_tr(tr);
3986 
3987         __trace_array_put(tr);
3988 
3989         mutex_unlock(&trace_types_lock);
3990 
3991         mutex_destroy(&iter->mutex);
3992         free_cpumask_var(iter->started);
3993         kfree(iter->trace);
3994         kfree(iter->buffer_iter);
3995         seq_release_private(inode, file);
3996 
3997         return 0;
3998 }
3999 
4000 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4001 {
4002         struct trace_array *tr = inode->i_private;
4003 
4004         trace_array_put(tr);
4005         return 0;
4006 }
4007 
4008 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4009 {
4010         struct trace_array *tr = inode->i_private;
4011 
4012         trace_array_put(tr);
4013 
4014         return single_release(inode, file);
4015 }
4016 
4017 static int tracing_open(struct inode *inode, struct file *file)
4018 {
4019         struct trace_array *tr = inode->i_private;
4020         struct trace_iterator *iter;
4021         int ret = 0;
4022 
4023         if (trace_array_get(tr) < 0)
4024                 return -ENODEV;
4025 
4026         /* If this file was open for write, then erase contents */
4027         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4028                 int cpu = tracing_get_cpu(inode);
4029                 struct trace_buffer *trace_buf = &tr->trace_buffer;
4030 
4031 #ifdef CONFIG_TRACER_MAX_TRACE
4032                 if (tr->current_trace->print_max)
4033                         trace_buf = &tr->max_buffer;
4034 #endif
4035 
4036                 if (cpu == RING_BUFFER_ALL_CPUS)
4037                         tracing_reset_online_cpus(trace_buf);
4038                 else
4039                         tracing_reset(trace_buf, cpu);
4040         }
4041 
4042         if (file->f_mode & FMODE_READ) {
4043                 iter = __tracing_open(inode, file, false);
4044                 if (IS_ERR(iter))
4045                         ret = PTR_ERR(iter);
4046                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4047                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4048         }
4049 
4050         if (ret < 0)
4051                 trace_array_put(tr);
4052 
4053         return ret;
4054 }
4055 
4056 /*
4057  * Some tracers are not suitable for instance buffers.
4058  * A tracer is always available for the global array (toplevel)
4059  * or if it explicitly states that it is.
4060  */
4061 static bool
4062 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4063 {
4064         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4065 }
4066 
4067 /* Find the next tracer that this trace array may use */
4068 static struct tracer *
4069 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4070 {
4071         while (t && !trace_ok_for_array(t, tr))
4072                 t = t->next;
4073 
4074         return t;
4075 }
4076 
4077 static void *
4078 t_next(struct seq_file *m, void *v, loff_t *pos)
4079 {
4080         struct trace_array *tr = m->private;
4081         struct tracer *t = v;
4082 
4083         (*pos)++;
4084 
4085         if (t)
4086                 t = get_tracer_for_array(tr, t->next);
4087 
4088         return t;
4089 }
4090 
4091 static void *t_start(struct seq_file *m, loff_t *pos)
4092 {
4093         struct trace_array *tr = m->private;
4094         struct tracer *t;
4095         loff_t l = 0;
4096 
4097         mutex_lock(&trace_types_lock);
4098 
4099         t = get_tracer_for_array(tr, trace_types);
4100         for (; t && l < *pos; t = t_next(m, t, &l))
4101                         ;
4102 
4103         return t;
4104 }
4105 
4106 static void t_stop(struct seq_file *m, void *p)
4107 {
4108         mutex_unlock(&trace_types_lock);
4109 }
4110 
4111 static int t_show(struct seq_file *m, void *v)
4112 {
4113         struct tracer *t = v;
4114 
4115         if (!t)
4116                 return 0;
4117 
4118         seq_puts(m, t->name);
4119         if (t->next)
4120                 seq_putc(m, ' ');
4121         else
4122                 seq_putc(m, '\n');
4123 
4124         return 0;
4125 }
4126 
4127 static const struct seq_operations show_traces_seq_ops = {
4128         .start          = t_start,
4129         .next           = t_next,
4130         .stop           = t_stop,
4131         .show           = t_show,
4132 };
4133 
4134 static int show_traces_open(struct inode *inode, struct file *file)
4135 {
4136         struct trace_array *tr = inode->i_private;
4137         struct seq_file *m;
4138         int ret;
4139 
4140         if (tracing_disabled)
4141                 return -ENODEV;
4142 
4143         ret = seq_open(file, &show_traces_seq_ops);
4144         if (ret)
4145                 return ret;
4146 
4147         m = file->private_data;
4148         m->private = tr;
4149 
4150         return 0;
4151 }
4152 
4153 static ssize_t
4154 tracing_write_stub(struct file *filp, const char __user *ubuf,
4155                    size_t count, loff_t *ppos)
4156 {
4157         return count;
4158 }
4159 
4160 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4161 {
4162         int ret;
4163 
4164         if (file->f_mode & FMODE_READ)
4165                 ret = seq_lseek(file, offset, whence);
4166         else
4167                 file->f_pos = ret = 0;
4168 
4169         return ret;
4170 }
4171 
4172 static const struct file_operations tracing_fops = {
4173         .open           = tracing_open,
4174         .read           = seq_read,
4175         .write          = tracing_write_stub,
4176         .llseek         = tracing_lseek,
4177         .release        = tracing_release,
4178 };
4179 
4180 static const struct file_operations show_traces_fops = {
4181         .open           = show_traces_open,
4182         .read           = seq_read,
4183         .release        = seq_release,
4184         .llseek         = seq_lseek,
4185 };
4186 
4187 static ssize_t
4188 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4189                      size_t count, loff_t *ppos)
4190 {
4191         struct trace_array *tr = file_inode(filp)->i_private;
4192         char *mask_str;
4193         int len;
4194 
4195         len = snprintf(NULL, 0, "%*pb\n",
4196                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
4197         mask_str = kmalloc(len, GFP_KERNEL);
4198         if (!mask_str)
4199                 return -ENOMEM;
4200 
4201         len = snprintf(mask_str, len, "%*pb\n",
4202                        cpumask_pr_args(tr->tracing_cpumask));
4203         if (len >= count) {
4204                 count = -EINVAL;
4205                 goto out_err;
4206         }
4207         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4208 
4209 out_err:
4210         kfree(mask_str);
4211 
4212         return count;
4213 }
4214 
4215 static ssize_t
4216 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4217                       size_t count, loff_t *ppos)
4218 {
4219         struct trace_array *tr = file_inode(filp)->i_private;
4220         cpumask_var_t tracing_cpumask_new;
4221         int err, cpu;
4222 
4223         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4224                 return -ENOMEM;
4225 
4226         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4227         if (err)
4228                 goto err_unlock;
4229 
4230         local_irq_disable();
4231         arch_spin_lock(&tr->max_lock);
4232         for_each_tracing_cpu(cpu) {
4233                 /*
4234                  * Increase/decrease the disabled counter if we are
4235                  * about to flip a bit in the cpumask:
4236                  */
4237                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4238                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4239                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4240                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4241                 }
4242                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4243                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4244                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4245                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4246                 }
4247         }
4248         arch_spin_unlock(&tr->max_lock);
4249         local_irq_enable();
4250 
4251         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4252         free_cpumask_var(tracing_cpumask_new);
4253 
4254         return count;
4255 
4256 err_unlock:
4257         free_cpumask_var(tracing_cpumask_new);
4258 
4259         return err;
4260 }
4261 
4262 static const struct file_operations tracing_cpumask_fops = {
4263         .open           = tracing_open_generic_tr,
4264         .read           = tracing_cpumask_read,
4265         .write          = tracing_cpumask_write,
4266         .release        = tracing_release_generic_tr,
4267         .llseek         = generic_file_llseek,
4268 };
4269 
4270 static int tracing_trace_options_show(struct seq_file *m, void *v)
4271 {
4272         struct tracer_opt *trace_opts;
4273         struct trace_array *tr = m->private;
4274         u32 tracer_flags;
4275         int i;
4276 
4277         mutex_lock(&trace_types_lock);
4278         tracer_flags = tr->current_trace->flags->val;
4279         trace_opts = tr->current_trace->flags->opts;
4280 
4281         for (i = 0; trace_options[i]; i++) {
4282                 if (tr->trace_flags & (1 << i))
4283                         seq_printf(m, "%s\n", trace_options[i]);
4284                 else
4285                         seq_printf(m, "no%s\n", trace_options[i]);
4286         }
4287 
4288         for (i = 0; trace_opts[i].name; i++) {
4289                 if (tracer_flags & trace_opts[i].bit)
4290                         seq_printf(m, "%s\n", trace_opts[i].name);
4291                 else
4292                         seq_printf(m, "no%s\n", trace_opts[i].name);
4293         }
4294         mutex_unlock(&trace_types_lock);
4295 
4296         return 0;
4297 }
4298 
4299 static int __set_tracer_option(struct trace_array *tr,
4300                                struct tracer_flags *tracer_flags,
4301                                struct tracer_opt *opts, int neg)
4302 {
4303         struct tracer *trace = tracer_flags->trace;
4304         int ret;
4305 
4306         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4307         if (ret)
4308                 return ret;
4309 
4310         if (neg)
4311                 tracer_flags->val &= ~opts->bit;
4312         else
4313                 tracer_flags->val |= opts->bit;
4314         return 0;
4315 }
4316 
4317 /* Try to assign a tracer specific option */
4318 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4319 {
4320         struct tracer *trace = tr->current_trace;
4321         struct tracer_flags *tracer_flags = trace->flags;
4322         struct tracer_opt *opts = NULL;
4323         int i;
4324 
4325         for (i = 0; tracer_flags->opts[i].name; i++) {
4326                 opts = &tracer_flags->opts[i];
4327 
4328                 if (strcmp(cmp, opts->name) == 0)
4329                         return __set_tracer_option(tr, trace->flags, opts, neg);
4330         }
4331 
4332         return -EINVAL;
4333 }
4334 
4335 /* Some tracers require overwrite to stay enabled */
4336 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4337 {
4338         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4339                 return -1;
4340 
4341         return 0;
4342 }
4343 
4344 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4345 {
4346         /* do nothing if flag is already set */
4347         if (!!(tr->trace_flags & mask) == !!enabled)
4348                 return 0;
4349 
4350         /* Give the tracer a chance to approve the change */
4351         if (tr->current_trace->flag_changed)
4352                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4353                         return -EINVAL;
4354 
4355         if (enabled)
4356                 tr->trace_flags |= mask;
4357         else
4358                 tr->trace_flags &= ~mask;
4359 
4360         if (mask == TRACE_ITER_RECORD_CMD)
4361                 trace_event_enable_cmd_record(enabled);
4362 
4363         if (mask == TRACE_ITER_RECORD_TGID) {
4364                 if (!tgid_map)
4365                         tgid_map = kzalloc((PID_MAX_DEFAULT + 1) * sizeof(*tgid_map),
4366                                            GFP_KERNEL);
4367                 if (!tgid_map) {
4368                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4369                         return -ENOMEM;
4370                 }
4371 
4372                 trace_event_enable_tgid_record(enabled);
4373         }
4374 
4375         if (mask == TRACE_ITER_EVENT_FORK)
4376                 trace_event_follow_fork(tr, enabled);
4377 
4378         if (mask == TRACE_ITER_FUNC_FORK)
4379                 ftrace_pid_follow_fork(tr, enabled);
4380 
4381         if (mask == TRACE_ITER_OVERWRITE) {
4382                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4383 #ifdef CONFIG_TRACER_MAX_TRACE
4384                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4385 #endif
4386         }
4387 
4388         if (mask == TRACE_ITER_PRINTK) {
4389                 trace_printk_start_stop_comm(enabled);
4390                 trace_printk_control(enabled);
4391         }
4392 
4393         return 0;
4394 }
4395 
4396 static int trace_set_options(struct trace_array *tr, char *option)
4397 {
4398         char *cmp;
4399         int neg = 0;
4400         int ret = -ENODEV;
4401         int i;
4402         size_t orig_len = strlen(option);
4403 
4404         cmp = strstrip(option);
4405 
4406         if (strncmp(cmp, "no", 2) == 0) {
4407                 neg = 1;
4408                 cmp += 2;
4409         }
4410 
4411         mutex_lock(&trace_types_lock);
4412 
4413         for (i = 0; trace_options[i]; i++) {
4414                 if (strcmp(cmp, trace_options[i]) == 0) {
4415                         ret = set_tracer_flag(tr, 1 << i, !neg);
4416                         break;
4417                 }
4418         }
4419 
4420         /* If no option could be set, test the specific tracer options */
4421         if (!trace_options[i])
4422                 ret = set_tracer_option(tr, cmp, neg);
4423 
4424         mutex_unlock(&trace_types_lock);
4425 
4426         /*
4427          * If the first trailing whitespace is replaced with '\0' by strstrip,
4428          * turn it back into a space.
4429          */
4430         if (orig_len > strlen(option))
4431                 option[strlen(option)] = ' ';
4432 
4433         return ret;
4434 }
4435 
4436 static void __init apply_trace_boot_options(void)
4437 {
4438         char *buf = trace_boot_options_buf;
4439         char *option;
4440 
4441         while (true) {
4442                 option = strsep(&buf, ",");
4443 
4444                 if (!option)
4445                         break;
4446 
4447                 if (*option)
4448                         trace_set_options(&global_trace, option);
4449 
4450                 /* Put back the comma to allow this to be called again */
4451                 if (buf)
4452                         *(buf - 1) = ',';
4453         }
4454 }
4455 
4456 static ssize_t
4457 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4458                         size_t cnt, loff_t *ppos)
4459 {
4460         struct seq_file *m = filp->private_data;
4461         struct trace_array *tr = m->private;
4462         char buf[64];
4463         int ret;
4464 
4465         if (cnt >= sizeof(buf))
4466                 return -EINVAL;
4467 
4468         if (copy_from_user(buf, ubuf, cnt))
4469                 return -EFAULT;
4470 
4471         buf[cnt] = 0;
4472 
4473         ret = trace_set_options(tr, buf);
4474         if (ret < 0)
4475                 return ret;
4476 
4477         *ppos += cnt;
4478 
4479         return cnt;
4480 }
4481 
4482 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4483 {
4484         struct trace_array *tr = inode->i_private;
4485         int ret;
4486 
4487         if (tracing_disabled)
4488                 return -ENODEV;
4489 
4490         if (trace_array_get(tr) < 0)
4491                 return -ENODEV;
4492 
4493         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4494         if (ret < 0)
4495                 trace_array_put(tr);
4496 
4497         return ret;
4498 }
4499 
4500 static const struct file_operations tracing_iter_fops = {
4501         .open           = tracing_trace_options_open,
4502         .read           = seq_read,
4503         .llseek         = seq_lseek,
4504         .release        = tracing_single_release_tr,
4505         .write          = tracing_trace_options_write,
4506 };
4507 
4508 static const char readme_msg[] =
4509         "tracing mini-HOWTO:\n\n"
4510         "# echo 0 > tracing_on : quick way to disable tracing\n"
4511         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4512         " Important files:\n"
4513         "  trace\t\t\t- The static contents of the buffer\n"
4514         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4515         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4516         "  current_tracer\t- function and latency tracers\n"
4517         "  available_tracers\t- list of configured tracers for current_tracer\n"
4518         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4519         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4520         "  trace_clock\t\t-change the clock used to order events\n"
4521         "       local:   Per cpu clock but may not be synced across CPUs\n"
4522         "      global:   Synced across CPUs but slows tracing down.\n"
4523         "     counter:   Not a clock, but just an increment\n"
4524         "      uptime:   Jiffy counter from time of boot\n"
4525         "        perf:   Same clock that perf events use\n"
4526 #ifdef CONFIG_X86_64
4527         "     x86-tsc:   TSC cycle counter\n"
4528 #endif
4529         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4530         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4531         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4532         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4533         "\t\t\t  Remove sub-buffer with rmdir\n"
4534         "  trace_options\t\t- Set format or modify how tracing happens\n"
4535         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4536         "\t\t\t  option name\n"
4537         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4538 #ifdef CONFIG_DYNAMIC_FTRACE
4539         "\n  available_filter_functions - list of functions that can be filtered on\n"
4540         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4541         "\t\t\t  functions\n"
4542         "\t     accepts: func_full_name or glob-matching-pattern\n"
4543         "\t     modules: Can select a group via module\n"
4544         "\t      Format: :mod:<module-name>\n"
4545         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4546         "\t    triggers: a command to perform when function is hit\n"
4547         "\t      Format: <function>:<trigger>[:count]\n"
4548         "\t     trigger: traceon, traceoff\n"
4549         "\t\t      enable_event:<system>:<event>\n"
4550         "\t\t      disable_event:<system>:<event>\n"
4551 #ifdef CONFIG_STACKTRACE
4552         "\t\t      stacktrace\n"
4553 #endif
4554 #ifdef CONFIG_TRACER_SNAPSHOT
4555         "\t\t      snapshot\n"
4556 #endif
4557         "\t\t      dump\n"
4558         "\t\t      cpudump\n"
4559         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4560         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4561         "\t     The first one will disable tracing every time do_fault is hit\n"
4562         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4563         "\t       The first time do trap is hit and it disables tracing, the\n"
4564         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4565         "\t       the counter will not decrement. It only decrements when the\n"
4566         "\t       trigger did work\n"
4567         "\t     To remove trigger without count:\n"
4568         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4569         "\t     To remove trigger with a count:\n"
4570         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4571         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4572         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4573         "\t    modules: Can select a group via module command :mod:\n"
4574         "\t    Does not accept triggers\n"
4575 #endif /* CONFIG_DYNAMIC_FTRACE */
4576 #ifdef CONFIG_FUNCTION_TRACER
4577         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4578         "\t\t    (function)\n"
4579 #endif
4580 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4581         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4582         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4583         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4584 #endif
4585 #ifdef CONFIG_TRACER_SNAPSHOT
4586         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4587         "\t\t\t  snapshot buffer. Read the contents for more\n"
4588         "\t\t\t  information\n"
4589 #endif
4590 #ifdef CONFIG_STACK_TRACER
4591         "  stack_trace\t\t- Shows the max stack trace when active\n"
4592         "  stack_max_size\t- Shows current max stack size that was traced\n"
4593         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4594         "\t\t\t  new trace)\n"
4595 #ifdef CONFIG_DYNAMIC_FTRACE
4596         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4597         "\t\t\t  traces\n"
4598 #endif
4599 #endif /* CONFIG_STACK_TRACER */
4600 #ifdef CONFIG_KPROBE_EVENTS
4601         "  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4602         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4603 #endif
4604 #ifdef CONFIG_UPROBE_EVENTS
4605         "  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4606         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4607 #endif
4608 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4609         "\t  accepts: event-definitions (one definition per line)\n"
4610         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4611         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4612         "\t           -:[<group>/]<event>\n"
4613 #ifdef CONFIG_KPROBE_EVENTS
4614         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4615   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4616 #endif
4617 #ifdef CONFIG_UPROBE_EVENTS
4618         "\t    place: <path>:<offset>\n"
4619 #endif
4620         "\t     args: <name>=fetcharg[:type]\n"
4621         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4622         "\t           $stack<index>, $stack, $retval, $comm\n"
4623         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4624         "\t           b<bit-width>@<bit-offset>/<container-size>\n"
4625 #endif
4626         "  events/\t\t- Directory containing all trace event subsystems:\n"
4627         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4628         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4629         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4630         "\t\t\t  events\n"
4631         "      filter\t\t- If set, only events passing filter are traced\n"
4632         "  events/<system>/<event>/\t- Directory containing control files for\n"
4633         "\t\t\t  <event>:\n"
4634         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4635         "      filter\t\t- If set, only events passing filter are traced\n"
4636         "      trigger\t\t- If set, a command to perform when event is hit\n"
4637         "\t    Format: <trigger>[:count][if <filter>]\n"
4638         "\t   trigger: traceon, traceoff\n"
4639         "\t            enable_event:<system>:<event>\n"
4640         "\t            disable_event:<system>:<event>\n"
4641 #ifdef CONFIG_HIST_TRIGGERS
4642         "\t            enable_hist:<system>:<event>\n"
4643         "\t            disable_hist:<system>:<event>\n"
4644 #endif
4645 #ifdef CONFIG_STACKTRACE
4646         "\t\t    stacktrace\n"
4647 #endif
4648 #ifdef CONFIG_TRACER_SNAPSHOT
4649         "\t\t    snapshot\n"
4650 #endif
4651 #ifdef CONFIG_HIST_TRIGGERS
4652         "\t\t    hist (see below)\n"
4653 #endif
4654         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4655         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4656         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4657         "\t                  events/block/block_unplug/trigger\n"
4658         "\t   The first disables tracing every time block_unplug is hit.\n"
4659         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4660         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4661         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4662         "\t   Like function triggers, the counter is only decremented if it\n"
4663         "\t    enabled or disabled tracing.\n"
4664         "\t   To remove a trigger without a count:\n"
4665         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4666         "\t   To remove a trigger with a count:\n"
4667         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4668         "\t   Filters can be ignored when removing a trigger.\n"
4669 #ifdef CONFIG_HIST_TRIGGERS
4670         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4671         "\t    Format: hist:keys=<field1[,field2,...]>\n"
4672         "\t            [:values=<field1[,field2,...]>]\n"
4673         "\t            [:sort=<field1[,field2,...]>]\n"
4674         "\t            [:size=#entries]\n"
4675         "\t            [:pause][:continue][:clear]\n"
4676         "\t            [:name=histname1]\n"
4677         "\t            [if <filter>]\n\n"
4678         "\t    When a matching event is hit, an entry is added to a hash\n"
4679         "\t    table using the key(s) and value(s) named, and the value of a\n"
4680         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4681         "\t    correspond to fields in the event's format description.  Keys\n"
4682         "\t    can be any field, or the special string 'stacktrace'.\n"
4683         "\t    Compound keys consisting of up to two fields can be specified\n"
4684         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4685         "\t    fields.  Sort keys consisting of up to two fields can be\n"
4686         "\t    specified using the 'sort' keyword.  The sort direction can\n"
4687         "\t    be modified by appending '.descending' or '.ascending' to a\n"
4688         "\t    sort field.  The 'size' parameter can be used to specify more\n"
4689         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4690         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4691         "\t    its histogram data will be shared with other triggers of the\n"
4692         "\t    same name, and trigger hits will update this common data.\n\n"
4693         "\t    Reading the 'hist' file for the event will dump the hash\n"
4694         "\t    table in its entirety to stdout.  If there are multiple hist\n"
4695         "\t    triggers attached to an event, there will be a table for each\n"
4696         "\t    trigger in the output.  The table displayed for a named\n"
4697         "\t    trigger will be the same as any other instance having the\n"
4698         "\t    same name.  The default format used to display a given field\n"
4699         "\t    can be modified by appending any of the following modifiers\n"
4700         "\t    to the field name, as applicable:\n\n"
4701         "\t            .hex        display a number as a hex value\n"
4702         "\t            .sym        display an address as a symbol\n"
4703         "\t            .sym-offset display an address as a symbol and offset\n"
4704         "\t            .execname   display a common_pid as a program name\n"
4705         "\t            .syscall    display a syscall id as a syscall name\n\n"
4706         "\t            .log2       display log2 value rather than raw number\n\n"
4707         "\t    The 'pause' parameter can be used to pause an existing hist\n"
4708         "\t    trigger or to start a hist trigger but not log any events\n"
4709         "\t    until told to do so.  'continue' can be used to start or\n"
4710         "\t    restart a paused hist trigger.\n\n"
4711         "\t    The 'clear' parameter will clear the contents of a running\n"
4712         "\t    hist trigger and leave its current paused/active state\n"
4713         "\t    unchanged.\n\n"
4714         "\t    The enable_hist and disable_hist triggers can be used to\n"
4715         "\t    have one event conditionally start and stop another event's\n"
4716         "\t    already-attached hist trigger.  The syntax is analagous to\n"
4717         "\t    the enable_event and disable_event triggers.\n"
4718 #endif
4719 ;
4720 
4721 static ssize_t
4722 tracing_readme_read(struct file *filp, char __user *ubuf,
4723                        size_t cnt, loff_t *ppos)
4724 {
4725         return simple_read_from_buffer(ubuf, cnt, ppos,
4726                                         readme_msg, strlen(readme_msg));
4727 }
4728 
4729 static const struct file_operations tracing_readme_fops = {
4730         .open           = tracing_open_generic,
4731         .read           = tracing_readme_read,
4732         .llseek         = generic_file_llseek,
4733 };
4734 
4735 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4736 {
4737         int *ptr = v;
4738 
4739         if (*pos || m->count)
4740                 ptr++;
4741 
4742         (*pos)++;
4743 
4744         for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
4745                 if (trace_find_tgid(*ptr))
4746                         return ptr;
4747         }
4748 
4749         return NULL;
4750 }
4751 
4752 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
4753 {
4754         void *v;
4755         loff_t l = 0;
4756 
4757         if (!tgid_map)
4758                 return NULL;
4759 
4760         v = &tgid_map[0];
4761         while (l <= *pos) {
4762                 v = saved_tgids_next(m, v, &l);
4763                 if (!v)
4764                         return NULL;
4765         }
4766 
4767         return v;
4768 }
4769 
4770 static void saved_tgids_stop(struct seq_file *m, void *v)
4771 {
4772 }
4773 
4774 static int saved_tgids_show(struct seq_file *m, void *v)
4775 {
4776         int pid = (int *)v - tgid_map;
4777 
4778         seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
4779         return 0;
4780 }
4781 
4782 static const struct seq_operations tracing_saved_tgids_seq_ops = {
4783         .start          = saved_tgids_start,
4784         .stop           = saved_tgids_stop,
4785         .next           = saved_tgids_next,
4786         .show           = saved_tgids_show,
4787 };
4788 
4789 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
4790 {
4791         if (tracing_disabled)
4792                 return -ENODEV;
4793 
4794         return seq_open(filp, &tracing_saved_tgids_seq_ops);
4795 }
4796 
4797 
4798 static const struct file_operations tracing_saved_tgids_fops = {
4799         .open           = tracing_saved_tgids_open,
4800         .read           = seq_read,
4801         .llseek         = seq_lseek,
4802         .release        = seq_release,
4803 };
4804 
4805 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4806 {
4807         unsigned int *ptr = v;
4808 
4809         if (*pos || m->count)
4810                 ptr++;
4811 
4812         (*pos)++;
4813 
4814         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4815              ptr++) {
4816                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4817                         continue;
4818 
4819                 return ptr;
4820         }
4821 
4822         return NULL;
4823 }
4824 
4825 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4826 {
4827         void *v;
4828         loff_t l = 0;
4829 
4830         preempt_disable();
4831         arch_spin_lock(&trace_cmdline_lock);
4832 
4833         v = &savedcmd->map_cmdline_to_pid[0];
4834         while (l <= *pos) {
4835                 v = saved_cmdlines_next(m, v, &l);
4836                 if (!v)
4837                         return NULL;
4838         }
4839 
4840         return v;
4841 }
4842 
4843 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4844 {
4845         arch_spin_unlock(&trace_cmdline_lock);
4846         preempt_enable();
4847 }
4848 
4849 static int saved_cmdlines_show(struct seq_file *m, void *v)
4850 {
4851         char buf[TASK_COMM_LEN];
4852         unsigned int *pid = v;
4853 
4854         __trace_find_cmdline(*pid, buf);
4855         seq_printf(m, "%d %s\n", *pid, buf);
4856         return 0;
4857 }
4858 
4859 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4860         .start          = saved_cmdlines_start,
4861         .next           = saved_cmdlines_next,
4862         .stop           = saved_cmdlines_stop,
4863         .show           = saved_cmdlines_show,
4864 };
4865 
4866 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4867 {
4868         if (tracing_disabled)
4869                 return -ENODEV;
4870 
4871         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4872 }
4873 
4874 static const struct file_operations tracing_saved_cmdlines_fops = {
4875         .open           = tracing_saved_cmdlines_open,
4876         .read           = seq_read,
4877         .llseek         = seq_lseek,
4878         .release        = seq_release,
4879 };
4880 
4881 static ssize_t
4882 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4883                                  size_t cnt, loff_t *ppos)
4884 {
4885         char buf[64];
4886         int r;
4887 
4888         arch_spin_lock(&trace_cmdline_lock);
4889         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4890         arch_spin_unlock(&trace_cmdline_lock);
4891 
4892         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4893 }
4894 
4895 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4896 {
4897         kfree(s->saved_cmdlines);
4898         kfree(s->map_cmdline_to_pid);
4899         kfree(s);
4900 }
4901 
4902 static int tracing_resize_saved_cmdlines(unsigned int val)
4903 {
4904         struct saved_cmdlines_buffer *s, *savedcmd_temp;
4905 
4906         s = kmalloc(sizeof(*s), GFP_KERNEL);
4907         if (!s)
4908                 return -ENOMEM;
4909 
4910         if (allocate_cmdlines_buffer(val, s) < 0) {
4911                 kfree(s);
4912                 return -ENOMEM;
4913         }
4914 
4915         arch_spin_lock(&trace_cmdline_lock);
4916         savedcmd_temp = savedcmd;
4917         savedcmd = s;
4918         arch_spin_unlock(&trace_cmdline_lock);
4919         free_saved_cmdlines_buffer(savedcmd_temp);
4920 
4921         return 0;
4922 }
4923 
4924 static ssize_t
4925 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4926                                   size_t cnt, loff_t *ppos)
4927 {
4928         unsigned long val;
4929         int ret;
4930 
4931         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4932         if (ret)
4933                 return ret;
4934 
4935         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4936         if (!val || val > PID_MAX_DEFAULT)
4937                 return -EINVAL;
4938 
4939         ret = tracing_resize_saved_cmdlines((unsigned int)val);
4940         if (ret < 0)
4941                 return ret;
4942 
4943         *ppos += cnt;
4944 
4945         return cnt;
4946 }
4947 
4948 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4949         .open           = tracing_open_generic,
4950         .read           = tracing_saved_cmdlines_size_read,
4951         .write          = tracing_saved_cmdlines_size_write,
4952 };
4953 
4954 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
4955 static union trace_eval_map_item *
4956 update_eval_map(union trace_eval_map_item *ptr)
4957 {
4958         if (!ptr->map.eval_string) {
4959                 if (ptr->tail.next) {
4960                         ptr = ptr->tail.next;
4961                         /* Set ptr to the next real item (skip head) */
4962                         ptr++;
4963                 } else
4964                         return NULL;
4965         }
4966         return ptr;
4967 }
4968 
4969 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
4970 {
4971         union trace_eval_map_item *ptr = v;
4972 
4973         /*
4974          * Paranoid! If ptr points to end, we don't want to increment past it.
4975          * This really should never happen.
4976          */
4977         ptr = update_eval_map(ptr);
4978         if (WARN_ON_ONCE(!ptr))
4979                 return NULL;
4980 
4981         ptr++;
4982 
4983         (*pos)++;
4984 
4985         ptr = update_eval_map(ptr);
4986 
4987         return ptr;
4988 }
4989 
4990 static void *eval_map_start(struct seq_file *m, loff_t *pos)
4991 {
4992         union trace_eval_map_item *v;
4993         loff_t l = 0;
4994 
4995         mutex_lock(&trace_eval_mutex);
4996 
4997         v = trace_eval_maps;
4998         if (v)
4999                 v++;
5000 
5001         while (v && l < *pos) {
5002                 v = eval_map_next(m, v, &l);
5003         }
5004 
5005         return v;
5006 }
5007 
5008 static void eval_map_stop(struct seq_file *m, void *v)
5009 {
5010         mutex_unlock(&trace_eval_mutex);
5011 }
5012 
5013 static int eval_map_show(struct seq_file *m, void *v)
5014 {
5015         union trace_eval_map_item *ptr = v;
5016 
5017         seq_printf(m, "%s %ld (%s)\n",
5018                    ptr->map.eval_string, ptr->map.eval_value,
5019                    ptr->map.system);
5020 
5021         return 0;
5022 }
5023 
5024 static const struct seq_operations tracing_eval_map_seq_ops = {
5025         .start          = eval_map_start,
5026         .next           = eval_map_next,
5027         .stop           = eval_map_stop,
5028         .show           = eval_map_show,
5029 };
5030 
5031 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5032 {
5033         if (tracing_disabled)
5034                 return -ENODEV;
5035 
5036         return seq_open(filp, &tracing_eval_map_seq_ops);
5037 }
5038 
5039 static const struct file_operations tracing_eval_map_fops = {
5040         .open           = tracing_eval_map_open,
5041         .read           = seq_read,
5042         .llseek         = seq_lseek,
5043         .release        = seq_release,
5044 };
5045 
5046 static inline union trace_eval_map_item *
5047 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5048 {
5049         /* Return tail of array given the head */
5050         return ptr + ptr->head.length + 1;
5051 }
5052 
5053 static void
5054 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5055                            int len)
5056 {
5057         struct trace_eval_map **stop;
5058         struct trace_eval_map **map;
5059         union trace_eval_map_item *map_array;
5060         union trace_eval_map_item *ptr;
5061 
5062         stop = start + len;
5063 
5064         /*
5065          * The trace_eval_maps contains the map plus a head and tail item,
5066          * where the head holds the module and length of array, and the
5067          * tail holds a pointer to the next list.
5068          */
5069         map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
5070         if (!map_array) {
5071                 pr_warn("Unable to allocate trace eval mapping\n");
5072                 return;
5073         }
5074 
5075         mutex_lock(&trace_eval_mutex);
5076 
5077         if (!trace_eval_maps)
5078                 trace_eval_maps = map_array;
5079         else {
5080                 ptr = trace_eval_maps;
5081                 for (;;) {
5082                         ptr = trace_eval_jmp_to_tail(ptr);
5083                         if (!ptr->tail.next)
5084                                 break;
5085                         ptr = ptr->tail.next;
5086 
5087                 }
5088                 ptr->tail.next = map_array;
5089         }
5090         map_array->head.mod = mod;
5091         map_array->head.length = len;
5092         map_array++;
5093 
5094         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5095                 map_array->map = **map;
5096                 map_array++;
5097         }
5098         memset(map_array, 0, sizeof(*map_array));
5099 
5100         mutex_unlock(&trace_eval_mutex);
5101 }
5102 
5103 static void trace_create_eval_file(struct dentry *d_tracer)
5104 {
5105         trace_create_file("eval_map", 0444, d_tracer,
5106                           NULL, &tracing_eval_map_fops);
5107 }
5108 
5109 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5110 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5111 static inline void trace_insert_eval_map_file(struct module *mod,
5112                               struct trace_eval_map **start, int len) { }
5113 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5114 
5115 static void trace_insert_eval_map(struct module *mod,
5116                                   struct trace_eval_map **start, int len)
5117 {
5118         struct trace_eval_map **map;
5119 
5120         if (len <= 0)
5121                 return;
5122 
5123         map = start;
5124 
5125         trace_event_eval_update(map, len);
5126 
5127         trace_insert_eval_map_file(mod, start, len);
5128 }
5129 
5130 static ssize_t
5131 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5132                        size_t cnt, loff_t *ppos)
5133 {
5134         struct trace_array *tr = filp->private_data;
5135         char buf[MAX_TRACER_SIZE+2];
5136         int r;
5137 
5138         mutex_lock(&trace_types_lock);
5139         r = sprintf(buf, "%s\n", tr->current_trace->name);
5140         mutex_unlock(&trace_types_lock);
5141 
5142         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5143 }
5144 
5145 int tracer_init(struct tracer *t, struct trace_array *tr)
5146 {
5147         tracing_reset_online_cpus(&tr->trace_buffer);
5148         return t->init(tr);
5149 }
5150 
5151 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5152 {
5153         int cpu;
5154 
5155         for_each_tracing_cpu(cpu)
5156                 per_cpu_ptr(buf->data, cpu)->entries = val;
5157 }
5158 
5159 #ifdef CONFIG_TRACER_MAX_TRACE
5160 /* resize @tr's buffer to the size of @size_tr's entries */
5161 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5162                                         struct trace_buffer *size_buf, int cpu_id)
5163 {
5164         int cpu, ret = 0;
5165 
5166         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5167                 for_each_tracing_cpu(cpu) {
5168                         ret = ring_buffer_resize(trace_buf->buffer,
5169                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5170                         if (ret < 0)
5171                                 break;
5172                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5173                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5174                 }
5175         } else {
5176                 ret = ring_buffer_resize(trace_buf->buffer,
5177                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5178                 if (ret == 0)
5179                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5180                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5181         }
5182 
5183         return ret;
5184 }
5185 #endif /* CONFIG_TRACER_MAX_TRACE */
5186 
5187 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5188                                         unsigned long size, int cpu)
5189 {
5190         int ret;
5191 
5192         /*
5193          * If kernel or user changes the size of the ring buffer
5194          * we use the size that was given, and we can forget about
5195          * expanding it later.
5196          */
5197         ring_buffer_expanded = true;
5198 
5199         /* May be called before buffers are initialized */
5200         if (!tr->trace_buffer.buffer)
5201                 return 0;
5202 
5203         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5204         if (ret < 0)
5205                 return ret;
5206 
5207 #ifdef CONFIG_TRACER_MAX_TRACE
5208         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5209             !tr->current_trace->use_max_tr)
5210                 goto out;
5211 
5212         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5213         if (ret < 0) {
5214                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5215                                                      &tr->trace_buffer, cpu);
5216                 if (r < 0) {
5217                         /*
5218                          * AARGH! We are left with different
5219                          * size max buffer!!!!
5220                          * The max buffer is our "snapshot" buffer.
5221                          * When a tracer needs a snapshot (one of the
5222                          * latency tracers), it swaps the max buffer
5223                          * with the saved snap shot. We succeeded to
5224                          * update the size of the main buffer, but failed to
5225                          * update the size of the max buffer. But when we tried
5226                          * to reset the main buffer to the original size, we
5227                          * failed there too. This is very unlikely to
5228                          * happen, but if it does, warn and kill all
5229                          * tracing.
5230                          */
5231                         WARN_ON(1);
5232                         tracing_disabled = 1;
5233                 }
5234                 return ret;
5235         }
5236 
5237         if (cpu == RING_BUFFER_ALL_CPUS)
5238                 set_buffer_entries(&tr->max_buffer, size);
5239         else
5240                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5241 
5242  out:
5243 #endif /* CONFIG_TRACER_MAX_TRACE */
5244 
5245         if (cpu == RING_BUFFER_ALL_CPUS)
5246                 set_buffer_entries(&tr->trace_buffer, size);
5247         else
5248                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5249 
5250         return ret;
5251 }
5252 
5253 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5254                                           unsigned long size, int cpu_id)
5255 {
5256         int ret = size;
5257 
5258         mutex_lock(&trace_types_lock);
5259 
5260         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5261                 /* make sure, this cpu is enabled in the mask */
5262                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5263                         ret = -EINVAL;
5264                         goto out;
5265                 }
5266         }
5267 
5268         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5269         if (ret < 0)
5270                 ret = -ENOMEM;
5271 
5272 out:
5273         mutex_unlock(&trace_types_lock);
5274 
5275         return ret;
5276 }
5277 
5278 
5279 /**
5280  * tracing_update_buffers - used by tracing facility to expand ring buffers
5281  *
5282  * To save on memory when the tracing is never used on a system with it
5283  * configured in. The ring buffers are set to a minimum size. But once
5284  * a user starts to use the tracing facility, then they need to grow
5285  * to their default size.
5286  *
5287  * This function is to be called when a tracer is about to be used.
5288  */
5289 int tracing_update_buffers(void)
5290 {
5291         int ret = 0;
5292 
5293         mutex_lock(&trace_types_lock);
5294         if (!ring_buffer_expanded)
5295                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5296                                                 RING_BUFFER_ALL_CPUS);
5297         mutex_unlock(&trace_types_lock);
5298 
5299         return ret;
5300 }
5301 
5302 struct trace_option_dentry;
5303 
5304 static void
5305 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5306 
5307 /*
5308  * Used to clear out the tracer before deletion of an instance.
5309  * Must have trace_types_lock held.
5310  */
5311 static void tracing_set_nop(struct trace_array *tr)
5312 {
5313         if (tr->current_trace == &nop_trace)
5314                 return;
5315         
5316         tr->current_trace->enabled--;
5317 
5318         if (tr->current_trace->reset)
5319                 tr->current_trace->reset(tr);
5320 
5321         tr->current_trace = &nop_trace;
5322 }
5323 
5324 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5325 {
5326         /* Only enable if the directory has been created already. */
5327         if (!tr->dir)
5328                 return;
5329 
5330         create_trace_option_files(tr, t);
5331 }
5332 
5333 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5334 {
5335         struct tracer *t;
5336 #ifdef CONFIG_TRACER_MAX_TRACE
5337         bool had_max_tr;
5338 #endif
5339         int ret = 0;
5340 
5341         mutex_lock(&trace_types_lock);
5342 
5343         if (!ring_buffer_expanded) {
5344                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5345                                                 RING_BUFFER_ALL_CPUS);
5346                 if (ret < 0)
5347                         goto out;
5348                 ret = 0;
5349         }
5350 
5351         for (t = trace_types; t; t = t->next) {
5352                 if (strcmp(t->name, buf) == 0)
5353                         break;
5354         }
5355         if (!t) {
5356                 ret = -EINVAL;
5357                 goto out;
5358         }
5359         if (t == tr->current_trace)
5360                 goto out;
5361 
5362         /* Some tracers won't work on kernel command line */
5363         if (system_state < SYSTEM_RUNNING && t->noboot) {
5364                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5365                         t->name);
5366                 goto out;
5367         }
5368 
5369         /* Some tracers are only allowed for the top level buffer */
5370         if (!trace_ok_for_array(t, tr)) {
5371                 ret = -EINVAL;
5372                 goto out;
5373         }
5374 
5375         /* If trace pipe files are being read, we can't change the tracer */
5376         if (tr->current_trace->ref) {
5377                 ret = -EBUSY;
5378                 goto out;
5379         }
5380 
5381         trace_branch_disable();
5382 
5383         tr->current_trace->enabled--;
5384 
5385         if (tr->current_trace->reset)
5386                 tr->current_trace->reset(tr);
5387 
5388         /* Current trace needs to be nop_trace before synchronize_sched */
5389         tr->current_trace = &nop_trace;
5390 
5391 #ifdef CONFIG_TRACER_MAX_TRACE
5392         had_max_tr = tr->allocated_snapshot;
5393 
5394         if (had_max_tr && !t->use_max_tr) {
5395                 /*
5396                  * We need to make sure that the update_max_tr sees that
5397                  * current_trace changed to nop_trace to keep it from
5398                  * swapping the buffers after we resize it.
5399                  * The update_max_tr is called from interrupts disabled
5400                  * so a synchronized_sched() is sufficient.
5401                  */
5402                 synchronize_sched();
5403                 free_snapshot(tr);
5404         }
5405 #endif
5406 
5407 #ifdef CONFIG_TRACER_MAX_TRACE
5408         if (t->use_max_tr && !had_max_tr) {
5409                 ret = alloc_snapshot(tr);
5410                 if (ret < 0)
5411                         goto out;
5412         }
5413 #endif
5414 
5415         if (t->init) {
5416                 ret = tracer_init(t, tr);
5417                 if (ret)
5418                         goto out;
5419         }
5420 
5421         tr->current_trace = t;
5422         tr->current_trace->enabled++;
5423         trace_branch_enable(tr);
5424  out:
5425         mutex_unlock(&trace_types_lock);
5426 
5427         return ret;
5428 }
5429 
5430 static ssize_t
5431 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5432                         size_t cnt, loff_t *ppos)
5433 {
5434         struct trace_array *tr = filp->private_data;
5435         char buf[MAX_TRACER_SIZE+1];
5436         int i;
5437         size_t ret;
5438         int err;
5439 
5440         ret = cnt;
5441 
5442         if (cnt > MAX_TRACER_SIZE)
5443                 cnt = MAX_TRACER_SIZE;
5444 
5445         if (copy_from_user(buf, ubuf, cnt))
5446                 return -EFAULT;
5447 
5448         buf[cnt] = 0;
5449 
5450         /* strip ending whitespace. */
5451         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5452                 buf[i] = 0;
5453 
5454         err = tracing_set_tracer(tr, buf);
5455         if (err)
5456                 return err;
5457 
5458         *ppos += ret;
5459 
5460         return ret;
5461 }
5462 
5463 static ssize_t
5464 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5465                    size_t cnt, loff_t *ppos)
5466 {
5467         char buf[64];
5468         int r;
5469 
5470         r = snprintf(buf, sizeof(buf), "%ld\n",
5471                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5472         if (r > sizeof(buf))
5473                 r = sizeof(buf);
5474         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5475 }
5476 
5477 static ssize_t
5478 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5479                     size_t cnt, loff_t *ppos)
5480 {
5481         unsigned long val;
5482         int ret;
5483 
5484         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5485         if (ret)
5486                 return ret;
5487 
5488         *ptr = val * 1000;
5489 
5490         return cnt;
5491 }
5492 
5493 static ssize_t
5494 tracing_thresh_read(struct file *filp, char __user *ubuf,
5495                     size_t cnt, loff_t *ppos)
5496 {
5497         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5498 }
5499 
5500 static ssize_t
5501 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5502                      size_t cnt, loff_t *ppos)
5503 {
5504         struct trace_array *tr = filp->private_data;
5505         int ret;
5506 
5507         mutex_lock(&trace_types_lock);
5508         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5509         if (ret < 0)
5510                 goto out;
5511 
5512         if (tr->current_trace->update_thresh) {
5513                 ret = tr->current_trace->update_thresh(tr);
5514                 if (ret < 0)
5515                         goto out;
5516         }
5517 
5518         ret = cnt;
5519 out:
5520         mutex_unlock(&trace_types_lock);
5521 
5522         return ret;
5523 }
5524 
5525 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5526 
5527 static ssize_t
5528 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5529                      size_t cnt, loff_t *ppos)
5530 {
5531         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5532 }
5533 
5534 static ssize_t
5535 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5536                       size_t cnt, loff_t *ppos)
5537 {
5538         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5539 }
5540 
5541 #endif
5542 
5543 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5544 {
5545         struct trace_array *tr = inode->i_private;
5546         struct trace_iterator *iter;
5547         int ret = 0;
5548 
5549         if (tracing_disabled)
5550                 return -ENODEV;
5551 
5552         if (trace_array_get(tr) < 0)
5553                 return -ENODEV;
5554 
5555         mutex_lock(&trace_types_lock);
5556 
5557         /* create a buffer to store the information to pass to userspace */
5558         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5559         if (!iter) {
5560                 ret = -ENOMEM;
5561                 __trace_array_put(tr);
5562                 goto out;
5563         }
5564 
5565         trace_seq_init(&iter->seq);
5566         iter->trace = tr->current_trace;
5567 
5568         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5569                 ret = -ENOMEM;
5570                 goto fail;
5571         }
5572 
5573         /* trace pipe does not show start of buffer */
5574         cpumask_setall(iter->started);
5575 
5576         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5577                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5578 
5579         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5580         if (trace_clocks[tr->clock_id].in_ns)
5581                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5582 
5583         iter->tr = tr;
5584         iter->trace_buffer = &tr->trace_buffer;
5585         iter->cpu_file = tracing_get_cpu(inode);
5586         mutex_init(&iter->mutex);
5587         filp->private_data = iter;
5588 
5589         if (iter->trace->pipe_open)
5590                 iter->trace->pipe_open(iter);
5591 
5592         nonseekable_open(inode, filp);
5593 
5594         tr->current_trace->ref++;
5595 out:
5596         mutex_unlock(&trace_types_lock);
5597         return ret;
5598 
5599 fail:
5600         kfree(iter->trace);
5601         kfree(iter);
5602         __trace_array_put(tr);
5603         mutex_unlock(&trace_types_lock);
5604         return ret;
5605 }
5606 
5607 static int tracing_release_pipe(struct inode *inode, struct file *file)
5608 {
5609         struct trace_iterator *iter = file->private_data;
5610         struct trace_array *tr = inode->i_private;
5611 
5612         mutex_lock(&trace_types_lock);
5613 
5614         tr->current_trace->ref--;
5615 
5616         if (iter->trace->pipe_close)
5617                 iter->trace->pipe_close(iter);
5618 
5619         mutex_unlock(&trace_types_lock);
5620 
5621         free_cpumask_var(iter->started);
5622         mutex_destroy(&iter->mutex);
5623         kfree(iter);
5624 
5625         trace_array_put(tr);
5626 
5627         return 0;
5628 }
5629 
5630 static unsigned int
5631 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5632 {
5633         struct trace_array *tr = iter->tr;
5634 
5635         /* Iterators are static, they should be filled or empty */
5636         if (trace_buffer_iter(iter, iter->cpu_file))
5637                 return POLLIN | POLLRDNORM;
5638 
5639         if (tr->trace_flags & TRACE_ITER_BLOCK)
5640                 /*
5641                  * Always select as readable when in blocking mode
5642                  */
5643                 return POLLIN | POLLRDNORM;
5644         else
5645                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5646                                              filp, poll_table);
5647 }
5648 
5649 static unsigned int
5650 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5651 {
5652         struct trace_iterator *iter = filp->private_data;
5653 
5654         return trace_poll(iter, filp, poll_table);
5655 }
5656 
5657 /* Must be called with iter->mutex held. */
5658 static int tracing_wait_pipe(struct file *filp)
5659 {
5660         struct trace_iterator *iter = filp->private_data;
5661         int ret;
5662 
5663         while (trace_empty(iter)) {
5664 
5665                 if ((filp->f_flags & O_NONBLOCK)) {
5666                         return -EAGAIN;
5667                 }
5668 
5669                 /*
5670                  * We block until we read something and tracing is disabled.
5671                  * We still block if tracing is disabled, but we have never
5672                  * read anything. This allows a user to cat this file, and
5673                  * then enable tracing. But after we have read something,
5674                  * we give an EOF when tracing is again disabled.
5675                  *
5676                  * iter->pos will be 0 if we haven't read anything.
5677                  */
5678                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5679                         break;
5680 
5681                 mutex_unlock(&iter->mutex);
5682 
5683                 ret = wait_on_pipe(iter, false);
5684 
5685                 mutex_lock(&iter->mutex);
5686 
5687                 if (ret)
5688                         return ret;
5689         }
5690 
5691         return 1;
5692 }
5693 
5694 /*
5695  * Consumer reader.
5696  */
5697 static ssize_t
5698 tracing_read_pipe(struct file *filp, char __user *ubuf,
5699                   size_t cnt, loff_t *ppos)
5700 {
5701         struct trace_iterator *iter = filp->private_data;
5702         ssize_t sret;
5703 
5704         /*
5705          * Avoid more than one consumer on a single file descriptor
5706          * This is just a matter of traces coherency, the ring buffer itself
5707          * is protected.
5708          */
5709         mutex_lock(&iter->mutex);
5710 
5711         /* return any leftover data */
5712         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5713         if (sret != -EBUSY)
5714                 goto out;
5715 
5716         trace_seq_init(&iter->seq);
5717 
5718         if (iter->trace->read) {
5719                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5720                 if (sret)
5721                         goto out;
5722         }
5723 
5724 waitagain:
5725         sret = tracing_wait_pipe(filp);
5726         if (sret <= 0)
5727                 goto out;
5728 
5729         /* stop when tracing is finished */
5730         if (trace_empty(iter)) {
5731                 sret = 0;
5732                 goto out;
5733         }
5734 
5735         if (cnt >= PAGE_SIZE)
5736                 cnt = PAGE_SIZE - 1;
5737 
5738         /* reset all but tr, trace, and overruns */
5739         memset(&iter->seq, 0,
5740                sizeof(struct trace_iterator) -
5741                offsetof(struct trace_iterator, seq));
5742         cpumask_clear(iter->started);
5743         iter->pos = -1;
5744 
5745         trace_event_read_lock();
5746         trace_access_lock(iter->cpu_file);
5747         while (trace_find_next_entry_inc(iter) != NULL) {
5748                 enum print_line_t ret;
5749                 int save_len = iter->seq.seq.len;
5750 
5751                 ret = print_trace_line(iter);
5752                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5753                         /* don't print partial lines */
5754                         iter->seq.seq.len = save_len;
5755                         break;
5756                 }
5757                 if (ret != TRACE_TYPE_NO_CONSUME)
5758                         trace_consume(iter);
5759 
5760                 if (trace_seq_used(&iter->seq) >= cnt)
5761                         break;
5762 
5763                 /*
5764                  * Setting the full flag means we reached the trace_seq buffer
5765                  * size and we should leave by partial output condition above.
5766                  * One of the trace_seq_* functions is not used properly.
5767                  */
5768                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5769                           iter->ent->type);
5770         }
5771         trace_access_unlock(iter->cpu_file);
5772         trace_event_read_unlock();
5773 
5774         /* Now copy what we have to the user */
5775         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5776         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5777                 trace_seq_init(&iter->seq);
5778 
5779         /*
5780          * If there was nothing to send to user, in spite of consuming trace
5781          * entries, go back to wait for more entries.
5782          */
5783         if (sret == -EBUSY)
5784                 goto waitagain;
5785 
5786 out:
5787         mutex_unlock(&iter->mutex);
5788 
5789         return sret;
5790 }
5791 
5792 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5793                                      unsigned int idx)
5794 {
5795         __free_page(spd->pages[idx]);
5796 }
5797 
5798 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5799         .can_merge              = 0,
5800         .confirm                = generic_pipe_buf_confirm,
5801         .release                = generic_pipe_buf_release,
5802         .steal                  = generic_pipe_buf_steal,
5803         .get                    = generic_pipe_buf_get,
5804 };
5805 
5806 static size_t
5807 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5808 {
5809         size_t count;
5810         int save_len;
5811         int ret;
5812 
5813         /* Seq buffer is page-sized, exactly what we need. */
5814         for (;;) {
5815                 save_len = iter->seq.seq.len;
5816                 ret = print_trace_line(iter);
5817 
5818                 if (trace_seq_has_overflowed(&iter->seq)) {
5819                         iter->seq.seq.len = save_len;
5820                         break;
5821                 }
5822 
5823                 /*
5824                  * This should not be hit, because it should only
5825                  * be set if the iter->seq overflowed. But check it
5826                  * anyway to be safe.
5827                  */
5828                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5829                         iter->seq.seq.len = save_len;
5830                         break;
5831                 }
5832 
5833                 count = trace_seq_used(&iter->seq) - save_len;
5834                 if (rem < count) {
5835                         rem = 0;
5836                         iter->seq.seq.len = save_len;
5837                         break;
5838                 }
5839 
5840                 if (ret != TRACE_TYPE_NO_CONSUME)
5841                         trace_consume(iter);
5842                 rem -= count;
5843                 if (!trace_find_next_entry_inc(iter))   {
5844                         rem = 0;
5845                         iter->ent = NULL;
5846                         break;
5847                 }
5848         }
5849 
5850         return rem;
5851 }
5852 
5853 static ssize_t tracing_splice_read_pipe(struct file *filp,
5854                                         loff_t *ppos,
5855                                         struct pipe_inode_info *pipe,
5856                                         size_t len,
5857                                         unsigned int flags)
5858 {
5859         struct page *pages_def[PIPE_DEF_BUFFERS];