~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/kernel/trace/trace.c

Version: ~ [ linux-5.4-rc7 ] ~ [ linux-5.3.11 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.84 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.154 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.201 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.201 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.19.8 ] ~ [ linux-3.18.140 ] ~ [ linux-3.17.8 ] ~ [ linux-3.16.77 ] ~ [ linux-3.15.10 ] ~ [ linux-3.14.79 ] ~ [ linux-3.13.11 ] ~ [ linux-3.12.74 ] ~ [ linux-3.11.10 ] ~ [ linux-3.10.108 ] ~ [ linux-3.9.11 ] ~ [ linux-3.8.13 ] ~ [ linux-3.7.10 ] ~ [ linux-3.6.11 ] ~ [ linux-3.5.7 ] ~ [ linux-3.4.113 ] ~ [ linux-3.3.8 ] ~ [ linux-3.2.102 ] ~ [ linux-3.1.10 ] ~ [ linux-3.0.101 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0
  2 /*
  3  * ring buffer based function tracer
  4  *
  5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
  6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
  7  *
  8  * Originally taken from the RT patch by:
  9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
 10  *
 11  * Based on code from the latency_tracer, that is:
 12  *  Copyright (C) 2004-2006 Ingo Molnar
 13  *  Copyright (C) 2004 Nadia Yvette Chambers
 14  */
 15 #include <linux/ring_buffer.h>
 16 #include <generated/utsrelease.h>
 17 #include <linux/stacktrace.h>
 18 #include <linux/writeback.h>
 19 #include <linux/kallsyms.h>
 20 #include <linux/seq_file.h>
 21 #include <linux/notifier.h>
 22 #include <linux/irqflags.h>
 23 #include <linux/debugfs.h>
 24 #include <linux/tracefs.h>
 25 #include <linux/pagemap.h>
 26 #include <linux/hardirq.h>
 27 #include <linux/linkage.h>
 28 #include <linux/uaccess.h>
 29 #include <linux/vmalloc.h>
 30 #include <linux/ftrace.h>
 31 #include <linux/module.h>
 32 #include <linux/percpu.h>
 33 #include <linux/splice.h>
 34 #include <linux/kdebug.h>
 35 #include <linux/string.h>
 36 #include <linux/mount.h>
 37 #include <linux/rwsem.h>
 38 #include <linux/slab.h>
 39 #include <linux/ctype.h>
 40 #include <linux/init.h>
 41 #include <linux/poll.h>
 42 #include <linux/nmi.h>
 43 #include <linux/fs.h>
 44 #include <linux/trace.h>
 45 #include <linux/sched/clock.h>
 46 #include <linux/sched/rt.h>
 47 
 48 #include "trace.h"
 49 #include "trace_output.h"
 50 
 51 /*
 52  * On boot up, the ring buffer is set to the minimum size, so that
 53  * we do not waste memory on systems that are not using tracing.
 54  */
 55 bool ring_buffer_expanded;
 56 
 57 /*
 58  * We need to change this state when a selftest is running.
 59  * A selftest will lurk into the ring-buffer to count the
 60  * entries inserted during the selftest although some concurrent
 61  * insertions into the ring-buffer such as trace_printk could occurred
 62  * at the same time, giving false positive or negative results.
 63  */
 64 static bool __read_mostly tracing_selftest_running;
 65 
 66 /*
 67  * If a tracer is running, we do not want to run SELFTEST.
 68  */
 69 bool __read_mostly tracing_selftest_disabled;
 70 
 71 /* Pipe tracepoints to printk */
 72 struct trace_iterator *tracepoint_print_iter;
 73 int tracepoint_printk;
 74 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
 75 
 76 /* For tracers that don't implement custom flags */
 77 static struct tracer_opt dummy_tracer_opt[] = {
 78         { }
 79 };
 80 
 81 static int
 82 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
 83 {
 84         return 0;
 85 }
 86 
 87 /*
 88  * To prevent the comm cache from being overwritten when no
 89  * tracing is active, only save the comm when a trace event
 90  * occurred.
 91  */
 92 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
 93 
 94 /*
 95  * Kill all tracing for good (never come back).
 96  * It is initialized to 1 but will turn to zero if the initialization
 97  * of the tracer is successful. But that is the only place that sets
 98  * this back to zero.
 99  */
100 static int tracing_disabled = 1;
101 
102 cpumask_var_t __read_mostly     tracing_buffer_mask;
103 
104 /*
105  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
106  *
107  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
108  * is set, then ftrace_dump is called. This will output the contents
109  * of the ftrace buffers to the console.  This is very useful for
110  * capturing traces that lead to crashes and outputing it to a
111  * serial console.
112  *
113  * It is default off, but you can enable it with either specifying
114  * "ftrace_dump_on_oops" in the kernel command line, or setting
115  * /proc/sys/kernel/ftrace_dump_on_oops
116  * Set 1 if you want to dump buffers of all CPUs
117  * Set 2 if you want to dump the buffer of the CPU that triggered oops
118  */
119 
120 enum ftrace_dump_mode ftrace_dump_on_oops;
121 
122 /* When set, tracing will stop when a WARN*() is hit */
123 int __disable_trace_on_warning;
124 
125 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
126 /* Map of enums to their values, for "eval_map" file */
127 struct trace_eval_map_head {
128         struct module                   *mod;
129         unsigned long                   length;
130 };
131 
132 union trace_eval_map_item;
133 
134 struct trace_eval_map_tail {
135         /*
136          * "end" is first and points to NULL as it must be different
137          * than "mod" or "eval_string"
138          */
139         union trace_eval_map_item       *next;
140         const char                      *end;   /* points to NULL */
141 };
142 
143 static DEFINE_MUTEX(trace_eval_mutex);
144 
145 /*
146  * The trace_eval_maps are saved in an array with two extra elements,
147  * one at the beginning, and one at the end. The beginning item contains
148  * the count of the saved maps (head.length), and the module they
149  * belong to if not built in (head.mod). The ending item contains a
150  * pointer to the next array of saved eval_map items.
151  */
152 union trace_eval_map_item {
153         struct trace_eval_map           map;
154         struct trace_eval_map_head      head;
155         struct trace_eval_map_tail      tail;
156 };
157 
158 static union trace_eval_map_item *trace_eval_maps;
159 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
160 
161 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
162 
163 #define MAX_TRACER_SIZE         100
164 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
165 static char *default_bootup_tracer;
166 
167 static bool allocate_snapshot;
168 
169 static int __init set_cmdline_ftrace(char *str)
170 {
171         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
172         default_bootup_tracer = bootup_tracer_buf;
173         /* We are using ftrace early, expand it */
174         ring_buffer_expanded = true;
175         return 1;
176 }
177 __setup("ftrace=", set_cmdline_ftrace);
178 
179 static int __init set_ftrace_dump_on_oops(char *str)
180 {
181         if (*str++ != '=' || !*str) {
182                 ftrace_dump_on_oops = DUMP_ALL;
183                 return 1;
184         }
185 
186         if (!strcmp("orig_cpu", str)) {
187                 ftrace_dump_on_oops = DUMP_ORIG;
188                 return 1;
189         }
190 
191         return 0;
192 }
193 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
194 
195 static int __init stop_trace_on_warning(char *str)
196 {
197         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
198                 __disable_trace_on_warning = 1;
199         return 1;
200 }
201 __setup("traceoff_on_warning", stop_trace_on_warning);
202 
203 static int __init boot_alloc_snapshot(char *str)
204 {
205         allocate_snapshot = true;
206         /* We also need the main ring buffer expanded */
207         ring_buffer_expanded = true;
208         return 1;
209 }
210 __setup("alloc_snapshot", boot_alloc_snapshot);
211 
212 
213 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
214 
215 static int __init set_trace_boot_options(char *str)
216 {
217         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
218         return 0;
219 }
220 __setup("trace_options=", set_trace_boot_options);
221 
222 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
223 static char *trace_boot_clock __initdata;
224 
225 static int __init set_trace_boot_clock(char *str)
226 {
227         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
228         trace_boot_clock = trace_boot_clock_buf;
229         return 0;
230 }
231 __setup("trace_clock=", set_trace_boot_clock);
232 
233 static int __init set_tracepoint_printk(char *str)
234 {
235         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
236                 tracepoint_printk = 1;
237         return 1;
238 }
239 __setup("tp_printk", set_tracepoint_printk);
240 
241 unsigned long long ns2usecs(u64 nsec)
242 {
243         nsec += 500;
244         do_div(nsec, 1000);
245         return nsec;
246 }
247 
248 /* trace_flags holds trace_options default values */
249 #define TRACE_DEFAULT_FLAGS                                             \
250         (FUNCTION_DEFAULT_FLAGS |                                       \
251          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
252          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
253          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
254          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
255 
256 /* trace_options that are only supported by global_trace */
257 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
258                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
259 
260 /* trace_flags that are default zero for instances */
261 #define ZEROED_TRACE_FLAGS \
262         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
263 
264 /*
265  * The global_trace is the descriptor that holds the top-level tracing
266  * buffers for the live tracing.
267  */
268 static struct trace_array global_trace = {
269         .trace_flags = TRACE_DEFAULT_FLAGS,
270 };
271 
272 LIST_HEAD(ftrace_trace_arrays);
273 
274 int trace_array_get(struct trace_array *this_tr)
275 {
276         struct trace_array *tr;
277         int ret = -ENODEV;
278 
279         mutex_lock(&trace_types_lock);
280         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
281                 if (tr == this_tr) {
282                         tr->ref++;
283                         ret = 0;
284                         break;
285                 }
286         }
287         mutex_unlock(&trace_types_lock);
288 
289         return ret;
290 }
291 
292 static void __trace_array_put(struct trace_array *this_tr)
293 {
294         WARN_ON(!this_tr->ref);
295         this_tr->ref--;
296 }
297 
298 void trace_array_put(struct trace_array *this_tr)
299 {
300         mutex_lock(&trace_types_lock);
301         __trace_array_put(this_tr);
302         mutex_unlock(&trace_types_lock);
303 }
304 
305 int call_filter_check_discard(struct trace_event_call *call, void *rec,
306                               struct ring_buffer *buffer,
307                               struct ring_buffer_event *event)
308 {
309         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
310             !filter_match_preds(call->filter, rec)) {
311                 __trace_event_discard_commit(buffer, event);
312                 return 1;
313         }
314 
315         return 0;
316 }
317 
318 void trace_free_pid_list(struct trace_pid_list *pid_list)
319 {
320         vfree(pid_list->pids);
321         kfree(pid_list);
322 }
323 
324 /**
325  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
326  * @filtered_pids: The list of pids to check
327  * @search_pid: The PID to find in @filtered_pids
328  *
329  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
330  */
331 bool
332 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
333 {
334         /*
335          * If pid_max changed after filtered_pids was created, we
336          * by default ignore all pids greater than the previous pid_max.
337          */
338         if (search_pid >= filtered_pids->pid_max)
339                 return false;
340 
341         return test_bit(search_pid, filtered_pids->pids);
342 }
343 
344 /**
345  * trace_ignore_this_task - should a task be ignored for tracing
346  * @filtered_pids: The list of pids to check
347  * @task: The task that should be ignored if not filtered
348  *
349  * Checks if @task should be traced or not from @filtered_pids.
350  * Returns true if @task should *NOT* be traced.
351  * Returns false if @task should be traced.
352  */
353 bool
354 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
355 {
356         /*
357          * Return false, because if filtered_pids does not exist,
358          * all pids are good to trace.
359          */
360         if (!filtered_pids)
361                 return false;
362 
363         return !trace_find_filtered_pid(filtered_pids, task->pid);
364 }
365 
366 /**
367  * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
368  * @pid_list: The list to modify
369  * @self: The current task for fork or NULL for exit
370  * @task: The task to add or remove
371  *
372  * If adding a task, if @self is defined, the task is only added if @self
373  * is also included in @pid_list. This happens on fork and tasks should
374  * only be added when the parent is listed. If @self is NULL, then the
375  * @task pid will be removed from the list, which would happen on exit
376  * of a task.
377  */
378 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
379                                   struct task_struct *self,
380                                   struct task_struct *task)
381 {
382         if (!pid_list)
383                 return;
384 
385         /* For forks, we only add if the forking task is listed */
386         if (self) {
387                 if (!trace_find_filtered_pid(pid_list, self->pid))
388                         return;
389         }
390 
391         /* Sorry, but we don't support pid_max changing after setting */
392         if (task->pid >= pid_list->pid_max)
393                 return;
394 
395         /* "self" is set for forks, and NULL for exits */
396         if (self)
397                 set_bit(task->pid, pid_list->pids);
398         else
399                 clear_bit(task->pid, pid_list->pids);
400 }
401 
402 /**
403  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
404  * @pid_list: The pid list to show
405  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
406  * @pos: The position of the file
407  *
408  * This is used by the seq_file "next" operation to iterate the pids
409  * listed in a trace_pid_list structure.
410  *
411  * Returns the pid+1 as we want to display pid of zero, but NULL would
412  * stop the iteration.
413  */
414 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
415 {
416         unsigned long pid = (unsigned long)v;
417 
418         (*pos)++;
419 
420         /* pid already is +1 of the actual prevous bit */
421         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
422 
423         /* Return pid + 1 to allow zero to be represented */
424         if (pid < pid_list->pid_max)
425                 return (void *)(pid + 1);
426 
427         return NULL;
428 }
429 
430 /**
431  * trace_pid_start - Used for seq_file to start reading pid lists
432  * @pid_list: The pid list to show
433  * @pos: The position of the file
434  *
435  * This is used by seq_file "start" operation to start the iteration
436  * of listing pids.
437  *
438  * Returns the pid+1 as we want to display pid of zero, but NULL would
439  * stop the iteration.
440  */
441 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
442 {
443         unsigned long pid;
444         loff_t l = 0;
445 
446         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
447         if (pid >= pid_list->pid_max)
448                 return NULL;
449 
450         /* Return pid + 1 so that zero can be the exit value */
451         for (pid++; pid && l < *pos;
452              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
453                 ;
454         return (void *)pid;
455 }
456 
457 /**
458  * trace_pid_show - show the current pid in seq_file processing
459  * @m: The seq_file structure to write into
460  * @v: A void pointer of the pid (+1) value to display
461  *
462  * Can be directly used by seq_file operations to display the current
463  * pid value.
464  */
465 int trace_pid_show(struct seq_file *m, void *v)
466 {
467         unsigned long pid = (unsigned long)v - 1;
468 
469         seq_printf(m, "%lu\n", pid);
470         return 0;
471 }
472 
473 /* 128 should be much more than enough */
474 #define PID_BUF_SIZE            127
475 
476 int trace_pid_write(struct trace_pid_list *filtered_pids,
477                     struct trace_pid_list **new_pid_list,
478                     const char __user *ubuf, size_t cnt)
479 {
480         struct trace_pid_list *pid_list;
481         struct trace_parser parser;
482         unsigned long val;
483         int nr_pids = 0;
484         ssize_t read = 0;
485         ssize_t ret = 0;
486         loff_t pos;
487         pid_t pid;
488 
489         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
490                 return -ENOMEM;
491 
492         /*
493          * Always recreate a new array. The write is an all or nothing
494          * operation. Always create a new array when adding new pids by
495          * the user. If the operation fails, then the current list is
496          * not modified.
497          */
498         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
499         if (!pid_list)
500                 return -ENOMEM;
501 
502         pid_list->pid_max = READ_ONCE(pid_max);
503 
504         /* Only truncating will shrink pid_max */
505         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
506                 pid_list->pid_max = filtered_pids->pid_max;
507 
508         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
509         if (!pid_list->pids) {
510                 kfree(pid_list);
511                 return -ENOMEM;
512         }
513 
514         if (filtered_pids) {
515                 /* copy the current bits to the new max */
516                 for_each_set_bit(pid, filtered_pids->pids,
517                                  filtered_pids->pid_max) {
518                         set_bit(pid, pid_list->pids);
519                         nr_pids++;
520                 }
521         }
522 
523         while (cnt > 0) {
524 
525                 pos = 0;
526 
527                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
528                 if (ret < 0 || !trace_parser_loaded(&parser))
529                         break;
530 
531                 read += ret;
532                 ubuf += ret;
533                 cnt -= ret;
534 
535                 ret = -EINVAL;
536                 if (kstrtoul(parser.buffer, 0, &val))
537                         break;
538                 if (val >= pid_list->pid_max)
539                         break;
540 
541                 pid = (pid_t)val;
542 
543                 set_bit(pid, pid_list->pids);
544                 nr_pids++;
545 
546                 trace_parser_clear(&parser);
547                 ret = 0;
548         }
549         trace_parser_put(&parser);
550 
551         if (ret < 0) {
552                 trace_free_pid_list(pid_list);
553                 return ret;
554         }
555 
556         if (!nr_pids) {
557                 /* Cleared the list of pids */
558                 trace_free_pid_list(pid_list);
559                 read = ret;
560                 pid_list = NULL;
561         }
562 
563         *new_pid_list = pid_list;
564 
565         return read;
566 }
567 
568 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
569 {
570         u64 ts;
571 
572         /* Early boot up does not have a buffer yet */
573         if (!buf->buffer)
574                 return trace_clock_local();
575 
576         ts = ring_buffer_time_stamp(buf->buffer, cpu);
577         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
578 
579         return ts;
580 }
581 
582 u64 ftrace_now(int cpu)
583 {
584         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
585 }
586 
587 /**
588  * tracing_is_enabled - Show if global_trace has been disabled
589  *
590  * Shows if the global trace has been enabled or not. It uses the
591  * mirror flag "buffer_disabled" to be used in fast paths such as for
592  * the irqsoff tracer. But it may be inaccurate due to races. If you
593  * need to know the accurate state, use tracing_is_on() which is a little
594  * slower, but accurate.
595  */
596 int tracing_is_enabled(void)
597 {
598         /*
599          * For quick access (irqsoff uses this in fast path), just
600          * return the mirror variable of the state of the ring buffer.
601          * It's a little racy, but we don't really care.
602          */
603         smp_rmb();
604         return !global_trace.buffer_disabled;
605 }
606 
607 /*
608  * trace_buf_size is the size in bytes that is allocated
609  * for a buffer. Note, the number of bytes is always rounded
610  * to page size.
611  *
612  * This number is purposely set to a low number of 16384.
613  * If the dump on oops happens, it will be much appreciated
614  * to not have to wait for all that output. Anyway this can be
615  * boot time and run time configurable.
616  */
617 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
618 
619 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
620 
621 /* trace_types holds a link list of available tracers. */
622 static struct tracer            *trace_types __read_mostly;
623 
624 /*
625  * trace_types_lock is used to protect the trace_types list.
626  */
627 DEFINE_MUTEX(trace_types_lock);
628 
629 /*
630  * serialize the access of the ring buffer
631  *
632  * ring buffer serializes readers, but it is low level protection.
633  * The validity of the events (which returns by ring_buffer_peek() ..etc)
634  * are not protected by ring buffer.
635  *
636  * The content of events may become garbage if we allow other process consumes
637  * these events concurrently:
638  *   A) the page of the consumed events may become a normal page
639  *      (not reader page) in ring buffer, and this page will be rewrited
640  *      by events producer.
641  *   B) The page of the consumed events may become a page for splice_read,
642  *      and this page will be returned to system.
643  *
644  * These primitives allow multi process access to different cpu ring buffer
645  * concurrently.
646  *
647  * These primitives don't distinguish read-only and read-consume access.
648  * Multi read-only access are also serialized.
649  */
650 
651 #ifdef CONFIG_SMP
652 static DECLARE_RWSEM(all_cpu_access_lock);
653 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
654 
655 static inline void trace_access_lock(int cpu)
656 {
657         if (cpu == RING_BUFFER_ALL_CPUS) {
658                 /* gain it for accessing the whole ring buffer. */
659                 down_write(&all_cpu_access_lock);
660         } else {
661                 /* gain it for accessing a cpu ring buffer. */
662 
663                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
664                 down_read(&all_cpu_access_lock);
665 
666                 /* Secondly block other access to this @cpu ring buffer. */
667                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
668         }
669 }
670 
671 static inline void trace_access_unlock(int cpu)
672 {
673         if (cpu == RING_BUFFER_ALL_CPUS) {
674                 up_write(&all_cpu_access_lock);
675         } else {
676                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
677                 up_read(&all_cpu_access_lock);
678         }
679 }
680 
681 static inline void trace_access_lock_init(void)
682 {
683         int cpu;
684 
685         for_each_possible_cpu(cpu)
686                 mutex_init(&per_cpu(cpu_access_lock, cpu));
687 }
688 
689 #else
690 
691 static DEFINE_MUTEX(access_lock);
692 
693 static inline void trace_access_lock(int cpu)
694 {
695         (void)cpu;
696         mutex_lock(&access_lock);
697 }
698 
699 static inline void trace_access_unlock(int cpu)
700 {
701         (void)cpu;
702         mutex_unlock(&access_lock);
703 }
704 
705 static inline void trace_access_lock_init(void)
706 {
707 }
708 
709 #endif
710 
711 #ifdef CONFIG_STACKTRACE
712 static void __ftrace_trace_stack(struct ring_buffer *buffer,
713                                  unsigned long flags,
714                                  int skip, int pc, struct pt_regs *regs);
715 static inline void ftrace_trace_stack(struct trace_array *tr,
716                                       struct ring_buffer *buffer,
717                                       unsigned long flags,
718                                       int skip, int pc, struct pt_regs *regs);
719 
720 #else
721 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
722                                         unsigned long flags,
723                                         int skip, int pc, struct pt_regs *regs)
724 {
725 }
726 static inline void ftrace_trace_stack(struct trace_array *tr,
727                                       struct ring_buffer *buffer,
728                                       unsigned long flags,
729                                       int skip, int pc, struct pt_regs *regs)
730 {
731 }
732 
733 #endif
734 
735 static __always_inline void
736 trace_event_setup(struct ring_buffer_event *event,
737                   int type, unsigned long flags, int pc)
738 {
739         struct trace_entry *ent = ring_buffer_event_data(event);
740 
741         tracing_generic_entry_update(ent, flags, pc);
742         ent->type = type;
743 }
744 
745 static __always_inline struct ring_buffer_event *
746 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
747                           int type,
748                           unsigned long len,
749                           unsigned long flags, int pc)
750 {
751         struct ring_buffer_event *event;
752 
753         event = ring_buffer_lock_reserve(buffer, len);
754         if (event != NULL)
755                 trace_event_setup(event, type, flags, pc);
756 
757         return event;
758 }
759 
760 void tracer_tracing_on(struct trace_array *tr)
761 {
762         if (tr->trace_buffer.buffer)
763                 ring_buffer_record_on(tr->trace_buffer.buffer);
764         /*
765          * This flag is looked at when buffers haven't been allocated
766          * yet, or by some tracers (like irqsoff), that just want to
767          * know if the ring buffer has been disabled, but it can handle
768          * races of where it gets disabled but we still do a record.
769          * As the check is in the fast path of the tracers, it is more
770          * important to be fast than accurate.
771          */
772         tr->buffer_disabled = 0;
773         /* Make the flag seen by readers */
774         smp_wmb();
775 }
776 
777 /**
778  * tracing_on - enable tracing buffers
779  *
780  * This function enables tracing buffers that may have been
781  * disabled with tracing_off.
782  */
783 void tracing_on(void)
784 {
785         tracer_tracing_on(&global_trace);
786 }
787 EXPORT_SYMBOL_GPL(tracing_on);
788 
789 
790 static __always_inline void
791 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
792 {
793         __this_cpu_write(trace_taskinfo_save, true);
794 
795         /* If this is the temp buffer, we need to commit fully */
796         if (this_cpu_read(trace_buffered_event) == event) {
797                 /* Length is in event->array[0] */
798                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
799                 /* Release the temp buffer */
800                 this_cpu_dec(trace_buffered_event_cnt);
801         } else
802                 ring_buffer_unlock_commit(buffer, event);
803 }
804 
805 /**
806  * __trace_puts - write a constant string into the trace buffer.
807  * @ip:    The address of the caller
808  * @str:   The constant string to write
809  * @size:  The size of the string.
810  */
811 int __trace_puts(unsigned long ip, const char *str, int size)
812 {
813         struct ring_buffer_event *event;
814         struct ring_buffer *buffer;
815         struct print_entry *entry;
816         unsigned long irq_flags;
817         int alloc;
818         int pc;
819 
820         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
821                 return 0;
822 
823         pc = preempt_count();
824 
825         if (unlikely(tracing_selftest_running || tracing_disabled))
826                 return 0;
827 
828         alloc = sizeof(*entry) + size + 2; /* possible \n added */
829 
830         local_save_flags(irq_flags);
831         buffer = global_trace.trace_buffer.buffer;
832         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
833                                             irq_flags, pc);
834         if (!event)
835                 return 0;
836 
837         entry = ring_buffer_event_data(event);
838         entry->ip = ip;
839 
840         memcpy(&entry->buf, str, size);
841 
842         /* Add a newline if necessary */
843         if (entry->buf[size - 1] != '\n') {
844                 entry->buf[size] = '\n';
845                 entry->buf[size + 1] = '\0';
846         } else
847                 entry->buf[size] = '\0';
848 
849         __buffer_unlock_commit(buffer, event);
850         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
851 
852         return size;
853 }
854 EXPORT_SYMBOL_GPL(__trace_puts);
855 
856 /**
857  * __trace_bputs - write the pointer to a constant string into trace buffer
858  * @ip:    The address of the caller
859  * @str:   The constant string to write to the buffer to
860  */
861 int __trace_bputs(unsigned long ip, const char *str)
862 {
863         struct ring_buffer_event *event;
864         struct ring_buffer *buffer;
865         struct bputs_entry *entry;
866         unsigned long irq_flags;
867         int size = sizeof(struct bputs_entry);
868         int pc;
869 
870         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
871                 return 0;
872 
873         pc = preempt_count();
874 
875         if (unlikely(tracing_selftest_running || tracing_disabled))
876                 return 0;
877 
878         local_save_flags(irq_flags);
879         buffer = global_trace.trace_buffer.buffer;
880         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
881                                             irq_flags, pc);
882         if (!event)
883                 return 0;
884 
885         entry = ring_buffer_event_data(event);
886         entry->ip                       = ip;
887         entry->str                      = str;
888 
889         __buffer_unlock_commit(buffer, event);
890         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
891 
892         return 1;
893 }
894 EXPORT_SYMBOL_GPL(__trace_bputs);
895 
896 #ifdef CONFIG_TRACER_SNAPSHOT
897 void tracing_snapshot_instance(struct trace_array *tr)
898 {
899         struct tracer *tracer = tr->current_trace;
900         unsigned long flags;
901 
902         if (in_nmi()) {
903                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
904                 internal_trace_puts("*** snapshot is being ignored        ***\n");
905                 return;
906         }
907 
908         if (!tr->allocated_snapshot) {
909                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
910                 internal_trace_puts("*** stopping trace here!   ***\n");
911                 tracing_off();
912                 return;
913         }
914 
915         /* Note, snapshot can not be used when the tracer uses it */
916         if (tracer->use_max_tr) {
917                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
918                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
919                 return;
920         }
921 
922         local_irq_save(flags);
923         update_max_tr(tr, current, smp_processor_id());
924         local_irq_restore(flags);
925 }
926 
927 /**
928  * tracing_snapshot - take a snapshot of the current buffer.
929  *
930  * This causes a swap between the snapshot buffer and the current live
931  * tracing buffer. You can use this to take snapshots of the live
932  * trace when some condition is triggered, but continue to trace.
933  *
934  * Note, make sure to allocate the snapshot with either
935  * a tracing_snapshot_alloc(), or by doing it manually
936  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
937  *
938  * If the snapshot buffer is not allocated, it will stop tracing.
939  * Basically making a permanent snapshot.
940  */
941 void tracing_snapshot(void)
942 {
943         struct trace_array *tr = &global_trace;
944 
945         tracing_snapshot_instance(tr);
946 }
947 EXPORT_SYMBOL_GPL(tracing_snapshot);
948 
949 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
950                                         struct trace_buffer *size_buf, int cpu_id);
951 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
952 
953 int tracing_alloc_snapshot_instance(struct trace_array *tr)
954 {
955         int ret;
956 
957         if (!tr->allocated_snapshot) {
958 
959                 /* allocate spare buffer */
960                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
961                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
962                 if (ret < 0)
963                         return ret;
964 
965                 tr->allocated_snapshot = true;
966         }
967 
968         return 0;
969 }
970 
971 static void free_snapshot(struct trace_array *tr)
972 {
973         /*
974          * We don't free the ring buffer. instead, resize it because
975          * The max_tr ring buffer has some state (e.g. ring->clock) and
976          * we want preserve it.
977          */
978         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
979         set_buffer_entries(&tr->max_buffer, 1);
980         tracing_reset_online_cpus(&tr->max_buffer);
981         tr->allocated_snapshot = false;
982 }
983 
984 /**
985  * tracing_alloc_snapshot - allocate snapshot buffer.
986  *
987  * This only allocates the snapshot buffer if it isn't already
988  * allocated - it doesn't also take a snapshot.
989  *
990  * This is meant to be used in cases where the snapshot buffer needs
991  * to be set up for events that can't sleep but need to be able to
992  * trigger a snapshot.
993  */
994 int tracing_alloc_snapshot(void)
995 {
996         struct trace_array *tr = &global_trace;
997         int ret;
998 
999         ret = tracing_alloc_snapshot_instance(tr);
1000         WARN_ON(ret < 0);
1001 
1002         return ret;
1003 }
1004 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1005 
1006 /**
1007  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1008  *
1009  * This is similar to tracing_snapshot(), but it will allocate the
1010  * snapshot buffer if it isn't already allocated. Use this only
1011  * where it is safe to sleep, as the allocation may sleep.
1012  *
1013  * This causes a swap between the snapshot buffer and the current live
1014  * tracing buffer. You can use this to take snapshots of the live
1015  * trace when some condition is triggered, but continue to trace.
1016  */
1017 void tracing_snapshot_alloc(void)
1018 {
1019         int ret;
1020 
1021         ret = tracing_alloc_snapshot();
1022         if (ret < 0)
1023                 return;
1024 
1025         tracing_snapshot();
1026 }
1027 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1028 #else
1029 void tracing_snapshot(void)
1030 {
1031         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1032 }
1033 EXPORT_SYMBOL_GPL(tracing_snapshot);
1034 int tracing_alloc_snapshot(void)
1035 {
1036         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1037         return -ENODEV;
1038 }
1039 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1040 void tracing_snapshot_alloc(void)
1041 {
1042         /* Give warning */
1043         tracing_snapshot();
1044 }
1045 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1046 #endif /* CONFIG_TRACER_SNAPSHOT */
1047 
1048 void tracer_tracing_off(struct trace_array *tr)
1049 {
1050         if (tr->trace_buffer.buffer)
1051                 ring_buffer_record_off(tr->trace_buffer.buffer);
1052         /*
1053          * This flag is looked at when buffers haven't been allocated
1054          * yet, or by some tracers (like irqsoff), that just want to
1055          * know if the ring buffer has been disabled, but it can handle
1056          * races of where it gets disabled but we still do a record.
1057          * As the check is in the fast path of the tracers, it is more
1058          * important to be fast than accurate.
1059          */
1060         tr->buffer_disabled = 1;
1061         /* Make the flag seen by readers */
1062         smp_wmb();
1063 }
1064 
1065 /**
1066  * tracing_off - turn off tracing buffers
1067  *
1068  * This function stops the tracing buffers from recording data.
1069  * It does not disable any overhead the tracers themselves may
1070  * be causing. This function simply causes all recording to
1071  * the ring buffers to fail.
1072  */
1073 void tracing_off(void)
1074 {
1075         tracer_tracing_off(&global_trace);
1076 }
1077 EXPORT_SYMBOL_GPL(tracing_off);
1078 
1079 void disable_trace_on_warning(void)
1080 {
1081         if (__disable_trace_on_warning)
1082                 tracing_off();
1083 }
1084 
1085 /**
1086  * tracer_tracing_is_on - show real state of ring buffer enabled
1087  * @tr : the trace array to know if ring buffer is enabled
1088  *
1089  * Shows real state of the ring buffer if it is enabled or not.
1090  */
1091 bool tracer_tracing_is_on(struct trace_array *tr)
1092 {
1093         if (tr->trace_buffer.buffer)
1094                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1095         return !tr->buffer_disabled;
1096 }
1097 
1098 /**
1099  * tracing_is_on - show state of ring buffers enabled
1100  */
1101 int tracing_is_on(void)
1102 {
1103         return tracer_tracing_is_on(&global_trace);
1104 }
1105 EXPORT_SYMBOL_GPL(tracing_is_on);
1106 
1107 static int __init set_buf_size(char *str)
1108 {
1109         unsigned long buf_size;
1110 
1111         if (!str)
1112                 return 0;
1113         buf_size = memparse(str, &str);
1114         /* nr_entries can not be zero */
1115         if (buf_size == 0)
1116                 return 0;
1117         trace_buf_size = buf_size;
1118         return 1;
1119 }
1120 __setup("trace_buf_size=", set_buf_size);
1121 
1122 static int __init set_tracing_thresh(char *str)
1123 {
1124         unsigned long threshold;
1125         int ret;
1126 
1127         if (!str)
1128                 return 0;
1129         ret = kstrtoul(str, 0, &threshold);
1130         if (ret < 0)
1131                 return 0;
1132         tracing_thresh = threshold * 1000;
1133         return 1;
1134 }
1135 __setup("tracing_thresh=", set_tracing_thresh);
1136 
1137 unsigned long nsecs_to_usecs(unsigned long nsecs)
1138 {
1139         return nsecs / 1000;
1140 }
1141 
1142 /*
1143  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1144  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1145  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1146  * of strings in the order that the evals (enum) were defined.
1147  */
1148 #undef C
1149 #define C(a, b) b
1150 
1151 /* These must match the bit postions in trace_iterator_flags */
1152 static const char *trace_options[] = {
1153         TRACE_FLAGS
1154         NULL
1155 };
1156 
1157 static struct {
1158         u64 (*func)(void);
1159         const char *name;
1160         int in_ns;              /* is this clock in nanoseconds? */
1161 } trace_clocks[] = {
1162         { trace_clock_local,            "local",        1 },
1163         { trace_clock_global,           "global",       1 },
1164         { trace_clock_counter,          "counter",      0 },
1165         { trace_clock_jiffies,          "uptime",       0 },
1166         { trace_clock,                  "perf",         1 },
1167         { ktime_get_mono_fast_ns,       "mono",         1 },
1168         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1169         { ktime_get_boot_fast_ns,       "boot",         1 },
1170         ARCH_TRACE_CLOCKS
1171 };
1172 
1173 bool trace_clock_in_ns(struct trace_array *tr)
1174 {
1175         if (trace_clocks[tr->clock_id].in_ns)
1176                 return true;
1177 
1178         return false;
1179 }
1180 
1181 /*
1182  * trace_parser_get_init - gets the buffer for trace parser
1183  */
1184 int trace_parser_get_init(struct trace_parser *parser, int size)
1185 {
1186         memset(parser, 0, sizeof(*parser));
1187 
1188         parser->buffer = kmalloc(size, GFP_KERNEL);
1189         if (!parser->buffer)
1190                 return 1;
1191 
1192         parser->size = size;
1193         return 0;
1194 }
1195 
1196 /*
1197  * trace_parser_put - frees the buffer for trace parser
1198  */
1199 void trace_parser_put(struct trace_parser *parser)
1200 {
1201         kfree(parser->buffer);
1202         parser->buffer = NULL;
1203 }
1204 
1205 /*
1206  * trace_get_user - reads the user input string separated by  space
1207  * (matched by isspace(ch))
1208  *
1209  * For each string found the 'struct trace_parser' is updated,
1210  * and the function returns.
1211  *
1212  * Returns number of bytes read.
1213  *
1214  * See kernel/trace/trace.h for 'struct trace_parser' details.
1215  */
1216 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1217         size_t cnt, loff_t *ppos)
1218 {
1219         char ch;
1220         size_t read = 0;
1221         ssize_t ret;
1222 
1223         if (!*ppos)
1224                 trace_parser_clear(parser);
1225 
1226         ret = get_user(ch, ubuf++);
1227         if (ret)
1228                 goto out;
1229 
1230         read++;
1231         cnt--;
1232 
1233         /*
1234          * The parser is not finished with the last write,
1235          * continue reading the user input without skipping spaces.
1236          */
1237         if (!parser->cont) {
1238                 /* skip white space */
1239                 while (cnt && isspace(ch)) {
1240                         ret = get_user(ch, ubuf++);
1241                         if (ret)
1242                                 goto out;
1243                         read++;
1244                         cnt--;
1245                 }
1246 
1247                 parser->idx = 0;
1248 
1249                 /* only spaces were written */
1250                 if (isspace(ch) || !ch) {
1251                         *ppos += read;
1252                         ret = read;
1253                         goto out;
1254                 }
1255         }
1256 
1257         /* read the non-space input */
1258         while (cnt && !isspace(ch) && ch) {
1259                 if (parser->idx < parser->size - 1)
1260                         parser->buffer[parser->idx++] = ch;
1261                 else {
1262                         ret = -EINVAL;
1263                         goto out;
1264                 }
1265                 ret = get_user(ch, ubuf++);
1266                 if (ret)
1267                         goto out;
1268                 read++;
1269                 cnt--;
1270         }
1271 
1272         /* We either got finished input or we have to wait for another call. */
1273         if (isspace(ch) || !ch) {
1274                 parser->buffer[parser->idx] = 0;
1275                 parser->cont = false;
1276         } else if (parser->idx < parser->size - 1) {
1277                 parser->cont = true;
1278                 parser->buffer[parser->idx++] = ch;
1279                 /* Make sure the parsed string always terminates with '\0'. */
1280                 parser->buffer[parser->idx] = 0;
1281         } else {
1282                 ret = -EINVAL;
1283                 goto out;
1284         }
1285 
1286         *ppos += read;
1287         ret = read;
1288 
1289 out:
1290         return ret;
1291 }
1292 
1293 /* TODO add a seq_buf_to_buffer() */
1294 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1295 {
1296         int len;
1297 
1298         if (trace_seq_used(s) <= s->seq.readpos)
1299                 return -EBUSY;
1300 
1301         len = trace_seq_used(s) - s->seq.readpos;
1302         if (cnt > len)
1303                 cnt = len;
1304         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1305 
1306         s->seq.readpos += cnt;
1307         return cnt;
1308 }
1309 
1310 unsigned long __read_mostly     tracing_thresh;
1311 
1312 #ifdef CONFIG_TRACER_MAX_TRACE
1313 /*
1314  * Copy the new maximum trace into the separate maximum-trace
1315  * structure. (this way the maximum trace is permanently saved,
1316  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1317  */
1318 static void
1319 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1320 {
1321         struct trace_buffer *trace_buf = &tr->trace_buffer;
1322         struct trace_buffer *max_buf = &tr->max_buffer;
1323         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1324         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1325 
1326         max_buf->cpu = cpu;
1327         max_buf->time_start = data->preempt_timestamp;
1328 
1329         max_data->saved_latency = tr->max_latency;
1330         max_data->critical_start = data->critical_start;
1331         max_data->critical_end = data->critical_end;
1332 
1333         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1334         max_data->pid = tsk->pid;
1335         /*
1336          * If tsk == current, then use current_uid(), as that does not use
1337          * RCU. The irq tracer can be called out of RCU scope.
1338          */
1339         if (tsk == current)
1340                 max_data->uid = current_uid();
1341         else
1342                 max_data->uid = task_uid(tsk);
1343 
1344         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1345         max_data->policy = tsk->policy;
1346         max_data->rt_priority = tsk->rt_priority;
1347 
1348         /* record this tasks comm */
1349         tracing_record_cmdline(tsk);
1350 }
1351 
1352 /**
1353  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1354  * @tr: tracer
1355  * @tsk: the task with the latency
1356  * @cpu: The cpu that initiated the trace.
1357  *
1358  * Flip the buffers between the @tr and the max_tr and record information
1359  * about which task was the cause of this latency.
1360  */
1361 void
1362 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1363 {
1364         if (tr->stop_count)
1365                 return;
1366 
1367         WARN_ON_ONCE(!irqs_disabled());
1368 
1369         if (!tr->allocated_snapshot) {
1370                 /* Only the nop tracer should hit this when disabling */
1371                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1372                 return;
1373         }
1374 
1375         arch_spin_lock(&tr->max_lock);
1376 
1377         /* Inherit the recordable setting from trace_buffer */
1378         if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1379                 ring_buffer_record_on(tr->max_buffer.buffer);
1380         else
1381                 ring_buffer_record_off(tr->max_buffer.buffer);
1382 
1383         swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
1384 
1385         __update_max_tr(tr, tsk, cpu);
1386         arch_spin_unlock(&tr->max_lock);
1387 }
1388 
1389 /**
1390  * update_max_tr_single - only copy one trace over, and reset the rest
1391  * @tr - tracer
1392  * @tsk - task with the latency
1393  * @cpu - the cpu of the buffer to copy.
1394  *
1395  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1396  */
1397 void
1398 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1399 {
1400         int ret;
1401 
1402         if (tr->stop_count)
1403                 return;
1404 
1405         WARN_ON_ONCE(!irqs_disabled());
1406         if (!tr->allocated_snapshot) {
1407                 /* Only the nop tracer should hit this when disabling */
1408                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1409                 return;
1410         }
1411 
1412         arch_spin_lock(&tr->max_lock);
1413 
1414         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1415 
1416         if (ret == -EBUSY) {
1417                 /*
1418                  * We failed to swap the buffer due to a commit taking
1419                  * place on this CPU. We fail to record, but we reset
1420                  * the max trace buffer (no one writes directly to it)
1421                  * and flag that it failed.
1422                  */
1423                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1424                         "Failed to swap buffers due to commit in progress\n");
1425         }
1426 
1427         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1428 
1429         __update_max_tr(tr, tsk, cpu);
1430         arch_spin_unlock(&tr->max_lock);
1431 }
1432 #endif /* CONFIG_TRACER_MAX_TRACE */
1433 
1434 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1435 {
1436         /* Iterators are static, they should be filled or empty */
1437         if (trace_buffer_iter(iter, iter->cpu_file))
1438                 return 0;
1439 
1440         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1441                                 full);
1442 }
1443 
1444 #ifdef CONFIG_FTRACE_STARTUP_TEST
1445 static bool selftests_can_run;
1446 
1447 struct trace_selftests {
1448         struct list_head                list;
1449         struct tracer                   *type;
1450 };
1451 
1452 static LIST_HEAD(postponed_selftests);
1453 
1454 static int save_selftest(struct tracer *type)
1455 {
1456         struct trace_selftests *selftest;
1457 
1458         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1459         if (!selftest)
1460                 return -ENOMEM;
1461 
1462         selftest->type = type;
1463         list_add(&selftest->list, &postponed_selftests);
1464         return 0;
1465 }
1466 
1467 static int run_tracer_selftest(struct tracer *type)
1468 {
1469         struct trace_array *tr = &global_trace;
1470         struct tracer *saved_tracer = tr->current_trace;
1471         int ret;
1472 
1473         if (!type->selftest || tracing_selftest_disabled)
1474                 return 0;
1475 
1476         /*
1477          * If a tracer registers early in boot up (before scheduling is
1478          * initialized and such), then do not run its selftests yet.
1479          * Instead, run it a little later in the boot process.
1480          */
1481         if (!selftests_can_run)
1482                 return save_selftest(type);
1483 
1484         /*
1485          * Run a selftest on this tracer.
1486          * Here we reset the trace buffer, and set the current
1487          * tracer to be this tracer. The tracer can then run some
1488          * internal tracing to verify that everything is in order.
1489          * If we fail, we do not register this tracer.
1490          */
1491         tracing_reset_online_cpus(&tr->trace_buffer);
1492 
1493         tr->current_trace = type;
1494 
1495 #ifdef CONFIG_TRACER_MAX_TRACE
1496         if (type->use_max_tr) {
1497                 /* If we expanded the buffers, make sure the max is expanded too */
1498                 if (ring_buffer_expanded)
1499                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1500                                            RING_BUFFER_ALL_CPUS);
1501                 tr->allocated_snapshot = true;
1502         }
1503 #endif
1504 
1505         /* the test is responsible for initializing and enabling */
1506         pr_info("Testing tracer %s: ", type->name);
1507         ret = type->selftest(type, tr);
1508         /* the test is responsible for resetting too */
1509         tr->current_trace = saved_tracer;
1510         if (ret) {
1511                 printk(KERN_CONT "FAILED!\n");
1512                 /* Add the warning after printing 'FAILED' */
1513                 WARN_ON(1);
1514                 return -1;
1515         }
1516         /* Only reset on passing, to avoid touching corrupted buffers */
1517         tracing_reset_online_cpus(&tr->trace_buffer);
1518 
1519 #ifdef CONFIG_TRACER_MAX_TRACE
1520         if (type->use_max_tr) {
1521                 tr->allocated_snapshot = false;
1522 
1523                 /* Shrink the max buffer again */
1524                 if (ring_buffer_expanded)
1525                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1526                                            RING_BUFFER_ALL_CPUS);
1527         }
1528 #endif
1529 
1530         printk(KERN_CONT "PASSED\n");
1531         return 0;
1532 }
1533 
1534 static __init int init_trace_selftests(void)
1535 {
1536         struct trace_selftests *p, *n;
1537         struct tracer *t, **last;
1538         int ret;
1539 
1540         selftests_can_run = true;
1541 
1542         mutex_lock(&trace_types_lock);
1543 
1544         if (list_empty(&postponed_selftests))
1545                 goto out;
1546 
1547         pr_info("Running postponed tracer tests:\n");
1548 
1549         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1550                 ret = run_tracer_selftest(p->type);
1551                 /* If the test fails, then warn and remove from available_tracers */
1552                 if (ret < 0) {
1553                         WARN(1, "tracer: %s failed selftest, disabling\n",
1554                              p->type->name);
1555                         last = &trace_types;
1556                         for (t = trace_types; t; t = t->next) {
1557                                 if (t == p->type) {
1558                                         *last = t->next;
1559                                         break;
1560                                 }
1561                                 last = &t->next;
1562                         }
1563                 }
1564                 list_del(&p->list);
1565                 kfree(p);
1566         }
1567 
1568  out:
1569         mutex_unlock(&trace_types_lock);
1570 
1571         return 0;
1572 }
1573 core_initcall(init_trace_selftests);
1574 #else
1575 static inline int run_tracer_selftest(struct tracer *type)
1576 {
1577         return 0;
1578 }
1579 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1580 
1581 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1582 
1583 static void __init apply_trace_boot_options(void);
1584 
1585 /**
1586  * register_tracer - register a tracer with the ftrace system.
1587  * @type - the plugin for the tracer
1588  *
1589  * Register a new plugin tracer.
1590  */
1591 int __init register_tracer(struct tracer *type)
1592 {
1593         struct tracer *t;
1594         int ret = 0;
1595 
1596         if (!type->name) {
1597                 pr_info("Tracer must have a name\n");
1598                 return -1;
1599         }
1600 
1601         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1602                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1603                 return -1;
1604         }
1605 
1606         mutex_lock(&trace_types_lock);
1607 
1608         tracing_selftest_running = true;
1609 
1610         for (t = trace_types; t; t = t->next) {
1611                 if (strcmp(type->name, t->name) == 0) {
1612                         /* already found */
1613                         pr_info("Tracer %s already registered\n",
1614                                 type->name);
1615                         ret = -1;
1616                         goto out;
1617                 }
1618         }
1619 
1620         if (!type->set_flag)
1621                 type->set_flag = &dummy_set_flag;
1622         if (!type->flags) {
1623                 /*allocate a dummy tracer_flags*/
1624                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1625                 if (!type->flags) {
1626                         ret = -ENOMEM;
1627                         goto out;
1628                 }
1629                 type->flags->val = 0;
1630                 type->flags->opts = dummy_tracer_opt;
1631         } else
1632                 if (!type->flags->opts)
1633                         type->flags->opts = dummy_tracer_opt;
1634 
1635         /* store the tracer for __set_tracer_option */
1636         type->flags->trace = type;
1637 
1638         ret = run_tracer_selftest(type);
1639         if (ret < 0)
1640                 goto out;
1641 
1642         type->next = trace_types;
1643         trace_types = type;
1644         add_tracer_options(&global_trace, type);
1645 
1646  out:
1647         tracing_selftest_running = false;
1648         mutex_unlock(&trace_types_lock);
1649 
1650         if (ret || !default_bootup_tracer)
1651                 goto out_unlock;
1652 
1653         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1654                 goto out_unlock;
1655 
1656         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1657         /* Do we want this tracer to start on bootup? */
1658         tracing_set_tracer(&global_trace, type->name);
1659         default_bootup_tracer = NULL;
1660 
1661         apply_trace_boot_options();
1662 
1663         /* disable other selftests, since this will break it. */
1664         tracing_selftest_disabled = true;
1665 #ifdef CONFIG_FTRACE_STARTUP_TEST
1666         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1667                type->name);
1668 #endif
1669 
1670  out_unlock:
1671         return ret;
1672 }
1673 
1674 void tracing_reset(struct trace_buffer *buf, int cpu)
1675 {
1676         struct ring_buffer *buffer = buf->buffer;
1677 
1678         if (!buffer)
1679                 return;
1680 
1681         ring_buffer_record_disable(buffer);
1682 
1683         /* Make sure all commits have finished */
1684         synchronize_sched();
1685         ring_buffer_reset_cpu(buffer, cpu);
1686 
1687         ring_buffer_record_enable(buffer);
1688 }
1689 
1690 void tracing_reset_online_cpus(struct trace_buffer *buf)
1691 {
1692         struct ring_buffer *buffer = buf->buffer;
1693         int cpu;
1694 
1695         if (!buffer)
1696                 return;
1697 
1698         ring_buffer_record_disable(buffer);
1699 
1700         /* Make sure all commits have finished */
1701         synchronize_sched();
1702 
1703         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1704 
1705         for_each_online_cpu(cpu)
1706                 ring_buffer_reset_cpu(buffer, cpu);
1707 
1708         ring_buffer_record_enable(buffer);
1709 }
1710 
1711 /* Must have trace_types_lock held */
1712 void tracing_reset_all_online_cpus(void)
1713 {
1714         struct trace_array *tr;
1715 
1716         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1717                 if (!tr->clear_trace)
1718                         continue;
1719                 tr->clear_trace = false;
1720                 tracing_reset_online_cpus(&tr->trace_buffer);
1721 #ifdef CONFIG_TRACER_MAX_TRACE
1722                 tracing_reset_online_cpus(&tr->max_buffer);
1723 #endif
1724         }
1725 }
1726 
1727 static int *tgid_map;
1728 
1729 #define SAVED_CMDLINES_DEFAULT 128
1730 #define NO_CMDLINE_MAP UINT_MAX
1731 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1732 struct saved_cmdlines_buffer {
1733         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1734         unsigned *map_cmdline_to_pid;
1735         unsigned cmdline_num;
1736         int cmdline_idx;
1737         char *saved_cmdlines;
1738 };
1739 static struct saved_cmdlines_buffer *savedcmd;
1740 
1741 /* temporary disable recording */
1742 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1743 
1744 static inline char *get_saved_cmdlines(int idx)
1745 {
1746         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1747 }
1748 
1749 static inline void set_cmdline(int idx, const char *cmdline)
1750 {
1751         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1752 }
1753 
1754 static int allocate_cmdlines_buffer(unsigned int val,
1755                                     struct saved_cmdlines_buffer *s)
1756 {
1757         s->map_cmdline_to_pid = kmalloc_array(val,
1758                                               sizeof(*s->map_cmdline_to_pid),
1759                                               GFP_KERNEL);
1760         if (!s->map_cmdline_to_pid)
1761                 return -ENOMEM;
1762 
1763         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
1764         if (!s->saved_cmdlines) {
1765                 kfree(s->map_cmdline_to_pid);
1766                 return -ENOMEM;
1767         }
1768 
1769         s->cmdline_idx = 0;
1770         s->cmdline_num = val;
1771         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1772                sizeof(s->map_pid_to_cmdline));
1773         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1774                val * sizeof(*s->map_cmdline_to_pid));
1775 
1776         return 0;
1777 }
1778 
1779 static int trace_create_savedcmd(void)
1780 {
1781         int ret;
1782 
1783         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1784         if (!savedcmd)
1785                 return -ENOMEM;
1786 
1787         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1788         if (ret < 0) {
1789                 kfree(savedcmd);
1790                 savedcmd = NULL;
1791                 return -ENOMEM;
1792         }
1793 
1794         return 0;
1795 }
1796 
1797 int is_tracing_stopped(void)
1798 {
1799         return global_trace.stop_count;
1800 }
1801 
1802 /**
1803  * tracing_start - quick start of the tracer
1804  *
1805  * If tracing is enabled but was stopped by tracing_stop,
1806  * this will start the tracer back up.
1807  */
1808 void tracing_start(void)
1809 {
1810         struct ring_buffer *buffer;
1811         unsigned long flags;
1812 
1813         if (tracing_disabled)
1814                 return;
1815 
1816         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1817         if (--global_trace.stop_count) {
1818                 if (global_trace.stop_count < 0) {
1819                         /* Someone screwed up their debugging */
1820                         WARN_ON_ONCE(1);
1821                         global_trace.stop_count = 0;
1822                 }
1823                 goto out;
1824         }
1825 
1826         /* Prevent the buffers from switching */
1827         arch_spin_lock(&global_trace.max_lock);
1828 
1829         buffer = global_trace.trace_buffer.buffer;
1830         if (buffer)
1831                 ring_buffer_record_enable(buffer);
1832 
1833 #ifdef CONFIG_TRACER_MAX_TRACE
1834         buffer = global_trace.max_buffer.buffer;
1835         if (buffer)
1836                 ring_buffer_record_enable(buffer);
1837 #endif
1838 
1839         arch_spin_unlock(&global_trace.max_lock);
1840 
1841  out:
1842         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1843 }
1844 
1845 static void tracing_start_tr(struct trace_array *tr)
1846 {
1847         struct ring_buffer *buffer;
1848         unsigned long flags;
1849 
1850         if (tracing_disabled)
1851                 return;
1852 
1853         /* If global, we need to also start the max tracer */
1854         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1855                 return tracing_start();
1856 
1857         raw_spin_lock_irqsave(&tr->start_lock, flags);
1858 
1859         if (--tr->stop_count) {
1860                 if (tr->stop_count < 0) {
1861                         /* Someone screwed up their debugging */
1862                         WARN_ON_ONCE(1);
1863                         tr->stop_count = 0;
1864                 }
1865                 goto out;
1866         }
1867 
1868         buffer = tr->trace_buffer.buffer;
1869         if (buffer)
1870                 ring_buffer_record_enable(buffer);
1871 
1872  out:
1873         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1874 }
1875 
1876 /**
1877  * tracing_stop - quick stop of the tracer
1878  *
1879  * Light weight way to stop tracing. Use in conjunction with
1880  * tracing_start.
1881  */
1882 void tracing_stop(void)
1883 {
1884         struct ring_buffer *buffer;
1885         unsigned long flags;
1886 
1887         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1888         if (global_trace.stop_count++)
1889                 goto out;
1890 
1891         /* Prevent the buffers from switching */
1892         arch_spin_lock(&global_trace.max_lock);
1893 
1894         buffer = global_trace.trace_buffer.buffer;
1895         if (buffer)
1896                 ring_buffer_record_disable(buffer);
1897 
1898 #ifdef CONFIG_TRACER_MAX_TRACE
1899         buffer = global_trace.max_buffer.buffer;
1900         if (buffer)
1901                 ring_buffer_record_disable(buffer);
1902 #endif
1903 
1904         arch_spin_unlock(&global_trace.max_lock);
1905 
1906  out:
1907         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1908 }
1909 
1910 static void tracing_stop_tr(struct trace_array *tr)
1911 {
1912         struct ring_buffer *buffer;
1913         unsigned long flags;
1914 
1915         /* If global, we need to also stop the max tracer */
1916         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1917                 return tracing_stop();
1918 
1919         raw_spin_lock_irqsave(&tr->start_lock, flags);
1920         if (tr->stop_count++)
1921                 goto out;
1922 
1923         buffer = tr->trace_buffer.buffer;
1924         if (buffer)
1925                 ring_buffer_record_disable(buffer);
1926 
1927  out:
1928         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1929 }
1930 
1931 static int trace_save_cmdline(struct task_struct *tsk)
1932 {
1933         unsigned pid, idx;
1934 
1935         /* treat recording of idle task as a success */
1936         if (!tsk->pid)
1937                 return 1;
1938 
1939         if (unlikely(tsk->pid > PID_MAX_DEFAULT))
1940                 return 0;
1941 
1942         /*
1943          * It's not the end of the world if we don't get
1944          * the lock, but we also don't want to spin
1945          * nor do we want to disable interrupts,
1946          * so if we miss here, then better luck next time.
1947          */
1948         if (!arch_spin_trylock(&trace_cmdline_lock))
1949                 return 0;
1950 
1951         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1952         if (idx == NO_CMDLINE_MAP) {
1953                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1954 
1955                 /*
1956                  * Check whether the cmdline buffer at idx has a pid
1957                  * mapped. We are going to overwrite that entry so we
1958                  * need to clear the map_pid_to_cmdline. Otherwise we
1959                  * would read the new comm for the old pid.
1960                  */
1961                 pid = savedcmd->map_cmdline_to_pid[idx];
1962                 if (pid != NO_CMDLINE_MAP)
1963                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1964 
1965                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1966                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1967 
1968                 savedcmd->cmdline_idx = idx;
1969         }
1970 
1971         set_cmdline(idx, tsk->comm);
1972 
1973         arch_spin_unlock(&trace_cmdline_lock);
1974 
1975         return 1;
1976 }
1977 
1978 static void __trace_find_cmdline(int pid, char comm[])
1979 {
1980         unsigned map;
1981 
1982         if (!pid) {
1983                 strcpy(comm, "<idle>");
1984                 return;
1985         }
1986 
1987         if (WARN_ON_ONCE(pid < 0)) {
1988                 strcpy(comm, "<XXX>");
1989                 return;
1990         }
1991 
1992         if (pid > PID_MAX_DEFAULT) {
1993                 strcpy(comm, "<...>");
1994                 return;
1995         }
1996 
1997         map = savedcmd->map_pid_to_cmdline[pid];
1998         if (map != NO_CMDLINE_MAP)
1999                 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2000         else
2001                 strcpy(comm, "<...>");
2002 }
2003 
2004 void trace_find_cmdline(int pid, char comm[])
2005 {
2006         preempt_disable();
2007         arch_spin_lock(&trace_cmdline_lock);
2008 
2009         __trace_find_cmdline(pid, comm);
2010 
2011         arch_spin_unlock(&trace_cmdline_lock);
2012         preempt_enable();
2013 }
2014 
2015 int trace_find_tgid(int pid)
2016 {
2017         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2018                 return 0;
2019 
2020         return tgid_map[pid];
2021 }
2022 
2023 static int trace_save_tgid(struct task_struct *tsk)
2024 {
2025         /* treat recording of idle task as a success */
2026         if (!tsk->pid)
2027                 return 1;
2028 
2029         if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2030                 return 0;
2031 
2032         tgid_map[tsk->pid] = tsk->tgid;
2033         return 1;
2034 }
2035 
2036 static bool tracing_record_taskinfo_skip(int flags)
2037 {
2038         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2039                 return true;
2040         if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2041                 return true;
2042         if (!__this_cpu_read(trace_taskinfo_save))
2043                 return true;
2044         return false;
2045 }
2046 
2047 /**
2048  * tracing_record_taskinfo - record the task info of a task
2049  *
2050  * @task  - task to record
2051  * @flags - TRACE_RECORD_CMDLINE for recording comm
2052  *        - TRACE_RECORD_TGID for recording tgid
2053  */
2054 void tracing_record_taskinfo(struct task_struct *task, int flags)
2055 {
2056         bool done;
2057 
2058         if (tracing_record_taskinfo_skip(flags))
2059                 return;
2060 
2061         /*
2062          * Record as much task information as possible. If some fail, continue
2063          * to try to record the others.
2064          */
2065         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2066         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2067 
2068         /* If recording any information failed, retry again soon. */
2069         if (!done)
2070                 return;
2071 
2072         __this_cpu_write(trace_taskinfo_save, false);
2073 }
2074 
2075 /**
2076  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2077  *
2078  * @prev - previous task during sched_switch
2079  * @next - next task during sched_switch
2080  * @flags - TRACE_RECORD_CMDLINE for recording comm
2081  *          TRACE_RECORD_TGID for recording tgid
2082  */
2083 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2084                                           struct task_struct *next, int flags)
2085 {
2086         bool done;
2087 
2088         if (tracing_record_taskinfo_skip(flags))
2089                 return;
2090 
2091         /*
2092          * Record as much task information as possible. If some fail, continue
2093          * to try to record the others.
2094          */
2095         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2096         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2097         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2098         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2099 
2100         /* If recording any information failed, retry again soon. */
2101         if (!done)
2102                 return;
2103 
2104         __this_cpu_write(trace_taskinfo_save, false);
2105 }
2106 
2107 /* Helpers to record a specific task information */
2108 void tracing_record_cmdline(struct task_struct *task)
2109 {
2110         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2111 }
2112 
2113 void tracing_record_tgid(struct task_struct *task)
2114 {
2115         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2116 }
2117 
2118 /*
2119  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2120  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2121  * simplifies those functions and keeps them in sync.
2122  */
2123 enum print_line_t trace_handle_return(struct trace_seq *s)
2124 {
2125         return trace_seq_has_overflowed(s) ?
2126                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2127 }
2128 EXPORT_SYMBOL_GPL(trace_handle_return);
2129 
2130 void
2131 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2132                              int pc)
2133 {
2134         struct task_struct *tsk = current;
2135 
2136         entry->preempt_count            = pc & 0xff;
2137         entry->pid                      = (tsk) ? tsk->pid : 0;
2138         entry->flags =
2139 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2140                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2141 #else
2142                 TRACE_FLAG_IRQS_NOSUPPORT |
2143 #endif
2144                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2145                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2146                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2147                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2148                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2149 }
2150 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2151 
2152 struct ring_buffer_event *
2153 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2154                           int type,
2155                           unsigned long len,
2156                           unsigned long flags, int pc)
2157 {
2158         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2159 }
2160 
2161 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2162 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2163 static int trace_buffered_event_ref;
2164 
2165 /**
2166  * trace_buffered_event_enable - enable buffering events
2167  *
2168  * When events are being filtered, it is quicker to use a temporary
2169  * buffer to write the event data into if there's a likely chance
2170  * that it will not be committed. The discard of the ring buffer
2171  * is not as fast as committing, and is much slower than copying
2172  * a commit.
2173  *
2174  * When an event is to be filtered, allocate per cpu buffers to
2175  * write the event data into, and if the event is filtered and discarded
2176  * it is simply dropped, otherwise, the entire data is to be committed
2177  * in one shot.
2178  */
2179 void trace_buffered_event_enable(void)
2180 {
2181         struct ring_buffer_event *event;
2182         struct page *page;
2183         int cpu;
2184 
2185         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2186 
2187         if (trace_buffered_event_ref++)
2188                 return;
2189 
2190         for_each_tracing_cpu(cpu) {
2191                 page = alloc_pages_node(cpu_to_node(cpu),
2192                                         GFP_KERNEL | __GFP_NORETRY, 0);
2193                 if (!page)
2194                         goto failed;
2195 
2196                 event = page_address(page);
2197                 memset(event, 0, sizeof(*event));
2198 
2199                 per_cpu(trace_buffered_event, cpu) = event;
2200 
2201                 preempt_disable();
2202                 if (cpu == smp_processor_id() &&
2203                     this_cpu_read(trace_buffered_event) !=
2204                     per_cpu(trace_buffered_event, cpu))
2205                         WARN_ON_ONCE(1);
2206                 preempt_enable();
2207         }
2208 
2209         return;
2210  failed:
2211         trace_buffered_event_disable();
2212 }
2213 
2214 static void enable_trace_buffered_event(void *data)
2215 {
2216         /* Probably not needed, but do it anyway */
2217         smp_rmb();
2218         this_cpu_dec(trace_buffered_event_cnt);
2219 }
2220 
2221 static void disable_trace_buffered_event(void *data)
2222 {
2223         this_cpu_inc(trace_buffered_event_cnt);
2224 }
2225 
2226 /**
2227  * trace_buffered_event_disable - disable buffering events
2228  *
2229  * When a filter is removed, it is faster to not use the buffered
2230  * events, and to commit directly into the ring buffer. Free up
2231  * the temp buffers when there are no more users. This requires
2232  * special synchronization with current events.
2233  */
2234 void trace_buffered_event_disable(void)
2235 {
2236         int cpu;
2237 
2238         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2239 
2240         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2241                 return;
2242 
2243         if (--trace_buffered_event_ref)
2244                 return;
2245 
2246         preempt_disable();
2247         /* For each CPU, set the buffer as used. */
2248         smp_call_function_many(tracing_buffer_mask,
2249                                disable_trace_buffered_event, NULL, 1);
2250         preempt_enable();
2251 
2252         /* Wait for all current users to finish */
2253         synchronize_sched();
2254 
2255         for_each_tracing_cpu(cpu) {
2256                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2257                 per_cpu(trace_buffered_event, cpu) = NULL;
2258         }
2259         /*
2260          * Make sure trace_buffered_event is NULL before clearing
2261          * trace_buffered_event_cnt.
2262          */
2263         smp_wmb();
2264 
2265         preempt_disable();
2266         /* Do the work on each cpu */
2267         smp_call_function_many(tracing_buffer_mask,
2268                                enable_trace_buffered_event, NULL, 1);
2269         preempt_enable();
2270 }
2271 
2272 static struct ring_buffer *temp_buffer;
2273 
2274 struct ring_buffer_event *
2275 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2276                           struct trace_event_file *trace_file,
2277                           int type, unsigned long len,
2278                           unsigned long flags, int pc)
2279 {
2280         struct ring_buffer_event *entry;
2281         int val;
2282 
2283         *current_rb = trace_file->tr->trace_buffer.buffer;
2284 
2285         if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2286              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2287             (entry = this_cpu_read(trace_buffered_event))) {
2288                 /* Try to use the per cpu buffer first */
2289                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2290                 if (val == 1) {
2291                         trace_event_setup(entry, type, flags, pc);
2292                         entry->array[0] = len;
2293                         return entry;
2294                 }
2295                 this_cpu_dec(trace_buffered_event_cnt);
2296         }
2297 
2298         entry = __trace_buffer_lock_reserve(*current_rb,
2299                                             type, len, flags, pc);
2300         /*
2301          * If tracing is off, but we have triggers enabled
2302          * we still need to look at the event data. Use the temp_buffer
2303          * to store the trace event for the tigger to use. It's recusive
2304          * safe and will not be recorded anywhere.
2305          */
2306         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2307                 *current_rb = temp_buffer;
2308                 entry = __trace_buffer_lock_reserve(*current_rb,
2309                                                     type, len, flags, pc);
2310         }
2311         return entry;
2312 }
2313 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2314 
2315 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2316 static DEFINE_MUTEX(tracepoint_printk_mutex);
2317 
2318 static void output_printk(struct trace_event_buffer *fbuffer)
2319 {
2320         struct trace_event_call *event_call;
2321         struct trace_event *event;
2322         unsigned long flags;
2323         struct trace_iterator *iter = tracepoint_print_iter;
2324 
2325         /* We should never get here if iter is NULL */
2326         if (WARN_ON_ONCE(!iter))
2327                 return;
2328 
2329         event_call = fbuffer->trace_file->event_call;
2330         if (!event_call || !event_call->event.funcs ||
2331             !event_call->event.funcs->trace)
2332                 return;
2333 
2334         event = &fbuffer->trace_file->event_call->event;
2335 
2336         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2337         trace_seq_init(&iter->seq);
2338         iter->ent = fbuffer->entry;
2339         event_call->event.funcs->trace(iter, 0, event);
2340         trace_seq_putc(&iter->seq, 0);
2341         printk("%s", iter->seq.buffer);
2342 
2343         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2344 }
2345 
2346 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2347                              void __user *buffer, size_t *lenp,
2348                              loff_t *ppos)
2349 {
2350         int save_tracepoint_printk;
2351         int ret;
2352 
2353         mutex_lock(&tracepoint_printk_mutex);
2354         save_tracepoint_printk = tracepoint_printk;
2355 
2356         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2357 
2358         /*
2359          * This will force exiting early, as tracepoint_printk
2360          * is always zero when tracepoint_printk_iter is not allocated
2361          */
2362         if (!tracepoint_print_iter)
2363                 tracepoint_printk = 0;
2364 
2365         if (save_tracepoint_printk == tracepoint_printk)
2366                 goto out;
2367 
2368         if (tracepoint_printk)
2369                 static_key_enable(&tracepoint_printk_key.key);
2370         else
2371                 static_key_disable(&tracepoint_printk_key.key);
2372 
2373  out:
2374         mutex_unlock(&tracepoint_printk_mutex);
2375 
2376         return ret;
2377 }
2378 
2379 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2380 {
2381         if (static_key_false(&tracepoint_printk_key.key))
2382                 output_printk(fbuffer);
2383 
2384         event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2385                                     fbuffer->event, fbuffer->entry,
2386                                     fbuffer->flags, fbuffer->pc);
2387 }
2388 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2389 
2390 /*
2391  * Skip 3:
2392  *
2393  *   trace_buffer_unlock_commit_regs()
2394  *   trace_event_buffer_commit()
2395  *   trace_event_raw_event_xxx()
2396  */
2397 # define STACK_SKIP 3
2398 
2399 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2400                                      struct ring_buffer *buffer,
2401                                      struct ring_buffer_event *event,
2402                                      unsigned long flags, int pc,
2403                                      struct pt_regs *regs)
2404 {
2405         __buffer_unlock_commit(buffer, event);
2406 
2407         /*
2408          * If regs is not set, then skip the necessary functions.
2409          * Note, we can still get here via blktrace, wakeup tracer
2410          * and mmiotrace, but that's ok if they lose a function or
2411          * two. They are not that meaningful.
2412          */
2413         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2414         ftrace_trace_userstack(buffer, flags, pc);
2415 }
2416 
2417 /*
2418  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2419  */
2420 void
2421 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2422                                    struct ring_buffer_event *event)
2423 {
2424         __buffer_unlock_commit(buffer, event);
2425 }
2426 
2427 static void
2428 trace_process_export(struct trace_export *export,
2429                struct ring_buffer_event *event)
2430 {
2431         struct trace_entry *entry;
2432         unsigned int size = 0;
2433 
2434         entry = ring_buffer_event_data(event);
2435         size = ring_buffer_event_length(event);
2436         export->write(export, entry, size);
2437 }
2438 
2439 static DEFINE_MUTEX(ftrace_export_lock);
2440 
2441 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2442 
2443 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2444 
2445 static inline void ftrace_exports_enable(void)
2446 {
2447         static_branch_enable(&ftrace_exports_enabled);
2448 }
2449 
2450 static inline void ftrace_exports_disable(void)
2451 {
2452         static_branch_disable(&ftrace_exports_enabled);
2453 }
2454 
2455 void ftrace_exports(struct ring_buffer_event *event)
2456 {
2457         struct trace_export *export;
2458 
2459         preempt_disable_notrace();
2460 
2461         export = rcu_dereference_raw_notrace(ftrace_exports_list);
2462         while (export) {
2463                 trace_process_export(export, event);
2464                 export = rcu_dereference_raw_notrace(export->next);
2465         }
2466 
2467         preempt_enable_notrace();
2468 }
2469 
2470 static inline void
2471 add_trace_export(struct trace_export **list, struct trace_export *export)
2472 {
2473         rcu_assign_pointer(export->next, *list);
2474         /*
2475          * We are entering export into the list but another
2476          * CPU might be walking that list. We need to make sure
2477          * the export->next pointer is valid before another CPU sees
2478          * the export pointer included into the list.
2479          */
2480         rcu_assign_pointer(*list, export);
2481 }
2482 
2483 static inline int
2484 rm_trace_export(struct trace_export **list, struct trace_export *export)
2485 {
2486         struct trace_export **p;
2487 
2488         for (p = list; *p != NULL; p = &(*p)->next)
2489                 if (*p == export)
2490                         break;
2491 
2492         if (*p != export)
2493                 return -1;
2494 
2495         rcu_assign_pointer(*p, (*p)->next);
2496 
2497         return 0;
2498 }
2499 
2500 static inline void
2501 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2502 {
2503         if (*list == NULL)
2504                 ftrace_exports_enable();
2505 
2506         add_trace_export(list, export);
2507 }
2508 
2509 static inline int
2510 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2511 {
2512         int ret;
2513 
2514         ret = rm_trace_export(list, export);
2515         if (*list == NULL)
2516                 ftrace_exports_disable();
2517 
2518         return ret;
2519 }
2520 
2521 int register_ftrace_export(struct trace_export *export)
2522 {
2523         if (WARN_ON_ONCE(!export->write))
2524                 return -1;
2525 
2526         mutex_lock(&ftrace_export_lock);
2527 
2528         add_ftrace_export(&ftrace_exports_list, export);
2529 
2530         mutex_unlock(&ftrace_export_lock);
2531 
2532         return 0;
2533 }
2534 EXPORT_SYMBOL_GPL(register_ftrace_export);
2535 
2536 int unregister_ftrace_export(struct trace_export *export)
2537 {
2538         int ret;
2539 
2540         mutex_lock(&ftrace_export_lock);
2541 
2542         ret = rm_ftrace_export(&ftrace_exports_list, export);
2543 
2544         mutex_unlock(&ftrace_export_lock);
2545 
2546         return ret;
2547 }
2548 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2549 
2550 void
2551 trace_function(struct trace_array *tr,
2552                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2553                int pc)
2554 {
2555         struct trace_event_call *call = &event_function;
2556         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2557         struct ring_buffer_event *event;
2558         struct ftrace_entry *entry;
2559 
2560         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2561                                             flags, pc);
2562         if (!event)
2563                 return;
2564         entry   = ring_buffer_event_data(event);
2565         entry->ip                       = ip;
2566         entry->parent_ip                = parent_ip;
2567 
2568         if (!call_filter_check_discard(call, entry, buffer, event)) {
2569                 if (static_branch_unlikely(&ftrace_exports_enabled))
2570                         ftrace_exports(event);
2571                 __buffer_unlock_commit(buffer, event);
2572         }
2573 }
2574 
2575 #ifdef CONFIG_STACKTRACE
2576 
2577 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2578 struct ftrace_stack {
2579         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
2580 };
2581 
2582 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2583 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2584 
2585 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2586                                  unsigned long flags,
2587                                  int skip, int pc, struct pt_regs *regs)
2588 {
2589         struct trace_event_call *call = &event_kernel_stack;
2590         struct ring_buffer_event *event;
2591         struct stack_entry *entry;
2592         struct stack_trace trace;
2593         int use_stack;
2594         int size = FTRACE_STACK_ENTRIES;
2595 
2596         trace.nr_entries        = 0;
2597         trace.skip              = skip;
2598 
2599         /*
2600          * Add one, for this function and the call to save_stack_trace()
2601          * If regs is set, then these functions will not be in the way.
2602          */
2603 #ifndef CONFIG_UNWINDER_ORC
2604         if (!regs)
2605                 trace.skip++;
2606 #endif
2607 
2608         /*
2609          * Since events can happen in NMIs there's no safe way to
2610          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2611          * or NMI comes in, it will just have to use the default
2612          * FTRACE_STACK_SIZE.
2613          */
2614         preempt_disable_notrace();
2615 
2616         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2617         /*
2618          * We don't need any atomic variables, just a barrier.
2619          * If an interrupt comes in, we don't care, because it would
2620          * have exited and put the counter back to what we want.
2621          * We just need a barrier to keep gcc from moving things
2622          * around.
2623          */
2624         barrier();
2625         if (use_stack == 1) {
2626                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
2627                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
2628 
2629                 if (regs)
2630                         save_stack_trace_regs(regs, &trace);
2631                 else
2632                         save_stack_trace(&trace);
2633 
2634                 if (trace.nr_entries > size)
2635                         size = trace.nr_entries;
2636         } else
2637                 /* From now on, use_stack is a boolean */
2638                 use_stack = 0;
2639 
2640         size *= sizeof(unsigned long);
2641 
2642         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2643                                             sizeof(*entry) + size, flags, pc);
2644         if (!event)
2645                 goto out;
2646         entry = ring_buffer_event_data(event);
2647 
2648         memset(&entry->caller, 0, size);
2649 
2650         if (use_stack)
2651                 memcpy(&entry->caller, trace.entries,
2652                        trace.nr_entries * sizeof(unsigned long));
2653         else {
2654                 trace.max_entries       = FTRACE_STACK_ENTRIES;
2655                 trace.entries           = entry->caller;
2656                 if (regs)
2657                         save_stack_trace_regs(regs, &trace);
2658                 else
2659                         save_stack_trace(&trace);
2660         }
2661 
2662         entry->size = trace.nr_entries;
2663 
2664         if (!call_filter_check_discard(call, entry, buffer, event))
2665                 __buffer_unlock_commit(buffer, event);
2666 
2667  out:
2668         /* Again, don't let gcc optimize things here */
2669         barrier();
2670         __this_cpu_dec(ftrace_stack_reserve);
2671         preempt_enable_notrace();
2672 
2673 }
2674 
2675 static inline void ftrace_trace_stack(struct trace_array *tr,
2676                                       struct ring_buffer *buffer,
2677                                       unsigned long flags,
2678                                       int skip, int pc, struct pt_regs *regs)
2679 {
2680         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2681                 return;
2682 
2683         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2684 }
2685 
2686 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2687                    int pc)
2688 {
2689         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2690 
2691         if (rcu_is_watching()) {
2692                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2693                 return;
2694         }
2695 
2696         /*
2697          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2698          * but if the above rcu_is_watching() failed, then the NMI
2699          * triggered someplace critical, and rcu_irq_enter() should
2700          * not be called from NMI.
2701          */
2702         if (unlikely(in_nmi()))
2703                 return;
2704 
2705         rcu_irq_enter_irqson();
2706         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2707         rcu_irq_exit_irqson();
2708 }
2709 
2710 /**
2711  * trace_dump_stack - record a stack back trace in the trace buffer
2712  * @skip: Number of functions to skip (helper handlers)
2713  */
2714 void trace_dump_stack(int skip)
2715 {
2716         unsigned long flags;
2717 
2718         if (tracing_disabled || tracing_selftest_running)
2719                 return;
2720 
2721         local_save_flags(flags);
2722 
2723 #ifndef CONFIG_UNWINDER_ORC
2724         /* Skip 1 to skip this function. */
2725         skip++;
2726 #endif
2727         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2728                              flags, skip, preempt_count(), NULL);
2729 }
2730 EXPORT_SYMBOL_GPL(trace_dump_stack);
2731 
2732 static DEFINE_PER_CPU(int, user_stack_count);
2733 
2734 void
2735 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2736 {
2737         struct trace_event_call *call = &event_user_stack;
2738         struct ring_buffer_event *event;
2739         struct userstack_entry *entry;
2740         struct stack_trace trace;
2741 
2742         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2743                 return;
2744 
2745         /*
2746          * NMIs can not handle page faults, even with fix ups.
2747          * The save user stack can (and often does) fault.
2748          */
2749         if (unlikely(in_nmi()))
2750                 return;
2751 
2752         /*
2753          * prevent recursion, since the user stack tracing may
2754          * trigger other kernel events.
2755          */
2756         preempt_disable();
2757         if (__this_cpu_read(user_stack_count))
2758                 goto out;
2759 
2760         __this_cpu_inc(user_stack_count);
2761 
2762         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2763                                             sizeof(*entry), flags, pc);
2764         if (!event)
2765                 goto out_drop_count;
2766         entry   = ring_buffer_event_data(event);
2767 
2768         entry->tgid             = current->tgid;
2769         memset(&entry->caller, 0, sizeof(entry->caller));
2770 
2771         trace.nr_entries        = 0;
2772         trace.max_entries       = FTRACE_STACK_ENTRIES;
2773         trace.skip              = 0;
2774         trace.entries           = entry->caller;
2775 
2776         save_stack_trace_user(&trace);
2777         if (!call_filter_check_discard(call, entry, buffer, event))
2778                 __buffer_unlock_commit(buffer, event);
2779 
2780  out_drop_count:
2781         __this_cpu_dec(user_stack_count);
2782  out:
2783         preempt_enable();
2784 }
2785 
2786 #ifdef UNUSED
2787 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2788 {
2789         ftrace_trace_userstack(tr, flags, preempt_count());
2790 }
2791 #endif /* UNUSED */
2792 
2793 #endif /* CONFIG_STACKTRACE */
2794 
2795 /* created for use with alloc_percpu */
2796 struct trace_buffer_struct {
2797         int nesting;
2798         char buffer[4][TRACE_BUF_SIZE];
2799 };
2800 
2801 static struct trace_buffer_struct *trace_percpu_buffer;
2802 
2803 /*
2804  * Thise allows for lockless recording.  If we're nested too deeply, then
2805  * this returns NULL.
2806  */
2807 static char *get_trace_buf(void)
2808 {
2809         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2810 
2811         if (!buffer || buffer->nesting >= 4)
2812                 return NULL;
2813 
2814         buffer->nesting++;
2815 
2816         /* Interrupts must see nesting incremented before we use the buffer */
2817         barrier();
2818         return &buffer->buffer[buffer->nesting][0];
2819 }
2820 
2821 static void put_trace_buf(void)
2822 {
2823         /* Don't let the decrement of nesting leak before this */
2824         barrier();
2825         this_cpu_dec(trace_percpu_buffer->nesting);
2826 }
2827 
2828 static int alloc_percpu_trace_buffer(void)
2829 {
2830         struct trace_buffer_struct *buffers;
2831 
2832         buffers = alloc_percpu(struct trace_buffer_struct);
2833         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2834                 return -ENOMEM;
2835 
2836         trace_percpu_buffer = buffers;
2837         return 0;
2838 }
2839 
2840 static int buffers_allocated;
2841 
2842 void trace_printk_init_buffers(void)
2843 {
2844         if (buffers_allocated)
2845                 return;
2846 
2847         if (alloc_percpu_trace_buffer())
2848                 return;
2849 
2850         /* trace_printk() is for debug use only. Don't use it in production. */
2851 
2852         pr_warn("\n");
2853         pr_warn("**********************************************************\n");
2854         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2855         pr_warn("**                                                      **\n");
2856         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2857         pr_warn("**                                                      **\n");
2858         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2859         pr_warn("** unsafe for production use.                           **\n");
2860         pr_warn("**                                                      **\n");
2861         pr_warn("** If you see this message and you are not debugging    **\n");
2862         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2863         pr_warn("**                                                      **\n");
2864         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2865         pr_warn("**********************************************************\n");
2866 
2867         /* Expand the buffers to set size */
2868         tracing_update_buffers();
2869 
2870         buffers_allocated = 1;
2871 
2872         /*
2873          * trace_printk_init_buffers() can be called by modules.
2874          * If that happens, then we need to start cmdline recording
2875          * directly here. If the global_trace.buffer is already
2876          * allocated here, then this was called by module code.
2877          */
2878         if (global_trace.trace_buffer.buffer)
2879                 tracing_start_cmdline_record();
2880 }
2881 
2882 void trace_printk_start_comm(void)
2883 {
2884         /* Start tracing comms if trace printk is set */
2885         if (!buffers_allocated)
2886                 return;
2887         tracing_start_cmdline_record();
2888 }
2889 
2890 static void trace_printk_start_stop_comm(int enabled)
2891 {
2892         if (!buffers_allocated)
2893                 return;
2894 
2895         if (enabled)
2896                 tracing_start_cmdline_record();
2897         else
2898                 tracing_stop_cmdline_record();
2899 }
2900 
2901 /**
2902  * trace_vbprintk - write binary msg to tracing buffer
2903  *
2904  */
2905 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2906 {
2907         struct trace_event_call *call = &event_bprint;
2908         struct ring_buffer_event *event;
2909         struct ring_buffer *buffer;
2910         struct trace_array *tr = &global_trace;
2911         struct bprint_entry *entry;
2912         unsigned long flags;
2913         char *tbuffer;
2914         int len = 0, size, pc;
2915 
2916         if (unlikely(tracing_selftest_running || tracing_disabled))
2917                 return 0;
2918 
2919         /* Don't pollute graph traces with trace_vprintk internals */
2920         pause_graph_tracing();
2921 
2922         pc = preempt_count();
2923         preempt_disable_notrace();
2924 
2925         tbuffer = get_trace_buf();
2926         if (!tbuffer) {
2927                 len = 0;
2928                 goto out_nobuffer;
2929         }
2930 
2931         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2932 
2933         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2934                 goto out;
2935 
2936         local_save_flags(flags);
2937         size = sizeof(*entry) + sizeof(u32) * len;
2938         buffer = tr->trace_buffer.buffer;
2939         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2940                                             flags, pc);
2941         if (!event)
2942                 goto out;
2943         entry = ring_buffer_event_data(event);
2944         entry->ip                       = ip;
2945         entry->fmt                      = fmt;
2946 
2947         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2948         if (!call_filter_check_discard(call, entry, buffer, event)) {
2949                 __buffer_unlock_commit(buffer, event);
2950                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2951         }
2952 
2953 out:
2954         put_trace_buf();
2955 
2956 out_nobuffer:
2957         preempt_enable_notrace();
2958         unpause_graph_tracing();
2959 
2960         return len;
2961 }
2962 EXPORT_SYMBOL_GPL(trace_vbprintk);
2963 
2964 __printf(3, 0)
2965 static int
2966 __trace_array_vprintk(struct ring_buffer *buffer,
2967                       unsigned long ip, const char *fmt, va_list args)
2968 {
2969         struct trace_event_call *call = &event_print;
2970         struct ring_buffer_event *event;
2971         int len = 0, size, pc;
2972         struct print_entry *entry;
2973         unsigned long flags;
2974         char *tbuffer;
2975 
2976         if (tracing_disabled || tracing_selftest_running)
2977                 return 0;
2978 
2979         /* Don't pollute graph traces with trace_vprintk internals */
2980         pause_graph_tracing();
2981 
2982         pc = preempt_count();
2983         preempt_disable_notrace();
2984 
2985 
2986         tbuffer = get_trace_buf();
2987         if (!tbuffer) {
2988                 len = 0;
2989                 goto out_nobuffer;
2990         }
2991 
2992         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2993 
2994         local_save_flags(flags);
2995         size = sizeof(*entry) + len + 1;
2996         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2997                                             flags, pc);
2998         if (!event)
2999                 goto out;
3000         entry = ring_buffer_event_data(event);
3001         entry->ip = ip;
3002 
3003         memcpy(&entry->buf, tbuffer, len + 1);
3004         if (!call_filter_check_discard(call, entry, buffer, event)) {
3005                 __buffer_unlock_commit(buffer, event);
3006                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3007         }
3008 
3009 out:
3010         put_trace_buf();
3011 
3012 out_nobuffer:
3013         preempt_enable_notrace();
3014         unpause_graph_tracing();
3015 
3016         return len;
3017 }
3018 
3019 __printf(3, 0)
3020 int trace_array_vprintk(struct trace_array *tr,
3021                         unsigned long ip, const char *fmt, va_list args)
3022 {
3023         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3024 }
3025 
3026 __printf(3, 0)
3027 int trace_array_printk(struct trace_array *tr,
3028                        unsigned long ip, const char *fmt, ...)
3029 {
3030         int ret;
3031         va_list ap;
3032 
3033         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3034                 return 0;
3035 
3036         va_start(ap, fmt);
3037         ret = trace_array_vprintk(tr, ip, fmt, ap);
3038         va_end(ap);
3039         return ret;
3040 }
3041 
3042 __printf(3, 4)
3043 int trace_array_printk_buf(struct ring_buffer *buffer,
3044                            unsigned long ip, const char *fmt, ...)
3045 {
3046         int ret;
3047         va_list ap;
3048 
3049         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3050                 return 0;
3051 
3052         va_start(ap, fmt);
3053         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3054         va_end(ap);
3055         return ret;
3056 }
3057 
3058 __printf(2, 0)
3059 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3060 {
3061         return trace_array_vprintk(&global_trace, ip, fmt, args);
3062 }
3063 EXPORT_SYMBOL_GPL(trace_vprintk);
3064 
3065 static void trace_iterator_increment(struct trace_iterator *iter)
3066 {
3067         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3068 
3069         iter->idx++;
3070         if (buf_iter)
3071                 ring_buffer_read(buf_iter, NULL);
3072 }
3073 
3074 static struct trace_entry *
3075 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3076                 unsigned long *lost_events)
3077 {
3078         struct ring_buffer_event *event;
3079         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3080 
3081         if (buf_iter)
3082                 event = ring_buffer_iter_peek(buf_iter, ts);
3083         else
3084                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3085                                          lost_events);
3086 
3087         if (event) {
3088                 iter->ent_size = ring_buffer_event_length(event);
3089                 return ring_buffer_event_data(event);
3090         }
3091         iter->ent_size = 0;
3092         return NULL;
3093 }
3094 
3095 static struct trace_entry *
3096 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3097                   unsigned long *missing_events, u64 *ent_ts)
3098 {
3099         struct ring_buffer *buffer = iter->trace_buffer->buffer;
3100         struct trace_entry *ent, *next = NULL;
3101         unsigned long lost_events = 0, next_lost = 0;
3102         int cpu_file = iter->cpu_file;
3103         u64 next_ts = 0, ts;
3104         int next_cpu = -1;
3105         int next_size = 0;
3106         int cpu;
3107 
3108         /*
3109          * If we are in a per_cpu trace file, don't bother by iterating over
3110          * all cpu and peek directly.
3111          */
3112         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3113                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3114                         return NULL;
3115                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3116                 if (ent_cpu)
3117                         *ent_cpu = cpu_file;
3118 
3119                 return ent;
3120         }
3121 
3122         for_each_tracing_cpu(cpu) {
3123 
3124                 if (ring_buffer_empty_cpu(buffer, cpu))
3125                         continue;
3126 
3127                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3128 
3129                 /*
3130                  * Pick the entry with the smallest timestamp:
3131                  */
3132                 if (ent && (!next || ts < next_ts)) {
3133                         next = ent;
3134                         next_cpu = cpu;
3135                         next_ts = ts;
3136                         next_lost = lost_events;
3137                         next_size = iter->ent_size;
3138                 }
3139         }
3140 
3141         iter->ent_size = next_size;
3142 
3143         if (ent_cpu)
3144                 *ent_cpu = next_cpu;
3145 
3146         if (ent_ts)
3147                 *ent_ts = next_ts;
3148 
3149         if (missing_events)
3150                 *missing_events = next_lost;
3151 
3152         return next;
3153 }
3154 
3155 /* Find the next real entry, without updating the iterator itself */
3156 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3157                                           int *ent_cpu, u64 *ent_ts)
3158 {
3159         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3160 }
3161 
3162 /* Find the next real entry, and increment the iterator to the next entry */
3163 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3164 {
3165         iter->ent = __find_next_entry(iter, &iter->cpu,
3166                                       &iter->lost_events, &iter->ts);
3167 
3168         if (iter->ent)
3169                 trace_iterator_increment(iter);
3170 
3171         return iter->ent ? iter : NULL;
3172 }
3173 
3174 static void trace_consume(struct trace_iterator *iter)
3175 {
3176         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3177                             &iter->lost_events);
3178 }
3179 
3180 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3181 {
3182         struct trace_iterator *iter = m->private;
3183         int i = (int)*pos;
3184         void *ent;
3185 
3186         WARN_ON_ONCE(iter->leftover);
3187 
3188         (*pos)++;
3189 
3190         /* can't go backwards */
3191         if (iter->idx > i)
3192                 return NULL;
3193 
3194         if (iter->idx < 0)
3195                 ent = trace_find_next_entry_inc(iter);
3196         else
3197                 ent = iter;
3198 
3199         while (ent && iter->idx < i)
3200                 ent = trace_find_next_entry_inc(iter);
3201 
3202         iter->pos = *pos;
3203 
3204         return ent;
3205 }
3206 
3207 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3208 {
3209         struct ring_buffer_event *event;
3210         struct ring_buffer_iter *buf_iter;
3211         unsigned long entries = 0;
3212         u64 ts;
3213 
3214         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3215 
3216         buf_iter = trace_buffer_iter(iter, cpu);
3217         if (!buf_iter)
3218                 return;
3219 
3220         ring_buffer_iter_reset(buf_iter);
3221 
3222         /*
3223          * We could have the case with the max latency tracers
3224          * that a reset never took place on a cpu. This is evident
3225          * by the timestamp being before the start of the buffer.
3226          */
3227         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3228                 if (ts >= iter->trace_buffer->time_start)
3229                         break;
3230                 entries++;
3231                 ring_buffer_read(buf_iter, NULL);
3232         }
3233 
3234         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3235 }
3236 
3237 /*
3238  * The current tracer is copied to avoid a global locking
3239  * all around.
3240  */
3241 static void *s_start(struct seq_file *m, loff_t *pos)
3242 {
3243         struct trace_iterator *iter = m->private;
3244         struct trace_array *tr = iter->tr;
3245         int cpu_file = iter->cpu_file;
3246         void *p = NULL;
3247         loff_t l = 0;
3248         int cpu;
3249 
3250         /*
3251          * copy the tracer to avoid using a global lock all around.
3252          * iter->trace is a copy of current_trace, the pointer to the
3253          * name may be used instead of a strcmp(), as iter->trace->name
3254          * will point to the same string as current_trace->name.
3255          */
3256         mutex_lock(&trace_types_lock);
3257         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3258                 *iter->trace = *tr->current_trace;
3259         mutex_unlock(&trace_types_lock);
3260 
3261 #ifdef CONFIG_TRACER_MAX_TRACE
3262         if (iter->snapshot && iter->trace->use_max_tr)
3263                 return ERR_PTR(-EBUSY);
3264 #endif
3265 
3266         if (!iter->snapshot)
3267                 atomic_inc(&trace_record_taskinfo_disabled);
3268 
3269         if (*pos != iter->pos) {
3270                 iter->ent = NULL;
3271                 iter->cpu = 0;
3272                 iter->idx = -1;
3273 
3274                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3275                         for_each_tracing_cpu(cpu)
3276                                 tracing_iter_reset(iter, cpu);
3277                 } else
3278                         tracing_iter_reset(iter, cpu_file);
3279 
3280                 iter->leftover = 0;
3281                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3282                         ;
3283 
3284         } else {
3285                 /*
3286                  * If we overflowed the seq_file before, then we want
3287                  * to just reuse the trace_seq buffer again.
3288                  */
3289                 if (iter->leftover)
3290                         p = iter;
3291                 else {
3292                         l = *pos - 1;
3293                         p = s_next(m, p, &l);
3294                 }
3295         }
3296 
3297         trace_event_read_lock();
3298         trace_access_lock(cpu_file);
3299         return p;
3300 }
3301 
3302 static void s_stop(struct seq_file *m, void *p)
3303 {
3304         struct trace_iterator *iter = m->private;
3305 
3306 #ifdef CONFIG_TRACER_MAX_TRACE
3307         if (iter->snapshot && iter->trace->use_max_tr)
3308                 return;
3309 #endif
3310 
3311         if (!iter->snapshot)
3312                 atomic_dec(&trace_record_taskinfo_disabled);
3313 
3314         trace_access_unlock(iter->cpu_file);
3315         trace_event_read_unlock();
3316 }
3317 
3318 static void
3319 get_total_entries(struct trace_buffer *buf,
3320                   unsigned long *total, unsigned long *entries)
3321 {
3322         unsigned long count;
3323         int cpu;
3324 
3325         *total = 0;
3326         *entries = 0;
3327 
3328         for_each_tracing_cpu(cpu) {
3329                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3330                 /*
3331                  * If this buffer has skipped entries, then we hold all
3332                  * entries for the trace and we need to ignore the
3333                  * ones before the time stamp.
3334                  */
3335                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3336                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3337                         /* total is the same as the entries */
3338                         *total += count;
3339                 } else
3340                         *total += count +
3341                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
3342                 *entries += count;
3343         }
3344 }
3345 
3346 static void print_lat_help_header(struct seq_file *m)
3347 {
3348         seq_puts(m, "#                  _------=> CPU#            \n"
3349                     "#                 / _-----=> irqs-off        \n"
3350                     "#                | / _----=> need-resched    \n"
3351                     "#                || / _---=> hardirq/softirq \n"
3352                     "#                ||| / _--=> preempt-depth   \n"
3353                     "#                |||| /     delay            \n"
3354                     "#  cmd     pid   ||||| time  |   caller      \n"
3355                     "#     \\   /      |||||  \\    |   /         \n");
3356 }
3357 
3358 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3359 {
3360         unsigned long total;
3361         unsigned long entries;
3362 
3363         get_total_entries(buf, &total, &entries);
3364         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3365                    entries, total, num_online_cpus());
3366         seq_puts(m, "#\n");
3367 }
3368 
3369 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3370                                    unsigned int flags)
3371 {
3372         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3373 
3374         print_event_info(buf, m);
3375 
3376         seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3377         seq_printf(m, "#              | |     %s    |       |         |\n",      tgid ? "  |      " : "");
3378 }
3379 
3380 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3381                                        unsigned int flags)
3382 {
3383         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3384         const char tgid_space[] = "          ";
3385         const char space[] = "  ";
3386 
3387         print_event_info(buf, m);
3388 
3389         seq_printf(m, "#                          %s  _-----=> irqs-off\n",
3390                    tgid ? tgid_space : space);
3391         seq_printf(m, "#                          %s / _----=> need-resched\n",
3392                    tgid ? tgid_space : space);
3393         seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",
3394                    tgid ? tgid_space : space);
3395         seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",
3396                    tgid ? tgid_space : space);
3397         seq_printf(m, "#                          %s||| /     delay\n",
3398                    tgid ? tgid_space : space);
3399         seq_printf(m, "#           TASK-PID %sCPU#  ||||    TIMESTAMP  FUNCTION\n",
3400                    tgid ? "   TGID   " : space);
3401         seq_printf(m, "#              | |   %s  |   ||||       |         |\n",
3402                    tgid ? "     |    " : space);
3403 }
3404 
3405 void
3406 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3407 {
3408         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3409         struct trace_buffer *buf = iter->trace_buffer;
3410         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3411         struct tracer *type = iter->trace;
3412         unsigned long entries;
3413         unsigned long total;
3414         const char *name = "preemption";
3415 
3416         name = type->name;
3417 
3418         get_total_entries(buf, &total, &entries);
3419 
3420         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3421                    name, UTS_RELEASE);
3422         seq_puts(m, "# -----------------------------------"
3423                  "---------------------------------\n");
3424         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3425                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3426                    nsecs_to_usecs(data->saved_latency),
3427                    entries,
3428                    total,
3429                    buf->cpu,
3430 #if defined(CONFIG_PREEMPT_NONE)
3431                    "server",
3432 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3433                    "desktop",
3434 #elif defined(CONFIG_PREEMPT)
3435                    "preempt",
3436 #else
3437                    "unknown",
3438 #endif
3439                    /* These are reserved for later use */
3440                    0, 0, 0, 0);
3441 #ifdef CONFIG_SMP
3442         seq_printf(m, " #P:%d)\n", num_online_cpus());
3443 #else
3444         seq_puts(m, ")\n");
3445 #endif
3446         seq_puts(m, "#    -----------------\n");
3447         seq_printf(m, "#    | task: %.16s-%d "
3448                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3449                    data->comm, data->pid,
3450                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3451                    data->policy, data->rt_priority);
3452         seq_puts(m, "#    -----------------\n");
3453 
3454         if (data->critical_start) {
3455                 seq_puts(m, "#  => started at: ");
3456                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3457                 trace_print_seq(m, &iter->seq);
3458                 seq_puts(m, "\n#  => ended at:   ");
3459                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3460                 trace_print_seq(m, &iter->seq);
3461                 seq_puts(m, "\n#\n");
3462         }
3463 
3464         seq_puts(m, "#\n");
3465 }
3466 
3467 static void test_cpu_buff_start(struct trace_iterator *iter)
3468 {
3469         struct trace_seq *s = &iter->seq;
3470         struct trace_array *tr = iter->tr;
3471 
3472         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3473                 return;
3474 
3475         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3476                 return;
3477 
3478         if (cpumask_available(iter->started) &&
3479             cpumask_test_cpu(iter->cpu, iter->started))
3480                 return;
3481 
3482         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3483                 return;
3484 
3485         if (cpumask_available(iter->started))
3486                 cpumask_set_cpu(iter->cpu, iter->started);
3487 
3488         /* Don't print started cpu buffer for the first entry of the trace */
3489         if (iter->idx > 1)
3490                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3491                                 iter->cpu);
3492 }
3493 
3494 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3495 {
3496         struct trace_array *tr = iter->tr;
3497         struct trace_seq *s = &iter->seq;
3498         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3499         struct trace_entry *entry;
3500         struct trace_event *event;
3501 
3502         entry = iter->ent;
3503 
3504         test_cpu_buff_start(iter);
3505 
3506         event = ftrace_find_event(entry->type);
3507 
3508         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3509                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3510                         trace_print_lat_context(iter);
3511                 else
3512                         trace_print_context(iter);
3513         }
3514 
3515         if (trace_seq_has_overflowed(s))
3516                 return TRACE_TYPE_PARTIAL_LINE;
3517 
3518         if (event)
3519                 return event->funcs->trace(iter, sym_flags, event);
3520 
3521         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3522 
3523         return trace_handle_return(s);
3524 }
3525 
3526 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3527 {
3528         struct trace_array *tr = iter->tr;
3529         struct trace_seq *s = &iter->seq;
3530         struct trace_entry *entry;
3531         struct trace_event *event;
3532 
3533         entry = iter->ent;
3534 
3535         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3536                 trace_seq_printf(s, "%d %d %llu ",
3537                                  entry->pid, iter->cpu, iter->ts);
3538 
3539         if (trace_seq_has_overflowed(s))
3540                 return TRACE_TYPE_PARTIAL_LINE;
3541 
3542         event = ftrace_find_event(entry->type);
3543         if (event)
3544                 return event->funcs->raw(iter, 0, event);
3545 
3546         trace_seq_printf(s, "%d ?\n", entry->type);
3547 
3548         return trace_handle_return(s);
3549 }
3550 
3551 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3552 {
3553         struct trace_array *tr = iter->tr;
3554         struct trace_seq *s = &iter->seq;
3555         unsigned char newline = '\n';
3556         struct trace_entry *entry;
3557         struct trace_event *event;
3558 
3559         entry = iter->ent;
3560 
3561         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3562                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3563                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3564                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3565                 if (trace_seq_has_overflowed(s))
3566                         return TRACE_TYPE_PARTIAL_LINE;
3567         }
3568 
3569         event = ftrace_find_event(entry->type);
3570         if (event) {
3571                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3572                 if (ret != TRACE_TYPE_HANDLED)
3573                         return ret;
3574         }
3575 
3576         SEQ_PUT_FIELD(s, newline);
3577 
3578         return trace_handle_return(s);
3579 }
3580 
3581 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3582 {
3583         struct trace_array *tr = iter->tr;
3584         struct trace_seq *s = &iter->seq;
3585         struct trace_entry *entry;
3586         struct trace_event *event;
3587 
3588         entry = iter->ent;
3589 
3590         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3591                 SEQ_PUT_FIELD(s, entry->pid);
3592                 SEQ_PUT_FIELD(s, iter->cpu);
3593                 SEQ_PUT_FIELD(s, iter->ts);
3594                 if (trace_seq_has_overflowed(s))
3595                         return TRACE_TYPE_PARTIAL_LINE;
3596         }
3597 
3598         event = ftrace_find_event(entry->type);
3599         return event ? event->funcs->binary(iter, 0, event) :
3600                 TRACE_TYPE_HANDLED;
3601 }
3602 
3603 int trace_empty(struct trace_iterator *iter)
3604 {
3605         struct ring_buffer_iter *buf_iter;
3606         int cpu;
3607 
3608         /* If we are looking at one CPU buffer, only check that one */
3609         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3610                 cpu = iter->cpu_file;
3611                 buf_iter = trace_buffer_iter(iter, cpu);
3612                 if (buf_iter) {
3613                         if (!ring_buffer_iter_empty(buf_iter))
3614                                 return 0;
3615                 } else {
3616                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3617                                 return 0;
3618                 }
3619                 return 1;
3620         }
3621 
3622         for_each_tracing_cpu(cpu) {
3623                 buf_iter = trace_buffer_iter(iter, cpu);
3624                 if (buf_iter) {
3625                         if (!ring_buffer_iter_empty(buf_iter))
3626                                 return 0;
3627                 } else {
3628                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3629                                 return 0;
3630                 }
3631         }
3632 
3633         return 1;
3634 }
3635 
3636 /*  Called with trace_event_read_lock() held. */
3637 enum print_line_t print_trace_line(struct trace_iterator *iter)
3638 {
3639         struct trace_array *tr = iter->tr;
3640         unsigned long trace_flags = tr->trace_flags;
3641         enum print_line_t ret;
3642 
3643         if (iter->lost_events) {
3644                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3645                                  iter->cpu, iter->lost_events);
3646                 if (trace_seq_has_overflowed(&iter->seq))
3647                         return TRACE_TYPE_PARTIAL_LINE;
3648         }
3649 
3650         if (iter->trace && iter->trace->print_line) {
3651                 ret = iter->trace->print_line(iter);
3652                 if (ret != TRACE_TYPE_UNHANDLED)
3653                         return ret;
3654         }
3655 
3656         if (iter->ent->type == TRACE_BPUTS &&
3657                         trace_flags & TRACE_ITER_PRINTK &&
3658                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3659                 return trace_print_bputs_msg_only(iter);
3660 
3661         if (iter->ent->type == TRACE_BPRINT &&
3662                         trace_flags & TRACE_ITER_PRINTK &&
3663                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3664                 return trace_print_bprintk_msg_only(iter);
3665 
3666         if (iter->ent->type == TRACE_PRINT &&
3667                         trace_flags & TRACE_ITER_PRINTK &&
3668                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3669                 return trace_print_printk_msg_only(iter);
3670 
3671         if (trace_flags & TRACE_ITER_BIN)
3672                 return print_bin_fmt(iter);
3673 
3674         if (trace_flags & TRACE_ITER_HEX)
3675                 return print_hex_fmt(iter);
3676 
3677         if (trace_flags & TRACE_ITER_RAW)
3678                 return print_raw_fmt(iter);
3679 
3680         return print_trace_fmt(iter);
3681 }
3682 
3683 void trace_latency_header(struct seq_file *m)
3684 {
3685         struct trace_iterator *iter = m->private;
3686         struct trace_array *tr = iter->tr;
3687 
3688         /* print nothing if the buffers are empty */
3689         if (trace_empty(iter))
3690                 return;
3691 
3692         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3693                 print_trace_header(m, iter);
3694 
3695         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3696                 print_lat_help_header(m);
3697 }
3698 
3699 void trace_default_header(struct seq_file *m)
3700 {
3701         struct trace_iterator *iter = m->private;
3702         struct trace_array *tr = iter->tr;
3703         unsigned long trace_flags = tr->trace_flags;
3704 
3705         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3706                 return;
3707 
3708         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3709                 /* print nothing if the buffers are empty */
3710                 if (trace_empty(iter))
3711                         return;
3712                 print_trace_header(m, iter);
3713                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3714                         print_lat_help_header(m);
3715         } else {
3716                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3717                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3718                                 print_func_help_header_irq(iter->trace_buffer,
3719                                                            m, trace_flags);
3720                         else
3721                                 print_func_help_header(iter->trace_buffer, m,
3722                                                        trace_flags);
3723                 }
3724         }
3725 }
3726 
3727 static void test_ftrace_alive(struct seq_file *m)
3728 {
3729         if (!ftrace_is_dead())
3730                 return;
3731         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3732                     "#          MAY BE MISSING FUNCTION EVENTS\n");
3733 }
3734 
3735 #ifdef CONFIG_TRACER_MAX_TRACE
3736 static void show_snapshot_main_help(struct seq_file *m)
3737 {
3738         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3739                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3740                     "#                      Takes a snapshot of the main buffer.\n"
3741                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3742                     "#                      (Doesn't have to be '2' works with any number that\n"
3743                     "#                       is not a '' or '1')\n");
3744 }
3745 
3746 static void show_snapshot_percpu_help(struct seq_file *m)
3747 {
3748         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3749 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3750         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3751                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
3752 #else
3753         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3754                     "#                     Must use main snapshot file to allocate.\n");
3755 #endif
3756         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3757                     "#                      (Doesn't have to be '2' works with any number that\n"
3758                     "#                       is not a '' or '1')\n");
3759 }
3760 
3761 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3762 {
3763         if (iter->tr->allocated_snapshot)
3764                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3765         else
3766                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3767 
3768         seq_puts(m, "# Snapshot commands:\n");
3769         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3770                 show_snapshot_main_help(m);
3771         else
3772                 show_snapshot_percpu_help(m);
3773 }
3774 #else
3775 /* Should never be called */
3776 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3777 #endif
3778 
3779 static int s_show(struct seq_file *m, void *v)
3780 {
3781         struct trace_iterator *iter = v;
3782         int ret;
3783 
3784         if (iter->ent == NULL) {
3785                 if (iter->tr) {
3786                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
3787                         seq_puts(m, "#\n");
3788                         test_ftrace_alive(m);
3789                 }
3790                 if (iter->snapshot && trace_empty(iter))
3791                         print_snapshot_help(m, iter);
3792                 else if (iter->trace && iter->trace->print_header)
3793                         iter->trace->print_header(m);
3794                 else
3795                         trace_default_header(m);
3796 
3797         } else if (iter->leftover) {
3798                 /*
3799                  * If we filled the seq_file buffer earlier, we
3800                  * want to just show it now.
3801                  */
3802                 ret = trace_print_seq(m, &iter->seq);
3803 
3804                 /* ret should this time be zero, but you never know */
3805                 iter->leftover = ret;
3806 
3807         } else {
3808                 print_trace_line(iter);
3809                 ret = trace_print_seq(m, &iter->seq);
3810                 /*
3811                  * If we overflow the seq_file buffer, then it will
3812                  * ask us for this data again at start up.
3813                  * Use that instead.
3814                  *  ret is 0 if seq_file write succeeded.
3815                  *        -1 otherwise.
3816                  */
3817                 iter->leftover = ret;
3818         }
3819 
3820         return 0;
3821 }
3822 
3823 /*
3824  * Should be used after trace_array_get(), trace_types_lock
3825  * ensures that i_cdev was already initialized.
3826  */
3827 static inline int tracing_get_cpu(struct inode *inode)
3828 {
3829         if (inode->i_cdev) /* See trace_create_cpu_file() */
3830                 return (long)inode->i_cdev - 1;
3831         return RING_BUFFER_ALL_CPUS;
3832 }
3833 
3834 static const struct seq_operations tracer_seq_ops = {
3835         .start          = s_start,
3836         .next           = s_next,
3837         .stop           = s_stop,
3838         .show           = s_show,
3839 };
3840 
3841 static struct trace_iterator *
3842 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3843 {
3844         struct trace_array *tr = inode->i_private;
3845         struct trace_iterator *iter;
3846         int cpu;
3847 
3848         if (tracing_disabled)
3849                 return ERR_PTR(-ENODEV);
3850 
3851         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3852         if (!iter)
3853                 return ERR_PTR(-ENOMEM);
3854 
3855         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3856                                     GFP_KERNEL);
3857         if (!iter->buffer_iter)
3858                 goto release;
3859 
3860         /*
3861          * We make a copy of the current tracer to avoid concurrent
3862          * changes on it while we are reading.
3863          */
3864         mutex_lock(&trace_types_lock);
3865         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3866         if (!iter->trace)
3867                 goto fail;
3868 
3869         *iter->trace = *tr->current_trace;
3870 
3871         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3872                 goto fail;
3873 
3874         iter->tr = tr;
3875 
3876 #ifdef CONFIG_TRACER_MAX_TRACE
3877         /* Currently only the top directory has a snapshot */
3878         if (tr->current_trace->print_max || snapshot)
3879                 iter->trace_buffer = &tr->max_buffer;
3880         else
3881 #endif
3882                 iter->trace_buffer = &tr->trace_buffer;
3883         iter->snapshot = snapshot;
3884         iter->pos = -1;
3885         iter->cpu_file = tracing_get_cpu(inode);
3886         mutex_init(&iter->mutex);
3887 
3888         /* Notify the tracer early; before we stop tracing. */
3889         if (iter->trace && iter->trace->open)
3890                 iter->trace->open(iter);
3891 
3892         /* Annotate start of buffers if we had overruns */
3893         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3894                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3895 
3896         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3897         if (trace_clocks[tr->clock_id].in_ns)
3898                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3899 
3900         /* stop the trace while dumping if we are not opening "snapshot" */
3901         if (!iter->snapshot)
3902                 tracing_stop_tr(tr);
3903 
3904         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3905                 for_each_tracing_cpu(cpu) {
3906                         iter->buffer_iter[cpu] =
3907                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3908                 }
3909                 ring_buffer_read_prepare_sync();
3910                 for_each_tracing_cpu(cpu) {
3911                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3912                         tracing_iter_reset(iter, cpu);
3913                 }
3914         } else {
3915                 cpu = iter->cpu_file;
3916                 iter->buffer_iter[cpu] =
3917                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3918                 ring_buffer_read_prepare_sync();
3919                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3920                 tracing_iter_reset(iter, cpu);
3921         }
3922 
3923         mutex_unlock(&trace_types_lock);
3924 
3925         return iter;
3926 
3927  fail:
3928         mutex_unlock(&trace_types_lock);
3929         kfree(iter->trace);
3930         kfree(iter->buffer_iter);
3931 release:
3932         seq_release_private(inode, file);
3933         return ERR_PTR(-ENOMEM);
3934 }
3935 
3936 int tracing_open_generic(struct inode *inode, struct file *filp)
3937 {
3938         if (tracing_disabled)
3939                 return -ENODEV;
3940 
3941         filp->private_data = inode->i_private;
3942         return 0;
3943 }
3944 
3945 bool tracing_is_disabled(void)
3946 {
3947         return (tracing_disabled) ? true: false;
3948 }
3949 
3950 /*
3951  * Open and update trace_array ref count.
3952  * Must have the current trace_array passed to it.
3953  */
3954 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3955 {
3956         struct trace_array *tr = inode->i_private;
3957 
3958         if (tracing_disabled)
3959                 return -ENODEV;
3960 
3961         if (trace_array_get(tr) < 0)
3962                 return -ENODEV;
3963 
3964         filp->private_data = inode->i_private;
3965 
3966         return 0;
3967 }
3968 
3969 static int tracing_release(struct inode *inode, struct file *file)
3970 {
3971         struct trace_array *tr = inode->i_private;
3972         struct seq_file *m = file->private_data;
3973         struct trace_iterator *iter;
3974         int cpu;
3975 
3976         if (!(file->f_mode & FMODE_READ)) {
3977                 trace_array_put(tr);
3978                 return 0;
3979         }
3980 
3981         /* Writes do not use seq_file */
3982         iter = m->private;
3983         mutex_lock(&trace_types_lock);
3984 
3985         for_each_tracing_cpu(cpu) {
3986                 if (iter->buffer_iter[cpu])
3987                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3988         }
3989 
3990         if (iter->trace && iter->trace->close)
3991                 iter->trace->close(iter);
3992 
3993         if (!iter->snapshot)
3994                 /* reenable tracing if it was previously enabled */
3995                 tracing_start_tr(tr);
3996 
3997         __trace_array_put(tr);
3998 
3999         mutex_unlock(&trace_types_lock);
4000 
4001         mutex_destroy(&iter->mutex);
4002         free_cpumask_var(iter->started);
4003         kfree(iter->trace);
4004         kfree(iter->buffer_iter);
4005         seq_release_private(inode, file);
4006 
4007         return 0;
4008 }
4009 
4010 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4011 {
4012         struct trace_array *tr = inode->i_private;
4013 
4014         trace_array_put(tr);
4015         return 0;
4016 }
4017 
4018 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4019 {
4020         struct trace_array *tr = inode->i_private;
4021 
4022         trace_array_put(tr);
4023 
4024         return single_release(inode, file);
4025 }
4026 
4027 static int tracing_open(struct inode *inode, struct file *file)
4028 {
4029         struct trace_array *tr = inode->i_private;
4030         struct trace_iterator *iter;
4031         int ret = 0;
4032 
4033         if (trace_array_get(tr) < 0)
4034                 return -ENODEV;
4035 
4036         /* If this file was open for write, then erase contents */
4037         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4038                 int cpu = tracing_get_cpu(inode);
4039                 struct trace_buffer *trace_buf = &tr->trace_buffer;
4040 
4041 #ifdef CONFIG_TRACER_MAX_TRACE
4042                 if (tr->current_trace->print_max)
4043                         trace_buf = &tr->max_buffer;
4044 #endif
4045 
4046                 if (cpu == RING_BUFFER_ALL_CPUS)
4047                         tracing_reset_online_cpus(trace_buf);
4048                 else
4049                         tracing_reset(trace_buf, cpu);
4050         }
4051 
4052         if (file->f_mode & FMODE_READ) {
4053                 iter = __tracing_open(inode, file, false);
4054                 if (IS_ERR(iter))
4055                         ret = PTR_ERR(iter);
4056                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4057                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4058         }
4059 
4060         if (ret < 0)
4061                 trace_array_put(tr);
4062 
4063         return ret;
4064 }
4065 
4066 /*
4067  * Some tracers are not suitable for instance buffers.
4068  * A tracer is always available for the global array (toplevel)
4069  * or if it explicitly states that it is.
4070  */
4071 static bool
4072 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4073 {
4074         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4075 }
4076 
4077 /* Find the next tracer that this trace array may use */
4078 static struct tracer *
4079 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4080 {
4081         while (t && !trace_ok_for_array(t, tr))
4082                 t = t->next;
4083 
4084         return t;
4085 }
4086 
4087 static void *
4088 t_next(struct seq_file *m, void *v, loff_t *pos)
4089 {
4090         struct trace_array *tr = m->private;
4091         struct tracer *t = v;
4092 
4093         (*pos)++;
4094 
4095         if (t)
4096                 t = get_tracer_for_array(tr, t->next);
4097 
4098         return t;
4099 }
4100 
4101 static void *t_start(struct seq_file *m, loff_t *pos)
4102 {
4103         struct trace_array *tr = m->private;
4104         struct tracer *t;
4105         loff_t l = 0;
4106 
4107         mutex_lock(&trace_types_lock);
4108 
4109         t = get_tracer_for_array(tr, trace_types);
4110         for (; t && l < *pos; t = t_next(m, t, &l))
4111                         ;
4112 
4113         return t;
4114 }
4115 
4116 static void t_stop(struct seq_file *m, void *p)
4117 {
4118         mutex_unlock(&trace_types_lock);
4119 }
4120 
4121 static int t_show(struct seq_file *m, void *v)
4122 {
4123         struct tracer *t = v;
4124 
4125         if (!t)
4126                 return 0;
4127 
4128         seq_puts(m, t->name);
4129         if (t->next)
4130                 seq_putc(m, ' ');
4131         else
4132                 seq_putc(m, '\n');
4133 
4134         return 0;
4135 }
4136 
4137 static const struct seq_operations show_traces_seq_ops = {
4138         .start          = t_start,
4139         .next           = t_next,
4140         .stop           = t_stop,
4141         .show           = t_show,
4142 };
4143 
4144 static int show_traces_open(struct inode *inode, struct file *file)
4145 {
4146         struct trace_array *tr = inode->i_private;
4147         struct seq_file *m;
4148         int ret;
4149 
4150         if (tracing_disabled)
4151                 return -ENODEV;
4152 
4153         ret = seq_open(file, &show_traces_seq_ops);
4154         if (ret)
4155                 return ret;
4156 
4157         m = file->private_data;
4158         m->private = tr;
4159 
4160         return 0;
4161 }
4162 
4163 static ssize_t
4164 tracing_write_stub(struct file *filp, const char __user *ubuf,
4165                    size_t count, loff_t *ppos)
4166 {
4167         return count;
4168 }
4169 
4170 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4171 {
4172         int ret;
4173 
4174         if (file->f_mode & FMODE_READ)
4175                 ret = seq_lseek(file, offset, whence);
4176         else
4177                 file->f_pos = ret = 0;
4178 
4179         return ret;
4180 }
4181 
4182 static const struct file_operations tracing_fops = {
4183         .open           = tracing_open,
4184         .read           = seq_read,
4185         .write          = tracing_write_stub,
4186         .llseek         = tracing_lseek,
4187         .release        = tracing_release,
4188 };
4189 
4190 static const struct file_operations show_traces_fops = {
4191         .open           = show_traces_open,
4192         .read           = seq_read,
4193         .release        = seq_release,
4194         .llseek         = seq_lseek,
4195 };
4196 
4197 static ssize_t
4198 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4199                      size_t count, loff_t *ppos)
4200 {
4201         struct trace_array *tr = file_inode(filp)->i_private;
4202         char *mask_str;
4203         int len;
4204 
4205         len = snprintf(NULL, 0, "%*pb\n",
4206                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
4207         mask_str = kmalloc(len, GFP_KERNEL);
4208         if (!mask_str)
4209                 return -ENOMEM;
4210 
4211         len = snprintf(mask_str, len, "%*pb\n",
4212                        cpumask_pr_args(tr->tracing_cpumask));
4213         if (len >= count) {
4214                 count = -EINVAL;
4215                 goto out_err;
4216         }
4217         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4218 
4219 out_err:
4220         kfree(mask_str);
4221 
4222         return count;
4223 }
4224 
4225 static ssize_t
4226 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4227                       size_t count, loff_t *ppos)
4228 {
4229         struct trace_array *tr = file_inode(filp)->i_private;
4230         cpumask_var_t tracing_cpumask_new;
4231         int err, cpu;
4232 
4233         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4234                 return -ENOMEM;
4235 
4236         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4237         if (err)
4238                 goto err_unlock;
4239 
4240         local_irq_disable();
4241         arch_spin_lock(&tr->max_lock);
4242         for_each_tracing_cpu(cpu) {
4243                 /*
4244                  * Increase/decrease the disabled counter if we are
4245                  * about to flip a bit in the cpumask:
4246                  */
4247                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4248                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4249                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4250                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4251                 }
4252                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4253                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4254                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4255                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4256                 }
4257         }
4258         arch_spin_unlock(&tr->max_lock);
4259         local_irq_enable();
4260 
4261         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4262         free_cpumask_var(tracing_cpumask_new);
4263 
4264         return count;
4265 
4266 err_unlock:
4267         free_cpumask_var(tracing_cpumask_new);
4268 
4269         return err;
4270 }
4271 
4272 static const struct file_operations tracing_cpumask_fops = {
4273         .open           = tracing_open_generic_tr,
4274         .read           = tracing_cpumask_read,
4275         .write          = tracing_cpumask_write,
4276         .release        = tracing_release_generic_tr,
4277         .llseek         = generic_file_llseek,
4278 };
4279 
4280 static int tracing_trace_options_show(struct seq_file *m, void *v)
4281 {
4282         struct tracer_opt *trace_opts;
4283         struct trace_array *tr = m->private;
4284         u32 tracer_flags;
4285         int i;
4286 
4287         mutex_lock(&trace_types_lock);
4288         tracer_flags = tr->current_trace->flags->val;
4289         trace_opts = tr->current_trace->flags->opts;
4290 
4291         for (i = 0; trace_options[i]; i++) {
4292                 if (tr->trace_flags & (1 << i))
4293                         seq_printf(m, "%s\n", trace_options[i]);
4294                 else
4295                         seq_printf(m, "no%s\n", trace_options[i]);
4296         }
4297 
4298         for (i = 0; trace_opts[i].name; i++) {
4299                 if (tracer_flags & trace_opts[i].bit)
4300                         seq_printf(m, "%s\n", trace_opts[i].name);
4301                 else
4302                         seq_printf(m, "no%s\n", trace_opts[i].name);
4303         }
4304         mutex_unlock(&trace_types_lock);
4305 
4306         return 0;
4307 }
4308 
4309 static int __set_tracer_option(struct trace_array *tr,
4310                                struct tracer_flags *tracer_flags,
4311                                struct tracer_opt *opts, int neg)
4312 {
4313         struct tracer *trace = tracer_flags->trace;
4314         int ret;
4315 
4316         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4317         if (ret)
4318                 return ret;
4319 
4320         if (neg)
4321                 tracer_flags->val &= ~opts->bit;
4322         else
4323                 tracer_flags->val |= opts->bit;
4324         return 0;
4325 }
4326 
4327 /* Try to assign a tracer specific option */
4328 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4329 {
4330         struct tracer *trace = tr->current_trace;
4331         struct tracer_flags *tracer_flags = trace->flags;
4332         struct tracer_opt *opts = NULL;
4333         int i;
4334 
4335         for (i = 0; tracer_flags->opts[i].name; i++) {
4336                 opts = &tracer_flags->opts[i];
4337 
4338                 if (strcmp(cmp, opts->name) == 0)
4339                         return __set_tracer_option(tr, trace->flags, opts, neg);
4340         }
4341 
4342         return -EINVAL;
4343 }
4344 
4345 /* Some tracers require overwrite to stay enabled */
4346 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4347 {
4348         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4349                 return -1;
4350 
4351         return 0;
4352 }
4353 
4354 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4355 {
4356         /* do nothing if flag is already set */
4357         if (!!(tr->trace_flags & mask) == !!enabled)
4358                 return 0;
4359 
4360         /* Give the tracer a chance to approve the change */
4361         if (tr->current_trace->flag_changed)
4362                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4363                         return -EINVAL;
4364 
4365         if (enabled)
4366                 tr->trace_flags |= mask;
4367         else
4368                 tr->trace_flags &= ~mask;
4369 
4370         if (mask == TRACE_ITER_RECORD_CMD)
4371                 trace_event_enable_cmd_record(enabled);
4372 
4373         if (mask == TRACE_ITER_RECORD_TGID) {
4374                 if (!tgid_map)
4375                         tgid_map = kcalloc(PID_MAX_DEFAULT + 1,
4376                                            sizeof(*tgid_map),
4377                                            GFP_KERNEL);
4378                 if (!tgid_map) {
4379                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4380                         return -ENOMEM;
4381                 }
4382 
4383                 trace_event_enable_tgid_record(enabled);
4384         }
4385 
4386         if (mask == TRACE_ITER_EVENT_FORK)
4387                 trace_event_follow_fork(tr, enabled);
4388 
4389         if (mask == TRACE_ITER_FUNC_FORK)
4390                 ftrace_pid_follow_fork(tr, enabled);
4391 
4392         if (mask == TRACE_ITER_OVERWRITE) {
4393                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4394 #ifdef CONFIG_TRACER_MAX_TRACE
4395                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4396 #endif
4397         }
4398 
4399         if (mask == TRACE_ITER_PRINTK) {
4400                 trace_printk_start_stop_comm(enabled);
4401                 trace_printk_control(enabled);
4402         }
4403 
4404         return 0;
4405 }
4406 
4407 static int trace_set_options(struct trace_array *tr, char *option)
4408 {
4409         char *cmp;
4410         int neg = 0;
4411         int ret;
4412         size_t orig_len = strlen(option);
4413 
4414         cmp = strstrip(option);
4415 
4416         if (strncmp(cmp, "no", 2) == 0) {
4417                 neg = 1;
4418                 cmp += 2;
4419         }
4420 
4421         mutex_lock(&trace_types_lock);
4422 
4423         ret = match_string(trace_options, -1, cmp);
4424         /* If no option could be set, test the specific tracer options */
4425         if (ret < 0)
4426                 ret = set_tracer_option(tr, cmp, neg);
4427         else
4428                 ret = set_tracer_flag(tr, 1 << ret, !neg);
4429 
4430         mutex_unlock(&trace_types_lock);
4431 
4432         /*
4433          * If the first trailing whitespace is replaced with '\0' by strstrip,
4434          * turn it back into a space.
4435          */
4436         if (orig_len > strlen(option))
4437                 option[strlen(option)] = ' ';
4438 
4439         return ret;
4440 }
4441 
4442 static void __init apply_trace_boot_options(void)
4443 {
4444         char *buf = trace_boot_options_buf;
4445         char *option;
4446 
4447         while (true) {
4448                 option = strsep(&buf, ",");
4449 
4450                 if (!option)
4451                         break;
4452 
4453                 if (*option)
4454                         trace_set_options(&global_trace, option);
4455 
4456                 /* Put back the comma to allow this to be called again */
4457                 if (buf)
4458                         *(buf - 1) = ',';
4459         }
4460 }
4461 
4462 static ssize_t
4463 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4464                         size_t cnt, loff_t *ppos)
4465 {
4466         struct seq_file *m = filp->private_data;
4467         struct trace_array *tr = m->private;
4468         char buf[64];
4469         int ret;
4470 
4471         if (cnt >= sizeof(buf))
4472                 return -EINVAL;
4473 
4474         if (copy_from_user(buf, ubuf, cnt))
4475                 return -EFAULT;
4476 
4477         buf[cnt] = 0;
4478 
4479         ret = trace_set_options(tr, buf);
4480         if (ret < 0)
4481                 return ret;
4482 
4483         *ppos += cnt;
4484 
4485         return cnt;
4486 }
4487 
4488 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4489 {
4490         struct trace_array *tr = inode->i_private;
4491         int ret;
4492 
4493         if (tracing_disabled)
4494                 return -ENODEV;
4495 
4496         if (trace_array_get(tr) < 0)
4497                 return -ENODEV;
4498 
4499         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4500         if (ret < 0)
4501                 trace_array_put(tr);
4502 
4503         return ret;
4504 }
4505 
4506 static const struct file_operations tracing_iter_fops = {
4507         .open           = tracing_trace_options_open,
4508         .read           = seq_read,
4509         .llseek         = seq_lseek,
4510         .release        = tracing_single_release_tr,
4511         .write          = tracing_trace_options_write,
4512 };
4513 
4514 static const char readme_msg[] =
4515         "tracing mini-HOWTO:\n\n"
4516         "# echo 0 > tracing_on : quick way to disable tracing\n"
4517         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4518         " Important files:\n"
4519         "  trace\t\t\t- The static contents of the buffer\n"
4520         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4521         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4522         "  current_tracer\t- function and latency tracers\n"
4523         "  available_tracers\t- list of configured tracers for current_tracer\n"
4524         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4525         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4526         "  trace_clock\t\t-change the clock used to order events\n"
4527         "       local:   Per cpu clock but may not be synced across CPUs\n"
4528         "      global:   Synced across CPUs but slows tracing down.\n"
4529         "     counter:   Not a clock, but just an increment\n"
4530         "      uptime:   Jiffy counter from time of boot\n"
4531         "        perf:   Same clock that perf events use\n"
4532 #ifdef CONFIG_X86_64
4533         "     x86-tsc:   TSC cycle counter\n"
4534 #endif
4535         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
4536         "       delta:   Delta difference against a buffer-wide timestamp\n"
4537         "    absolute:   Absolute (standalone) timestamp\n"
4538         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4539         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4540         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4541         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4542         "\t\t\t  Remove sub-buffer with rmdir\n"
4543         "  trace_options\t\t- Set format or modify how tracing happens\n"
4544         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4545         "\t\t\t  option name\n"
4546         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4547 #ifdef CONFIG_DYNAMIC_FTRACE
4548         "\n  available_filter_functions - list of functions that can be filtered on\n"
4549         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4550         "\t\t\t  functions\n"
4551         "\t     accepts: func_full_name or glob-matching-pattern\n"
4552         "\t     modules: Can select a group via module\n"
4553         "\t      Format: :mod:<module-name>\n"
4554         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4555         "\t    triggers: a command to perform when function is hit\n"
4556         "\t      Format: <function>:<trigger>[:count]\n"
4557         "\t     trigger: traceon, traceoff\n"
4558         "\t\t      enable_event:<system>:<event>\n"
4559         "\t\t      disable_event:<system>:<event>\n"
4560 #ifdef CONFIG_STACKTRACE
4561         "\t\t      stacktrace\n"
4562 #endif
4563 #ifdef CONFIG_TRACER_SNAPSHOT
4564         "\t\t      snapshot\n"
4565 #endif
4566         "\t\t      dump\n"
4567         "\t\t      cpudump\n"
4568         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4569         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4570         "\t     The first one will disable tracing every time do_fault is hit\n"
4571         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4572         "\t       The first time do trap is hit and it disables tracing, the\n"
4573         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4574         "\t       the counter will not decrement. It only decrements when the\n"
4575         "\t       trigger did work\n"
4576         "\t     To remove trigger without count:\n"
4577         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4578         "\t     To remove trigger with a count:\n"
4579         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4580         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4581         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4582         "\t    modules: Can select a group via module command :mod:\n"
4583         "\t    Does not accept triggers\n"
4584 #endif /* CONFIG_DYNAMIC_FTRACE */
4585 #ifdef CONFIG_FUNCTION_TRACER
4586         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4587         "\t\t    (function)\n"
4588 #endif
4589 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4590         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4591         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4592         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4593 #endif
4594 #ifdef CONFIG_TRACER_SNAPSHOT
4595         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4596         "\t\t\t  snapshot buffer. Read the contents for more\n"
4597         "\t\t\t  information\n"
4598 #endif
4599 #ifdef CONFIG_STACK_TRACER
4600         "  stack_trace\t\t- Shows the max stack trace when active\n"
4601         "  stack_max_size\t- Shows current max stack size that was traced\n"
4602         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4603         "\t\t\t  new trace)\n"
4604 #ifdef CONFIG_DYNAMIC_FTRACE
4605         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4606         "\t\t\t  traces\n"
4607 #endif
4608 #endif /* CONFIG_STACK_TRACER */
4609 #ifdef CONFIG_KPROBE_EVENTS
4610         "  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4611         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4612 #endif
4613 #ifdef CONFIG_UPROBE_EVENTS
4614         "  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4615         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4616 #endif
4617 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4618         "\t  accepts: event-definitions (one definition per line)\n"
4619         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4620         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4621         "\t           -:[<group>/]<event>\n"
4622 #ifdef CONFIG_KPROBE_EVENTS
4623         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4624   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4625 #endif
4626 #ifdef CONFIG_UPROBE_EVENTS
4627   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
4628 #endif
4629         "\t     args: <name>=fetcharg[:type]\n"
4630         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4631 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
4632         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>\n"
4633 #else
4634         "\t           $stack<index>, $stack, $retval, $comm\n"
4635 #endif
4636         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
4637         "\t           b<bit-width>@<bit-offset>/<container-size>,\n"
4638         "\t           <type>\\[<array-size>\\]\n"
4639 #endif
4640         "  events/\t\t- Directory containing all trace event subsystems:\n"
4641         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4642         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4643         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4644         "\t\t\t  events\n"
4645         "      filter\t\t- If set, only events passing filter are traced\n"
4646         "  events/<system>/<event>/\t- Directory containing control files for\n"
4647         "\t\t\t  <event>:\n"
4648         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4649         "      filter\t\t- If set, only events passing filter are traced\n"
4650         "      trigger\t\t- If set, a command to perform when event is hit\n"
4651         "\t    Format: <trigger>[:count][if <filter>]\n"
4652         "\t   trigger: traceon, traceoff\n"
4653         "\t            enable_event:<system>:<event>\n"
4654         "\t            disable_event:<system>:<event>\n"
4655 #ifdef CONFIG_HIST_TRIGGERS
4656         "\t            enable_hist:<system>:<event>\n"
4657         "\t            disable_hist:<system>:<event>\n"
4658 #endif
4659 #ifdef CONFIG_STACKTRACE
4660         "\t\t    stacktrace\n"
4661 #endif
4662 #ifdef CONFIG_TRACER_SNAPSHOT
4663         "\t\t    snapshot\n"
4664 #endif
4665 #ifdef CONFIG_HIST_TRIGGERS
4666         "\t\t    hist (see below)\n"
4667 #endif
4668         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4669         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4670         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4671         "\t                  events/block/block_unplug/trigger\n"
4672         "\t   The first disables tracing every time block_unplug is hit.\n"
4673         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4674         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4675         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4676         "\t   Like function triggers, the counter is only decremented if it\n"
4677         "\t    enabled or disabled tracing.\n"
4678         "\t   To remove a trigger without a count:\n"
4679         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4680         "\t   To remove a trigger with a count:\n"
4681         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4682         "\t   Filters can be ignored when removing a trigger.\n"
4683 #ifdef CONFIG_HIST_TRIGGERS
4684         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4685         "\t    Format: hist:keys=<field1[,field2,...]>\n"
4686         "\t            [:values=<field1[,field2,...]>]\n"
4687         "\t            [:sort=<field1[,field2,...]>]\n"
4688         "\t            [:size=#entries]\n"
4689         "\t            [:pause][:continue][:clear]\n"
4690         "\t            [:name=histname1]\n"
4691         "\t            [if <filter>]\n\n"
4692         "\t    When a matching event is hit, an entry is added to a hash\n"
4693         "\t    table using the key(s) and value(s) named, and the value of a\n"
4694         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4695         "\t    correspond to fields in the event's format description.  Keys\n"
4696         "\t    can be any field, or the special string 'stacktrace'.\n"
4697         "\t    Compound keys consisting of up to two fields can be specified\n"
4698         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4699         "\t    fields.  Sort keys consisting of up to two fields can be\n"
4700         "\t    specified using the 'sort' keyword.  The sort direction can\n"
4701         "\t    be modified by appending '.descending' or '.ascending' to a\n"
4702         "\t    sort field.  The 'size' parameter can be used to specify more\n"
4703         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4704         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4705         "\t    its histogram data will be shared with other triggers of the\n"
4706         "\t    same name, and trigger hits will update this common data.\n\n"
4707         "\t    Reading the 'hist' file for the event will dump the hash\n"
4708         "\t    table in its entirety to stdout.  If there are multiple hist\n"
4709         "\t    triggers attached to an event, there will be a table for each\n"
4710         "\t    trigger in the output.  The table displayed for a named\n"
4711         "\t    trigger will be the same as any other instance having the\n"
4712         "\t    same name.  The default format used to display a given field\n"
4713         "\t    can be modified by appending any of the following modifiers\n"
4714         "\t    to the field name, as applicable:\n\n"
4715         "\t            .hex        display a number as a hex value\n"
4716         "\t            .sym        display an address as a symbol\n"
4717         "\t            .sym-offset display an address as a symbol and offset\n"
4718         "\t            .execname   display a common_pid as a program name\n"
4719         "\t            .syscall    display a syscall id as a syscall name\n"
4720         "\t            .log2       display log2 value rather than raw number\n"
4721         "\t            .usecs      display a common_timestamp in microseconds\n\n"
4722         "\t    The 'pause' parameter can be used to pause an existing hist\n"
4723         "\t    trigger or to start a hist trigger but not log any events\n"
4724         "\t    until told to do so.  'continue' can be used to start or\n"
4725         "\t    restart a paused hist trigger.\n\n"
4726         "\t    The 'clear' parameter will clear the contents of a running\n"
4727         "\t    hist trigger and leave its current paused/active state\n"
4728         "\t    unchanged.\n\n"
4729         "\t    The enable_hist and disable_hist triggers can be used to\n"
4730         "\t    have one event conditionally start and stop another event's\n"
4731         "\t    already-attached hist trigger.  The syntax is analagous to\n"
4732         "\t    the enable_event and disable_event triggers.\n"
4733 #endif
4734 ;
4735 
4736 static ssize_t
4737 tracing_readme_read(struct file *filp, char __user *ubuf,
4738                        size_t cnt, loff_t *ppos)
4739 {
4740         return simple_read_from_buffer(ubuf, cnt, ppos,
4741                                         readme_msg, strlen(readme_msg));
4742 }
4743 
4744 static const struct file_operations tracing_readme_fops = {
4745         .open           = tracing_open_generic,
4746         .read           = tracing_readme_read,
4747         .llseek         = generic_file_llseek,
4748 };
4749 
4750 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4751 {
4752         int *ptr = v;
4753 
4754         if (*pos || m->count)
4755                 ptr++;
4756 
4757         (*pos)++;
4758 
4759         for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
4760                 if (trace_find_tgid(*ptr))
4761                         return ptr;
4762         }
4763 
4764         return NULL;
4765 }
4766 
4767 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
4768 {
4769         void *v;
4770         loff_t l = 0;
4771 
4772         if (!tgid_map)
4773                 return NULL;
4774 
4775         v = &tgid_map[0];
4776         while (l <= *pos) {
4777                 v = saved_tgids_next(m, v, &l);
4778                 if (!v)
4779                         return NULL;
4780         }
4781 
4782         return v;
4783 }
4784 
4785 static void saved_tgids_stop(struct seq_file *m, void *v)
4786 {
4787 }
4788 
4789 static int saved_tgids_show(struct seq_file *m, void *v)
4790 {
4791         int pid = (int *)v - tgid_map;
4792 
4793         seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
4794         return 0;
4795 }
4796 
4797 static const struct seq_operations tracing_saved_tgids_seq_ops = {
4798         .start          = saved_tgids_start,
4799         .stop           = saved_tgids_stop,
4800         .next           = saved_tgids_next,
4801         .show           = saved_tgids_show,
4802 };
4803 
4804 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
4805 {
4806         if (tracing_disabled)
4807                 return -ENODEV;
4808 
4809         return seq_open(filp, &tracing_saved_tgids_seq_ops);
4810 }
4811 
4812 
4813 static const struct file_operations tracing_saved_tgids_fops = {
4814         .open           = tracing_saved_tgids_open,
4815         .read           = seq_read,
4816         .llseek         = seq_lseek,
4817         .release        = seq_release,
4818 };
4819 
4820 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4821 {
4822         unsigned int *ptr = v;
4823 
4824         if (*pos || m->count)
4825                 ptr++;
4826 
4827         (*pos)++;
4828 
4829         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4830              ptr++) {
4831                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4832                         continue;
4833 
4834                 return ptr;
4835         }
4836 
4837         return NULL;
4838 }
4839 
4840 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4841 {
4842         void *v;
4843         loff_t l = 0;
4844 
4845         preempt_disable();
4846         arch_spin_lock(&trace_cmdline_lock);
4847 
4848         v = &savedcmd->map_cmdline_to_pid[0];
4849         while (l <= *pos) {
4850                 v = saved_cmdlines_next(m, v, &l);
4851                 if (!v)
4852                         return NULL;
4853         }
4854 
4855         return v;
4856 }
4857 
4858 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4859 {
4860         arch_spin_unlock(&trace_cmdline_lock);
4861         preempt_enable();
4862 }
4863 
4864 static int saved_cmdlines_show(struct seq_file *m, void *v)
4865 {
4866         char buf[TASK_COMM_LEN];
4867         unsigned int *pid = v;
4868 
4869         __trace_find_cmdline(*pid, buf);
4870         seq_printf(m, "%d %s\n", *pid, buf);
4871         return 0;
4872 }
4873 
4874 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4875         .start          = saved_cmdlines_start,
4876         .next           = saved_cmdlines_next,
4877         .stop           = saved_cmdlines_stop,
4878         .show           = saved_cmdlines_show,
4879 };
4880 
4881 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4882 {
4883         if (tracing_disabled)
4884                 return -ENODEV;
4885 
4886         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4887 }
4888 
4889 static const struct file_operations tracing_saved_cmdlines_fops = {
4890         .open           = tracing_saved_cmdlines_open,
4891         .read           = seq_read,
4892         .llseek         = seq_lseek,
4893         .release        = seq_release,
4894 };
4895 
4896 static ssize_t
4897 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4898                                  size_t cnt, loff_t *ppos)
4899 {
4900         char buf[64];
4901         int r;
4902 
4903         arch_spin_lock(&trace_cmdline_lock);
4904         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4905         arch_spin_unlock(&trace_cmdline_lock);
4906 
4907         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4908 }
4909 
4910 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4911 {
4912         kfree(s->saved_cmdlines);
4913         kfree(s->map_cmdline_to_pid);
4914         kfree(s);
4915 }
4916 
4917 static int tracing_resize_saved_cmdlines(unsigned int val)
4918 {
4919         struct saved_cmdlines_buffer *s, *savedcmd_temp;
4920 
4921         s = kmalloc(sizeof(*s), GFP_KERNEL);
4922         if (!s)
4923                 return -ENOMEM;
4924 
4925         if (allocate_cmdlines_buffer(val, s) < 0) {
4926                 kfree(s);
4927                 return -ENOMEM;
4928         }
4929 
4930         arch_spin_lock(&trace_cmdline_lock);
4931         savedcmd_temp = savedcmd;
4932         savedcmd = s;
4933         arch_spin_unlock(&trace_cmdline_lock);
4934         free_saved_cmdlines_buffer(savedcmd_temp);
4935 
4936         return 0;
4937 }
4938 
4939 static ssize_t
4940 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4941                                   size_t cnt, loff_t *ppos)
4942 {
4943         unsigned long val;
4944         int ret;
4945 
4946         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4947         if (ret)
4948                 return ret;
4949 
4950         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4951         if (!val || val > PID_MAX_DEFAULT)
4952                 return -EINVAL;
4953 
4954         ret = tracing_resize_saved_cmdlines((unsigned int)val);
4955         if (ret < 0)
4956                 return ret;
4957 
4958         *ppos += cnt;
4959 
4960         return cnt;
4961 }
4962 
4963 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4964         .open           = tracing_open_generic,
4965         .read           = tracing_saved_cmdlines_size_read,
4966         .write          = tracing_saved_cmdlines_size_write,
4967 };
4968 
4969 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
4970 static union trace_eval_map_item *
4971 update_eval_map(union trace_eval_map_item *ptr)
4972 {
4973         if (!ptr->map.eval_string) {
4974                 if (ptr->tail.next) {
4975                         ptr = ptr->tail.next;
4976                         /* Set ptr to the next real item (skip head) */
4977                         ptr++;
4978                 } else
4979                         return NULL;
4980         }
4981         return ptr;
4982 }
4983 
4984 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
4985 {
4986         union trace_eval_map_item *ptr = v;
4987 
4988         /*
4989          * Paranoid! If ptr points to end, we don't want to increment past it.
4990          * This really should never happen.
4991          */
4992         ptr = update_eval_map(ptr);
4993         if (WARN_ON_ONCE(!ptr))
4994                 return NULL;
4995 
4996         ptr++;
4997 
4998         (*pos)++;
4999 
5000         ptr = update_eval_map(ptr);
5001 
5002         return ptr;
5003 }
5004 
5005 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5006 {
5007         union trace_eval_map_item *v;
5008         loff_t l = 0;
5009 
5010         mutex_lock(&trace_eval_mutex);
5011 
5012         v = trace_eval_maps;
5013         if (v)
5014                 v++;
5015 
5016         while (v && l < *pos) {
5017                 v = eval_map_next(m, v, &l);
5018         }
5019 
5020         return v;
5021 }
5022 
5023 static void eval_map_stop(struct seq_file *m, void *v)
5024 {
5025         mutex_unlock(&trace_eval_mutex);
5026 }
5027 
5028 static int eval_map_show(struct seq_file *m, void *v)
5029 {
5030         union trace_eval_map_item *ptr = v;
5031 
5032         seq_printf(m, "%s %ld (%s)\n",
5033                    ptr->map.eval_string, ptr->map.eval_value,
5034                    ptr->map.system);
5035 
5036         return 0;
5037 }
5038 
5039 static const struct seq_operations tracing_eval_map_seq_ops = {
5040         .start          = eval_map_start,
5041         .next           = eval_map_next,
5042         .stop           = eval_map_stop,
5043         .show           = eval_map_show,
5044 };
5045 
5046 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5047 {
5048         if (tracing_disabled)
5049                 return -ENODEV;
5050 
5051         return seq_open(filp, &tracing_eval_map_seq_ops);
5052 }
5053 
5054 static const struct file_operations tracing_eval_map_fops = {
5055         .open           = tracing_eval_map_open,
5056         .read           = seq_read,
5057         .llseek         = seq_lseek,
5058         .release        = seq_release,
5059 };
5060 
5061 static inline union trace_eval_map_item *
5062 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5063 {
5064         /* Return tail of array given the head */
5065         return ptr + ptr->head.length + 1;
5066 }
5067 
5068 static void
5069 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5070                            int len)
5071 {
5072         struct trace_eval_map **stop;
5073         struct trace_eval_map **map;
5074         union trace_eval_map_item *map_array;
5075         union trace_eval_map_item *ptr;
5076 
5077         stop = start + len;
5078 
5079         /*
5080          * The trace_eval_maps contains the map plus a head and tail item,
5081          * where the head holds the module and length of array, and the
5082          * tail holds a pointer to the next list.
5083          */
5084         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5085         if (!map_array) {
5086                 pr_warn("Unable to allocate trace eval mapping\n");
5087                 return;
5088         }
5089 
5090         mutex_lock(&trace_eval_mutex);
5091 
5092         if (!trace_eval_maps)
5093                 trace_eval_maps = map_array;
5094         else {
5095                 ptr = trace_eval_maps;
5096                 for (;;) {
5097                         ptr = trace_eval_jmp_to_tail(ptr);
5098                         if (!ptr->tail.next)
5099                                 break;
5100                         ptr = ptr->tail.next;
5101 
5102                 }
5103                 ptr->tail.next = map_array;
5104         }
5105         map_array->head.mod = mod;
5106         map_array->head.length = len;
5107         map_array++;
5108 
5109         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5110                 map_array->map = **map;
5111                 map_array++;
5112         }
5113         memset(map_array, 0, sizeof(*map_array));
5114 
5115         mutex_unlock(&trace_eval_mutex);
5116 }
5117 
5118 static void trace_create_eval_file(struct dentry *d_tracer)
5119 {
5120         trace_create_file("eval_map", 0444, d_tracer,
5121                           NULL, &tracing_eval_map_fops);
5122 }
5123 
5124 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5125 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5126 static inline void trace_insert_eval_map_file(struct module *mod,
5127                               struct trace_eval_map **start, int len) { }
5128 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5129 
5130 static void trace_insert_eval_map(struct module *mod,
5131                                   struct trace_eval_map **start, int len)
5132 {
5133         struct trace_eval_map **map;
5134 
5135         if (len <= 0)
5136                 return;
5137 
5138         map = start;
5139 
5140         trace_event_eval_update(map, len);
5141 
5142         trace_insert_eval_map_file(mod, start, len);
5143 }
5144 
5145 static ssize_t
5146 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5147                        size_t cnt, loff_t *ppos)
5148 {
5149         struct trace_array *tr = filp->private_data;
5150         char buf[MAX_TRACER_SIZE+2];
5151         int r;
5152 
5153         mutex_lock(&trace_types_lock);
5154         r = sprintf(buf, "%s\n", tr->current_trace->name);
5155         mutex_unlock(&trace_types_lock);
5156 
5157         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5158 }
5159 
5160 int tracer_init(struct tracer *t, struct trace_array *tr)
5161 {
5162         tracing_reset_online_cpus(&tr->trace_buffer);
5163         return t->init(tr);
5164 }
5165 
5166 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5167 {
5168         int cpu;
5169 
5170         for_each_tracing_cpu(cpu)
5171                 per_cpu_ptr(buf->data, cpu)->entries = val;
5172 }
5173 
5174 #ifdef CONFIG_TRACER_MAX_TRACE
5175 /* resize @tr's buffer to the size of @size_tr's entries */
5176 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5177                                         struct trace_buffer *size_buf, int cpu_id)
5178 {
5179         int cpu, ret = 0;
5180 
5181         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5182                 for_each_tracing_cpu(cpu) {
5183                         ret = ring_buffer_resize(trace_buf->buffer,
5184                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5185                         if (ret < 0)
5186                                 break;
5187                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5188                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5189                 }
5190         } else {
5191                 ret = ring_buffer_resize(trace_buf->buffer,
5192                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5193                 if (ret == 0)
5194                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5195                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5196         }
5197 
5198         return ret;
5199 }
5200 #endif /* CONFIG_TRACER_MAX_TRACE */
5201 
5202 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5203                                         unsigned long size, int cpu)
5204 {
5205         int ret;
5206 
5207         /*
5208          * If kernel or user changes the size of the ring buffer
5209          * we use the size that was given, and we can forget about
5210          * expanding it later.
5211          */
5212         ring_buffer_expanded = true;
5213 
5214         /* May be called before buffers are initialized */
5215         if (!tr->trace_buffer.buffer)
5216                 return 0;
5217 
5218         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5219         if (ret < 0)
5220                 return ret;
5221 
5222 #ifdef CONFIG_TRACER_MAX_TRACE
5223         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5224             !tr->current_trace->use_max_tr)
5225                 goto out;
5226 
5227         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5228         if (ret < 0) {
5229                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5230                                                      &tr->trace_buffer, cpu);
5231                 if (r < 0) {
5232                         /*
5233                          * AARGH! We are left with different
5234                          * size max buffer!!!!
5235                          * The max buffer is our "snapshot" buffer.
5236                          * When a tracer needs a snapshot (one of the
5237                          * latency tracers), it swaps the max buffer
5238                          * with the saved snap shot. We succeeded to
5239                          * update the size of the main buffer, but failed to
5240                          * update the size of the max buffer. But when we tried
5241                          * to reset the main buffer to the original size, we
5242                          * failed there too. This is very unlikely to
5243                          * happen, but if it does, warn and kill all
5244                          * tracing.
5245                          */
5246                         WARN_ON(1);
5247                         tracing_disabled = 1;
5248                 }
5249                 return ret;
5250         }
5251 
5252         if (cpu == RING_BUFFER_ALL_CPUS)
5253                 set_buffer_entries(&tr->max_buffer, size);
5254         else
5255                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5256 
5257  out:
5258 #endif /* CONFIG_TRACER_MAX_TRACE */
5259 
5260         if (cpu == RING_BUFFER_ALL_CPUS)
5261                 set_buffer_entries(&tr->trace_buffer, size);
5262         else
5263                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5264 
5265         return ret;
5266 }
5267 
5268 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5269                                           unsigned long size, int cpu_id)
5270 {
5271         int ret = size;
5272 
5273         mutex_lock(&trace_types_lock);
5274 
5275         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5276                 /* make sure, this cpu is enabled in the mask */
5277                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5278                         ret = -EINVAL;
5279                         goto out;
5280                 }
5281         }
5282 
5283         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5284         if (ret < 0)
5285                 ret = -ENOMEM;
5286 
5287 out:
5288         mutex_unlock(&trace_types_lock);
5289 
5290         return ret;
5291 }
5292 
5293 
5294 /**
5295  * tracing_update_buffers - used by tracing facility to expand ring buffers
5296  *
5297  * To save on memory when the tracing is never used on a system with it
5298  * configured in. The ring buffers are set to a minimum size. But once
5299  * a user starts to use the tracing facility, then they need to grow
5300  * to their default size.
5301  *
5302  * This function is to be called when a tracer is about to be used.
5303  */
5304 int tracing_update_buffers(void)
5305 {
5306         int ret = 0;
5307 
5308         mutex_lock(&trace_types_lock);
5309         if (!ring_buffer_expanded)
5310                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5311                                                 RING_BUFFER_ALL_CPUS);
5312         mutex_unlock(&trace_types_lock);
5313 
5314         return ret;
5315 }
5316 
5317 struct trace_option_dentry;
5318 
5319 static void
5320 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5321 
5322 /*
5323  * Used to clear out the tracer before deletion of an instance.
5324  * Must have trace_types_lock held.
5325  */
5326 static void tracing_set_nop(struct trace_array *tr)
5327 {
5328         if (tr->current_trace == &nop_trace)
5329                 return;
5330         
5331         tr->current_trace->enabled--;
5332 
5333         if (tr->current_trace->reset)
5334                 tr->current_trace->reset(tr);
5335 
5336         tr->current_trace = &nop_trace;
5337 }
5338 
5339 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5340 {
5341         /* Only enable if the directory has been created already. */
5342         if (!tr->dir)
5343                 return;
5344 
5345         create_trace_option_files(tr, t);
5346 }
5347 
5348 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5349 {
5350         struct tracer *t;
5351 #ifdef CONFIG_TRACER_MAX_TRACE
5352         bool had_max_tr;
5353 #endif
5354         int ret = 0;
5355 
5356         mutex_lock(&trace_types_lock);
5357 
5358         if (!ring_buffer_expanded) {
5359                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5360                                                 RING_BUFFER_ALL_CPUS);
5361                 if (ret < 0)
5362                         goto out;
5363                 ret = 0;
5364         }
5365 
5366         for (t = trace_types; t; t = t->next) {
5367                 if (strcmp(t->name, buf) == 0)
5368                         break;
5369         }
5370         if (!t) {
5371                 ret = -EINVAL;
5372                 goto out;
5373         }
5374         if (t == tr->current_trace)
5375                 goto out;
5376 
5377         /* Some tracers won't work on kernel command line */
5378         if (system_state < SYSTEM_RUNNING && t->noboot) {
5379                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5380                         t->name);
5381                 goto out;
5382         }
5383 
5384         /* Some tracers are only allowed for the top level buffer */
5385         if (!trace_ok_for_array(t, tr)) {
5386                 ret = -EINVAL;
5387                 goto out;
5388         }
5389 
5390         /* If trace pipe files are being read, we can't change the tracer */
5391         if (tr->current_trace->ref) {
5392                 ret = -EBUSY;
5393                 goto out;
5394         }
5395 
5396         trace_branch_disable();
5397 
5398         tr->current_trace->enabled--;
5399 
5400         if (tr->current_trace->reset)
5401                 tr->current_trace->reset(tr);
5402 
5403         /* Current trace needs to be nop_trace before synchronize_sched */
5404         tr->current_trace = &nop_trace;
5405 
5406 #ifdef CONFIG_TRACER_MAX_TRACE
5407         had_max_tr = tr->allocated_snapshot;
5408 
5409         if (had_max_tr && !t->use_max_tr) {
5410                 /*
5411                  * We need to make sure that the update_max_tr sees that
5412                  * current_trace changed to nop_trace to keep it from
5413                  * swapping the buffers after we resize it.
5414                  * The update_max_tr is called from interrupts disabled
5415                  * so a synchronized_sched() is sufficient.
5416                  */
5417                 synchronize_sched();
5418                 free_snapshot(tr);
5419         }
5420 #endif
5421 
5422 #ifdef CONFIG_TRACER_MAX_TRACE
5423         if (t->use_max_tr && !had_max_tr) {
5424                 ret = tracing_alloc_snapshot_instance(tr);
5425                 if (ret < 0)
5426                         goto out;
5427         }
5428 #endif
5429 
5430         if (t->init) {
5431                 ret = tracer_init(t, tr);
5432                 if (ret)
5433                         goto out;
5434         }
5435 
5436         tr->current_trace = t;
5437         tr->current_trace->enabled++;
5438         trace_branch_enable(tr);
5439  out:
5440         mutex_unlock(&trace_types_lock);
5441 
5442         return ret;
5443 }
5444 
5445 static ssize_t
5446 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5447                         size_t cnt, loff_t *ppos)
5448 {
5449         struct trace_array *tr = filp->private_data;
5450         char buf[MAX_TRACER_SIZE+1];
5451         int i;
5452         size_t ret;
5453         int err;
5454 
5455         ret = cnt;
5456 
5457         if (cnt > MAX_TRACER_SIZE)
5458                 cnt = MAX_TRACER_SIZE;
5459 
5460         if (copy_from_user(buf, ubuf, cnt))
5461                 return -EFAULT;
5462 
5463         buf[cnt] = 0;
5464 
5465         /* strip ending whitespace. */
5466         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5467                 buf[i] = 0;
5468 
5469         err = tracing_set_tracer(tr, buf);
5470         if (err)
5471                 return err;
5472 
5473         *ppos += ret;
5474 
5475         return ret;
5476 }
5477 
5478 static ssize_t
5479 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5480                    size_t cnt, loff_t *ppos)
5481 {
5482         char buf[64];
5483         int r;
5484 
5485         r = snprintf(buf, sizeof(buf), "%ld\n",
5486                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5487         if (r > sizeof(buf))
5488                 r = sizeof(buf);
5489         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5490 }
5491 
5492 static ssize_t
5493 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5494                     size_t cnt, loff_t *ppos)
5495 {
5496         unsigned long val;
5497         int ret;
5498 
5499         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5500         if (ret)
5501                 return ret;
5502 
5503         *ptr = val * 1000;
5504 
5505         return cnt;
5506 }
5507 
5508 static ssize_t
5509 tracing_thresh_read(struct file *filp, char __user *ubuf,
5510                     size_t cnt, loff_t *ppos)
5511 {
5512         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5513 }
5514 
5515 static ssize_t
5516 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5517                      size_t cnt, loff_t *ppos)
5518 {
5519         struct trace_array *tr = filp->private_data;
5520         int ret;
5521 
5522         mutex_lock(&trace_types_lock);
5523         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5524         if (ret < 0)
5525                 goto out;
5526 
5527         if (tr->current_trace->update_thresh) {
5528                 ret = tr->current_trace->update_thresh(tr);
5529                 if (ret < 0)
5530                         goto out;
5531         }
5532 
5533         ret = cnt;
5534 out:
5535         mutex_unlock(&trace_types_lock);
5536 
5537         return ret;
5538 }
5539 
5540 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5541 
5542 static ssize_t
5543 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5544                      size_t cnt, loff_t *ppos)
5545 {
5546         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5547 }
5548 
5549 static ssize_t
5550 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5551                       size_t cnt, loff_t *ppos)
5552 {
5553         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5554 }
5555 
5556 #endif
5557 
5558 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5559 {
5560         struct trace_array *tr = inode->i_private;
5561         struct trace_iterator *iter;
5562         int ret = 0;
5563 
5564         if (tracing_disabled)
5565                 return -ENODEV;
5566 
5567         if (trace_array_get(tr) < 0)
5568                 return -ENODEV;
5569 
5570         mutex_lock(&trace_types_lock);
5571 
5572         /* create a buffer to store the information to pass to userspace */
5573         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5574         if (!iter) {
5575                 ret = -ENOMEM;
5576                 __trace_array_put(tr);
5577                 goto out;
5578         }
5579 
5580         trace_seq_init(&iter->seq);
5581         iter->trace = tr->current_trace;
5582 
5583         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5584                 ret = -ENOMEM;
5585                 goto fail;
5586         }
5587 
5588         /* trace pipe does not show start of buffer */
5589         cpumask_setall(iter->started);
5590 
5591         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5592                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5593 
5594         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5595         if (trace_clocks[tr->clock_id].in_ns)
5596                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5597 
5598         iter->tr = tr;
5599         iter->trace_buffer = &tr->trace_buffer;
5600         iter->cpu_file = tracing_get_cpu(inode);
5601         mutex_init(&iter->mutex);
5602         filp->private_data = iter;
5603 
5604         if (iter->trace->pipe_open)
5605                 iter->trace->pipe_open(iter);
5606 
5607         nonseekable_open(inode, filp);
5608 
5609         tr->current_trace->ref++;
5610 out:
5611         mutex_unlock(&trace_types_lock);
5612         return ret;
5613 
5614 fail:
5615         kfree(iter->trace);
5616         kfree(iter);
5617         __trace_array_put(tr);
5618         mutex_unlock(&trace_types_lock);
5619         return ret;
5620 }
5621 
5622 static int tracing_release_pipe(struct inode *inode, struct file *file)
5623 {
5624         struct trace_iterator *iter = file->private_data;
5625         struct trace_array *tr = inode->i_private;
5626 
5627         mutex_lock(&trace_types_lock);
5628 
5629         tr->current_trace->ref--;
5630 
5631         if (iter->trace->pipe_close)
5632                 iter->trace->pipe_close(iter);
5633 
5634         mutex_unlock(&trace_types_lock);
5635 
5636         free_cpumask_var(iter->started);
5637         mutex_destroy(&iter->mutex);
5638         kfree(iter);
5639 
5640         trace_array_put(tr);
5641 
5642         return 0;
5643 }
5644 
5645 static __poll_t
5646 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5647 {
5648         struct trace_array *tr = iter->tr;
5649 
5650         /* Iterators are static, they should be filled or empty */
5651         if (trace_buffer_iter(iter, iter->cpu_file))
5652                 return EPOLLIN | EPOLLRDNORM;
5653 
5654         if (tr->trace_flags & TRACE_ITER_BLOCK)
5655                 /*
5656                  * Always select as readable when in blocking mode
5657                  */
5658                 return EPOLLIN | EPOLLRDNORM;
5659         else
5660                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5661                                              filp, poll_table);
5662 }
5663 
5664 static __poll_t
5665 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5666 {
5667         struct trace_iterator *iter = filp->private_data;
5668 
5669         return trace_poll(iter, filp, poll_table);
5670 }
5671 
5672 /* Must be called with iter->mutex held. */
5673 static int tracing_wait_pipe(struct file *filp)
5674 {
5675         struct trace_iterator *iter = filp->private_data;
5676         int ret;
5677 
5678         while (trace_empty(iter)) {
5679 
5680                 if ((filp->f_flags & O_NONBLOCK)) {
5681                         return -EAGAIN;
5682                 }
5683 
5684                 /*
5685                  * We block until we read something and tracing is disabled.
5686                  * We still block if tracing is disabled, but we have never
5687                  * read anything. This allows a user to cat this file, and
5688                  * then enable tracing. But after we have read something,
5689                  * we give an EOF when tracing is again disabled.
5690                  *
5691                  * iter->pos will be 0 if we haven't read anything.
5692                  */
5693                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5694                         break;
5695 
5696                 mutex_unlock(&iter->mutex);
5697 
5698                 ret = wait_on_pipe(iter, false);
5699 
5700                 mutex_lock(&iter->mutex);
5701 
5702                 if (ret)
5703                         return ret;
5704         }
5705 
5706         return 1;
5707 }
5708 
5709 /*
5710  * Consumer reader.
5711  */
5712 static ssize_t
5713 tracing_read_pipe(struct file *filp, char __user *ubuf,
5714                   size_t cnt, loff_t *ppos)
5715 {
5716         struct trace_iterator *iter = filp->private_data;
5717         ssize_t sret;
5718 
5719         /*
5720          * Avoid more than one consumer on a single file descriptor
5721          * This is just a matter of traces coherency, the ring buffer itself
5722          * is protected.
5723          */
5724         mutex_lock(&iter->mutex);
5725 
5726         /* return any leftover data */
5727         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5728         if (sret != -EBUSY)
5729                 goto out;
5730 
5731         trace_seq_init(&iter->seq);
5732 
5733         if (iter->trace->read) {
5734                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5735                 if (sret)
5736                         goto out;
5737         }
5738 
5739 waitagain:
5740         sret = tracing_wait_pipe(filp);
5741         if (sret <= 0)
5742                 goto out;
5743 
5744         /* stop when tracing is finished */
5745         if (trace_empty(iter)) {
5746                 sret = 0;
5747                 goto out;
5748         }
5749 
5750         if (cnt >= PAGE_SIZE)
5751                 cnt = PAGE_SIZE - 1;
5752 
5753         /* reset all but tr, trace, and overruns */
5754         memset(&iter->seq, 0,
5755                sizeof(struct trace_iterator) -
5756                offsetof(struct trace_iterator, seq));
5757         cpumask_clear(iter->started);
5758         iter->pos = -1;
5759 
5760         trace_event_read_lock();
5761         trace_access_lock(iter->cpu_file);
5762         while (trace_find_next_entry_inc(iter) != NULL) {
5763                 enum print_line_t ret;
5764                 int save_len = iter->seq.seq.len;
5765 
5766                 ret = print_trace_line(iter);
5767                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5768                         /* don't print partial lines */
5769                         iter->seq.seq.len = save_len;
5770                         break;
5771                 }
5772                 if (ret != TRACE_TYPE_NO_CONSUME)
5773                         trace_consume(iter);
5774 
5775                 if (trace_seq_used(&iter->seq) >= cnt)
5776                         break;
5777 
5778                 /*
5779                  * Setting the full flag means we reached the trace_seq buffer
5780                  * size and we should leave by partial output condition above.
5781                  * One of the trace_seq_* functions is not used properly.
5782                  */
5783                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5784                           iter->ent->type);
5785         }
5786         trace_access_unlock(iter->cpu_file);
5787         trace_event_read_unlock();
5788 
5789         /* Now copy what we have to the user */
5790         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5791         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5792                 trace_seq_init(&iter->seq);
5793 
5794         /*
5795          * If there was nothing to send to user, in spite of consuming trace
5796          * entries, go back to wait for more entries.
5797          */
5798         if (sret == -EBUSY)
5799                 goto waitagain;
5800 
5801 out:
5802         mutex_unlock(&iter->mutex);
5803 
5804         return sret;
5805 }
5806 
5807 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5808                                      unsigned int idx)
5809 {
5810         __free_page(spd->pages[idx]);
5811 }
5812 
5813 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5814         .can_merge              = 0,
5815         .confirm                = generic_pipe_buf_confirm,
5816         .release                = generic_pipe_buf_release,
5817         .steal                  = generic_pipe_buf_steal,
5818         .get                    = generic_pipe_buf_get,
5819 };
5820 
5821 static size_t
5822 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5823 {
5824         size_t count;
5825         int save_len;
5826         int ret;
5827 
5828         /* Seq buffer is page-sized, exactly what we need. */
5829         for (;;) {
5830                 save_len = iter->seq.seq.len;
5831                 ret = print_trace_line(iter);
5832 
5833                 if (trace_seq_has_overflowed(&iter->seq)) {
5834                         iter->seq.seq.len = save_len;
5835                         break;
5836                 }
5837 
5838                 /*
5839                  * This should not be hit, because it should only
5840                  * be set if the iter->seq overflowed. But check it
5841                  * anyway to be safe.
5842                  */
5843                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5844                         iter->seq.seq.len = save_len;
5845                         break;
5846                 }
5847 
5848                 count = trace_seq_used(&iter->seq) - save_len;
5849                 if (rem < count) {
5850                         rem = 0;
5851                         iter->seq.seq.len = save_len;
5852                         break;
5853                 }
5854 
5855                 if (ret != TRACE_TYPE_NO_CONSUME)
5856                         trace_consume(iter);
5857                 rem -= count;
5858                 if (!trace_find_next_entry_inc(iter))   {
5859                         rem = 0;
5860                         iter->ent = NULL;
5861                         break;
5862                 }
5863         }
5864 
5865         return rem;
5866 }
5867 
5868 static ssize_t tracing_splice_read_pipe(struct file *filp,
5869                                         loff_t *ppos,
5870                                         struct pipe_inode_info *pipe,
5871                                         size_t len,
5872                                         unsigned int flags)
5873 {
5874         struct page *pages_def[PIPE_DEF_BUFFERS];
5875         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5876         struct trace_iterator *iter = filp->private_data;
5877         struct splice_pipe_desc spd = {
5878                 .pages          = pages_def,
5879                 .partial        = partial_def,
5880                 .nr_pages       = 0, /* This gets updated below. */
5881                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5882                 .ops            = &tracing_pipe_buf_ops,
5883                 .spd_release    = tracing_spd_release_pipe,
5884         };
5885         ssize_t ret;
5886         size_t rem;
5887         unsigned int i;
5888 
5889         if (splice_grow_spd(pipe, &spd))
5890                 return -ENOMEM;
5891 
5892         mutex_lock(&iter->mutex);
5893 
5894         if (iter->trace->splice_read) {
5895                 ret = iter->trace->splice_read(iter, filp,
5896                                                ppos, pipe, len, flags);
5897                 if (ret)
5898                         goto out_err;
5899         }
5900 
5901         ret = tracing_wait_pipe(filp);
5902         if (ret <= 0)
5903                 goto out_err;
5904 
5905         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5906                 ret = -EFAULT;
5907                 goto out_err;
5908         }
5909 
5910         trace_event_read_lock();
5911         trace_access_lock(iter->cpu_file);
5912 
5913         /* Fill as many pages as possible. */
5914         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5915                 spd.pages[i] = alloc_page(GFP_KERNEL);
5916                 if (!spd.pages[i])
5917                         break;
5918 
5919                 rem = tracing_fill_pipe_page(rem, iter);
5920 
5921                 /* Copy the data into the page, so we can start over. */
5922                 ret = trace_seq_to_buffer(&iter->seq,
5923                                           page_address(spd.pages[i]),
5924                                           trace_seq_used(&iter->seq));
5925                 if (ret < 0) {
5926                         __free_page(spd.pages[i]);
5927                         break;
5928                 }
5929                 spd.partial[i].offset = 0;