~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/kernel/trace/trace.c

Version: ~ [ linux-5.7-rc7 ] ~ [ linux-5.6.14 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.42 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.124 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.181 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.224 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.224 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.19.8 ] ~ [ linux-3.18.140 ] ~ [ linux-3.17.8 ] ~ [ linux-3.16.84 ] ~ [ linux-3.15.10 ] ~ [ linux-3.14.79 ] ~ [ linux-3.13.11 ] ~ [ linux-3.12.74 ] ~ [ linux-3.11.10 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0
  2 /*
  3  * ring buffer based function tracer
  4  *
  5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
  6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
  7  *
  8  * Originally taken from the RT patch by:
  9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
 10  *
 11  * Based on code from the latency_tracer, that is:
 12  *  Copyright (C) 2004-2006 Ingo Molnar
 13  *  Copyright (C) 2004 Nadia Yvette Chambers
 14  */
 15 #include <linux/ring_buffer.h>
 16 #include <generated/utsrelease.h>
 17 #include <linux/stacktrace.h>
 18 #include <linux/writeback.h>
 19 #include <linux/kallsyms.h>
 20 #include <linux/seq_file.h>
 21 #include <linux/notifier.h>
 22 #include <linux/irqflags.h>
 23 #include <linux/debugfs.h>
 24 #include <linux/tracefs.h>
 25 #include <linux/pagemap.h>
 26 #include <linux/hardirq.h>
 27 #include <linux/linkage.h>
 28 #include <linux/uaccess.h>
 29 #include <linux/vmalloc.h>
 30 #include <linux/ftrace.h>
 31 #include <linux/module.h>
 32 #include <linux/percpu.h>
 33 #include <linux/splice.h>
 34 #include <linux/kdebug.h>
 35 #include <linux/string.h>
 36 #include <linux/mount.h>
 37 #include <linux/rwsem.h>
 38 #include <linux/slab.h>
 39 #include <linux/ctype.h>
 40 #include <linux/init.h>
 41 #include <linux/poll.h>
 42 #include <linux/nmi.h>
 43 #include <linux/fs.h>
 44 #include <linux/trace.h>
 45 #include <linux/sched/clock.h>
 46 #include <linux/sched/rt.h>
 47 
 48 #include "trace.h"
 49 #include "trace_output.h"
 50 
 51 /*
 52  * On boot up, the ring buffer is set to the minimum size, so that
 53  * we do not waste memory on systems that are not using tracing.
 54  */
 55 bool ring_buffer_expanded;
 56 
 57 /*
 58  * We need to change this state when a selftest is running.
 59  * A selftest will lurk into the ring-buffer to count the
 60  * entries inserted during the selftest although some concurrent
 61  * insertions into the ring-buffer such as trace_printk could occurred
 62  * at the same time, giving false positive or negative results.
 63  */
 64 static bool __read_mostly tracing_selftest_running;
 65 
 66 /*
 67  * If a tracer is running, we do not want to run SELFTEST.
 68  */
 69 bool __read_mostly tracing_selftest_disabled;
 70 
 71 /* Pipe tracepoints to printk */
 72 struct trace_iterator *tracepoint_print_iter;
 73 int tracepoint_printk;
 74 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
 75 
 76 /* For tracers that don't implement custom flags */
 77 static struct tracer_opt dummy_tracer_opt[] = {
 78         { }
 79 };
 80 
 81 static int
 82 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
 83 {
 84         return 0;
 85 }
 86 
 87 /*
 88  * To prevent the comm cache from being overwritten when no
 89  * tracing is active, only save the comm when a trace event
 90  * occurred.
 91  */
 92 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
 93 
 94 /*
 95  * Kill all tracing for good (never come back).
 96  * It is initialized to 1 but will turn to zero if the initialization
 97  * of the tracer is successful. But that is the only place that sets
 98  * this back to zero.
 99  */
100 static int tracing_disabled = 1;
101 
102 cpumask_var_t __read_mostly     tracing_buffer_mask;
103 
104 /*
105  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
106  *
107  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
108  * is set, then ftrace_dump is called. This will output the contents
109  * of the ftrace buffers to the console.  This is very useful for
110  * capturing traces that lead to crashes and outputing it to a
111  * serial console.
112  *
113  * It is default off, but you can enable it with either specifying
114  * "ftrace_dump_on_oops" in the kernel command line, or setting
115  * /proc/sys/kernel/ftrace_dump_on_oops
116  * Set 1 if you want to dump buffers of all CPUs
117  * Set 2 if you want to dump the buffer of the CPU that triggered oops
118  */
119 
120 enum ftrace_dump_mode ftrace_dump_on_oops;
121 
122 /* When set, tracing will stop when a WARN*() is hit */
123 int __disable_trace_on_warning;
124 
125 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
126 /* Map of enums to their values, for "eval_map" file */
127 struct trace_eval_map_head {
128         struct module                   *mod;
129         unsigned long                   length;
130 };
131 
132 union trace_eval_map_item;
133 
134 struct trace_eval_map_tail {
135         /*
136          * "end" is first and points to NULL as it must be different
137          * than "mod" or "eval_string"
138          */
139         union trace_eval_map_item       *next;
140         const char                      *end;   /* points to NULL */
141 };
142 
143 static DEFINE_MUTEX(trace_eval_mutex);
144 
145 /*
146  * The trace_eval_maps are saved in an array with two extra elements,
147  * one at the beginning, and one at the end. The beginning item contains
148  * the count of the saved maps (head.length), and the module they
149  * belong to if not built in (head.mod). The ending item contains a
150  * pointer to the next array of saved eval_map items.
151  */
152 union trace_eval_map_item {
153         struct trace_eval_map           map;
154         struct trace_eval_map_head      head;
155         struct trace_eval_map_tail      tail;
156 };
157 
158 static union trace_eval_map_item *trace_eval_maps;
159 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
160 
161 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
162 
163 #define MAX_TRACER_SIZE         100
164 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
165 static char *default_bootup_tracer;
166 
167 static bool allocate_snapshot;
168 
169 static int __init set_cmdline_ftrace(char *str)
170 {
171         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
172         default_bootup_tracer = bootup_tracer_buf;
173         /* We are using ftrace early, expand it */
174         ring_buffer_expanded = true;
175         return 1;
176 }
177 __setup("ftrace=", set_cmdline_ftrace);
178 
179 static int __init set_ftrace_dump_on_oops(char *str)
180 {
181         if (*str++ != '=' || !*str) {
182                 ftrace_dump_on_oops = DUMP_ALL;
183                 return 1;
184         }
185 
186         if (!strcmp("orig_cpu", str)) {
187                 ftrace_dump_on_oops = DUMP_ORIG;
188                 return 1;
189         }
190 
191         return 0;
192 }
193 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
194 
195 static int __init stop_trace_on_warning(char *str)
196 {
197         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
198                 __disable_trace_on_warning = 1;
199         return 1;
200 }
201 __setup("traceoff_on_warning", stop_trace_on_warning);
202 
203 static int __init boot_alloc_snapshot(char *str)
204 {
205         allocate_snapshot = true;
206         /* We also need the main ring buffer expanded */
207         ring_buffer_expanded = true;
208         return 1;
209 }
210 __setup("alloc_snapshot", boot_alloc_snapshot);
211 
212 
213 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
214 
215 static int __init set_trace_boot_options(char *str)
216 {
217         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
218         return 0;
219 }
220 __setup("trace_options=", set_trace_boot_options);
221 
222 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
223 static char *trace_boot_clock __initdata;
224 
225 static int __init set_trace_boot_clock(char *str)
226 {
227         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
228         trace_boot_clock = trace_boot_clock_buf;
229         return 0;
230 }
231 __setup("trace_clock=", set_trace_boot_clock);
232 
233 static int __init set_tracepoint_printk(char *str)
234 {
235         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
236                 tracepoint_printk = 1;
237         return 1;
238 }
239 __setup("tp_printk", set_tracepoint_printk);
240 
241 unsigned long long ns2usecs(u64 nsec)
242 {
243         nsec += 500;
244         do_div(nsec, 1000);
245         return nsec;
246 }
247 
248 /* trace_flags holds trace_options default values */
249 #define TRACE_DEFAULT_FLAGS                                             \
250         (FUNCTION_DEFAULT_FLAGS |                                       \
251          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
252          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
253          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
254          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
255 
256 /* trace_options that are only supported by global_trace */
257 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
258                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
259 
260 /* trace_flags that are default zero for instances */
261 #define ZEROED_TRACE_FLAGS \
262         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
263 
264 /*
265  * The global_trace is the descriptor that holds the top-level tracing
266  * buffers for the live tracing.
267  */
268 static struct trace_array global_trace = {
269         .trace_flags = TRACE_DEFAULT_FLAGS,
270 };
271 
272 LIST_HEAD(ftrace_trace_arrays);
273 
274 int trace_array_get(struct trace_array *this_tr)
275 {
276         struct trace_array *tr;
277         int ret = -ENODEV;
278 
279         mutex_lock(&trace_types_lock);
280         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
281                 if (tr == this_tr) {
282                         tr->ref++;
283                         ret = 0;
284                         break;
285                 }
286         }
287         mutex_unlock(&trace_types_lock);
288 
289         return ret;
290 }
291 
292 static void __trace_array_put(struct trace_array *this_tr)
293 {
294         WARN_ON(!this_tr->ref);
295         this_tr->ref--;
296 }
297 
298 void trace_array_put(struct trace_array *this_tr)
299 {
300         mutex_lock(&trace_types_lock);
301         __trace_array_put(this_tr);
302         mutex_unlock(&trace_types_lock);
303 }
304 
305 int call_filter_check_discard(struct trace_event_call *call, void *rec,
306                               struct ring_buffer *buffer,
307                               struct ring_buffer_event *event)
308 {
309         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
310             !filter_match_preds(call->filter, rec)) {
311                 __trace_event_discard_commit(buffer, event);
312                 return 1;
313         }
314 
315         return 0;
316 }
317 
318 void trace_free_pid_list(struct trace_pid_list *pid_list)
319 {
320         vfree(pid_list->pids);
321         kfree(pid_list);
322 }
323 
324 /**
325  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
326  * @filtered_pids: The list of pids to check
327  * @search_pid: The PID to find in @filtered_pids
328  *
329  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
330  */
331 bool
332 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
333 {
334         /*
335          * If pid_max changed after filtered_pids was created, we
336          * by default ignore all pids greater than the previous pid_max.
337          */
338         if (search_pid >= filtered_pids->pid_max)
339                 return false;
340 
341         return test_bit(search_pid, filtered_pids->pids);
342 }
343 
344 /**
345  * trace_ignore_this_task - should a task be ignored for tracing
346  * @filtered_pids: The list of pids to check
347  * @task: The task that should be ignored if not filtered
348  *
349  * Checks if @task should be traced or not from @filtered_pids.
350  * Returns true if @task should *NOT* be traced.
351  * Returns false if @task should be traced.
352  */
353 bool
354 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
355 {
356         /*
357          * Return false, because if filtered_pids does not exist,
358          * all pids are good to trace.
359          */
360         if (!filtered_pids)
361                 return false;
362 
363         return !trace_find_filtered_pid(filtered_pids, task->pid);
364 }
365 
366 /**
367  * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
368  * @pid_list: The list to modify
369  * @self: The current task for fork or NULL for exit
370  * @task: The task to add or remove
371  *
372  * If adding a task, if @self is defined, the task is only added if @self
373  * is also included in @pid_list. This happens on fork and tasks should
374  * only be added when the parent is listed. If @self is NULL, then the
375  * @task pid will be removed from the list, which would happen on exit
376  * of a task.
377  */
378 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
379                                   struct task_struct *self,
380                                   struct task_struct *task)
381 {
382         if (!pid_list)
383                 return;
384 
385         /* For forks, we only add if the forking task is listed */
386         if (self) {
387                 if (!trace_find_filtered_pid(pid_list, self->pid))
388                         return;
389         }
390 
391         /* Sorry, but we don't support pid_max changing after setting */
392         if (task->pid >= pid_list->pid_max)
393                 return;
394 
395         /* "self" is set for forks, and NULL for exits */
396         if (self)
397                 set_bit(task->pid, pid_list->pids);
398         else
399                 clear_bit(task->pid, pid_list->pids);
400 }
401 
402 /**
403  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
404  * @pid_list: The pid list to show
405  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
406  * @pos: The position of the file
407  *
408  * This is used by the seq_file "next" operation to iterate the pids
409  * listed in a trace_pid_list structure.
410  *
411  * Returns the pid+1 as we want to display pid of zero, but NULL would
412  * stop the iteration.
413  */
414 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
415 {
416         unsigned long pid = (unsigned long)v;
417 
418         (*pos)++;
419 
420         /* pid already is +1 of the actual prevous bit */
421         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
422 
423         /* Return pid + 1 to allow zero to be represented */
424         if (pid < pid_list->pid_max)
425                 return (void *)(pid + 1);
426 
427         return NULL;
428 }
429 
430 /**
431  * trace_pid_start - Used for seq_file to start reading pid lists
432  * @pid_list: The pid list to show
433  * @pos: The position of the file
434  *
435  * This is used by seq_file "start" operation to start the iteration
436  * of listing pids.
437  *
438  * Returns the pid+1 as we want to display pid of zero, but NULL would
439  * stop the iteration.
440  */
441 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
442 {
443         unsigned long pid;
444         loff_t l = 0;
445 
446         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
447         if (pid >= pid_list->pid_max)
448                 return NULL;
449 
450         /* Return pid + 1 so that zero can be the exit value */
451         for (pid++; pid && l < *pos;
452              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
453                 ;
454         return (void *)pid;
455 }
456 
457 /**
458  * trace_pid_show - show the current pid in seq_file processing
459  * @m: The seq_file structure to write into
460  * @v: A void pointer of the pid (+1) value to display
461  *
462  * Can be directly used by seq_file operations to display the current
463  * pid value.
464  */
465 int trace_pid_show(struct seq_file *m, void *v)
466 {
467         unsigned long pid = (unsigned long)v - 1;
468 
469         seq_printf(m, "%lu\n", pid);
470         return 0;
471 }
472 
473 /* 128 should be much more than enough */
474 #define PID_BUF_SIZE            127
475 
476 int trace_pid_write(struct trace_pid_list *filtered_pids,
477                     struct trace_pid_list **new_pid_list,
478                     const char __user *ubuf, size_t cnt)
479 {
480         struct trace_pid_list *pid_list;
481         struct trace_parser parser;
482         unsigned long val;
483         int nr_pids = 0;
484         ssize_t read = 0;
485         ssize_t ret = 0;
486         loff_t pos;
487         pid_t pid;
488 
489         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
490                 return -ENOMEM;
491 
492         /*
493          * Always recreate a new array. The write is an all or nothing
494          * operation. Always create a new array when adding new pids by
495          * the user. If the operation fails, then the current list is
496          * not modified.
497          */
498         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
499         if (!pid_list) {
500                 trace_parser_put(&parser);
501                 return -ENOMEM;
502         }
503 
504         pid_list->pid_max = READ_ONCE(pid_max);
505 
506         /* Only truncating will shrink pid_max */
507         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
508                 pid_list->pid_max = filtered_pids->pid_max;
509 
510         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
511         if (!pid_list->pids) {
512                 trace_parser_put(&parser);
513                 kfree(pid_list);
514                 return -ENOMEM;
515         }
516 
517         if (filtered_pids) {
518                 /* copy the current bits to the new max */
519                 for_each_set_bit(pid, filtered_pids->pids,
520                                  filtered_pids->pid_max) {
521                         set_bit(pid, pid_list->pids);
522                         nr_pids++;
523                 }
524         }
525 
526         while (cnt > 0) {
527 
528                 pos = 0;
529 
530                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
531                 if (ret < 0 || !trace_parser_loaded(&parser))
532                         break;
533 
534                 read += ret;
535                 ubuf += ret;
536                 cnt -= ret;
537 
538                 ret = -EINVAL;
539                 if (kstrtoul(parser.buffer, 0, &val))
540                         break;
541                 if (val >= pid_list->pid_max)
542                         break;
543 
544                 pid = (pid_t)val;
545 
546                 set_bit(pid, pid_list->pids);
547                 nr_pids++;
548 
549                 trace_parser_clear(&parser);
550                 ret = 0;
551         }
552         trace_parser_put(&parser);
553 
554         if (ret < 0) {
555                 trace_free_pid_list(pid_list);
556                 return ret;
557         }
558 
559         if (!nr_pids) {
560                 /* Cleared the list of pids */
561                 trace_free_pid_list(pid_list);
562                 read = ret;
563                 pid_list = NULL;
564         }
565 
566         *new_pid_list = pid_list;
567 
568         return read;
569 }
570 
571 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
572 {
573         u64 ts;
574 
575         /* Early boot up does not have a buffer yet */
576         if (!buf->buffer)
577                 return trace_clock_local();
578 
579         ts = ring_buffer_time_stamp(buf->buffer, cpu);
580         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
581 
582         return ts;
583 }
584 
585 u64 ftrace_now(int cpu)
586 {
587         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
588 }
589 
590 /**
591  * tracing_is_enabled - Show if global_trace has been disabled
592  *
593  * Shows if the global trace has been enabled or not. It uses the
594  * mirror flag "buffer_disabled" to be used in fast paths such as for
595  * the irqsoff tracer. But it may be inaccurate due to races. If you
596  * need to know the accurate state, use tracing_is_on() which is a little
597  * slower, but accurate.
598  */
599 int tracing_is_enabled(void)
600 {
601         /*
602          * For quick access (irqsoff uses this in fast path), just
603          * return the mirror variable of the state of the ring buffer.
604          * It's a little racy, but we don't really care.
605          */
606         smp_rmb();
607         return !global_trace.buffer_disabled;
608 }
609 
610 /*
611  * trace_buf_size is the size in bytes that is allocated
612  * for a buffer. Note, the number of bytes is always rounded
613  * to page size.
614  *
615  * This number is purposely set to a low number of 16384.
616  * If the dump on oops happens, it will be much appreciated
617  * to not have to wait for all that output. Anyway this can be
618  * boot time and run time configurable.
619  */
620 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
621 
622 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
623 
624 /* trace_types holds a link list of available tracers. */
625 static struct tracer            *trace_types __read_mostly;
626 
627 /*
628  * trace_types_lock is used to protect the trace_types list.
629  */
630 DEFINE_MUTEX(trace_types_lock);
631 
632 /*
633  * serialize the access of the ring buffer
634  *
635  * ring buffer serializes readers, but it is low level protection.
636  * The validity of the events (which returns by ring_buffer_peek() ..etc)
637  * are not protected by ring buffer.
638  *
639  * The content of events may become garbage if we allow other process consumes
640  * these events concurrently:
641  *   A) the page of the consumed events may become a normal page
642  *      (not reader page) in ring buffer, and this page will be rewrited
643  *      by events producer.
644  *   B) The page of the consumed events may become a page for splice_read,
645  *      and this page will be returned to system.
646  *
647  * These primitives allow multi process access to different cpu ring buffer
648  * concurrently.
649  *
650  * These primitives don't distinguish read-only and read-consume access.
651  * Multi read-only access are also serialized.
652  */
653 
654 #ifdef CONFIG_SMP
655 static DECLARE_RWSEM(all_cpu_access_lock);
656 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
657 
658 static inline void trace_access_lock(int cpu)
659 {
660         if (cpu == RING_BUFFER_ALL_CPUS) {
661                 /* gain it for accessing the whole ring buffer. */
662                 down_write(&all_cpu_access_lock);
663         } else {
664                 /* gain it for accessing a cpu ring buffer. */
665 
666                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
667                 down_read(&all_cpu_access_lock);
668 
669                 /* Secondly block other access to this @cpu ring buffer. */
670                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
671         }
672 }
673 
674 static inline void trace_access_unlock(int cpu)
675 {
676         if (cpu == RING_BUFFER_ALL_CPUS) {
677                 up_write(&all_cpu_access_lock);
678         } else {
679                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
680                 up_read(&all_cpu_access_lock);
681         }
682 }
683 
684 static inline void trace_access_lock_init(void)
685 {
686         int cpu;
687 
688         for_each_possible_cpu(cpu)
689                 mutex_init(&per_cpu(cpu_access_lock, cpu));
690 }
691 
692 #else
693 
694 static DEFINE_MUTEX(access_lock);
695 
696 static inline void trace_access_lock(int cpu)
697 {
698         (void)cpu;
699         mutex_lock(&access_lock);
700 }
701 
702 static inline void trace_access_unlock(int cpu)
703 {
704         (void)cpu;
705         mutex_unlock(&access_lock);
706 }
707 
708 static inline void trace_access_lock_init(void)
709 {
710 }
711 
712 #endif
713 
714 #ifdef CONFIG_STACKTRACE
715 static void __ftrace_trace_stack(struct ring_buffer *buffer,
716                                  unsigned long flags,
717                                  int skip, int pc, struct pt_regs *regs);
718 static inline void ftrace_trace_stack(struct trace_array *tr,
719                                       struct ring_buffer *buffer,
720                                       unsigned long flags,
721                                       int skip, int pc, struct pt_regs *regs);
722 
723 #else
724 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
725                                         unsigned long flags,
726                                         int skip, int pc, struct pt_regs *regs)
727 {
728 }
729 static inline void ftrace_trace_stack(struct trace_array *tr,
730                                       struct ring_buffer *buffer,
731                                       unsigned long flags,
732                                       int skip, int pc, struct pt_regs *regs)
733 {
734 }
735 
736 #endif
737 
738 static __always_inline void
739 trace_event_setup(struct ring_buffer_event *event,
740                   int type, unsigned long flags, int pc)
741 {
742         struct trace_entry *ent = ring_buffer_event_data(event);
743 
744         tracing_generic_entry_update(ent, flags, pc);
745         ent->type = type;
746 }
747 
748 static __always_inline struct ring_buffer_event *
749 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
750                           int type,
751                           unsigned long len,
752                           unsigned long flags, int pc)
753 {
754         struct ring_buffer_event *event;
755 
756         event = ring_buffer_lock_reserve(buffer, len);
757         if (event != NULL)
758                 trace_event_setup(event, type, flags, pc);
759 
760         return event;
761 }
762 
763 void tracer_tracing_on(struct trace_array *tr)
764 {
765         if (tr->trace_buffer.buffer)
766                 ring_buffer_record_on(tr->trace_buffer.buffer);
767         /*
768          * This flag is looked at when buffers haven't been allocated
769          * yet, or by some tracers (like irqsoff), that just want to
770          * know if the ring buffer has been disabled, but it can handle
771          * races of where it gets disabled but we still do a record.
772          * As the check is in the fast path of the tracers, it is more
773          * important to be fast than accurate.
774          */
775         tr->buffer_disabled = 0;
776         /* Make the flag seen by readers */
777         smp_wmb();
778 }
779 
780 /**
781  * tracing_on - enable tracing buffers
782  *
783  * This function enables tracing buffers that may have been
784  * disabled with tracing_off.
785  */
786 void tracing_on(void)
787 {
788         tracer_tracing_on(&global_trace);
789 }
790 EXPORT_SYMBOL_GPL(tracing_on);
791 
792 
793 static __always_inline void
794 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
795 {
796         __this_cpu_write(trace_taskinfo_save, true);
797 
798         /* If this is the temp buffer, we need to commit fully */
799         if (this_cpu_read(trace_buffered_event) == event) {
800                 /* Length is in event->array[0] */
801                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
802                 /* Release the temp buffer */
803                 this_cpu_dec(trace_buffered_event_cnt);
804         } else
805                 ring_buffer_unlock_commit(buffer, event);
806 }
807 
808 /**
809  * __trace_puts - write a constant string into the trace buffer.
810  * @ip:    The address of the caller
811  * @str:   The constant string to write
812  * @size:  The size of the string.
813  */
814 int __trace_puts(unsigned long ip, const char *str, int size)
815 {
816         struct ring_buffer_event *event;
817         struct ring_buffer *buffer;
818         struct print_entry *entry;
819         unsigned long irq_flags;
820         int alloc;
821         int pc;
822 
823         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
824                 return 0;
825 
826         pc = preempt_count();
827 
828         if (unlikely(tracing_selftest_running || tracing_disabled))
829                 return 0;
830 
831         alloc = sizeof(*entry) + size + 2; /* possible \n added */
832 
833         local_save_flags(irq_flags);
834         buffer = global_trace.trace_buffer.buffer;
835         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
836                                             irq_flags, pc);
837         if (!event)
838                 return 0;
839 
840         entry = ring_buffer_event_data(event);
841         entry->ip = ip;
842 
843         memcpy(&entry->buf, str, size);
844 
845         /* Add a newline if necessary */
846         if (entry->buf[size - 1] != '\n') {
847                 entry->buf[size] = '\n';
848                 entry->buf[size + 1] = '\0';
849         } else
850                 entry->buf[size] = '\0';
851 
852         __buffer_unlock_commit(buffer, event);
853         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
854 
855         return size;
856 }
857 EXPORT_SYMBOL_GPL(__trace_puts);
858 
859 /**
860  * __trace_bputs - write the pointer to a constant string into trace buffer
861  * @ip:    The address of the caller
862  * @str:   The constant string to write to the buffer to
863  */
864 int __trace_bputs(unsigned long ip, const char *str)
865 {
866         struct ring_buffer_event *event;
867         struct ring_buffer *buffer;
868         struct bputs_entry *entry;
869         unsigned long irq_flags;
870         int size = sizeof(struct bputs_entry);
871         int pc;
872 
873         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
874                 return 0;
875 
876         pc = preempt_count();
877 
878         if (unlikely(tracing_selftest_running || tracing_disabled))
879                 return 0;
880 
881         local_save_flags(irq_flags);
882         buffer = global_trace.trace_buffer.buffer;
883         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
884                                             irq_flags, pc);
885         if (!event)
886                 return 0;
887 
888         entry = ring_buffer_event_data(event);
889         entry->ip                       = ip;
890         entry->str                      = str;
891 
892         __buffer_unlock_commit(buffer, event);
893         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
894 
895         return 1;
896 }
897 EXPORT_SYMBOL_GPL(__trace_bputs);
898 
899 #ifdef CONFIG_TRACER_SNAPSHOT
900 void tracing_snapshot_instance(struct trace_array *tr)
901 {
902         struct tracer *tracer = tr->current_trace;
903         unsigned long flags;
904 
905         if (in_nmi()) {
906                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
907                 internal_trace_puts("*** snapshot is being ignored        ***\n");
908                 return;
909         }
910 
911         if (!tr->allocated_snapshot) {
912                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
913                 internal_trace_puts("*** stopping trace here!   ***\n");
914                 tracing_off();
915                 return;
916         }
917 
918         /* Note, snapshot can not be used when the tracer uses it */
919         if (tracer->use_max_tr) {
920                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
921                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
922                 return;
923         }
924 
925         local_irq_save(flags);
926         update_max_tr(tr, current, smp_processor_id());
927         local_irq_restore(flags);
928 }
929 
930 /**
931  * tracing_snapshot - take a snapshot of the current buffer.
932  *
933  * This causes a swap between the snapshot buffer and the current live
934  * tracing buffer. You can use this to take snapshots of the live
935  * trace when some condition is triggered, but continue to trace.
936  *
937  * Note, make sure to allocate the snapshot with either
938  * a tracing_snapshot_alloc(), or by doing it manually
939  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
940  *
941  * If the snapshot buffer is not allocated, it will stop tracing.
942  * Basically making a permanent snapshot.
943  */
944 void tracing_snapshot(void)
945 {
946         struct trace_array *tr = &global_trace;
947 
948         tracing_snapshot_instance(tr);
949 }
950 EXPORT_SYMBOL_GPL(tracing_snapshot);
951 
952 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
953                                         struct trace_buffer *size_buf, int cpu_id);
954 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
955 
956 int tracing_alloc_snapshot_instance(struct trace_array *tr)
957 {
958         int ret;
959 
960         if (!tr->allocated_snapshot) {
961 
962                 /* allocate spare buffer */
963                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
964                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
965                 if (ret < 0)
966                         return ret;
967 
968                 tr->allocated_snapshot = true;
969         }
970 
971         return 0;
972 }
973 
974 static void free_snapshot(struct trace_array *tr)
975 {
976         /*
977          * We don't free the ring buffer. instead, resize it because
978          * The max_tr ring buffer has some state (e.g. ring->clock) and
979          * we want preserve it.
980          */
981         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
982         set_buffer_entries(&tr->max_buffer, 1);
983         tracing_reset_online_cpus(&tr->max_buffer);
984         tr->allocated_snapshot = false;
985 }
986 
987 /**
988  * tracing_alloc_snapshot - allocate snapshot buffer.
989  *
990  * This only allocates the snapshot buffer if it isn't already
991  * allocated - it doesn't also take a snapshot.
992  *
993  * This is meant to be used in cases where the snapshot buffer needs
994  * to be set up for events that can't sleep but need to be able to
995  * trigger a snapshot.
996  */
997 int tracing_alloc_snapshot(void)
998 {
999         struct trace_array *tr = &global_trace;
1000         int ret;
1001 
1002         ret = tracing_alloc_snapshot_instance(tr);
1003         WARN_ON(ret < 0);
1004 
1005         return ret;
1006 }
1007 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1008 
1009 /**
1010  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1011  *
1012  * This is similar to tracing_snapshot(), but it will allocate the
1013  * snapshot buffer if it isn't already allocated. Use this only
1014  * where it is safe to sleep, as the allocation may sleep.
1015  *
1016  * This causes a swap between the snapshot buffer and the current live
1017  * tracing buffer. You can use this to take snapshots of the live
1018  * trace when some condition is triggered, but continue to trace.
1019  */
1020 void tracing_snapshot_alloc(void)
1021 {
1022         int ret;
1023 
1024         ret = tracing_alloc_snapshot();
1025         if (ret < 0)
1026                 return;
1027 
1028         tracing_snapshot();
1029 }
1030 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1031 #else
1032 void tracing_snapshot(void)
1033 {
1034         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1035 }
1036 EXPORT_SYMBOL_GPL(tracing_snapshot);
1037 int tracing_alloc_snapshot(void)
1038 {
1039         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1040         return -ENODEV;
1041 }
1042 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1043 void tracing_snapshot_alloc(void)
1044 {
1045         /* Give warning */
1046         tracing_snapshot();
1047 }
1048 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1049 #endif /* CONFIG_TRACER_SNAPSHOT */
1050 
1051 void tracer_tracing_off(struct trace_array *tr)
1052 {
1053         if (tr->trace_buffer.buffer)
1054                 ring_buffer_record_off(tr->trace_buffer.buffer);
1055         /*
1056          * This flag is looked at when buffers haven't been allocated
1057          * yet, or by some tracers (like irqsoff), that just want to
1058          * know if the ring buffer has been disabled, but it can handle
1059          * races of where it gets disabled but we still do a record.
1060          * As the check is in the fast path of the tracers, it is more
1061          * important to be fast than accurate.
1062          */
1063         tr->buffer_disabled = 1;
1064         /* Make the flag seen by readers */
1065         smp_wmb();
1066 }
1067 
1068 /**
1069  * tracing_off - turn off tracing buffers
1070  *
1071  * This function stops the tracing buffers from recording data.
1072  * It does not disable any overhead the tracers themselves may
1073  * be causing. This function simply causes all recording to
1074  * the ring buffers to fail.
1075  */
1076 void tracing_off(void)
1077 {
1078         tracer_tracing_off(&global_trace);
1079 }
1080 EXPORT_SYMBOL_GPL(tracing_off);
1081 
1082 void disable_trace_on_warning(void)
1083 {
1084         if (__disable_trace_on_warning)
1085                 tracing_off();
1086 }
1087 
1088 /**
1089  * tracer_tracing_is_on - show real state of ring buffer enabled
1090  * @tr : the trace array to know if ring buffer is enabled
1091  *
1092  * Shows real state of the ring buffer if it is enabled or not.
1093  */
1094 bool tracer_tracing_is_on(struct trace_array *tr)
1095 {
1096         if (tr->trace_buffer.buffer)
1097                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1098         return !tr->buffer_disabled;
1099 }
1100 
1101 /**
1102  * tracing_is_on - show state of ring buffers enabled
1103  */
1104 int tracing_is_on(void)
1105 {
1106         return tracer_tracing_is_on(&global_trace);
1107 }
1108 EXPORT_SYMBOL_GPL(tracing_is_on);
1109 
1110 static int __init set_buf_size(char *str)
1111 {
1112         unsigned long buf_size;
1113 
1114         if (!str)
1115                 return 0;
1116         buf_size = memparse(str, &str);
1117         /* nr_entries can not be zero */
1118         if (buf_size == 0)
1119                 return 0;
1120         trace_buf_size = buf_size;
1121         return 1;
1122 }
1123 __setup("trace_buf_size=", set_buf_size);
1124 
1125 static int __init set_tracing_thresh(char *str)
1126 {
1127         unsigned long threshold;
1128         int ret;
1129 
1130         if (!str)
1131                 return 0;
1132         ret = kstrtoul(str, 0, &threshold);
1133         if (ret < 0)
1134                 return 0;
1135         tracing_thresh = threshold * 1000;
1136         return 1;
1137 }
1138 __setup("tracing_thresh=", set_tracing_thresh);
1139 
1140 unsigned long nsecs_to_usecs(unsigned long nsecs)
1141 {
1142         return nsecs / 1000;
1143 }
1144 
1145 /*
1146  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1147  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1148  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1149  * of strings in the order that the evals (enum) were defined.
1150  */
1151 #undef C
1152 #define C(a, b) b
1153 
1154 /* These must match the bit postions in trace_iterator_flags */
1155 static const char *trace_options[] = {
1156         TRACE_FLAGS
1157         NULL
1158 };
1159 
1160 static struct {
1161         u64 (*func)(void);
1162         const char *name;
1163         int in_ns;              /* is this clock in nanoseconds? */
1164 } trace_clocks[] = {
1165         { trace_clock_local,            "local",        1 },
1166         { trace_clock_global,           "global",       1 },
1167         { trace_clock_counter,          "counter",      0 },
1168         { trace_clock_jiffies,          "uptime",       0 },
1169         { trace_clock,                  "perf",         1 },
1170         { ktime_get_mono_fast_ns,       "mono",         1 },
1171         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1172         { ktime_get_boot_fast_ns,       "boot",         1 },
1173         ARCH_TRACE_CLOCKS
1174 };
1175 
1176 bool trace_clock_in_ns(struct trace_array *tr)
1177 {
1178         if (trace_clocks[tr->clock_id].in_ns)
1179                 return true;
1180 
1181         return false;
1182 }
1183 
1184 /*
1185  * trace_parser_get_init - gets the buffer for trace parser
1186  */
1187 int trace_parser_get_init(struct trace_parser *parser, int size)
1188 {
1189         memset(parser, 0, sizeof(*parser));
1190 
1191         parser->buffer = kmalloc(size, GFP_KERNEL);
1192         if (!parser->buffer)
1193                 return 1;
1194 
1195         parser->size = size;
1196         return 0;
1197 }
1198 
1199 /*
1200  * trace_parser_put - frees the buffer for trace parser
1201  */
1202 void trace_parser_put(struct trace_parser *parser)
1203 {
1204         kfree(parser->buffer);
1205         parser->buffer = NULL;
1206 }
1207 
1208 /*
1209  * trace_get_user - reads the user input string separated by  space
1210  * (matched by isspace(ch))
1211  *
1212  * For each string found the 'struct trace_parser' is updated,
1213  * and the function returns.
1214  *
1215  * Returns number of bytes read.
1216  *
1217  * See kernel/trace/trace.h for 'struct trace_parser' details.
1218  */
1219 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1220         size_t cnt, loff_t *ppos)
1221 {
1222         char ch;
1223         size_t read = 0;
1224         ssize_t ret;
1225 
1226         if (!*ppos)
1227                 trace_parser_clear(parser);
1228 
1229         ret = get_user(ch, ubuf++);
1230         if (ret)
1231                 goto out;
1232 
1233         read++;
1234         cnt--;
1235 
1236         /*
1237          * The parser is not finished with the last write,
1238          * continue reading the user input without skipping spaces.
1239          */
1240         if (!parser->cont) {
1241                 /* skip white space */
1242                 while (cnt && isspace(ch)) {
1243                         ret = get_user(ch, ubuf++);
1244                         if (ret)
1245                                 goto out;
1246                         read++;
1247                         cnt--;
1248                 }
1249 
1250                 parser->idx = 0;
1251 
1252                 /* only spaces were written */
1253                 if (isspace(ch) || !ch) {
1254                         *ppos += read;
1255                         ret = read;
1256                         goto out;
1257                 }
1258         }
1259 
1260         /* read the non-space input */
1261         while (cnt && !isspace(ch) && ch) {
1262                 if (parser->idx < parser->size - 1)
1263                         parser->buffer[parser->idx++] = ch;
1264                 else {
1265                         ret = -EINVAL;
1266                         goto out;
1267                 }
1268                 ret = get_user(ch, ubuf++);
1269                 if (ret)
1270                         goto out;
1271                 read++;
1272                 cnt--;
1273         }
1274 
1275         /* We either got finished input or we have to wait for another call. */
1276         if (isspace(ch) || !ch) {
1277                 parser->buffer[parser->idx] = 0;
1278                 parser->cont = false;
1279         } else if (parser->idx < parser->size - 1) {
1280                 parser->cont = true;
1281                 parser->buffer[parser->idx++] = ch;
1282                 /* Make sure the parsed string always terminates with '\0'. */
1283                 parser->buffer[parser->idx] = 0;
1284         } else {
1285                 ret = -EINVAL;
1286                 goto out;
1287         }
1288 
1289         *ppos += read;
1290         ret = read;
1291 
1292 out:
1293         return ret;
1294 }
1295 
1296 /* TODO add a seq_buf_to_buffer() */
1297 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1298 {
1299         int len;
1300 
1301         if (trace_seq_used(s) <= s->seq.readpos)
1302                 return -EBUSY;
1303 
1304         len = trace_seq_used(s) - s->seq.readpos;
1305         if (cnt > len)
1306                 cnt = len;
1307         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1308 
1309         s->seq.readpos += cnt;
1310         return cnt;
1311 }
1312 
1313 unsigned long __read_mostly     tracing_thresh;
1314 
1315 #ifdef CONFIG_TRACER_MAX_TRACE
1316 /*
1317  * Copy the new maximum trace into the separate maximum-trace
1318  * structure. (this way the maximum trace is permanently saved,
1319  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1320  */
1321 static void
1322 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1323 {
1324         struct trace_buffer *trace_buf = &tr->trace_buffer;
1325         struct trace_buffer *max_buf = &tr->max_buffer;
1326         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1327         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1328 
1329         max_buf->cpu = cpu;
1330         max_buf->time_start = data->preempt_timestamp;
1331 
1332         max_data->saved_latency = tr->max_latency;
1333         max_data->critical_start = data->critical_start;
1334         max_data->critical_end = data->critical_end;
1335 
1336         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1337         max_data->pid = tsk->pid;
1338         /*
1339          * If tsk == current, then use current_uid(), as that does not use
1340          * RCU. The irq tracer can be called out of RCU scope.
1341          */
1342         if (tsk == current)
1343                 max_data->uid = current_uid();
1344         else
1345                 max_data->uid = task_uid(tsk);
1346 
1347         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1348         max_data->policy = tsk->policy;
1349         max_data->rt_priority = tsk->rt_priority;
1350 
1351         /* record this tasks comm */
1352         tracing_record_cmdline(tsk);
1353 }
1354 
1355 /**
1356  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1357  * @tr: tracer
1358  * @tsk: the task with the latency
1359  * @cpu: The cpu that initiated the trace.
1360  *
1361  * Flip the buffers between the @tr and the max_tr and record information
1362  * about which task was the cause of this latency.
1363  */
1364 void
1365 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1366 {
1367         if (tr->stop_count)
1368                 return;
1369 
1370         WARN_ON_ONCE(!irqs_disabled());
1371 
1372         if (!tr->allocated_snapshot) {
1373                 /* Only the nop tracer should hit this when disabling */
1374                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1375                 return;
1376         }
1377 
1378         arch_spin_lock(&tr->max_lock);
1379 
1380         /* Inherit the recordable setting from trace_buffer */
1381         if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1382                 ring_buffer_record_on(tr->max_buffer.buffer);
1383         else
1384                 ring_buffer_record_off(tr->max_buffer.buffer);
1385 
1386         swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
1387 
1388         __update_max_tr(tr, tsk, cpu);
1389         arch_spin_unlock(&tr->max_lock);
1390 }
1391 
1392 /**
1393  * update_max_tr_single - only copy one trace over, and reset the rest
1394  * @tr - tracer
1395  * @tsk - task with the latency
1396  * @cpu - the cpu of the buffer to copy.
1397  *
1398  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1399  */
1400 void
1401 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1402 {
1403         int ret;
1404 
1405         if (tr->stop_count)
1406                 return;
1407 
1408         WARN_ON_ONCE(!irqs_disabled());
1409         if (!tr->allocated_snapshot) {
1410                 /* Only the nop tracer should hit this when disabling */
1411                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1412                 return;
1413         }
1414 
1415         arch_spin_lock(&tr->max_lock);
1416 
1417         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1418 
1419         if (ret == -EBUSY) {
1420                 /*
1421                  * We failed to swap the buffer due to a commit taking
1422                  * place on this CPU. We fail to record, but we reset
1423                  * the max trace buffer (no one writes directly to it)
1424                  * and flag that it failed.
1425                  */
1426                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1427                         "Failed to swap buffers due to commit in progress\n");
1428         }
1429 
1430         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1431 
1432         __update_max_tr(tr, tsk, cpu);
1433         arch_spin_unlock(&tr->max_lock);
1434 }
1435 #endif /* CONFIG_TRACER_MAX_TRACE */
1436 
1437 static int wait_on_pipe(struct trace_iterator *iter, int full)
1438 {
1439         /* Iterators are static, they should be filled or empty */
1440         if (trace_buffer_iter(iter, iter->cpu_file))
1441                 return 0;
1442 
1443         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1444                                 full);
1445 }
1446 
1447 #ifdef CONFIG_FTRACE_STARTUP_TEST
1448 static bool selftests_can_run;
1449 
1450 struct trace_selftests {
1451         struct list_head                list;
1452         struct tracer                   *type;
1453 };
1454 
1455 static LIST_HEAD(postponed_selftests);
1456 
1457 static int save_selftest(struct tracer *type)
1458 {
1459         struct trace_selftests *selftest;
1460 
1461         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1462         if (!selftest)
1463                 return -ENOMEM;
1464 
1465         selftest->type = type;
1466         list_add(&selftest->list, &postponed_selftests);
1467         return 0;
1468 }
1469 
1470 static int run_tracer_selftest(struct tracer *type)
1471 {
1472         struct trace_array *tr = &global_trace;
1473         struct tracer *saved_tracer = tr->current_trace;
1474         int ret;
1475 
1476         if (!type->selftest || tracing_selftest_disabled)
1477                 return 0;
1478 
1479         /*
1480          * If a tracer registers early in boot up (before scheduling is
1481          * initialized and such), then do not run its selftests yet.
1482          * Instead, run it a little later in the boot process.
1483          */
1484         if (!selftests_can_run)
1485                 return save_selftest(type);
1486 
1487         /*
1488          * Run a selftest on this tracer.
1489          * Here we reset the trace buffer, and set the current
1490          * tracer to be this tracer. The tracer can then run some
1491          * internal tracing to verify that everything is in order.
1492          * If we fail, we do not register this tracer.
1493          */
1494         tracing_reset_online_cpus(&tr->trace_buffer);
1495 
1496         tr->current_trace = type;
1497 
1498 #ifdef CONFIG_TRACER_MAX_TRACE
1499         if (type->use_max_tr) {
1500                 /* If we expanded the buffers, make sure the max is expanded too */
1501                 if (ring_buffer_expanded)
1502                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1503                                            RING_BUFFER_ALL_CPUS);
1504                 tr->allocated_snapshot = true;
1505         }
1506 #endif
1507 
1508         /* the test is responsible for initializing and enabling */
1509         pr_info("Testing tracer %s: ", type->name);
1510         ret = type->selftest(type, tr);
1511         /* the test is responsible for resetting too */
1512         tr->current_trace = saved_tracer;
1513         if (ret) {
1514                 printk(KERN_CONT "FAILED!\n");
1515                 /* Add the warning after printing 'FAILED' */
1516                 WARN_ON(1);
1517                 return -1;
1518         }
1519         /* Only reset on passing, to avoid touching corrupted buffers */
1520         tracing_reset_online_cpus(&tr->trace_buffer);
1521 
1522 #ifdef CONFIG_TRACER_MAX_TRACE
1523         if (type->use_max_tr) {
1524                 tr->allocated_snapshot = false;
1525 
1526                 /* Shrink the max buffer again */
1527                 if (ring_buffer_expanded)
1528                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1529                                            RING_BUFFER_ALL_CPUS);
1530         }
1531 #endif
1532 
1533         printk(KERN_CONT "PASSED\n");
1534         return 0;
1535 }
1536 
1537 static __init int init_trace_selftests(void)
1538 {
1539         struct trace_selftests *p, *n;
1540         struct tracer *t, **last;
1541         int ret;
1542 
1543         selftests_can_run = true;
1544 
1545         mutex_lock(&trace_types_lock);
1546 
1547         if (list_empty(&postponed_selftests))
1548                 goto out;
1549 
1550         pr_info("Running postponed tracer tests:\n");
1551 
1552         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1553                 ret = run_tracer_selftest(p->type);
1554                 /* If the test fails, then warn and remove from available_tracers */
1555                 if (ret < 0) {
1556                         WARN(1, "tracer: %s failed selftest, disabling\n",
1557                              p->type->name);
1558                         last = &trace_types;
1559                         for (t = trace_types; t; t = t->next) {
1560                                 if (t == p->type) {
1561                                         *last = t->next;
1562                                         break;
1563                                 }
1564                                 last = &t->next;
1565                         }
1566                 }
1567                 list_del(&p->list);
1568                 kfree(p);
1569         }
1570 
1571  out:
1572         mutex_unlock(&trace_types_lock);
1573 
1574         return 0;
1575 }
1576 core_initcall(init_trace_selftests);
1577 #else
1578 static inline int run_tracer_selftest(struct tracer *type)
1579 {
1580         return 0;
1581 }
1582 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1583 
1584 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1585 
1586 static void __init apply_trace_boot_options(void);
1587 
1588 /**
1589  * register_tracer - register a tracer with the ftrace system.
1590  * @type - the plugin for the tracer
1591  *
1592  * Register a new plugin tracer.
1593  */
1594 int __init register_tracer(struct tracer *type)
1595 {
1596         struct tracer *t;
1597         int ret = 0;
1598 
1599         if (!type->name) {
1600                 pr_info("Tracer must have a name\n");
1601                 return -1;
1602         }
1603 
1604         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1605                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1606                 return -1;
1607         }
1608 
1609         mutex_lock(&trace_types_lock);
1610 
1611         tracing_selftest_running = true;
1612 
1613         for (t = trace_types; t; t = t->next) {
1614                 if (strcmp(type->name, t->name) == 0) {
1615                         /* already found */
1616                         pr_info("Tracer %s already registered\n",
1617                                 type->name);
1618                         ret = -1;
1619                         goto out;
1620                 }
1621         }
1622 
1623         if (!type->set_flag)
1624                 type->set_flag = &dummy_set_flag;
1625         if (!type->flags) {
1626                 /*allocate a dummy tracer_flags*/
1627                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1628                 if (!type->flags) {
1629                         ret = -ENOMEM;
1630                         goto out;
1631                 }
1632                 type->flags->val = 0;
1633                 type->flags->opts = dummy_tracer_opt;
1634         } else
1635                 if (!type->flags->opts)
1636                         type->flags->opts = dummy_tracer_opt;
1637 
1638         /* store the tracer for __set_tracer_option */
1639         type->flags->trace = type;
1640 
1641         ret = run_tracer_selftest(type);
1642         if (ret < 0)
1643                 goto out;
1644 
1645         type->next = trace_types;
1646         trace_types = type;
1647         add_tracer_options(&global_trace, type);
1648 
1649  out:
1650         tracing_selftest_running = false;
1651         mutex_unlock(&trace_types_lock);
1652 
1653         if (ret || !default_bootup_tracer)
1654                 goto out_unlock;
1655 
1656         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1657                 goto out_unlock;
1658 
1659         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1660         /* Do we want this tracer to start on bootup? */
1661         tracing_set_tracer(&global_trace, type->name);
1662         default_bootup_tracer = NULL;
1663 
1664         apply_trace_boot_options();
1665 
1666         /* disable other selftests, since this will break it. */
1667         tracing_selftest_disabled = true;
1668 #ifdef CONFIG_FTRACE_STARTUP_TEST
1669         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1670                type->name);
1671 #endif
1672 
1673  out_unlock:
1674         return ret;
1675 }
1676 
1677 void tracing_reset(struct trace_buffer *buf, int cpu)
1678 {
1679         struct ring_buffer *buffer = buf->buffer;
1680 
1681         if (!buffer)
1682                 return;
1683 
1684         ring_buffer_record_disable(buffer);
1685 
1686         /* Make sure all commits have finished */
1687         synchronize_rcu();
1688         ring_buffer_reset_cpu(buffer, cpu);
1689 
1690         ring_buffer_record_enable(buffer);
1691 }
1692 
1693 void tracing_reset_online_cpus(struct trace_buffer *buf)
1694 {
1695         struct ring_buffer *buffer = buf->buffer;
1696         int cpu;
1697 
1698         if (!buffer)
1699                 return;
1700 
1701         ring_buffer_record_disable(buffer);
1702 
1703         /* Make sure all commits have finished */
1704         synchronize_rcu();
1705 
1706         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1707 
1708         for_each_online_cpu(cpu)
1709                 ring_buffer_reset_cpu(buffer, cpu);
1710 
1711         ring_buffer_record_enable(buffer);
1712 }
1713 
1714 /* Must have trace_types_lock held */
1715 void tracing_reset_all_online_cpus(void)
1716 {
1717         struct trace_array *tr;
1718 
1719         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1720                 if (!tr->clear_trace)
1721                         continue;
1722                 tr->clear_trace = false;
1723                 tracing_reset_online_cpus(&tr->trace_buffer);
1724 #ifdef CONFIG_TRACER_MAX_TRACE
1725                 tracing_reset_online_cpus(&tr->max_buffer);
1726 #endif
1727         }
1728 }
1729 
1730 static int *tgid_map;
1731 
1732 #define SAVED_CMDLINES_DEFAULT 128
1733 #define NO_CMDLINE_MAP UINT_MAX
1734 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1735 struct saved_cmdlines_buffer {
1736         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1737         unsigned *map_cmdline_to_pid;
1738         unsigned cmdline_num;
1739         int cmdline_idx;
1740         char *saved_cmdlines;
1741 };
1742 static struct saved_cmdlines_buffer *savedcmd;
1743 
1744 /* temporary disable recording */
1745 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1746 
1747 static inline char *get_saved_cmdlines(int idx)
1748 {
1749         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1750 }
1751 
1752 static inline void set_cmdline(int idx, const char *cmdline)
1753 {
1754         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1755 }
1756 
1757 static int allocate_cmdlines_buffer(unsigned int val,
1758                                     struct saved_cmdlines_buffer *s)
1759 {
1760         s->map_cmdline_to_pid = kmalloc_array(val,
1761                                               sizeof(*s->map_cmdline_to_pid),
1762                                               GFP_KERNEL);
1763         if (!s->map_cmdline_to_pid)
1764                 return -ENOMEM;
1765 
1766         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
1767         if (!s->saved_cmdlines) {
1768                 kfree(s->map_cmdline_to_pid);
1769                 return -ENOMEM;
1770         }
1771 
1772         s->cmdline_idx = 0;
1773         s->cmdline_num = val;
1774         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1775                sizeof(s->map_pid_to_cmdline));
1776         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1777                val * sizeof(*s->map_cmdline_to_pid));
1778 
1779         return 0;
1780 }
1781 
1782 static int trace_create_savedcmd(void)
1783 {
1784         int ret;
1785 
1786         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1787         if (!savedcmd)
1788                 return -ENOMEM;
1789 
1790         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1791         if (ret < 0) {
1792                 kfree(savedcmd);
1793                 savedcmd = NULL;
1794                 return -ENOMEM;
1795         }
1796 
1797         return 0;
1798 }
1799 
1800 int is_tracing_stopped(void)
1801 {
1802         return global_trace.stop_count;
1803 }
1804 
1805 /**
1806  * tracing_start - quick start of the tracer
1807  *
1808  * If tracing is enabled but was stopped by tracing_stop,
1809  * this will start the tracer back up.
1810  */
1811 void tracing_start(void)
1812 {
1813         struct ring_buffer *buffer;
1814         unsigned long flags;
1815 
1816         if (tracing_disabled)
1817                 return;
1818 
1819         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1820         if (--global_trace.stop_count) {
1821                 if (global_trace.stop_count < 0) {
1822                         /* Someone screwed up their debugging */
1823                         WARN_ON_ONCE(1);
1824                         global_trace.stop_count = 0;
1825                 }
1826                 goto out;
1827         }
1828 
1829         /* Prevent the buffers from switching */
1830         arch_spin_lock(&global_trace.max_lock);
1831 
1832         buffer = global_trace.trace_buffer.buffer;
1833         if (buffer)
1834                 ring_buffer_record_enable(buffer);
1835 
1836 #ifdef CONFIG_TRACER_MAX_TRACE
1837         buffer = global_trace.max_buffer.buffer;
1838         if (buffer)
1839                 ring_buffer_record_enable(buffer);
1840 #endif
1841 
1842         arch_spin_unlock(&global_trace.max_lock);
1843 
1844  out:
1845         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1846 }
1847 
1848 static void tracing_start_tr(struct trace_array *tr)
1849 {
1850         struct ring_buffer *buffer;
1851         unsigned long flags;
1852 
1853         if (tracing_disabled)
1854                 return;
1855 
1856         /* If global, we need to also start the max tracer */
1857         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1858                 return tracing_start();
1859 
1860         raw_spin_lock_irqsave(&tr->start_lock, flags);
1861 
1862         if (--tr->stop_count) {
1863                 if (tr->stop_count < 0) {
1864                         /* Someone screwed up their debugging */
1865                         WARN_ON_ONCE(1);
1866                         tr->stop_count = 0;
1867                 }
1868                 goto out;
1869         }
1870 
1871         buffer = tr->trace_buffer.buffer;
1872         if (buffer)
1873                 ring_buffer_record_enable(buffer);
1874 
1875  out:
1876         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1877 }
1878 
1879 /**
1880  * tracing_stop - quick stop of the tracer
1881  *
1882  * Light weight way to stop tracing. Use in conjunction with
1883  * tracing_start.
1884  */
1885 void tracing_stop(void)
1886 {
1887         struct ring_buffer *buffer;
1888         unsigned long flags;
1889 
1890         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1891         if (global_trace.stop_count++)
1892                 goto out;
1893 
1894         /* Prevent the buffers from switching */
1895         arch_spin_lock(&global_trace.max_lock);
1896 
1897         buffer = global_trace.trace_buffer.buffer;
1898         if (buffer)
1899                 ring_buffer_record_disable(buffer);
1900 
1901 #ifdef CONFIG_TRACER_MAX_TRACE
1902         buffer = global_trace.max_buffer.buffer;
1903         if (buffer)
1904                 ring_buffer_record_disable(buffer);
1905 #endif
1906 
1907         arch_spin_unlock(&global_trace.max_lock);
1908 
1909  out:
1910         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1911 }
1912 
1913 static void tracing_stop_tr(struct trace_array *tr)
1914 {
1915         struct ring_buffer *buffer;
1916         unsigned long flags;
1917 
1918         /* If global, we need to also stop the max tracer */
1919         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1920                 return tracing_stop();
1921 
1922         raw_spin_lock_irqsave(&tr->start_lock, flags);
1923         if (tr->stop_count++)
1924                 goto out;
1925 
1926         buffer = tr->trace_buffer.buffer;
1927         if (buffer)
1928                 ring_buffer_record_disable(buffer);
1929 
1930  out:
1931         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1932 }
1933 
1934 static int trace_save_cmdline(struct task_struct *tsk)
1935 {
1936         unsigned pid, idx;
1937 
1938         /* treat recording of idle task as a success */
1939         if (!tsk->pid)
1940                 return 1;
1941 
1942         if (unlikely(tsk->pid > PID_MAX_DEFAULT))
1943                 return 0;
1944 
1945         /*
1946          * It's not the end of the world if we don't get
1947          * the lock, but we also don't want to spin
1948          * nor do we want to disable interrupts,
1949          * so if we miss here, then better luck next time.
1950          */
1951         if (!arch_spin_trylock(&trace_cmdline_lock))
1952                 return 0;
1953 
1954         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1955         if (idx == NO_CMDLINE_MAP) {
1956                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1957 
1958                 /*
1959                  * Check whether the cmdline buffer at idx has a pid
1960                  * mapped. We are going to overwrite that entry so we
1961                  * need to clear the map_pid_to_cmdline. Otherwise we
1962                  * would read the new comm for the old pid.
1963                  */
1964                 pid = savedcmd->map_cmdline_to_pid[idx];
1965                 if (pid != NO_CMDLINE_MAP)
1966                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1967 
1968                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1969                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1970 
1971                 savedcmd->cmdline_idx = idx;
1972         }
1973 
1974         set_cmdline(idx, tsk->comm);
1975 
1976         arch_spin_unlock(&trace_cmdline_lock);
1977 
1978         return 1;
1979 }
1980 
1981 static void __trace_find_cmdline(int pid, char comm[])
1982 {
1983         unsigned map;
1984 
1985         if (!pid) {
1986                 strcpy(comm, "<idle>");
1987                 return;
1988         }
1989 
1990         if (WARN_ON_ONCE(pid < 0)) {
1991                 strcpy(comm, "<XXX>");
1992                 return;
1993         }
1994 
1995         if (pid > PID_MAX_DEFAULT) {
1996                 strcpy(comm, "<...>");
1997                 return;
1998         }
1999 
2000         map = savedcmd->map_pid_to_cmdline[pid];
2001         if (map != NO_CMDLINE_MAP)
2002                 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2003         else
2004                 strcpy(comm, "<...>");
2005 }
2006 
2007 void trace_find_cmdline(int pid, char comm[])
2008 {
2009         preempt_disable();
2010         arch_spin_lock(&trace_cmdline_lock);
2011 
2012         __trace_find_cmdline(pid, comm);
2013 
2014         arch_spin_unlock(&trace_cmdline_lock);
2015         preempt_enable();
2016 }
2017 
2018 int trace_find_tgid(int pid)
2019 {
2020         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2021                 return 0;
2022 
2023         return tgid_map[pid];
2024 }
2025 
2026 static int trace_save_tgid(struct task_struct *tsk)
2027 {
2028         /* treat recording of idle task as a success */
2029         if (!tsk->pid)
2030                 return 1;
2031 
2032         if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2033                 return 0;
2034 
2035         tgid_map[tsk->pid] = tsk->tgid;
2036         return 1;
2037 }
2038 
2039 static bool tracing_record_taskinfo_skip(int flags)
2040 {
2041         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2042                 return true;
2043         if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2044                 return true;
2045         if (!__this_cpu_read(trace_taskinfo_save))
2046                 return true;
2047         return false;
2048 }
2049 
2050 /**
2051  * tracing_record_taskinfo - record the task info of a task
2052  *
2053  * @task  - task to record
2054  * @flags - TRACE_RECORD_CMDLINE for recording comm
2055  *        - TRACE_RECORD_TGID for recording tgid
2056  */
2057 void tracing_record_taskinfo(struct task_struct *task, int flags)
2058 {
2059         bool done;
2060 
2061         if (tracing_record_taskinfo_skip(flags))
2062                 return;
2063 
2064         /*
2065          * Record as much task information as possible. If some fail, continue
2066          * to try to record the others.
2067          */
2068         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2069         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2070 
2071         /* If recording any information failed, retry again soon. */
2072         if (!done)
2073                 return;
2074 
2075         __this_cpu_write(trace_taskinfo_save, false);
2076 }
2077 
2078 /**
2079  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2080  *
2081  * @prev - previous task during sched_switch
2082  * @next - next task during sched_switch
2083  * @flags - TRACE_RECORD_CMDLINE for recording comm
2084  *          TRACE_RECORD_TGID for recording tgid
2085  */
2086 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2087                                           struct task_struct *next, int flags)
2088 {
2089         bool done;
2090 
2091         if (tracing_record_taskinfo_skip(flags))
2092                 return;
2093 
2094         /*
2095          * Record as much task information as possible. If some fail, continue
2096          * to try to record the others.
2097          */
2098         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2099         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2100         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2101         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2102 
2103         /* If recording any information failed, retry again soon. */
2104         if (!done)
2105                 return;
2106 
2107         __this_cpu_write(trace_taskinfo_save, false);
2108 }
2109 
2110 /* Helpers to record a specific task information */
2111 void tracing_record_cmdline(struct task_struct *task)
2112 {
2113         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2114 }
2115 
2116 void tracing_record_tgid(struct task_struct *task)
2117 {
2118         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2119 }
2120 
2121 /*
2122  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2123  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2124  * simplifies those functions and keeps them in sync.
2125  */
2126 enum print_line_t trace_handle_return(struct trace_seq *s)
2127 {
2128         return trace_seq_has_overflowed(s) ?
2129                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2130 }
2131 EXPORT_SYMBOL_GPL(trace_handle_return);
2132 
2133 void
2134 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2135                              int pc)
2136 {
2137         struct task_struct *tsk = current;
2138 
2139         entry->preempt_count            = pc & 0xff;
2140         entry->pid                      = (tsk) ? tsk->pid : 0;
2141         entry->flags =
2142 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2143                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2144 #else
2145                 TRACE_FLAG_IRQS_NOSUPPORT |
2146 #endif
2147                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2148                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2149                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2150                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2151                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2152 }
2153 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2154 
2155 struct ring_buffer_event *
2156 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2157                           int type,
2158                           unsigned long len,
2159                           unsigned long flags, int pc)
2160 {
2161         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2162 }
2163 
2164 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2165 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2166 static int trace_buffered_event_ref;
2167 
2168 /**
2169  * trace_buffered_event_enable - enable buffering events
2170  *
2171  * When events are being filtered, it is quicker to use a temporary
2172  * buffer to write the event data into if there's a likely chance
2173  * that it will not be committed. The discard of the ring buffer
2174  * is not as fast as committing, and is much slower than copying
2175  * a commit.
2176  *
2177  * When an event is to be filtered, allocate per cpu buffers to
2178  * write the event data into, and if the event is filtered and discarded
2179  * it is simply dropped, otherwise, the entire data is to be committed
2180  * in one shot.
2181  */
2182 void trace_buffered_event_enable(void)
2183 {
2184         struct ring_buffer_event *event;
2185         struct page *page;
2186         int cpu;
2187 
2188         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2189 
2190         if (trace_buffered_event_ref++)
2191                 return;
2192 
2193         for_each_tracing_cpu(cpu) {
2194                 page = alloc_pages_node(cpu_to_node(cpu),
2195                                         GFP_KERNEL | __GFP_NORETRY, 0);
2196                 if (!page)
2197                         goto failed;
2198 
2199                 event = page_address(page);
2200                 memset(event, 0, sizeof(*event));
2201 
2202                 per_cpu(trace_buffered_event, cpu) = event;
2203 
2204                 preempt_disable();
2205                 if (cpu == smp_processor_id() &&
2206                     this_cpu_read(trace_buffered_event) !=
2207                     per_cpu(trace_buffered_event, cpu))
2208                         WARN_ON_ONCE(1);
2209                 preempt_enable();
2210         }
2211 
2212         return;
2213  failed:
2214         trace_buffered_event_disable();
2215 }
2216 
2217 static void enable_trace_buffered_event(void *data)
2218 {
2219         /* Probably not needed, but do it anyway */
2220         smp_rmb();
2221         this_cpu_dec(trace_buffered_event_cnt);
2222 }
2223 
2224 static void disable_trace_buffered_event(void *data)
2225 {
2226         this_cpu_inc(trace_buffered_event_cnt);
2227 }
2228 
2229 /**
2230  * trace_buffered_event_disable - disable buffering events
2231  *
2232  * When a filter is removed, it is faster to not use the buffered
2233  * events, and to commit directly into the ring buffer. Free up
2234  * the temp buffers when there are no more users. This requires
2235  * special synchronization with current events.
2236  */
2237 void trace_buffered_event_disable(void)
2238 {
2239         int cpu;
2240 
2241         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2242 
2243         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2244                 return;
2245 
2246         if (--trace_buffered_event_ref)
2247                 return;
2248 
2249         preempt_disable();
2250         /* For each CPU, set the buffer as used. */
2251         smp_call_function_many(tracing_buffer_mask,
2252                                disable_trace_buffered_event, NULL, 1);
2253         preempt_enable();
2254 
2255         /* Wait for all current users to finish */
2256         synchronize_rcu();
2257 
2258         for_each_tracing_cpu(cpu) {
2259                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2260                 per_cpu(trace_buffered_event, cpu) = NULL;
2261         }
2262         /*
2263          * Make sure trace_buffered_event is NULL before clearing
2264          * trace_buffered_event_cnt.
2265          */
2266         smp_wmb();
2267 
2268         preempt_disable();
2269         /* Do the work on each cpu */
2270         smp_call_function_many(tracing_buffer_mask,
2271                                enable_trace_buffered_event, NULL, 1);
2272         preempt_enable();
2273 }
2274 
2275 static struct ring_buffer *temp_buffer;
2276 
2277 struct ring_buffer_event *
2278 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2279                           struct trace_event_file *trace_file,
2280                           int type, unsigned long len,
2281                           unsigned long flags, int pc)
2282 {
2283         struct ring_buffer_event *entry;
2284         int val;
2285 
2286         *current_rb = trace_file->tr->trace_buffer.buffer;
2287 
2288         if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2289              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2290             (entry = this_cpu_read(trace_buffered_event))) {
2291                 /* Try to use the per cpu buffer first */
2292                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2293                 if (val == 1) {
2294                         trace_event_setup(entry, type, flags, pc);
2295                         entry->array[0] = len;
2296                         return entry;
2297                 }
2298                 this_cpu_dec(trace_buffered_event_cnt);
2299         }
2300 
2301         entry = __trace_buffer_lock_reserve(*current_rb,
2302                                             type, len, flags, pc);
2303         /*
2304          * If tracing is off, but we have triggers enabled
2305          * we still need to look at the event data. Use the temp_buffer
2306          * to store the trace event for the tigger to use. It's recusive
2307          * safe and will not be recorded anywhere.
2308          */
2309         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2310                 *current_rb = temp_buffer;
2311                 entry = __trace_buffer_lock_reserve(*current_rb,
2312                                                     type, len, flags, pc);
2313         }
2314         return entry;
2315 }
2316 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2317 
2318 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2319 static DEFINE_MUTEX(tracepoint_printk_mutex);
2320 
2321 static void output_printk(struct trace_event_buffer *fbuffer)
2322 {
2323         struct trace_event_call *event_call;
2324         struct trace_event *event;
2325         unsigned long flags;
2326         struct trace_iterator *iter = tracepoint_print_iter;
2327 
2328         /* We should never get here if iter is NULL */
2329         if (WARN_ON_ONCE(!iter))
2330                 return;
2331 
2332         event_call = fbuffer->trace_file->event_call;
2333         if (!event_call || !event_call->event.funcs ||
2334             !event_call->event.funcs->trace)
2335                 return;
2336 
2337         event = &fbuffer->trace_file->event_call->event;
2338 
2339         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2340         trace_seq_init(&iter->seq);
2341         iter->ent = fbuffer->entry;
2342         event_call->event.funcs->trace(iter, 0, event);
2343         trace_seq_putc(&iter->seq, 0);
2344         printk("%s", iter->seq.buffer);
2345 
2346         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2347 }
2348 
2349 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2350                              void __user *buffer, size_t *lenp,
2351                              loff_t *ppos)
2352 {
2353         int save_tracepoint_printk;
2354         int ret;
2355 
2356         mutex_lock(&tracepoint_printk_mutex);
2357         save_tracepoint_printk = tracepoint_printk;
2358 
2359         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2360 
2361         /*
2362          * This will force exiting early, as tracepoint_printk
2363          * is always zero when tracepoint_printk_iter is not allocated
2364          */
2365         if (!tracepoint_print_iter)
2366                 tracepoint_printk = 0;
2367 
2368         if (save_tracepoint_printk == tracepoint_printk)
2369                 goto out;
2370 
2371         if (tracepoint_printk)
2372                 static_key_enable(&tracepoint_printk_key.key);
2373         else
2374                 static_key_disable(&tracepoint_printk_key.key);
2375 
2376  out:
2377         mutex_unlock(&tracepoint_printk_mutex);
2378 
2379         return ret;
2380 }
2381 
2382 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2383 {
2384         if (static_key_false(&tracepoint_printk_key.key))
2385                 output_printk(fbuffer);
2386 
2387         event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2388                                     fbuffer->event, fbuffer->entry,
2389                                     fbuffer->flags, fbuffer->pc);
2390 }
2391 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2392 
2393 /*
2394  * Skip 3:
2395  *
2396  *   trace_buffer_unlock_commit_regs()
2397  *   trace_event_buffer_commit()
2398  *   trace_event_raw_event_xxx()
2399  */
2400 # define STACK_SKIP 3
2401 
2402 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2403                                      struct ring_buffer *buffer,
2404                                      struct ring_buffer_event *event,
2405                                      unsigned long flags, int pc,
2406                                      struct pt_regs *regs)
2407 {
2408         __buffer_unlock_commit(buffer, event);
2409 
2410         /*
2411          * If regs is not set, then skip the necessary functions.
2412          * Note, we can still get here via blktrace, wakeup tracer
2413          * and mmiotrace, but that's ok if they lose a function or
2414          * two. They are not that meaningful.
2415          */
2416         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2417         ftrace_trace_userstack(buffer, flags, pc);
2418 }
2419 
2420 /*
2421  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2422  */
2423 void
2424 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2425                                    struct ring_buffer_event *event)
2426 {
2427         __buffer_unlock_commit(buffer, event);
2428 }
2429 
2430 static void
2431 trace_process_export(struct trace_export *export,
2432                struct ring_buffer_event *event)
2433 {
2434         struct trace_entry *entry;
2435         unsigned int size = 0;
2436 
2437         entry = ring_buffer_event_data(event);
2438         size = ring_buffer_event_length(event);
2439         export->write(export, entry, size);
2440 }
2441 
2442 static DEFINE_MUTEX(ftrace_export_lock);
2443 
2444 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2445 
2446 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2447 
2448 static inline void ftrace_exports_enable(void)
2449 {
2450         static_branch_enable(&ftrace_exports_enabled);
2451 }
2452 
2453 static inline void ftrace_exports_disable(void)
2454 {
2455         static_branch_disable(&ftrace_exports_enabled);
2456 }
2457 
2458 static void ftrace_exports(struct ring_buffer_event *event)
2459 {
2460         struct trace_export *export;
2461 
2462         preempt_disable_notrace();
2463 
2464         export = rcu_dereference_raw_notrace(ftrace_exports_list);
2465         while (export) {
2466                 trace_process_export(export, event);
2467                 export = rcu_dereference_raw_notrace(export->next);
2468         }
2469 
2470         preempt_enable_notrace();
2471 }
2472 
2473 static inline void
2474 add_trace_export(struct trace_export **list, struct trace_export *export)
2475 {
2476         rcu_assign_pointer(export->next, *list);
2477         /*
2478          * We are entering export into the list but another
2479          * CPU might be walking that list. We need to make sure
2480          * the export->next pointer is valid before another CPU sees
2481          * the export pointer included into the list.
2482          */
2483         rcu_assign_pointer(*list, export);
2484 }
2485 
2486 static inline int
2487 rm_trace_export(struct trace_export **list, struct trace_export *export)
2488 {
2489         struct trace_export **p;
2490 
2491         for (p = list; *p != NULL; p = &(*p)->next)
2492                 if (*p == export)
2493                         break;
2494 
2495         if (*p != export)
2496                 return -1;
2497 
2498         rcu_assign_pointer(*p, (*p)->next);
2499 
2500         return 0;
2501 }
2502 
2503 static inline void
2504 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2505 {
2506         if (*list == NULL)
2507                 ftrace_exports_enable();
2508 
2509         add_trace_export(list, export);
2510 }
2511 
2512 static inline int
2513 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2514 {
2515         int ret;
2516 
2517         ret = rm_trace_export(list, export);
2518         if (*list == NULL)
2519                 ftrace_exports_disable();
2520 
2521         return ret;
2522 }
2523 
2524 int register_ftrace_export(struct trace_export *export)
2525 {
2526         if (WARN_ON_ONCE(!export->write))
2527                 return -1;
2528 
2529         mutex_lock(&ftrace_export_lock);
2530 
2531         add_ftrace_export(&ftrace_exports_list, export);
2532 
2533         mutex_unlock(&ftrace_export_lock);
2534 
2535         return 0;
2536 }
2537 EXPORT_SYMBOL_GPL(register_ftrace_export);
2538 
2539 int unregister_ftrace_export(struct trace_export *export)
2540 {
2541         int ret;
2542 
2543         mutex_lock(&ftrace_export_lock);
2544 
2545         ret = rm_ftrace_export(&ftrace_exports_list, export);
2546 
2547         mutex_unlock(&ftrace_export_lock);
2548 
2549         return ret;
2550 }
2551 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2552 
2553 void
2554 trace_function(struct trace_array *tr,
2555                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2556                int pc)
2557 {
2558         struct trace_event_call *call = &event_function;
2559         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2560         struct ring_buffer_event *event;
2561         struct ftrace_entry *entry;
2562 
2563         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2564                                             flags, pc);
2565         if (!event)
2566                 return;
2567         entry   = ring_buffer_event_data(event);
2568         entry->ip                       = ip;
2569         entry->parent_ip                = parent_ip;
2570 
2571         if (!call_filter_check_discard(call, entry, buffer, event)) {
2572                 if (static_branch_unlikely(&ftrace_exports_enabled))
2573                         ftrace_exports(event);
2574                 __buffer_unlock_commit(buffer, event);
2575         }
2576 }
2577 
2578 #ifdef CONFIG_STACKTRACE
2579 
2580 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2581 struct ftrace_stack {
2582         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
2583 };
2584 
2585 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2586 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2587 
2588 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2589                                  unsigned long flags,
2590                                  int skip, int pc, struct pt_regs *regs)
2591 {
2592         struct trace_event_call *call = &event_kernel_stack;
2593         struct ring_buffer_event *event;
2594         struct stack_entry *entry;
2595         struct stack_trace trace;
2596         int use_stack;
2597         int size = FTRACE_STACK_ENTRIES;
2598 
2599         trace.nr_entries        = 0;
2600         trace.skip              = skip;
2601 
2602         /*
2603          * Add one, for this function and the call to save_stack_trace()
2604          * If regs is set, then these functions will not be in the way.
2605          */
2606 #ifndef CONFIG_UNWINDER_ORC
2607         if (!regs)
2608                 trace.skip++;
2609 #endif
2610 
2611         /*
2612          * Since events can happen in NMIs there's no safe way to
2613          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2614          * or NMI comes in, it will just have to use the default
2615          * FTRACE_STACK_SIZE.
2616          */
2617         preempt_disable_notrace();
2618 
2619         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2620         /*
2621          * We don't need any atomic variables, just a barrier.
2622          * If an interrupt comes in, we don't care, because it would
2623          * have exited and put the counter back to what we want.
2624          * We just need a barrier to keep gcc from moving things
2625          * around.
2626          */
2627         barrier();
2628         if (use_stack == 1) {
2629                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
2630                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
2631 
2632                 if (regs)
2633                         save_stack_trace_regs(regs, &trace);
2634                 else
2635                         save_stack_trace(&trace);
2636 
2637                 if (trace.nr_entries > size)
2638                         size = trace.nr_entries;
2639         } else
2640                 /* From now on, use_stack is a boolean */
2641                 use_stack = 0;
2642 
2643         size *= sizeof(unsigned long);
2644 
2645         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2646                                             sizeof(*entry) + size, flags, pc);
2647         if (!event)
2648                 goto out;
2649         entry = ring_buffer_event_data(event);
2650 
2651         memset(&entry->caller, 0, size);
2652 
2653         if (use_stack)
2654                 memcpy(&entry->caller, trace.entries,
2655                        trace.nr_entries * sizeof(unsigned long));
2656         else {
2657                 trace.max_entries       = FTRACE_STACK_ENTRIES;
2658                 trace.entries           = entry->caller;
2659                 if (regs)
2660                         save_stack_trace_regs(regs, &trace);
2661                 else
2662                         save_stack_trace(&trace);
2663         }
2664 
2665         entry->size = trace.nr_entries;
2666 
2667         if (!call_filter_check_discard(call, entry, buffer, event))
2668                 __buffer_unlock_commit(buffer, event);
2669 
2670  out:
2671         /* Again, don't let gcc optimize things here */
2672         barrier();
2673         __this_cpu_dec(ftrace_stack_reserve);
2674         preempt_enable_notrace();
2675 
2676 }
2677 
2678 static inline void ftrace_trace_stack(struct trace_array *tr,
2679                                       struct ring_buffer *buffer,
2680                                       unsigned long flags,
2681                                       int skip, int pc, struct pt_regs *regs)
2682 {
2683         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2684                 return;
2685 
2686         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2687 }
2688 
2689 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2690                    int pc)
2691 {
2692         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2693 
2694         if (rcu_is_watching()) {
2695                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2696                 return;
2697         }
2698 
2699         /*
2700          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2701          * but if the above rcu_is_watching() failed, then the NMI
2702          * triggered someplace critical, and rcu_irq_enter() should
2703          * not be called from NMI.
2704          */
2705         if (unlikely(in_nmi()))
2706                 return;
2707 
2708         rcu_irq_enter_irqson();
2709         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2710         rcu_irq_exit_irqson();
2711 }
2712 
2713 /**
2714  * trace_dump_stack - record a stack back trace in the trace buffer
2715  * @skip: Number of functions to skip (helper handlers)
2716  */
2717 void trace_dump_stack(int skip)
2718 {
2719         unsigned long flags;
2720 
2721         if (tracing_disabled || tracing_selftest_running)
2722                 return;
2723 
2724         local_save_flags(flags);
2725 
2726 #ifndef CONFIG_UNWINDER_ORC
2727         /* Skip 1 to skip this function. */
2728         skip++;
2729 #endif
2730         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2731                              flags, skip, preempt_count(), NULL);
2732 }
2733 EXPORT_SYMBOL_GPL(trace_dump_stack);
2734 
2735 static DEFINE_PER_CPU(int, user_stack_count);
2736 
2737 void
2738 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2739 {
2740         struct trace_event_call *call = &event_user_stack;
2741         struct ring_buffer_event *event;
2742         struct userstack_entry *entry;
2743         struct stack_trace trace;
2744 
2745         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2746                 return;
2747 
2748         /*
2749          * NMIs can not handle page faults, even with fix ups.
2750          * The save user stack can (and often does) fault.
2751          */
2752         if (unlikely(in_nmi()))
2753                 return;
2754 
2755         /*
2756          * prevent recursion, since the user stack tracing may
2757          * trigger other kernel events.
2758          */
2759         preempt_disable();
2760         if (__this_cpu_read(user_stack_count))
2761                 goto out;
2762 
2763         __this_cpu_inc(user_stack_count);
2764 
2765         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2766                                             sizeof(*entry), flags, pc);
2767         if (!event)
2768                 goto out_drop_count;
2769         entry   = ring_buffer_event_data(event);
2770 
2771         entry->tgid             = current->tgid;
2772         memset(&entry->caller, 0, sizeof(entry->caller));
2773 
2774         trace.nr_entries        = 0;
2775         trace.max_entries       = FTRACE_STACK_ENTRIES;
2776         trace.skip              = 0;
2777         trace.entries           = entry->caller;
2778 
2779         save_stack_trace_user(&trace);
2780         if (!call_filter_check_discard(call, entry, buffer, event))
2781                 __buffer_unlock_commit(buffer, event);
2782 
2783  out_drop_count:
2784         __this_cpu_dec(user_stack_count);
2785  out:
2786         preempt_enable();
2787 }
2788 
2789 #ifdef UNUSED
2790 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2791 {
2792         ftrace_trace_userstack(tr, flags, preempt_count());
2793 }
2794 #endif /* UNUSED */
2795 
2796 #endif /* CONFIG_STACKTRACE */
2797 
2798 /* created for use with alloc_percpu */
2799 struct trace_buffer_struct {
2800         int nesting;
2801         char buffer[4][TRACE_BUF_SIZE];
2802 };
2803 
2804 static struct trace_buffer_struct *trace_percpu_buffer;
2805 
2806 /*
2807  * Thise allows for lockless recording.  If we're nested too deeply, then
2808  * this returns NULL.
2809  */
2810 static char *get_trace_buf(void)
2811 {
2812         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2813 
2814         if (!buffer || buffer->nesting >= 4)
2815                 return NULL;
2816 
2817         buffer->nesting++;
2818 
2819         /* Interrupts must see nesting incremented before we use the buffer */
2820         barrier();
2821         return &buffer->buffer[buffer->nesting][0];
2822 }
2823 
2824 static void put_trace_buf(void)
2825 {
2826         /* Don't let the decrement of nesting leak before this */
2827         barrier();
2828         this_cpu_dec(trace_percpu_buffer->nesting);
2829 }
2830 
2831 static int alloc_percpu_trace_buffer(void)
2832 {
2833         struct trace_buffer_struct *buffers;
2834 
2835         buffers = alloc_percpu(struct trace_buffer_struct);
2836         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2837                 return -ENOMEM;
2838 
2839         trace_percpu_buffer = buffers;
2840         return 0;
2841 }
2842 
2843 static int buffers_allocated;
2844 
2845 void trace_printk_init_buffers(void)
2846 {
2847         if (buffers_allocated)
2848                 return;
2849 
2850         if (alloc_percpu_trace_buffer())
2851                 return;
2852 
2853         /* trace_printk() is for debug use only. Don't use it in production. */
2854 
2855         pr_warn("\n");
2856         pr_warn("**********************************************************\n");
2857         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2858         pr_warn("**                                                      **\n");
2859         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2860         pr_warn("**                                                      **\n");
2861         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2862         pr_warn("** unsafe for production use.                           **\n");
2863         pr_warn("**                                                      **\n");
2864         pr_warn("** If you see this message and you are not debugging    **\n");
2865         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2866         pr_warn("**                                                      **\n");
2867         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2868         pr_warn("**********************************************************\n");
2869 
2870         /* Expand the buffers to set size */
2871         tracing_update_buffers();
2872 
2873         buffers_allocated = 1;
2874 
2875         /*
2876          * trace_printk_init_buffers() can be called by modules.
2877          * If that happens, then we need to start cmdline recording
2878          * directly here. If the global_trace.buffer is already
2879          * allocated here, then this was called by module code.
2880          */
2881         if (global_trace.trace_buffer.buffer)
2882                 tracing_start_cmdline_record();
2883 }
2884 
2885 void trace_printk_start_comm(void)
2886 {
2887         /* Start tracing comms if trace printk is set */
2888         if (!buffers_allocated)
2889                 return;
2890         tracing_start_cmdline_record();
2891 }
2892 
2893 static void trace_printk_start_stop_comm(int enabled)
2894 {
2895         if (!buffers_allocated)
2896                 return;
2897 
2898         if (enabled)
2899                 tracing_start_cmdline_record();
2900         else
2901                 tracing_stop_cmdline_record();
2902 }
2903 
2904 /**
2905  * trace_vbprintk - write binary msg to tracing buffer
2906  *
2907  */
2908 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2909 {
2910         struct trace_event_call *call = &event_bprint;
2911         struct ring_buffer_event *event;
2912         struct ring_buffer *buffer;
2913         struct trace_array *tr = &global_trace;
2914         struct bprint_entry *entry;
2915         unsigned long flags;
2916         char *tbuffer;
2917         int len = 0, size, pc;
2918 
2919         if (unlikely(tracing_selftest_running || tracing_disabled))
2920                 return 0;
2921 
2922         /* Don't pollute graph traces with trace_vprintk internals */
2923         pause_graph_tracing();
2924 
2925         pc = preempt_count();
2926         preempt_disable_notrace();
2927 
2928         tbuffer = get_trace_buf();
2929         if (!tbuffer) {
2930                 len = 0;
2931                 goto out_nobuffer;
2932         }
2933 
2934         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2935 
2936         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2937                 goto out;
2938 
2939         local_save_flags(flags);
2940         size = sizeof(*entry) + sizeof(u32) * len;
2941         buffer = tr->trace_buffer.buffer;
2942         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2943                                             flags, pc);
2944         if (!event)
2945                 goto out;
2946         entry = ring_buffer_event_data(event);
2947         entry->ip                       = ip;
2948         entry->fmt                      = fmt;
2949 
2950         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2951         if (!call_filter_check_discard(call, entry, buffer, event)) {
2952                 __buffer_unlock_commit(buffer, event);
2953                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2954         }
2955 
2956 out:
2957         put_trace_buf();
2958 
2959 out_nobuffer:
2960         preempt_enable_notrace();
2961         unpause_graph_tracing();
2962 
2963         return len;
2964 }
2965 EXPORT_SYMBOL_GPL(trace_vbprintk);
2966 
2967 __printf(3, 0)
2968 static int
2969 __trace_array_vprintk(struct ring_buffer *buffer,
2970                       unsigned long ip, const char *fmt, va_list args)
2971 {
2972         struct trace_event_call *call = &event_print;
2973         struct ring_buffer_event *event;
2974         int len = 0, size, pc;
2975         struct print_entry *entry;
2976         unsigned long flags;
2977         char *tbuffer;
2978 
2979         if (tracing_disabled || tracing_selftest_running)
2980                 return 0;
2981 
2982         /* Don't pollute graph traces with trace_vprintk internals */
2983         pause_graph_tracing();
2984 
2985         pc = preempt_count();
2986         preempt_disable_notrace();
2987 
2988 
2989         tbuffer = get_trace_buf();
2990         if (!tbuffer) {
2991                 len = 0;
2992                 goto out_nobuffer;
2993         }
2994 
2995         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2996 
2997         local_save_flags(flags);
2998         size = sizeof(*entry) + len + 1;
2999         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3000                                             flags, pc);
3001         if (!event)
3002                 goto out;
3003         entry = ring_buffer_event_data(event);
3004         entry->ip = ip;
3005 
3006         memcpy(&entry->buf, tbuffer, len + 1);
3007         if (!call_filter_check_discard(call, entry, buffer, event)) {
3008                 __buffer_unlock_commit(buffer, event);
3009                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3010         }
3011 
3012 out:
3013         put_trace_buf();
3014 
3015 out_nobuffer:
3016         preempt_enable_notrace();
3017         unpause_graph_tracing();
3018 
3019         return len;
3020 }
3021 
3022 __printf(3, 0)
3023 int trace_array_vprintk(struct trace_array *tr,
3024                         unsigned long ip, const char *fmt, va_list args)
3025 {
3026         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3027 }
3028 
3029 __printf(3, 0)
3030 int trace_array_printk(struct trace_array *tr,
3031                        unsigned long ip, const char *fmt, ...)
3032 {
3033         int ret;
3034         va_list ap;
3035 
3036         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3037                 return 0;
3038 
3039         va_start(ap, fmt);
3040         ret = trace_array_vprintk(tr, ip, fmt, ap);
3041         va_end(ap);
3042         return ret;
3043 }
3044 
3045 __printf(3, 4)
3046 int trace_array_printk_buf(struct ring_buffer *buffer,
3047                            unsigned long ip, const char *fmt, ...)
3048 {
3049         int ret;
3050         va_list ap;
3051 
3052         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3053                 return 0;
3054 
3055         va_start(ap, fmt);
3056         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3057         va_end(ap);
3058         return ret;
3059 }
3060 
3061 __printf(2, 0)
3062 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3063 {
3064         return trace_array_vprintk(&global_trace, ip, fmt, args);
3065 }
3066 EXPORT_SYMBOL_GPL(trace_vprintk);
3067 
3068 static void trace_iterator_increment(struct trace_iterator *iter)
3069 {
3070         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3071 
3072         iter->idx++;
3073         if (buf_iter)
3074                 ring_buffer_read(buf_iter, NULL);
3075 }
3076 
3077 static struct trace_entry *
3078 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3079                 unsigned long *lost_events)
3080 {
3081         struct ring_buffer_event *event;
3082         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3083 
3084         if (buf_iter)
3085                 event = ring_buffer_iter_peek(buf_iter, ts);
3086         else
3087                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3088                                          lost_events);
3089 
3090         if (event) {
3091                 iter->ent_size = ring_buffer_event_length(event);
3092                 return ring_buffer_event_data(event);
3093         }
3094         iter->ent_size = 0;
3095         return NULL;
3096 }
3097 
3098 static struct trace_entry *
3099 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3100                   unsigned long *missing_events, u64 *ent_ts)
3101 {
3102         struct ring_buffer *buffer = iter->trace_buffer->buffer;
3103         struct trace_entry *ent, *next = NULL;
3104         unsigned long lost_events = 0, next_lost = 0;
3105         int cpu_file = iter->cpu_file;
3106         u64 next_ts = 0, ts;
3107         int next_cpu = -1;
3108         int next_size = 0;
3109         int cpu;
3110 
3111         /*
3112          * If we are in a per_cpu trace file, don't bother by iterating over
3113          * all cpu and peek directly.
3114          */
3115         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3116                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3117                         return NULL;
3118                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3119                 if (ent_cpu)
3120                         *ent_cpu = cpu_file;
3121 
3122                 return ent;
3123         }
3124 
3125         for_each_tracing_cpu(cpu) {
3126 
3127                 if (ring_buffer_empty_cpu(buffer, cpu))
3128                         continue;
3129 
3130                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3131 
3132                 /*
3133                  * Pick the entry with the smallest timestamp:
3134                  */
3135                 if (ent && (!next || ts < next_ts)) {
3136                         next = ent;
3137                         next_cpu = cpu;
3138                         next_ts = ts;
3139                         next_lost = lost_events;
3140                         next_size = iter->ent_size;
3141                 }
3142         }
3143 
3144         iter->ent_size = next_size;
3145 
3146         if (ent_cpu)
3147                 *ent_cpu = next_cpu;
3148 
3149         if (ent_ts)
3150                 *ent_ts = next_ts;
3151 
3152         if (missing_events)
3153                 *missing_events = next_lost;
3154 
3155         return next;
3156 }
3157 
3158 /* Find the next real entry, without updating the iterator itself */
3159 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3160                                           int *ent_cpu, u64 *ent_ts)
3161 {
3162         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3163 }
3164 
3165 /* Find the next real entry, and increment the iterator to the next entry */
3166 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3167 {
3168         iter->ent = __find_next_entry(iter, &iter->cpu,
3169                                       &iter->lost_events, &iter->ts);
3170 
3171         if (iter->ent)
3172                 trace_iterator_increment(iter);
3173 
3174         return iter->ent ? iter : NULL;
3175 }
3176 
3177 static void trace_consume(struct trace_iterator *iter)
3178 {
3179         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3180                             &iter->lost_events);
3181 }
3182 
3183 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3184 {
3185         struct trace_iterator *iter = m->private;
3186         int i = (int)*pos;
3187         void *ent;
3188 
3189         WARN_ON_ONCE(iter->leftover);
3190 
3191         (*pos)++;
3192 
3193         /* can't go backwards */
3194         if (iter->idx > i)
3195                 return NULL;
3196 
3197         if (iter->idx < 0)
3198                 ent = trace_find_next_entry_inc(iter);
3199         else
3200                 ent = iter;
3201 
3202         while (ent && iter->idx < i)
3203                 ent = trace_find_next_entry_inc(iter);
3204 
3205         iter->pos = *pos;
3206 
3207         return ent;
3208 }
3209 
3210 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3211 {
3212         struct ring_buffer_event *event;
3213         struct ring_buffer_iter *buf_iter;
3214         unsigned long entries = 0;
3215         u64 ts;
3216 
3217         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3218 
3219         buf_iter = trace_buffer_iter(iter, cpu);
3220         if (!buf_iter)
3221                 return;
3222 
3223         ring_buffer_iter_reset(buf_iter);
3224 
3225         /*
3226          * We could have the case with the max latency tracers
3227          * that a reset never took place on a cpu. This is evident
3228          * by the timestamp being before the start of the buffer.
3229          */
3230         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3231                 if (ts >= iter->trace_buffer->time_start)
3232                         break;
3233                 entries++;
3234                 ring_buffer_read(buf_iter, NULL);
3235         }
3236 
3237         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3238 }
3239 
3240 /*
3241  * The current tracer is copied to avoid a global locking
3242  * all around.
3243  */
3244 static void *s_start(struct seq_file *m, loff_t *pos)
3245 {
3246         struct trace_iterator *iter = m->private;
3247         struct trace_array *tr = iter->tr;
3248         int cpu_file = iter->cpu_file;
3249         void *p = NULL;
3250         loff_t l = 0;
3251         int cpu;
3252 
3253         /*
3254          * copy the tracer to avoid using a global lock all around.
3255          * iter->trace is a copy of current_trace, the pointer to the
3256          * name may be used instead of a strcmp(), as iter->trace->name
3257          * will point to the same string as current_trace->name.
3258          */
3259         mutex_lock(&trace_types_lock);
3260         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3261                 *iter->trace = *tr->current_trace;
3262         mutex_unlock(&trace_types_lock);
3263 
3264 #ifdef CONFIG_TRACER_MAX_TRACE
3265         if (iter->snapshot && iter->trace->use_max_tr)
3266                 return ERR_PTR(-EBUSY);
3267 #endif
3268 
3269         if (!iter->snapshot)
3270                 atomic_inc(&trace_record_taskinfo_disabled);
3271 
3272         if (*pos != iter->pos) {
3273                 iter->ent = NULL;
3274                 iter->cpu = 0;
3275                 iter->idx = -1;
3276 
3277                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3278                         for_each_tracing_cpu(cpu)
3279                                 tracing_iter_reset(iter, cpu);
3280                 } else
3281                         tracing_iter_reset(iter, cpu_file);
3282 
3283                 iter->leftover = 0;
3284                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3285                         ;
3286 
3287         } else {
3288                 /*
3289                  * If we overflowed the seq_file before, then we want
3290                  * to just reuse the trace_seq buffer again.
3291                  */
3292                 if (iter->leftover)
3293                         p = iter;
3294                 else {
3295                         l = *pos - 1;
3296                         p = s_next(m, p, &l);
3297                 }
3298         }
3299 
3300         trace_event_read_lock();
3301         trace_access_lock(cpu_file);
3302         return p;
3303 }
3304 
3305 static void s_stop(struct seq_file *m, void *p)
3306 {
3307         struct trace_iterator *iter = m->private;
3308 
3309 #ifdef CONFIG_TRACER_MAX_TRACE
3310         if (iter->snapshot && iter->trace->use_max_tr)
3311                 return;
3312 #endif
3313 
3314         if (!iter->snapshot)
3315                 atomic_dec(&trace_record_taskinfo_disabled);
3316 
3317         trace_access_unlock(iter->cpu_file);
3318         trace_event_read_unlock();
3319 }
3320 
3321 static void
3322 get_total_entries(struct trace_buffer *buf,
3323                   unsigned long *total, unsigned long *entries)
3324 {
3325         unsigned long count;
3326         int cpu;
3327 
3328         *total = 0;
3329         *entries = 0;
3330 
3331         for_each_tracing_cpu(cpu) {
3332                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3333                 /*
3334                  * If this buffer has skipped entries, then we hold all
3335                  * entries for the trace and we need to ignore the
3336                  * ones before the time stamp.
3337                  */
3338                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3339                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3340                         /* total is the same as the entries */
3341                         *total += count;
3342                 } else
3343                         *total += count +
3344                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
3345                 *entries += count;
3346         }
3347 }
3348 
3349 static void print_lat_help_header(struct seq_file *m)
3350 {
3351         seq_puts(m, "#                  _------=> CPU#            \n"
3352                     "#                 / _-----=> irqs-off        \n"
3353                     "#                | / _----=> need-resched    \n"
3354                     "#                || / _---=> hardirq/softirq \n"
3355                     "#                ||| / _--=> preempt-depth   \n"
3356                     "#                |||| /     delay            \n"
3357                     "#  cmd     pid   ||||| time  |   caller      \n"
3358                     "#     \\   /      |||||  \\    |   /         \n");
3359 }
3360 
3361 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3362 {
3363         unsigned long total;
3364         unsigned long entries;
3365 
3366         get_total_entries(buf, &total, &entries);
3367         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3368                    entries, total, num_online_cpus());
3369         seq_puts(m, "#\n");
3370 }
3371 
3372 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3373                                    unsigned int flags)
3374 {
3375         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3376 
3377         print_event_info(buf, m);
3378 
3379         seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3380         seq_printf(m, "#              | |     %s    |       |         |\n",      tgid ? "  |      " : "");
3381 }
3382 
3383 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3384                                        unsigned int flags)
3385 {
3386         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3387         const char tgid_space[] = "          ";
3388         const char space[] = "  ";
3389 
3390         print_event_info(buf, m);
3391 
3392         seq_printf(m, "#                          %s  _-----=> irqs-off\n",
3393                    tgid ? tgid_space : space);
3394         seq_printf(m, "#                          %s / _----=> need-resched\n",
3395                    tgid ? tgid_space : space);
3396         seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",
3397                    tgid ? tgid_space : space);
3398         seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",
3399                    tgid ? tgid_space : space);
3400         seq_printf(m, "#                          %s||| /     delay\n",
3401                    tgid ? tgid_space : space);
3402         seq_printf(m, "#           TASK-PID %sCPU#  ||||    TIMESTAMP  FUNCTION\n",
3403                    tgid ? "   TGID   " : space);
3404         seq_printf(m, "#              | |   %s  |   ||||       |         |\n",
3405                    tgid ? "     |    " : space);
3406 }
3407 
3408 void
3409 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3410 {
3411         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3412         struct trace_buffer *buf = iter->trace_buffer;
3413         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3414         struct tracer *type = iter->trace;
3415         unsigned long entries;
3416         unsigned long total;
3417         const char *name = "preemption";
3418 
3419         name = type->name;
3420 
3421         get_total_entries(buf, &total, &entries);
3422 
3423         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3424                    name, UTS_RELEASE);
3425         seq_puts(m, "# -----------------------------------"
3426                  "---------------------------------\n");
3427         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3428                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3429                    nsecs_to_usecs(data->saved_latency),
3430                    entries,
3431                    total,
3432                    buf->cpu,
3433 #if defined(CONFIG_PREEMPT_NONE)
3434                    "server",
3435 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3436                    "desktop",
3437 #elif defined(CONFIG_PREEMPT)
3438                    "preempt",
3439 #else
3440                    "unknown",
3441 #endif
3442                    /* These are reserved for later use */
3443                    0, 0, 0, 0);
3444 #ifdef CONFIG_SMP
3445         seq_printf(m, " #P:%d)\n", num_online_cpus());
3446 #else
3447         seq_puts(m, ")\n");
3448 #endif
3449         seq_puts(m, "#    -----------------\n");
3450         seq_printf(m, "#    | task: %.16s-%d "
3451                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3452                    data->comm, data->pid,
3453                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3454                    data->policy, data->rt_priority);
3455         seq_puts(m, "#    -----------------\n");
3456 
3457         if (data->critical_start) {
3458                 seq_puts(m, "#  => started at: ");
3459                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3460                 trace_print_seq(m, &iter->seq);
3461                 seq_puts(m, "\n#  => ended at:   ");
3462                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3463                 trace_print_seq(m, &iter->seq);
3464                 seq_puts(m, "\n#\n");
3465         }
3466 
3467         seq_puts(m, "#\n");
3468 }
3469 
3470 static void test_cpu_buff_start(struct trace_iterator *iter)
3471 {
3472         struct trace_seq *s = &iter->seq;
3473         struct trace_array *tr = iter->tr;
3474 
3475         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3476                 return;
3477 
3478         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3479                 return;
3480 
3481         if (cpumask_available(iter->started) &&
3482             cpumask_test_cpu(iter->cpu, iter->started))
3483                 return;
3484 
3485         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3486                 return;
3487 
3488         if (cpumask_available(iter->started))
3489                 cpumask_set_cpu(iter->cpu, iter->started);
3490 
3491         /* Don't print started cpu buffer for the first entry of the trace */
3492         if (iter->idx > 1)
3493                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3494                                 iter->cpu);
3495 }
3496 
3497 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3498 {
3499         struct trace_array *tr = iter->tr;
3500         struct trace_seq *s = &iter->seq;
3501         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3502         struct trace_entry *entry;
3503         struct trace_event *event;
3504 
3505         entry = iter->ent;
3506 
3507         test_cpu_buff_start(iter);
3508 
3509         event = ftrace_find_event(entry->type);
3510 
3511         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3512                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3513                         trace_print_lat_context(iter);
3514                 else
3515                         trace_print_context(iter);
3516         }
3517 
3518         if (trace_seq_has_overflowed(s))
3519                 return TRACE_TYPE_PARTIAL_LINE;
3520 
3521         if (event)
3522                 return event->funcs->trace(iter, sym_flags, event);
3523 
3524         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3525 
3526         return trace_handle_return(s);
3527 }
3528 
3529 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3530 {
3531         struct trace_array *tr = iter->tr;
3532         struct trace_seq *s = &iter->seq;
3533         struct trace_entry *entry;
3534         struct trace_event *event;
3535 
3536         entry = iter->ent;
3537 
3538         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3539                 trace_seq_printf(s, "%d %d %llu ",
3540                                  entry->pid, iter->cpu, iter->ts);
3541 
3542         if (trace_seq_has_overflowed(s))
3543                 return TRACE_TYPE_PARTIAL_LINE;
3544 
3545         event = ftrace_find_event(entry->type);
3546         if (event)
3547                 return event->funcs->raw(iter, 0, event);
3548 
3549         trace_seq_printf(s, "%d ?\n", entry->type);
3550 
3551         return trace_handle_return(s);
3552 }
3553 
3554 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3555 {
3556         struct trace_array *tr = iter->tr;
3557         struct trace_seq *s = &iter->seq;
3558         unsigned char newline = '\n';
3559         struct trace_entry *entry;
3560         struct trace_event *event;
3561 
3562         entry = iter->ent;
3563 
3564         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3565                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3566                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3567                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3568                 if (trace_seq_has_overflowed(s))
3569                         return TRACE_TYPE_PARTIAL_LINE;
3570         }
3571 
3572         event = ftrace_find_event(entry->type);
3573         if (event) {
3574                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3575                 if (ret != TRACE_TYPE_HANDLED)
3576                         return ret;
3577         }
3578 
3579         SEQ_PUT_FIELD(s, newline);
3580 
3581         return trace_handle_return(s);
3582 }
3583 
3584 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3585 {
3586         struct trace_array *tr = iter->tr;
3587         struct trace_seq *s = &iter->seq;
3588         struct trace_entry *entry;
3589         struct trace_event *event;
3590 
3591         entry = iter->ent;
3592 
3593         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3594                 SEQ_PUT_FIELD(s, entry->pid);
3595                 SEQ_PUT_FIELD(s, iter->cpu);
3596                 SEQ_PUT_FIELD(s, iter->ts);
3597                 if (trace_seq_has_overflowed(s))
3598                         return TRACE_TYPE_PARTIAL_LINE;
3599         }
3600 
3601         event = ftrace_find_event(entry->type);
3602         return event ? event->funcs->binary(iter, 0, event) :
3603                 TRACE_TYPE_HANDLED;
3604 }
3605 
3606 int trace_empty(struct trace_iterator *iter)
3607 {
3608         struct ring_buffer_iter *buf_iter;
3609         int cpu;
3610 
3611         /* If we are looking at one CPU buffer, only check that one */
3612         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3613                 cpu = iter->cpu_file;
3614                 buf_iter = trace_buffer_iter(iter, cpu);
3615                 if (buf_iter) {
3616                         if (!ring_buffer_iter_empty(buf_iter))
3617                                 return 0;
3618                 } else {
3619                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3620                                 return 0;
3621                 }
3622                 return 1;
3623         }
3624 
3625         for_each_tracing_cpu(cpu) {
3626                 buf_iter = trace_buffer_iter(iter, cpu);
3627                 if (buf_iter) {
3628                         if (!ring_buffer_iter_empty(buf_iter))
3629                                 return 0;
3630                 } else {
3631                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3632                                 return 0;
3633                 }
3634         }
3635 
3636         return 1;
3637 }
3638 
3639 /*  Called with trace_event_read_lock() held. */
3640 enum print_line_t print_trace_line(struct trace_iterator *iter)
3641 {
3642         struct trace_array *tr = iter->tr;
3643         unsigned long trace_flags = tr->trace_flags;
3644         enum print_line_t ret;
3645 
3646         if (iter->lost_events) {
3647                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3648                                  iter->cpu, iter->lost_events);
3649                 if (trace_seq_has_overflowed(&iter->seq))
3650                         return TRACE_TYPE_PARTIAL_LINE;
3651         }
3652 
3653         if (iter->trace && iter->trace->print_line) {
3654                 ret = iter->trace->print_line(iter);
3655                 if (ret != TRACE_TYPE_UNHANDLED)
3656                         return ret;
3657         }
3658 
3659         if (iter->ent->type == TRACE_BPUTS &&
3660                         trace_flags & TRACE_ITER_PRINTK &&
3661                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3662                 return trace_print_bputs_msg_only(iter);
3663 
3664         if (iter->ent->type == TRACE_BPRINT &&
3665                         trace_flags & TRACE_ITER_PRINTK &&
3666                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3667                 return trace_print_bprintk_msg_only(iter);
3668 
3669         if (iter->ent->type == TRACE_PRINT &&
3670                         trace_flags & TRACE_ITER_PRINTK &&
3671                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3672                 return trace_print_printk_msg_only(iter);
3673 
3674         if (trace_flags & TRACE_ITER_BIN)
3675                 return print_bin_fmt(iter);
3676 
3677         if (trace_flags & TRACE_ITER_HEX)
3678                 return print_hex_fmt(iter);
3679 
3680         if (trace_flags & TRACE_ITER_RAW)
3681                 return print_raw_fmt(iter);
3682 
3683         return print_trace_fmt(iter);
3684 }
3685 
3686 void trace_latency_header(struct seq_file *m)
3687 {
3688         struct trace_iterator *iter = m->private;
3689         struct trace_array *tr = iter->tr;
3690 
3691         /* print nothing if the buffers are empty */
3692         if (trace_empty(iter))
3693                 return;
3694 
3695         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3696                 print_trace_header(m, iter);
3697 
3698         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3699                 print_lat_help_header(m);
3700 }
3701 
3702 void trace_default_header(struct seq_file *m)
3703 {
3704         struct trace_iterator *iter = m->private;
3705         struct trace_array *tr = iter->tr;
3706         unsigned long trace_flags = tr->trace_flags;
3707 
3708         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3709                 return;
3710 
3711         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3712                 /* print nothing if the buffers are empty */
3713                 if (trace_empty(iter))
3714                         return;
3715                 print_trace_header(m, iter);
3716                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3717                         print_lat_help_header(m);
3718         } else {
3719                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3720                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3721                                 print_func_help_header_irq(iter->trace_buffer,
3722                                                            m, trace_flags);
3723                         else
3724                                 print_func_help_header(iter->trace_buffer, m,
3725                                                        trace_flags);
3726                 }
3727         }
3728 }
3729 
3730 static void test_ftrace_alive(struct seq_file *m)
3731 {
3732         if (!ftrace_is_dead())
3733                 return;
3734         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3735                     "#          MAY BE MISSING FUNCTION EVENTS\n");
3736 }
3737 
3738 #ifdef CONFIG_TRACER_MAX_TRACE
3739 static void show_snapshot_main_help(struct seq_file *m)
3740 {
3741         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3742                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3743                     "#                      Takes a snapshot of the main buffer.\n"
3744                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3745                     "#                      (Doesn't have to be '2' works with any number that\n"
3746                     "#                       is not a '' or '1')\n");
3747 }
3748 
3749 static void show_snapshot_percpu_help(struct seq_file *m)
3750 {
3751         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3752 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3753         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3754                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
3755 #else
3756         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3757                     "#                     Must use main snapshot file to allocate.\n");
3758 #endif
3759         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3760                     "#                      (Doesn't have to be '2' works with any number that\n"
3761                     "#                       is not a '' or '1')\n");
3762 }
3763 
3764 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3765 {
3766         if (iter->tr->allocated_snapshot)
3767                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3768         else
3769                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3770 
3771         seq_puts(m, "# Snapshot commands:\n");
3772         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3773                 show_snapshot_main_help(m);
3774         else
3775                 show_snapshot_percpu_help(m);
3776 }
3777 #else
3778 /* Should never be called */
3779 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3780 #endif
3781 
3782 static int s_show(struct seq_file *m, void *v)
3783 {
3784         struct trace_iterator *iter = v;
3785         int ret;
3786 
3787         if (iter->ent == NULL) {
3788                 if (iter->tr) {
3789                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
3790                         seq_puts(m, "#\n");
3791                         test_ftrace_alive(m);
3792                 }
3793                 if (iter->snapshot && trace_empty(iter))
3794                         print_snapshot_help(m, iter);
3795                 else if (iter->trace && iter->trace->print_header)
3796                         iter->trace->print_header(m);
3797                 else
3798                         trace_default_header(m);
3799 
3800         } else if (iter->leftover) {
3801                 /*
3802                  * If we filled the seq_file buffer earlier, we
3803                  * want to just show it now.
3804                  */
3805                 ret = trace_print_seq(m, &iter->seq);
3806 
3807                 /* ret should this time be zero, but you never know */
3808                 iter->leftover = ret;
3809 
3810         } else {
3811                 print_trace_line(iter);
3812                 ret = trace_print_seq(m, &iter->seq);
3813                 /*
3814                  * If we overflow the seq_file buffer, then it will
3815                  * ask us for this data again at start up.
3816                  * Use that instead.
3817                  *  ret is 0 if seq_file write succeeded.
3818                  *        -1 otherwise.
3819                  */
3820                 iter->leftover = ret;
3821         }
3822 
3823         return 0;
3824 }
3825 
3826 /*
3827  * Should be used after trace_array_get(), trace_types_lock
3828  * ensures that i_cdev was already initialized.
3829  */
3830 static inline int tracing_get_cpu(struct inode *inode)
3831 {
3832         if (inode->i_cdev) /* See trace_create_cpu_file() */
3833                 return (long)inode->i_cdev - 1;
3834         return RING_BUFFER_ALL_CPUS;
3835 }
3836 
3837 static const struct seq_operations tracer_seq_ops = {
3838         .start          = s_start,
3839         .next           = s_next,
3840         .stop           = s_stop,
3841         .show           = s_show,
3842 };
3843 
3844 static struct trace_iterator *
3845 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3846 {
3847         struct trace_array *tr = inode->i_private;
3848         struct trace_iterator *iter;
3849         int cpu;
3850 
3851         if (tracing_disabled)
3852                 return ERR_PTR(-ENODEV);
3853 
3854         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3855         if (!iter)
3856                 return ERR_PTR(-ENOMEM);
3857 
3858         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3859                                     GFP_KERNEL);
3860         if (!iter->buffer_iter)
3861                 goto release;
3862 
3863         /*
3864          * We make a copy of the current tracer to avoid concurrent
3865          * changes on it while we are reading.
3866          */
3867         mutex_lock(&trace_types_lock);
3868         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3869         if (!iter->trace)
3870                 goto fail;
3871 
3872         *iter->trace = *tr->current_trace;
3873 
3874         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3875                 goto fail;
3876 
3877         iter->tr = tr;
3878 
3879 #ifdef CONFIG_TRACER_MAX_TRACE
3880         /* Currently only the top directory has a snapshot */
3881         if (tr->current_trace->print_max || snapshot)
3882                 iter->trace_buffer = &tr->max_buffer;
3883         else
3884 #endif
3885                 iter->trace_buffer = &tr->trace_buffer;
3886         iter->snapshot = snapshot;
3887         iter->pos = -1;
3888         iter->cpu_file = tracing_get_cpu(inode);
3889         mutex_init(&iter->mutex);
3890 
3891         /* Notify the tracer early; before we stop tracing. */
3892         if (iter->trace && iter->trace->open)
3893                 iter->trace->open(iter);
3894 
3895         /* Annotate start of buffers if we had overruns */
3896         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3897                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3898 
3899         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3900         if (trace_clocks[tr->clock_id].in_ns)
3901                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3902 
3903         /* stop the trace while dumping if we are not opening "snapshot" */
3904         if (!iter->snapshot)
3905                 tracing_stop_tr(tr);
3906 
3907         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3908                 for_each_tracing_cpu(cpu) {
3909                         iter->buffer_iter[cpu] =
3910                                 ring_buffer_read_prepare(iter->trace_buffer->buffer,
3911                                                          cpu, GFP_KERNEL);
3912                 }
3913                 ring_buffer_read_prepare_sync();
3914                 for_each_tracing_cpu(cpu) {
3915                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3916                         tracing_iter_reset(iter, cpu);
3917                 }
3918         } else {
3919                 cpu = iter->cpu_file;
3920                 iter->buffer_iter[cpu] =
3921                         ring_buffer_read_prepare(iter->trace_buffer->buffer,
3922                                                  cpu, GFP_KERNEL);
3923                 ring_buffer_read_prepare_sync();
3924                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3925                 tracing_iter_reset(iter, cpu);
3926         }
3927 
3928         mutex_unlock(&trace_types_lock);
3929 
3930         return iter;
3931 
3932  fail:
3933         mutex_unlock(&trace_types_lock);
3934         kfree(iter->trace);
3935         kfree(iter->buffer_iter);
3936 release:
3937         seq_release_private(inode, file);
3938         return ERR_PTR(-ENOMEM);
3939 }
3940 
3941 int tracing_open_generic(struct inode *inode, struct file *filp)
3942 {
3943         if (tracing_disabled)
3944                 return -ENODEV;
3945 
3946         filp->private_data = inode->i_private;
3947         return 0;
3948 }
3949 
3950 bool tracing_is_disabled(void)
3951 {
3952         return (tracing_disabled) ? true: false;
3953 }
3954 
3955 /*
3956  * Open and update trace_array ref count.
3957  * Must have the current trace_array passed to it.
3958  */
3959 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3960 {
3961         struct trace_array *tr = inode->i_private;
3962 
3963         if (tracing_disabled)
3964                 return -ENODEV;
3965 
3966         if (trace_array_get(tr) < 0)
3967                 return -ENODEV;
3968 
3969         filp->private_data = inode->i_private;
3970 
3971         return 0;
3972 }
3973 
3974 static int tracing_release(struct inode *inode, struct file *file)
3975 {
3976         struct trace_array *tr = inode->i_private;
3977         struct seq_file *m = file->private_data;
3978         struct trace_iterator *iter;
3979         int cpu;
3980 
3981         if (!(file->f_mode & FMODE_READ)) {
3982                 trace_array_put(tr);
3983                 return 0;
3984         }
3985 
3986         /* Writes do not use seq_file */
3987         iter = m->private;
3988         mutex_lock(&trace_types_lock);
3989 
3990         for_each_tracing_cpu(cpu) {
3991                 if (iter->buffer_iter[cpu])
3992                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3993         }
3994 
3995         if (iter->trace && iter->trace->close)
3996                 iter->trace->close(iter);
3997 
3998         if (!iter->snapshot)
3999                 /* reenable tracing if it was previously enabled */
4000                 tracing_start_tr(tr);
4001 
4002         __trace_array_put(tr);
4003 
4004         mutex_unlock(&trace_types_lock);
4005 
4006         mutex_destroy(&iter->mutex);
4007         free_cpumask_var(iter->started);
4008         kfree(iter->trace);
4009         kfree(iter->buffer_iter);
4010         seq_release_private(inode, file);
4011 
4012         return 0;
4013 }
4014 
4015 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4016 {
4017         struct trace_array *tr = inode->i_private;
4018 
4019         trace_array_put(tr);
4020         return 0;
4021 }
4022 
4023 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4024 {
4025         struct trace_array *tr = inode->i_private;
4026 
4027         trace_array_put(tr);
4028 
4029         return single_release(inode, file);
4030 }
4031 
4032 static int tracing_open(struct inode *inode, struct file *file)
4033 {
4034         struct trace_array *tr = inode->i_private;
4035         struct trace_iterator *iter;
4036         int ret = 0;
4037 
4038         if (trace_array_get(tr) < 0)
4039                 return -ENODEV;
4040 
4041         /* If this file was open for write, then erase contents */
4042         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4043                 int cpu = tracing_get_cpu(inode);
4044                 struct trace_buffer *trace_buf = &tr->trace_buffer;
4045 
4046 #ifdef CONFIG_TRACER_MAX_TRACE
4047                 if (tr->current_trace->print_max)
4048                         trace_buf = &tr->max_buffer;
4049 #endif
4050 
4051                 if (cpu == RING_BUFFER_ALL_CPUS)
4052                         tracing_reset_online_cpus(trace_buf);
4053                 else
4054                         tracing_reset(trace_buf, cpu);
4055         }
4056 
4057         if (file->f_mode & FMODE_READ) {
4058                 iter = __tracing_open(inode, file, false);
4059                 if (IS_ERR(iter))
4060                         ret = PTR_ERR(iter);
4061                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4062                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4063         }
4064 
4065         if (ret < 0)
4066                 trace_array_put(tr);
4067 
4068         return ret;
4069 }
4070 
4071 /*
4072  * Some tracers are not suitable for instance buffers.
4073  * A tracer is always available for the global array (toplevel)
4074  * or if it explicitly states that it is.
4075  */
4076 static bool
4077 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4078 {
4079         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4080 }
4081 
4082 /* Find the next tracer that this trace array may use */
4083 static struct tracer *
4084 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4085 {
4086         while (t && !trace_ok_for_array(t, tr))
4087                 t = t->next;
4088 
4089         return t;
4090 }
4091 
4092 static void *
4093 t_next(struct seq_file *m, void *v, loff_t *pos)
4094 {
4095         struct trace_array *tr = m->private;
4096         struct tracer *t = v;
4097 
4098         (*pos)++;
4099 
4100         if (t)
4101                 t = get_tracer_for_array(tr, t->next);
4102 
4103         return t;
4104 }
4105 
4106 static void *t_start(struct seq_file *m, loff_t *pos)
4107 {
4108         struct trace_array *tr = m->private;
4109         struct tracer *t;
4110         loff_t l = 0;
4111 
4112         mutex_lock(&trace_types_lock);
4113 
4114         t = get_tracer_for_array(tr, trace_types);
4115         for (; t && l < *pos; t = t_next(m, t, &l))
4116                         ;
4117 
4118         return t;
4119 }
4120 
4121 static void t_stop(struct seq_file *m, void *p)
4122 {
4123         mutex_unlock(&trace_types_lock);
4124 }
4125 
4126 static int t_show(struct seq_file *m, void *v)
4127 {
4128         struct tracer *t = v;
4129 
4130         if (!t)
4131                 return 0;
4132 
4133         seq_puts(m, t->name);
4134         if (t->next)
4135                 seq_putc(m, ' ');
4136         else
4137                 seq_putc(m, '\n');
4138 
4139         return 0;
4140 }
4141 
4142 static const struct seq_operations show_traces_seq_ops = {
4143         .start          = t_start,
4144         .next           = t_next,
4145         .stop           = t_stop,
4146         .show           = t_show,
4147 };
4148 
4149 static int show_traces_open(struct inode *inode, struct file *file)
4150 {
4151         struct trace_array *tr = inode->i_private;
4152         struct seq_file *m;
4153         int ret;
4154 
4155         if (tracing_disabled)
4156                 return -ENODEV;
4157 
4158         ret = seq_open(file, &show_traces_seq_ops);
4159         if (ret)
4160                 return ret;
4161 
4162         m = file->private_data;
4163         m->private = tr;
4164 
4165         return 0;
4166 }
4167 
4168 static ssize_t
4169 tracing_write_stub(struct file *filp, const char __user *ubuf,
4170                    size_t count, loff_t *ppos)
4171 {
4172         return count;
4173 }
4174 
4175 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4176 {
4177         int ret;
4178 
4179         if (file->f_mode & FMODE_READ)
4180                 ret = seq_lseek(file, offset, whence);
4181         else
4182                 file->f_pos = ret = 0;
4183 
4184         return ret;
4185 }
4186 
4187 static const struct file_operations tracing_fops = {
4188         .open           = tracing_open,
4189         .read           = seq_read,
4190         .write          = tracing_write_stub,
4191         .llseek         = tracing_lseek,
4192         .release        = tracing_release,
4193 };
4194 
4195 static const struct file_operations show_traces_fops = {
4196         .open           = show_traces_open,
4197         .read           = seq_read,
4198         .release        = seq_release,
4199         .llseek         = seq_lseek,
4200 };
4201 
4202 static ssize_t
4203 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4204                      size_t count, loff_t *ppos)
4205 {
4206         struct trace_array *tr = file_inode(filp)->i_private;
4207         char *mask_str;
4208         int len;
4209 
4210         len = snprintf(NULL, 0, "%*pb\n",
4211                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
4212         mask_str = kmalloc(len, GFP_KERNEL);
4213         if (!mask_str)
4214                 return -ENOMEM;
4215 
4216         len = snprintf(mask_str, len, "%*pb\n",
4217                        cpumask_pr_args(tr->tracing_cpumask));
4218         if (len >= count) {
4219                 count = -EINVAL;
4220                 goto out_err;
4221         }
4222         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4223 
4224 out_err:
4225         kfree(mask_str);
4226 
4227         return count;
4228 }
4229 
4230 static ssize_t
4231 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4232                       size_t count, loff_t *ppos)
4233 {
4234         struct trace_array *tr = file_inode(filp)->i_private;
4235         cpumask_var_t tracing_cpumask_new;
4236         int err, cpu;
4237 
4238         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4239                 return -ENOMEM;
4240 
4241         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4242         if (err)
4243                 goto err_unlock;
4244 
4245         local_irq_disable();
4246         arch_spin_lock(&tr->max_lock);
4247         for_each_tracing_cpu(cpu) {
4248                 /*
4249                  * Increase/decrease the disabled counter if we are
4250                  * about to flip a bit in the cpumask:
4251                  */
4252                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4253                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4254                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4255                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4256                 }
4257                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4258                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4259                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4260                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4261                 }
4262         }
4263         arch_spin_unlock(&tr->max_lock);
4264         local_irq_enable();
4265 
4266         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4267         free_cpumask_var(tracing_cpumask_new);
4268 
4269         return count;
4270 
4271 err_unlock:
4272         free_cpumask_var(tracing_cpumask_new);
4273 
4274         return err;
4275 }
4276 
4277 static const struct file_operations tracing_cpumask_fops = {
4278         .open           = tracing_open_generic_tr,
4279         .read           = tracing_cpumask_read,
4280         .write          = tracing_cpumask_write,
4281         .release        = tracing_release_generic_tr,
4282         .llseek         = generic_file_llseek,
4283 };
4284 
4285 static int tracing_trace_options_show(struct seq_file *m, void *v)
4286 {
4287         struct tracer_opt *trace_opts;
4288         struct trace_array *tr = m->private;
4289         u32 tracer_flags;
4290         int i;
4291 
4292         mutex_lock(&trace_types_lock);
4293         tracer_flags = tr->current_trace->flags->val;
4294         trace_opts = tr->current_trace->flags->opts;
4295 
4296         for (i = 0; trace_options[i]; i++) {
4297                 if (tr->trace_flags & (1 << i))
4298                         seq_printf(m, "%s\n", trace_options[i]);
4299                 else
4300                         seq_printf(m, "no%s\n", trace_options[i]);
4301         }
4302 
4303         for (i = 0; trace_opts[i].name; i++) {
4304                 if (tracer_flags & trace_opts[i].bit)
4305                         seq_printf(m, "%s\n", trace_opts[i].name);
4306                 else
4307                         seq_printf(m, "no%s\n", trace_opts[i].name);
4308         }
4309         mutex_unlock(&trace_types_lock);
4310 
4311         return 0;
4312 }
4313 
4314 static int __set_tracer_option(struct trace_array *tr,
4315                                struct tracer_flags *tracer_flags,
4316                                struct tracer_opt *opts, int neg)
4317 {
4318         struct tracer *trace = tracer_flags->trace;
4319         int ret;
4320 
4321         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4322         if (ret)
4323                 return ret;
4324 
4325         if (neg)
4326                 tracer_flags->val &= ~opts->bit;
4327         else
4328                 tracer_flags->val |= opts->bit;
4329         return 0;
4330 }
4331 
4332 /* Try to assign a tracer specific option */
4333 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4334 {
4335         struct tracer *trace = tr->current_trace;
4336         struct tracer_flags *tracer_flags = trace->flags;
4337         struct tracer_opt *opts = NULL;
4338         int i;
4339 
4340         for (i = 0; tracer_flags->opts[i].name; i++) {
4341                 opts = &tracer_flags->opts[i];
4342 
4343                 if (strcmp(cmp, opts->name) == 0)
4344                         return __set_tracer_option(tr, trace->flags, opts, neg);
4345         }
4346 
4347         return -EINVAL;
4348 }
4349 
4350 /* Some tracers require overwrite to stay enabled */
4351 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4352 {
4353         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4354                 return -1;
4355 
4356         return 0;
4357 }
4358 
4359 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4360 {
4361         /* do nothing if flag is already set */
4362         if (!!(tr->trace_flags & mask) == !!enabled)
4363                 return 0;
4364 
4365         /* Give the tracer a chance to approve the change */
4366         if (tr->current_trace->flag_changed)
4367                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4368                         return -EINVAL;
4369 
4370         if (enabled)
4371                 tr->trace_flags |= mask;
4372         else
4373                 tr->trace_flags &= ~mask;
4374 
4375         if (mask == TRACE_ITER_RECORD_CMD)
4376                 trace_event_enable_cmd_record(enabled);
4377 
4378         if (mask == TRACE_ITER_RECORD_TGID) {
4379                 if (!tgid_map)
4380                         tgid_map = kcalloc(PID_MAX_DEFAULT + 1,
4381                                            sizeof(*tgid_map),
4382                                            GFP_KERNEL);
4383                 if (!tgid_map) {
4384                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4385                         return -ENOMEM;
4386                 }
4387 
4388                 trace_event_enable_tgid_record(enabled);
4389         }
4390 
4391         if (mask == TRACE_ITER_EVENT_FORK)
4392                 trace_event_follow_fork(tr, enabled);
4393 
4394         if (mask == TRACE_ITER_FUNC_FORK)
4395                 ftrace_pid_follow_fork(tr, enabled);
4396 
4397         if (mask == TRACE_ITER_OVERWRITE) {
4398                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4399 #ifdef CONFIG_TRACER_MAX_TRACE
4400                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4401 #endif
4402         }
4403 
4404         if (mask == TRACE_ITER_PRINTK) {
4405                 trace_printk_start_stop_comm(enabled);
4406                 trace_printk_control(enabled);
4407         }
4408 
4409         return 0;
4410 }
4411 
4412 static int trace_set_options(struct trace_array *tr, char *option)
4413 {
4414         char *cmp;
4415         int neg = 0;
4416         int ret;
4417         size_t orig_len = strlen(option);
4418         int len;
4419 
4420         cmp = strstrip(option);
4421 
4422         len = str_has_prefix(cmp, "no");
4423         if (len)
4424                 neg = 1;
4425 
4426         cmp += len;
4427 
4428         mutex_lock(&trace_types_lock);
4429 
4430         ret = match_string(trace_options, -1, cmp);
4431         /* If no option could be set, test the specific tracer options */
4432         if (ret < 0)
4433                 ret = set_tracer_option(tr, cmp, neg);
4434         else
4435                 ret = set_tracer_flag(tr, 1 << ret, !neg);
4436 
4437         mutex_unlock(&trace_types_lock);
4438 
4439         /*
4440          * If the first trailing whitespace is replaced with '\0' by strstrip,
4441          * turn it back into a space.
4442          */
4443         if (orig_len > strlen(option))
4444                 option[strlen(option)] = ' ';
4445 
4446         return ret;
4447 }
4448 
4449 static void __init apply_trace_boot_options(void)
4450 {
4451         char *buf = trace_boot_options_buf;
4452         char *option;
4453 
4454         while (true) {
4455                 option = strsep(&buf, ",");
4456 
4457                 if (!option)
4458                         break;
4459 
4460                 if (*option)
4461                         trace_set_options(&global_trace, option);
4462 
4463                 /* Put back the comma to allow this to be called again */
4464                 if (buf)
4465                         *(buf - 1) = ',';
4466         }
4467 }
4468 
4469 static ssize_t
4470 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4471                         size_t cnt, loff_t *ppos)
4472 {
4473         struct seq_file *m = filp->private_data;
4474         struct trace_array *tr = m->private;
4475         char buf[64];
4476         int ret;
4477 
4478         if (cnt >= sizeof(buf))
4479                 return -EINVAL;
4480 
4481         if (copy_from_user(buf, ubuf, cnt))
4482                 return -EFAULT;
4483 
4484         buf[cnt] = 0;
4485 
4486         ret = trace_set_options(tr, buf);
4487         if (ret < 0)
4488                 return ret;
4489 
4490         *ppos += cnt;
4491 
4492         return cnt;
4493 }
4494 
4495 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4496 {
4497         struct trace_array *tr = inode->i_private;
4498         int ret;
4499 
4500         if (tracing_disabled)
4501                 return -ENODEV;
4502 
4503         if (trace_array_get(tr) < 0)
4504                 return -ENODEV;
4505 
4506         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4507         if (ret < 0)
4508                 trace_array_put(tr);
4509 
4510         return ret;
4511 }
4512 
4513 static const struct file_operations tracing_iter_fops = {
4514         .open           = tracing_trace_options_open,
4515         .read           = seq_read,
4516         .llseek         = seq_lseek,
4517         .release        = tracing_single_release_tr,
4518         .write          = tracing_trace_options_write,
4519 };
4520 
4521 static const char readme_msg[] =
4522         "tracing mini-HOWTO:\n\n"
4523         "# echo 0 > tracing_on : quick way to disable tracing\n"
4524         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4525         " Important files:\n"
4526         "  trace\t\t\t- The static contents of the buffer\n"
4527         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4528         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4529         "  current_tracer\t- function and latency tracers\n"
4530         "  available_tracers\t- list of configured tracers for current_tracer\n"
4531         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4532         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4533         "  trace_clock\t\t-change the clock used to order events\n"
4534         "       local:   Per cpu clock but may not be synced across CPUs\n"
4535         "      global:   Synced across CPUs but slows tracing down.\n"
4536         "     counter:   Not a clock, but just an increment\n"
4537         "      uptime:   Jiffy counter from time of boot\n"
4538         "        perf:   Same clock that perf events use\n"
4539 #ifdef CONFIG_X86_64
4540         "     x86-tsc:   TSC cycle counter\n"
4541 #endif
4542         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
4543         "       delta:   Delta difference against a buffer-wide timestamp\n"
4544         "    absolute:   Absolute (standalone) timestamp\n"
4545         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4546         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4547         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4548         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4549         "\t\t\t  Remove sub-buffer with rmdir\n"
4550         "  trace_options\t\t- Set format or modify how tracing happens\n"
4551         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4552         "\t\t\t  option name\n"
4553         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4554 #ifdef CONFIG_DYNAMIC_FTRACE
4555         "\n  available_filter_functions - list of functions that can be filtered on\n"
4556         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4557         "\t\t\t  functions\n"
4558         "\t     accepts: func_full_name or glob-matching-pattern\n"
4559         "\t     modules: Can select a group via module\n"
4560         "\t      Format: :mod:<module-name>\n"
4561         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4562         "\t    triggers: a command to perform when function is hit\n"
4563         "\t      Format: <function>:<trigger>[:count]\n"
4564         "\t     trigger: traceon, traceoff\n"
4565         "\t\t      enable_event:<system>:<event>\n"
4566         "\t\t      disable_event:<system>:<event>\n"
4567 #ifdef CONFIG_STACKTRACE
4568         "\t\t      stacktrace\n"
4569 #endif
4570 #ifdef CONFIG_TRACER_SNAPSHOT
4571         "\t\t      snapshot\n"
4572 #endif
4573         "\t\t      dump\n"
4574         "\t\t      cpudump\n"
4575         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4576         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4577         "\t     The first one will disable tracing every time do_fault is hit\n"
4578         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4579         "\t       The first time do trap is hit and it disables tracing, the\n"
4580         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4581         "\t       the counter will not decrement. It only decrements when the\n"
4582         "\t       trigger did work\n"
4583         "\t     To remove trigger without count:\n"
4584         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4585         "\t     To remove trigger with a count:\n"
4586         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4587         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4588         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4589         "\t    modules: Can select a group via module command :mod:\n"
4590         "\t    Does not accept triggers\n"
4591 #endif /* CONFIG_DYNAMIC_FTRACE */
4592 #ifdef CONFIG_FUNCTION_TRACER
4593         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4594         "\t\t    (function)\n"
4595 #endif
4596 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4597         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4598         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4599         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4600 #endif
4601 #ifdef CONFIG_TRACER_SNAPSHOT
4602         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4603         "\t\t\t  snapshot buffer. Read the contents for more\n"
4604         "\t\t\t  information\n"
4605 #endif
4606 #ifdef CONFIG_STACK_TRACER
4607         "  stack_trace\t\t- Shows the max stack trace when active\n"
4608         "  stack_max_size\t- Shows current max stack size that was traced\n"
4609         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4610         "\t\t\t  new trace)\n"
4611 #ifdef CONFIG_DYNAMIC_FTRACE
4612         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4613         "\t\t\t  traces\n"
4614 #endif
4615 #endif /* CONFIG_STACK_TRACER */
4616 #ifdef CONFIG_DYNAMIC_EVENTS
4617         "  dynamic_events\t\t- Add/remove/show the generic dynamic events\n"
4618         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4619 #endif
4620 #ifdef CONFIG_KPROBE_EVENTS
4621         "  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4622         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4623 #endif
4624 #ifdef CONFIG_UPROBE_EVENTS
4625         "  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4626         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4627 #endif
4628 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4629         "\t  accepts: event-definitions (one definition per line)\n"
4630         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4631         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4632 #ifdef CONFIG_HIST_TRIGGERS
4633         "\t           s:[synthetic/]<event> <field> [<field>]\n"
4634 #endif
4635         "\t           -:[<group>/]<event>\n"
4636 #ifdef CONFIG_KPROBE_EVENTS
4637         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4638   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4639 #endif
4640 #ifdef CONFIG_UPROBE_EVENTS
4641   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
4642 #endif
4643         "\t     args: <name>=fetcharg[:type]\n"
4644         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4645 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
4646         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>\n"
4647 #else
4648         "\t           $stack<index>, $stack, $retval, $comm\n"
4649 #endif
4650         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
4651         "\t           b<bit-width>@<bit-offset>/<container-size>,\n"
4652         "\t           <type>\\[<array-size>\\]\n"
4653 #ifdef CONFIG_HIST_TRIGGERS
4654         "\t    field: <stype> <name>;\n"
4655         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
4656         "\t           [unsigned] char/int/long\n"
4657 #endif
4658 #endif
4659         "  events/\t\t- Directory containing all trace event subsystems:\n"
4660         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4661         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4662         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4663         "\t\t\t  events\n"
4664         "      filter\t\t- If set, only events passing filter are traced\n"
4665         "  events/<system>/<event>/\t- Directory containing control files for\n"
4666         "\t\t\t  <event>:\n"
4667         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4668         "      filter\t\t- If set, only events passing filter are traced\n"
4669         "      trigger\t\t- If set, a command to perform when event is hit\n"
4670         "\t    Format: <trigger>[:count][if <filter>]\n"
4671         "\t   trigger: traceon, traceoff\n"
4672         "\t            enable_event:<system>:<event>\n"
4673         "\t            disable_event:<system>:<event>\n"
4674 #ifdef CONFIG_HIST_TRIGGERS
4675         "\t            enable_hist:<system>:<event>\n"
4676         "\t            disable_hist:<system>:<event>\n"
4677 #endif
4678 #ifdef CONFIG_STACKTRACE
4679         "\t\t    stacktrace\n"
4680 #endif
4681 #ifdef CONFIG_TRACER_SNAPSHOT
4682         "\t\t    snapshot\n"
4683 #endif
4684 #ifdef CONFIG_HIST_TRIGGERS
4685         "\t\t    hist (see below)\n"
4686 #endif
4687         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4688         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4689         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4690         "\t                  events/block/block_unplug/trigger\n"
4691         "\t   The first disables tracing every time block_unplug is hit.\n"
4692         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4693         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4694         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4695         "\t   Like function triggers, the counter is only decremented if it\n"
4696         "\t    enabled or disabled tracing.\n"
4697         "\t   To remove a trigger without a count:\n"
4698         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4699         "\t   To remove a trigger with a count:\n"
4700         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4701         "\t   Filters can be ignored when removing a trigger.\n"
4702 #ifdef CONFIG_HIST_TRIGGERS
4703         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4704         "\t    Format: hist:keys=<field1[,field2,...]>\n"
4705         "\t            [:values=<field1[,field2,...]>]\n"
4706         "\t            [:sort=<field1[,field2,...]>]\n"
4707         "\t            [:size=#entries]\n"
4708         "\t            [:pause][:continue][:clear]\n"
4709         "\t            [:name=histname1]\n"
4710         "\t            [if <filter>]\n\n"
4711         "\t    When a matching event is hit, an entry is added to a hash\n"
4712         "\t    table using the key(s) and value(s) named, and the value of a\n"
4713         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4714         "\t    correspond to fields in the event's format description.  Keys\n"
4715         "\t    can be any field, or the special string 'stacktrace'.\n"
4716         "\t    Compound keys consisting of up to two fields can be specified\n"
4717         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4718         "\t    fields.  Sort keys consisting of up to two fields can be\n"
4719         "\t    specified using the 'sort' keyword.  The sort direction can\n"
4720         "\t    be modified by appending '.descending' or '.ascending' to a\n"
4721         "\t    sort field.  The 'size' parameter can be used to specify more\n"
4722         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4723         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4724         "\t    its histogram data will be shared with other triggers of the\n"
4725         "\t    same name, and trigger hits will update this common data.\n\n"
4726         "\t    Reading the 'hist' file for the event will dump the hash\n"
4727         "\t    table in its entirety to stdout.  If there are multiple hist\n"
4728         "\t    triggers attached to an event, there will be a table for each\n"
4729         "\t    trigger in the output.  The table displayed for a named\n"
4730         "\t    trigger will be the same as any other instance having the\n"
4731         "\t    same name.  The default format used to display a given field\n"
4732         "\t    can be modified by appending any of the following modifiers\n"
4733         "\t    to the field name, as applicable:\n\n"
4734         "\t            .hex        display a number as a hex value\n"
4735         "\t            .sym        display an address as a symbol\n"
4736         "\t            .sym-offset display an address as a symbol and offset\n"
4737         "\t            .execname   display a common_pid as a program name\n"
4738         "\t            .syscall    display a syscall id as a syscall name\n"
4739         "\t            .log2       display log2 value rather than raw number\n"
4740         "\t            .usecs      display a common_timestamp in microseconds\n\n"
4741         "\t    The 'pause' parameter can be used to pause an existing hist\n"
4742         "\t    trigger or to start a hist trigger but not log any events\n"
4743         "\t    until told to do so.  'continue' can be used to start or\n"
4744         "\t    restart a paused hist trigger.\n\n"
4745         "\t    The 'clear' parameter will clear the contents of a running\n"
4746         "\t    hist trigger and leave its current paused/active state\n"
4747         "\t    unchanged.\n\n"
4748         "\t    The enable_hist and disable_hist triggers can be used to\n"
4749         "\t    have one event conditionally start and stop another event's\n"
4750         "\t    already-attached hist trigger.  The syntax is analagous to\n"
4751         "\t    the enable_event and disable_event triggers.\n"
4752 #endif
4753 ;
4754 
4755 static ssize_t
4756 tracing_readme_read(struct file *filp, char __user *ubuf,
4757                        size_t cnt, loff_t *ppos)
4758 {
4759         return simple_read_from_buffer(ubuf, cnt, ppos,
4760                                         readme_msg, strlen(readme_msg));
4761 }
4762 
4763 static const struct file_operations tracing_readme_fops = {
4764         .open           = tracing_open_generic,
4765         .read           = tracing_readme_read,
4766         .llseek         = generic_file_llseek,
4767 };
4768 
4769 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4770 {
4771         int *ptr = v;
4772 
4773         if (*pos || m->count)
4774                 ptr++;
4775 
4776         (*pos)++;
4777 
4778         for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
4779                 if (trace_find_tgid(*ptr))
4780                         return ptr;
4781         }
4782 
4783         return NULL;
4784 }
4785 
4786 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
4787 {
4788         void *v;
4789         loff_t l = 0;
4790 
4791         if (!tgid_map)
4792                 return NULL;
4793 
4794         v = &tgid_map[0];
4795         while (l <= *pos) {
4796                 v = saved_tgids_next(m, v, &l);
4797                 if (!v)
4798                         return NULL;
4799         }
4800 
4801         return v;
4802 }
4803 
4804 static void saved_tgids_stop(struct seq_file *m, void *v)
4805 {
4806 }
4807 
4808 static int saved_tgids_show(struct seq_file *m, void *v)
4809 {
4810         int pid = (int *)v - tgid_map;
4811 
4812         seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
4813         return 0;
4814 }
4815 
4816 static const struct seq_operations tracing_saved_tgids_seq_ops = {
4817         .start          = saved_tgids_start,
4818         .stop           = saved_tgids_stop,
4819         .next           = saved_tgids_next,
4820         .show           = saved_tgids_show,
4821 };
4822 
4823 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
4824 {
4825         if (tracing_disabled)
4826                 return -ENODEV;
4827 
4828         return seq_open(filp, &tracing_saved_tgids_seq_ops);
4829 }
4830 
4831 
4832 static const struct file_operations tracing_saved_tgids_fops = {
4833         .open           = tracing_saved_tgids_open,
4834         .read           = seq_read,
4835         .llseek         = seq_lseek,
4836         .release        = seq_release,
4837 };
4838 
4839 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4840 {
4841         unsigned int *ptr = v;
4842 
4843         if (*pos || m->count)
4844                 ptr++;
4845 
4846         (*pos)++;
4847 
4848         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4849              ptr++) {
4850                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4851                         continue;
4852 
4853                 return ptr;
4854         }
4855 
4856         return NULL;
4857 }
4858 
4859 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4860 {
4861         void *v;
4862         loff_t l = 0;
4863 
4864         preempt_disable();
4865         arch_spin_lock(&trace_cmdline_lock);
4866 
4867         v = &savedcmd->map_cmdline_to_pid[0];
4868         while (l <= *pos) {
4869                 v = saved_cmdlines_next(m, v, &l);
4870                 if (!v)
4871                         return NULL;
4872         }
4873 
4874         return v;
4875 }
4876 
4877 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4878 {
4879         arch_spin_unlock(&trace_cmdline_lock);
4880         preempt_enable();
4881 }
4882 
4883 static int saved_cmdlines_show(struct seq_file *m, void *v)
4884 {
4885         char buf[TASK_COMM_LEN];
4886         unsigned int *pid = v;
4887 
4888         __trace_find_cmdline(*pid, buf);
4889         seq_printf(m, "%d %s\n", *pid, buf);
4890         return 0;
4891 }
4892 
4893 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4894         .start          = saved_cmdlines_start,
4895         .next           = saved_cmdlines_next,
4896         .stop           = saved_cmdlines_stop,
4897         .show           = saved_cmdlines_show,
4898 };
4899 
4900 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4901 {
4902         if (tracing_disabled)
4903                 return -ENODEV;
4904 
4905         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4906 }
4907 
4908 static const struct file_operations tracing_saved_cmdlines_fops = {
4909         .open           = tracing_saved_cmdlines_open,
4910         .read           = seq_read,
4911         .llseek         = seq_lseek,
4912         .release        = seq_release,
4913 };
4914 
4915 static ssize_t
4916 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4917                                  size_t cnt, loff_t *ppos)
4918 {
4919         char buf[64];
4920         int r;
4921 
4922         arch_spin_lock(&trace_cmdline_lock);
4923         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4924         arch_spin_unlock(&trace_cmdline_lock);
4925 
4926         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4927 }
4928 
4929 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4930 {
4931         kfree(s->saved_cmdlines);
4932         kfree(s->map_cmdline_to_pid);
4933         kfree(s);
4934 }
4935 
4936 static int tracing_resize_saved_cmdlines(unsigned int val)
4937 {
4938         struct saved_cmdlines_buffer *s, *savedcmd_temp;
4939 
4940         s = kmalloc(sizeof(*s), GFP_KERNEL);
4941         if (!s)
4942                 return -ENOMEM;
4943 
4944         if (allocate_cmdlines_buffer(val, s) < 0) {
4945                 kfree(s);
4946                 return -ENOMEM;
4947         }
4948 
4949         arch_spin_lock(&trace_cmdline_lock);
4950         savedcmd_temp = savedcmd;
4951         savedcmd = s;
4952         arch_spin_unlock(&trace_cmdline_lock);
4953         free_saved_cmdlines_buffer(savedcmd_temp);
4954 
4955         return 0;
4956 }
4957 
4958 static ssize_t
4959 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4960                                   size_t cnt, loff_t *ppos)
4961 {
4962         unsigned long val;
4963         int ret;
4964 
4965         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4966         if (ret)
4967                 return ret;
4968 
4969         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4970         if (!val || val > PID_MAX_DEFAULT)
4971                 return -EINVAL;
4972 
4973         ret = tracing_resize_saved_cmdlines((unsigned int)val);
4974         if (ret < 0)
4975                 return ret;
4976 
4977         *ppos += cnt;
4978 
4979         return cnt;
4980 }
4981 
4982 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4983         .open           = tracing_open_generic,
4984         .read           = tracing_saved_cmdlines_size_read,
4985         .write          = tracing_saved_cmdlines_size_write,
4986 };
4987 
4988 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
4989 static union trace_eval_map_item *
4990 update_eval_map(union trace_eval_map_item *ptr)
4991 {
4992         if (!ptr->map.eval_string) {
4993                 if (ptr->tail.next) {
4994                         ptr = ptr->tail.next;
4995                         /* Set ptr to the next real item (skip head) */
4996                         ptr++;
4997                 } else
4998                         return NULL;
4999         }
5000         return ptr;
5001 }
5002 
5003 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5004 {
5005         union trace_eval_map_item *ptr = v;
5006 
5007         /*
5008          * Paranoid! If ptr points to end, we don't want to increment past it.
5009          * This really should never happen.
5010          */
5011         ptr = update_eval_map(ptr);
5012         if (WARN_ON_ONCE(!ptr))
5013                 return NULL;
5014 
5015         ptr++;
5016 
5017         (*pos)++;
5018 
5019         ptr = update_eval_map(ptr);
5020 
5021         return ptr;
5022 }
5023 
5024 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5025 {
5026         union trace_eval_map_item *v;
5027         loff_t l = 0;
5028 
5029         mutex_lock(&trace_eval_mutex);
5030 
5031         v = trace_eval_maps;
5032         if (v)
5033                 v++;
5034 
5035         while (v && l < *pos) {
5036                 v = eval_map_next(m, v, &l);
5037         }
5038 
5039         return v;
5040 }
5041 
5042 static void eval_map_stop(struct seq_file *m, void *v)
5043 {
5044         mutex_unlock(&trace_eval_mutex);
5045 }
5046 
5047 static int eval_map_show(struct seq_file *m, void *v)
5048 {
5049         union trace_eval_map_item *ptr = v;
5050 
5051         seq_printf(m, "%s %ld (%s)\n",
5052                    ptr->map.eval_string, ptr->map.eval_value,
5053                    ptr->map.system);
5054 
5055         return 0;
5056 }
5057 
5058 static const struct seq_operations tracing_eval_map_seq_ops = {
5059         .start          = eval_map_start,
5060         .next           = eval_map_next,
5061         .stop           = eval_map_stop,
5062         .show           = eval_map_show,
5063 };
5064 
5065 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5066 {
5067         if (tracing_disabled)
5068                 return -ENODEV;
5069 
5070         return seq_open(filp, &tracing_eval_map_seq_ops);
5071 }
5072 
5073 static const struct file_operations tracing_eval_map_fops = {
5074         .open           = tracing_eval_map_open,
5075         .read           = seq_read,
5076         .llseek         = seq_lseek,
5077         .release        = seq_release,
5078 };
5079 
5080 static inline union trace_eval_map_item *
5081 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5082 {
5083         /* Return tail of array given the head */
5084         return ptr + ptr->head.length + 1;
5085 }
5086 
5087 static void
5088 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5089                            int len)
5090 {
5091         struct trace_eval_map **stop;
5092         struct trace_eval_map **map;
5093         union trace_eval_map_item *map_array;
5094         union trace_eval_map_item *ptr;
5095 
5096         stop = start + len;
5097 
5098         /*
5099          * The trace_eval_maps contains the map plus a head and tail item,
5100          * where the head holds the module and length of array, and the
5101          * tail holds a pointer to the next list.
5102          */
5103         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5104         if (!map_array) {
5105                 pr_warn("Unable to allocate trace eval mapping\n");
5106                 return;
5107         }
5108 
5109         mutex_lock(&trace_eval_mutex);
5110 
5111         if (!trace_eval_maps)
5112                 trace_eval_maps = map_array;
5113         else {
5114                 ptr = trace_eval_maps;
5115                 for (;;) {
5116                         ptr = trace_eval_jmp_to_tail(ptr);
5117                         if (!ptr->tail.next)
5118                                 break;
5119                         ptr = ptr->tail.next;
5120 
5121                 }
5122                 ptr->tail.next = map_array;
5123         }
5124         map_array->head.mod = mod;
5125         map_array->head.length = len;
5126         map_array++;
5127 
5128         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5129                 map_array->map = **map;
5130                 map_array++;
5131         }
5132         memset(map_array, 0, sizeof(*map_array));
5133 
5134         mutex_unlock(&trace_eval_mutex);
5135 }
5136 
5137 static void trace_create_eval_file(struct dentry *d_tracer)
5138 {
5139         trace_create_file("eval_map", 0444, d_tracer,
5140                           NULL, &tracing_eval_map_fops);
5141 }
5142 
5143 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5144 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5145 static inline void trace_insert_eval_map_file(struct module *mod,
5146                               struct trace_eval_map **start, int len) { }
5147 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5148 
5149 static void trace_insert_eval_map(struct module *mod,
5150                                   struct trace_eval_map **start, int len)
5151 {
5152         struct trace_eval_map **map;
5153 
5154         if (len <= 0)
5155                 return;
5156 
5157         map = start;
5158 
5159         trace_event_eval_update(map, len);
5160 
5161         trace_insert_eval_map_file(mod, start, len);
5162 }
5163 
5164 static ssize_t
5165 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5166                        size_t cnt, loff_t *ppos)
5167 {
5168         struct trace_array *tr = filp->private_data;
5169         char buf[MAX_TRACER_SIZE+2];
5170         int r;
5171 
5172         mutex_lock(&trace_types_lock);
5173         r = sprintf(buf, "%s\n", tr->current_trace->name);
5174         mutex_unlock(&trace_types_lock);
5175 
5176         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5177 }
5178 
5179 int tracer_init(struct tracer *t, struct trace_array *tr)
5180 {
5181         tracing_reset_online_cpus(&tr->trace_buffer);
5182         return t->init(tr);
5183 }
5184 
5185 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5186 {
5187         int cpu;
5188 
5189         for_each_tracing_cpu(cpu)
5190                 per_cpu_ptr(buf->data, cpu)->entries = val;
5191 }
5192 
5193 #ifdef CONFIG_TRACER_MAX_TRACE
5194 /* resize @tr's buffer to the size of @size_tr's entries */
5195 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5196                                         struct trace_buffer *size_buf, int cpu_id)
5197 {
5198         int cpu, ret = 0;
5199 
5200         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5201                 for_each_tracing_cpu(cpu) {
5202                         ret = ring_buffer_resize(trace_buf->buffer,
5203                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5204                         if (ret < 0)
5205                                 break;
5206                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5207                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5208                 }
5209         } else {
5210                 ret = ring_buffer_resize(trace_buf->buffer,
5211                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5212                 if (ret == 0)
5213                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5214                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5215         }
5216 
5217         return ret;
5218 }
5219 #endif /* CONFIG_TRACER_MAX_TRACE */
5220 
5221 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5222                                         unsigned long size, int cpu)
5223 {
5224         int ret;
5225 
5226         /*
5227          * If kernel or user changes the size of the ring buffer
5228          * we use the size that was given, and we can forget about
5229          * expanding it later.
5230          */
5231         ring_buffer_expanded = true;
5232 
5233         /* May be called before buffers are initialized */
5234         if (!tr->trace_buffer.buffer)
5235                 return 0;
5236 
5237         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5238         if (ret < 0)
5239                 return ret;
5240 
5241 #ifdef CONFIG_TRACER_MAX_TRACE
5242         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5243             !tr->current_trace->use_max_tr)
5244                 goto out;
5245 
5246         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5247         if (ret < 0) {
5248                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5249                                                      &tr->trace_buffer, cpu);
5250                 if (r < 0) {
5251                         /*
5252                          * AARGH! We are left with different
5253                          * size max buffer!!!!
5254                          * The max buffer is our "snapshot" buffer.
5255                          * When a tracer needs a snapshot (one of the
5256                          * latency tracers), it swaps the max buffer
5257                          * with the saved snap shot. We succeeded to
5258                          * update the size of the main buffer, but failed to
5259                          * update the size of the max buffer. But when we tried
5260                          * to reset the main buffer to the original size, we
5261                          * failed there too. This is very unlikely to
5262                          * happen, but if it does, warn and kill all
5263                          * tracing.
5264                          */
5265                         WARN_ON(1);
5266                         tracing_disabled = 1;
5267                 }
5268                 return ret;
5269         }
5270 
5271         if (cpu == RING_BUFFER_ALL_CPUS)
5272                 set_buffer_entries(&tr->max_buffer, size);
5273         else
5274                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5275 
5276  out:
5277 #endif /* CONFIG_TRACER_MAX_TRACE */
5278 
5279         if (cpu == RING_BUFFER_ALL_CPUS)
5280                 set_buffer_entries(&tr->trace_buffer, size);
5281         else
5282                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5283 
5284         return ret;
5285 }
5286 
5287 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5288                                           unsigned long size, int cpu_id)
5289 {
5290         int ret = size;
5291 
5292         mutex_lock(&trace_types_lock);
5293 
5294         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5295                 /* make sure, this cpu is enabled in the mask */
5296                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5297                         ret = -EINVAL;
5298                         goto out;
5299                 }
5300         }
5301 
5302         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5303         if (ret < 0)
5304                 ret = -ENOMEM;
5305 
5306 out:
5307         mutex_unlock(&trace_types_lock);
5308 
5309         return ret;
5310 }
5311 
5312 
5313 /**
5314  * tracing_update_buffers - used by tracing facility to expand ring buffers
5315  *
5316  * To save on memory when the tracing is never used on a system with it
5317  * configured in. The ring buffers are set to a minimum size. But once
5318  * a user starts to use the tracing facility, then they need to grow
5319  * to their default size.
5320  *
5321  * This function is to be called when a tracer is about to be used.
5322  */
5323 int tracing_update_buffers(void)
5324 {
5325         int ret = 0;
5326 
5327         mutex_lock(&trace_types_lock);
5328         if (!ring_buffer_expanded)
5329                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5330                                                 RING_BUFFER_ALL_CPUS);
5331         mutex_unlock(&trace_types_lock);
5332 
5333         return ret;
5334 }
5335 
5336 struct trace_option_dentry;
5337 
5338 static void
5339 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5340 
5341 /*
5342  * Used to clear out the tracer before deletion of an instance.
5343  * Must have trace_types_lock held.
5344  */
5345 static void tracing_set_nop(struct trace_array *tr)
5346 {
5347         if (tr->current_trace == &nop_trace)
5348                 return;
5349         
5350         tr->current_trace->enabled--;
5351 
5352         if (tr->current_trace->reset)
5353                 tr->current_trace->reset(tr);
5354 
5355         tr->current_trace = &nop_trace;
5356 }
5357 
5358 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5359 {
5360         /* Only enable if the directory has been created already. */
5361         if (!tr->dir)
5362                 return;
5363 
5364         create_trace_option_files(tr, t);
5365 }
5366 
5367 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5368 {
5369         struct tracer *t;
5370 #ifdef CONFIG_TRACER_MAX_TRACE
5371         bool had_max_tr;
5372 #endif
5373         int ret = 0;
5374 
5375         mutex_lock(&trace_types_lock);
5376 
5377         if (!ring_buffer_expanded) {
5378                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5379                                                 RING_BUFFER_ALL_CPUS);
5380                 if (ret < 0)
5381                         goto out;
5382                 ret = 0;
5383         }
5384 
5385         for (t = trace_types; t; t = t->next) {
5386                 if (strcmp(t->name, buf) == 0)
5387                         break;
5388         }
5389         if (!t) {
5390                 ret = -EINVAL;
5391                 goto out;
5392         }
5393         if (t == tr->current_trace)
5394                 goto out;
5395 
5396         /* Some tracers won't work on kernel command line */
5397         if (system_state < SYSTEM_RUNNING && t->noboot) {
5398                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5399                         t->name);
5400                 goto out;
5401         }
5402 
5403         /* Some tracers are only allowed for the top level buffer */
5404         if (!trace_ok_for_array(t, tr)) {
5405                 ret = -EINVAL;
5406                 goto out;
5407         }
5408 
5409         /* If trace pipe files are being read, we can't change the tracer */
5410         if (tr->current_trace->ref) {
5411                 ret = -EBUSY;
5412                 goto out;
5413         }
5414 
5415         trace_branch_disable();
5416 
5417         tr->current_trace->enabled--;
5418 
5419         if (tr->current_trace->reset)
5420                 tr->current_trace->reset(tr);
5421 
5422         /* Current trace needs to be nop_trace before synchronize_rcu */
5423         tr->current_trace = &nop_trace;
5424 
5425 #ifdef CONFIG_TRACER_MAX_TRACE
5426         had_max_tr = tr->allocated_snapshot;
5427 
5428         if (had_max_tr && !t->use_max_tr) {
5429                 /*
5430                  * We need to make sure that the update_max_tr sees that
5431                  * current_trace changed to nop_trace to keep it from
5432                  * swapping the buffers after we resize it.
5433                  * The update_max_tr is called from interrupts disabled
5434                  * so a synchronized_sched() is sufficient.
5435                  */
5436                 synchronize_rcu();
5437                 free_snapshot(tr);
5438         }
5439 #endif
5440 
5441 #ifdef CONFIG_TRACER_MAX_TRACE
5442         if (t->use_max_tr && !had_max_tr) {
5443                 ret = tracing_alloc_snapshot_instance(tr);
5444                 if (ret < 0)
5445                         goto out;
5446         }
5447 #endif
5448 
5449         if (t->init) {
5450                 ret = tracer_init(t, tr);
5451                 if (ret)
5452                         goto out;
5453         }
5454 
5455         tr->current_trace = t;
5456         tr->current_trace->enabled++;
5457         trace_branch_enable(tr);
5458  out:
5459         mutex_unlock(&trace_types_lock);
5460 
5461         return ret;
5462 }
5463 
5464 static ssize_t
5465 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5466                         size_t cnt, loff_t *ppos)
5467 {
5468         struct trace_array *tr = filp->private_data;
5469         char buf[MAX_TRACER_SIZE+1];
5470         int i;
5471         size_t ret;
5472         int err;
5473 
5474         ret = cnt;
5475 
5476         if (cnt > MAX_TRACER_SIZE)
5477                 cnt = MAX_TRACER_SIZE;
5478 
5479         if (copy_from_user(buf, ubuf, cnt))
5480                 return -EFAULT;
5481 
5482         buf[cnt] = 0;
5483 
5484         /* strip ending whitespace. */
5485         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5486                 buf[i] = 0;
5487 
5488         err = tracing_set_tracer(tr, buf);
5489         if (err)
5490                 return err;
5491 
5492         *ppos += ret;
5493 
5494         return ret;
5495 }
5496 
5497 static ssize_t
5498 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5499                    size_t cnt, loff_t *ppos)
5500 {
5501         char buf[64];
5502         int r;
5503 
5504         r = snprintf(buf, sizeof(buf), "%ld\n",
5505                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5506         if (r > sizeof(buf))
5507                 r = sizeof(buf);
5508         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5509 }
5510 
5511 static ssize_t
5512 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5513                     size_t cnt, loff_t *ppos)
5514 {
5515         unsigned long val;
5516         int ret;
5517 
5518         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5519         if (ret)
5520                 return ret;
5521 
5522         *ptr = val * 1000;
5523 
5524         return cnt;
5525 }
5526 
5527 static ssize_t
5528 tracing_thresh_read(struct file *filp, char __user *ubuf,
5529                     size_t cnt, loff_t *ppos)
5530 {
5531         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5532 }
5533 
5534 static ssize_t
5535 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5536                      size_t cnt, loff_t *ppos)
5537 {
5538         struct trace_array *tr = filp->private_data;
5539         int ret;
5540 
5541         mutex_lock(&trace_types_lock);
5542         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5543         if (ret < 0)
5544                 goto out;
5545 
5546         if (tr->current_trace->update_thresh) {
5547                 ret = tr->current_trace->update_thresh(tr);
5548                 if (ret < 0)
5549                         goto out;
5550         }
5551 
5552         ret = cnt;
5553 out:
5554         mutex_unlock(&trace_types_lock);
5555 
5556         return ret;
5557 }
5558 
5559 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5560 
5561 static ssize_t
5562 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5563                      size_t cnt, loff_t *ppos)
5564 {
5565         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5566 }
5567 
5568 static ssize_t
5569 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5570                       size_t cnt, loff_t *ppos)
5571 {
5572         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5573 }
5574 
5575 #endif
5576 
5577 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5578 {
5579         struct trace_array *tr = inode->i_private;
5580         struct trace_iterator *iter;
5581         int ret = 0;
5582 
5583         if (tracing_disabled)
5584                 return -ENODEV;
5585 
5586         if (trace_array_get(tr) < 0)
5587                 return -ENODEV;
5588 
5589         mutex_lock(&trace_types_lock);
5590 
5591         /* create a buffer to store the information to pass to userspace */
5592         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5593         if (!iter) {
5594                 ret = -ENOMEM;
5595                 __trace_array_put(tr);
5596                 goto out;
5597         }
5598 
5599         trace_seq_init(&iter->seq);
5600         iter->trace = tr->current_trace;
5601 
5602         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5603                 ret = -ENOMEM;
5604                 goto fail;
5605         }
5606 
5607         /* trace pipe does not show start of buffer */
5608         cpumask_setall(iter->started);
5609 
5610         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5611                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5612 
5613         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5614         if (trace_clocks[tr->clock_id].in_ns)
5615                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5616 
5617         iter->tr = tr;
5618         iter->trace_buffer = &tr->trace_buffer;
5619         iter->cpu_file = tracing_get_cpu(inode);
5620         mutex_init(&iter->mutex);
5621         filp->private_data = iter;
5622 
5623         if (iter->trace->pipe_open)
5624                 iter->trace->pipe_open(iter);
5625 
5626         nonseekable_open(inode, filp);
5627 
5628         tr->current_trace->ref++;
5629 out:
5630         mutex_unlock(&trace_types_lock);
5631         return ret;
5632 
5633 fail:
5634         kfree(iter);
5635         __trace_array_put(tr);
5636         mutex_unlock(&trace_types_lock);
5637         return ret;
5638 }
5639 
5640 static int tracing_release_pipe(struct inode *inode, struct file *file)
5641 {
5642         struct trace_iterator *iter = file->private_data;
5643         struct trace_array *tr = inode->i_private;
5644 
5645         mutex_lock(&trace_types_lock);
5646 
5647         tr->current_trace->ref--;
5648 
5649         if (iter->trace->pipe_close)
5650                 iter->trace->pipe_close(iter);
5651 
5652         mutex_unlock(&trace_types_lock);
5653 
5654         free_cpumask_var(iter->started);
5655         mutex_destroy(&iter->mutex);
5656         kfree(iter);
5657 
5658         trace_array_put(tr);
5659 
5660         return 0;
5661 }
5662 
5663 static __poll_t
5664 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5665 {
5666         struct trace_array *tr = iter->tr;
5667 
5668         /* Iterators are static, they should be filled or empty */
5669         if (trace_buffer_iter(iter, iter->cpu_file))
5670                 return EPOLLIN | EPOLLRDNORM;
5671 
5672         if (tr->trace_flags & TRACE_ITER_BLOCK)
5673                 /*
5674                  * Always select as readable when in blocking mode
5675                  */
5676                 return EPOLLIN | EPOLLRDNORM;
5677         else
5678                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5679                                              filp, poll_table);
5680 }
5681 
5682 static __poll_t
5683 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5684 {
5685         struct trace_iterator *iter = filp->private_data;
5686 
5687         return trace_poll(iter, filp, poll_table);
5688 }
5689 
5690 /* Must be called with iter->mutex held. */
5691 static int tracing_wait_pipe(struct file *filp)
5692 {
5693         struct trace_iterator *iter = filp->private_data;
5694         int ret;
5695 
5696         while (trace_empty(iter)) {
5697 
5698                 if ((filp->f_flags & O_NONBLOCK)) {
5699                         return -EAGAIN;
5700                 }
5701 
5702                 /*
5703                  * We block until we read something and tracing is disabled.
5704                  * We still block if tracing is disabled, but we have never
5705                  * read anything. This allows a user to cat this file, and
5706                  * then enable tracing. But after we have read something,
5707                  * we give an EOF when tracing is again disabled.
5708                  *
5709                  * iter->pos will be 0 if we haven't read anything.
5710                  */
5711                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5712                         break;
5713 
5714                 mutex_unlock(&iter->mutex);
5715 
5716                 ret = wait_on_pipe(iter, 0);
5717 
5718                 mutex_lock(&iter->mutex);
5719 
5720                 if (ret)
5721                         return ret;
5722         }
5723 
5724         return 1;
5725 }
5726 
5727 /*
5728  * Consumer reader.
5729  */
5730 static ssize_t
5731 tracing_read_pipe(struct file *filp, char __user *ubuf,
5732                   size_t cnt, loff_t *ppos)
5733 {
5734         struct trace_iterator *iter = filp->private_data;
5735         ssize_t sret;
5736 
5737         /*
5738          * Avoid more than one consumer on a single file descriptor
5739          * This is just a matter of traces coherency, the ring buffer itself
5740          * is protected.
5741          */
5742         mutex_lock(&iter->mutex);
5743 
5744         /* return any leftover data */
5745         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5746         if (sret != -EBUSY)
5747                 goto out;
5748 
5749         trace_seq_init(&iter->seq);
5750 
5751         if (iter->trace->read) {
5752                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5753                 if (sret)
5754                         goto out;
5755         }
5756 
5757 waitagain:
5758         sret = tracing_wait_pipe(filp);
5759         if (sret <= 0)
5760                 goto out;
5761 
5762         /* stop when tracing is finished */
5763         if (trace_empty(iter)) {
5764                 sret = 0;
5765                 goto out;
5766         }
5767 
5768         if (cnt >= PAGE_SIZE)
5769                 cnt = PAGE_SIZE - 1;
5770 
5771         /* reset all but tr, trace, and overruns */
5772         memset(&iter->seq, 0,
5773                sizeof(struct trace_iterator) -
5774                offsetof(struct trace_iterator, seq));
5775         cpumask_clear(iter->started);
5776         iter->pos = -1;
5777 
5778         trace_event_read_lock();
5779         trace_access_lock(iter->cpu_file);
5780         while (trace_find_next_entry_inc(iter) != NULL) {
5781                 enum print_line_t ret;
5782                 int save_len = iter->seq.seq.len;
5783 
5784                 ret = print_trace_line(iter);
5785                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5786                         /* don't print partial lines */
5787                         iter->seq.seq.len = save_len;
5788                         break;
5789                 }
5790                 if (ret != TRACE_TYPE_NO_CONSUME)
5791                         trace_consume(iter);
5792 
5793                 if (trace_seq_used(&iter->seq) >= cnt)
5794                         break;
5795 
5796                 /*
5797                  * Setting the full flag means we reached the trace_seq buffer
5798                  * size and we should leave by partial output condition above.
5799                  * One of the trace_seq_* functions is not used properly.
5800                  */
5801                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5802                           iter->ent->type);
5803         }
5804         trace_access_unlock(iter->cpu_file);
5805         trace_event_read_unlock();
5806 
5807         /* Now copy what we have to the user */
5808         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5809         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5810                 trace_seq_init(&iter->seq);
5811 
5812         /*
5813          * If there was nothing to send to user, in spite of consuming trace
5814          * entries, go back to wait for more entries.
5815          */
5816         if (sret == -EBUSY)
5817                 goto waitagain;
5818 
5819 out:
5820         mutex_unlock(&iter->mutex);
5821 
5822         return sret;
5823 }
5824 
5825 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5826                                      unsigned int idx)
5827 {
5828         __free_page(spd->pages[idx]);
5829 }
5830 
5831 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5832         .can_merge              = 0,
5833         .confirm                = generic_pipe_buf_confirm,
5834         .release                = generic_pipe_buf_release,
5835         .steal                  = generic_pipe_buf_steal,
5836         .get                    = generic_pipe_buf_get,
5837 };
5838 
5839 static size_t
5840 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5841 {
5842         size_t count;
5843         int save_len;
5844         int ret;
5845 
5846         /* Seq buffer is page-sized, exactly what we need. */
5847         for (;;) {
5848                 save_len = iter->seq.seq.len;
5849                 ret = print_trace_line(iter);
5850 
5851                 if (trace_seq_has_overflowed(&iter->seq)) {
5852                         iter->seq.seq.len = save_len;
5853                         break;
5854                 }
5855 
5856                 /*
5857                  * This should not be hit, because it should only
5858                  * be set if the iter->seq overflowed. But check it
5859                  * anyway to be safe.
5860                  */
5861                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5862                         iter->seq.seq.len = save_len;
5863                         break;
5864                 }
5865 
5866                 count = trace_seq_used(&iter->seq) - save_len;
5867                 if (rem < count) {
5868                         rem = 0;
5869                         iter->seq.seq.len = save_len;
5870                         break;
5871                 }
5872 
5873                 if (ret != TRACE_TYPE_NO_CONSUME)
5874                         trace_consume(iter);
5875                 rem -= count;
5876                 if (!trace_find_next_entry_inc(iter))   {
5877                         rem = 0;
5878                         iter->ent = NULL;
5879                         break;
5880                 }
5881         }
5882 
5883         return rem;
5884 }
5885 
5886 static ssize_t tracing_splice_read_pipe(struct file *filp,
5887                                         loff_t *ppos,
5888                                         struct pipe_inode_info *pipe,
5889                                         size_t len,
5890                                         unsigned int flags)
5891 {
5892         struct page *pages_def[PIPE_DEF_BUFFERS];
5893         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5894         struct trace_iterator *iter = filp->private_data;
5895         struct splice_pipe_desc spd = {
5896                 .pages          = pages_def,
5897                 .partial        = partial_def,
5898                 .nr_pages       = 0, /* This gets updated below. */
5899                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5900                 .ops            = &tracing_pipe_buf_ops,
5901                 .spd_release    = tracing_spd_release_pipe,
5902         };
5903         ssize_t ret;
5904         size_t rem;
5905         unsigned int i;
5906 
5907         if (splice_grow_spd(pipe, &spd))
5908                 return -ENOMEM;
5909 
5910         mutex_lock(&iter->mutex);
5911 
5912         if (iter->trace->splice_read) {
5913                 ret = iter->trace->splice_read(iter, filp,
5914                                                ppos, pipe, len, flags);
5915                 if (ret)
5916                         goto out_err;
5917         }
5918 
5919         ret = tracing_wait_pipe(filp);
5920         if (ret <= 0)
5921                 goto out_err;
5922 
5923         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5924                 ret = -EFAULT;
5925                 goto out_err;
5926         }
5927 
5928         trace_event_read_lock();
5929         trace_access_lock(iter->cpu_file);
5930 
5931         /* Fill as many pages as possible. */
5932         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5933                 spd.pages[i] = alloc_page(GFP_KERNEL);
5934                 if (!spd.pages[i])
5935                         break;
5936 
5937                 rem = tracing_fill_pipe_page(rem, iter);
5938 
5939                 /* Copy the data into the page, so we can start over. */
5940                 ret = trace_seq_to_buffer(&iter->seq,
5941                                           page_address(spd.pages[i]),
5942                                           trace_seq_used(&iter->seq));
5943                 if (ret < 0) {
5944                         __free_page(spd.pages[i]);
5945                         break;
5946                 }
5947                 spd.partial[i].offset = 0;
5948                 spd.partial[i].len = trace_seq_used(&iter->seq);
5949 
5950                 trace_seq_init(&iter->seq);
5951         }
5952 
5953         trace_access_unlock(iter->cpu_file);
5954         trace_event_read_unlock();
5955         mutex_unlock(&iter->mutex);
5956 
5957         spd.nr_pages = i;
5958 
5959         if (i)
5960                 ret = splice_to_pipe(pipe, &spd);
5961         else
5962                 ret = 0;
5963 out:
5964         splice_shrink_spd(&spd);
5965         return ret;
5966 
5967 out_err:
5968         mutex_unlock(&iter->mutex);
5969         goto out;
5970 }
5971 
5972 static ssize_t
5973 tracing_entries_read(struct file *filp, char __user *ubuf,
5974                      size_t cnt, loff_t *ppos)
5975 {
5976         struct inode *inode = file_inode(filp);
5977         struct trace_array *tr = inode->i_private;
5978         int cpu = tracing_get_cpu(inode);
5979         char buf[64];
5980         int r = 0;
5981         ssize_t ret;
5982 
5983         mutex_lock(&trace_types_lock);
5984 
5985         if (cpu == RING_BUFFER_ALL_CPUS) {
5986                 int cpu, buf_size_same;
5987                 unsigned long size;
5988 
5989                 size = 0;
5990                 buf_size_same = 1;
5991                 /* check if all cpu sizes are same */
5992                 for_each_tracing_cpu(cpu) {
5993                         /* fill in the size from first enabled cpu */
5994                         if (size == 0)
5995                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5996                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5997                                 buf_size_same = 0;
5998                                 break;
5999                         }
6000                 }
6001 
6002                 if (buf_size_same) {
6003                         if (!ring_buffer_expanded)
6004                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6005                                             size >> 10,
6006                                             trace_buf_size >> 10);
6007                         else
6008                                 r = sprintf(buf, "%lu\n", size >> 10);
6009                 } else
6010                         r = sprintf(buf, "X\n");
6011         } else
6012                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
6013 
6014         mutex_unlock(&trace_types_lock);
6015 
6016         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6017         return ret;
6018 }
6019 
6020 static ssize_t
6021 tracing_entries_write(struct file *filp, const char __user *ubuf,
6022                       size_t cnt, loff_t *ppos)
6023 {
6024         struct inode *inode = file_inode(filp);
6025         struct trace_array *tr = inode->i_private;
6026         unsigned long val;
6027         int ret;
6028 
6029         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6030         if (ret)
6031                 return ret;
6032 
6033         /* must have at least 1 entry */
6034         if (!val)
6035                 return -EINVAL;
6036 
6037         /* value is in KB */
6038         val <<= 10;
6039         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6040         if (ret < 0)
6041                 return ret;
6042 
6043         *ppos += cnt;
6044 
6045         return cnt;
6046 }
6047 
6048 static ssize_t
6049 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6050                                 size_t cnt, loff_t *ppos)
6051 {
6052         struct trace_array *tr = filp->private_data;
6053         char buf[64];
6054         int r, cpu;
6055         unsigned long size = 0, expanded_size = 0;
6056 
6057         mutex_lock(&trace_types_lock);
6058         for_each_tracing_cpu(cpu) {
6059                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6060                 if (!ring_buffer_expanded)
6061                         expanded_size += trace_buf_size >> 10;
6062         }
6063         if (ring_buffer_expanded)
6064                 r = sprintf(buf, "%lu\n", size);
6065         else
6066                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6067         mutex_unlock(&trace_types_lock);
6068 
6069         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6070 }
6071 
6072 static ssize_t
6073 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6074                           size_t cnt, loff_t *ppos)
6075 {
6076         /*
6077          * There is no need to read what the user has written, this function
6078          * is just to make sure that there is no error when "echo" is used
6079          */
6080 
6081         *ppos += cnt;
6082 
6083         return cnt;
6084 }
6085 
6086 static int
6087 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6088 {
6089         struct trace_array *tr = inode->i_private;
6090 
6091         /* disable tracing ? */
6092         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6093                 tracer_tracing_off(tr);
6094         /* resize the ring buffer to 0 */
6095         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6096 
6097         trace_array_put(tr);
6098 
6099         return 0;
6100 }
6101 
6102 static ssize_t
6103 tracing_mark_write(struct file *filp, const char __user *ubuf,
6104                                         size_t cnt, loff_t *fpos)
6105 {
6106         struct trace_array *tr = filp->private_data;
6107         struct ring_buffer_event *event;
6108         enum event_trigger_type tt = ETT_NONE;
6109         struct ring_buffer *buffer;
6110         struct print_entry *entry;
6111         unsigned long irq_flags;
6112         const char faulted[] = "<faulted>";
6113         ssize_t written;
6114         int size;
6115         int len;
6116 
6117 /* Used in tracing_mark_raw_write() as well */
6118 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
6119 
6120         if (tracing_disabled)
6121                 return -EINVAL;
6122 
6123         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6124                 return -EINVAL;
6125 
6126         if (cnt > TRACE_BUF_SIZE)
6127                 cnt = TRACE_BUF_SIZE;
6128 
6129         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6130 
6131         local_save_flags(irq_flags);
6132         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6133 
6134         /* If less than "<faulted>", then make sure we can still add that */
6135         if (cnt < FAULTED_SIZE)
6136                 size += FAULTED_SIZE - cnt;
6137 
6138         buffer = tr->trace_buffer.buffer;
6139         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6140                                             irq_flags, preempt_count());
6141         if (unlikely(!event))
6142                 /* Ring buffer disabled, return as if not open for write */
6143                 return -EBADF;
6144 
6145         entry = ring_buffer_event_data(event);
6146         entry->ip = _THIS_IP_;
6147 
6148         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6149         if (len) {
6150                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6151                 cnt = FAULTED_SIZE;
6152                 written = -EFAULT;
6153         } else
6154                 written = cnt;
6155         len = cnt;
6156 
6157         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6158                 /* do not add \n before testing triggers, but add \0 */
6159                 entry->buf[cnt] = '\0';
6160                 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6161         }
6162 
6163         if (entry->buf[cnt - 1] != '\n') {
6164                 entry->buf[cnt] = '\n';
6165                 entry->buf[cnt + 1] = '\0';
6166         } else
6167                 entry->buf[cnt] = '\0';
6168 
6169         __buffer_unlock_commit(buffer, event);
6170 
6171         if (tt)
6172                 event_triggers_post_call(tr->trace_marker_file, tt);
6173 
6174         if (written > 0)
6175                 *fpos += written;
6176 
6177         return written;
6178 }
6179 
6180 /* Limit it for now to 3K (including tag) */
6181 #define RAW_DATA_MAX_SIZE (1024*3)
6182 
6183 static ssize_t
6184 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6185                                         size_t cnt, loff_t *fpos)
6186 {
6187         struct trace_array *tr = filp->private_data;
6188         struct ring_buffer_event *event;
6189         struct ring_buffer *buffer;
6190         struct raw_data_entry *entry;
6191         const char faulted[] = "<faulted>";
6192         unsigned long irq_flags;
6193         ssize_t written;
6194         int size;
6195         int len;
6196 
6197 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6198 
6199         if (tracing_disabled)
6200                 return -EINVAL;
6201 
6202         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6203                 return -EINVAL;
6204 
6205         /* The marker must at least have a tag id */
6206         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6207                 return -EINVAL;
6208 
6209         if (cnt > TRACE_BUF_SIZE)
6210                 cnt = TRACE_BUF_SIZE;
6211 
6212         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6213 
6214         local_save_flags(irq_flags);
6215         size = sizeof(*entry) + cnt;
6216         if (cnt < FAULT_SIZE_ID)
6217                 size += FAULT_SIZE_ID - cnt;
6218 
6219         buffer = tr->trace_buffer.buffer;
6220         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6221                                             irq_flags, preempt_count());
6222         if (!event)
6223                 /* Ring buffer disabled, return as if not open for write */
6224                 return -EBADF;
6225 
6226         entry = ring_buffer_event_data(event);
6227 
6228         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6229         if (len) {
6230                 entry->id = -1;
6231                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6232                 written = -EFAULT;
6233         } else
6234                 written = cnt;
6235 
6236         __buffer_unlock_commit(buffer, event);
6237 
6238         if (written > 0)
6239                 *fpos += written;
6240 
6241         return written;
6242 }
6243 
6244 static int tracing_clock_show(struct seq_file *m, void *v)
6245 {
6246         struct trace_array *tr = m->private;
6247         int i;
6248 
6249         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6250                 seq_printf(m,
6251                         "%s%s%s%s", i ? " " : "",
6252                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6253                         i == tr->clock_id ? "]" : "");
6254         seq_putc(m, '\n');
6255 
6256         return 0;
6257 }
6258 
6259 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6260 {
6261         int i;
6262 
6263         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6264                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6265                         break;
6266         }
6267         if (i == ARRAY_SIZE(trace_clocks))
6268                 return -EINVAL;
6269 
6270         mutex_lock(&trace_types_lock);
6271 
6272         tr->clock_id = i;
6273 
6274         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6275 
6276         /*
6277          * New clock may not be consistent with the previous clock.
6278          * Reset the buffer so that it doesn't have incomparable timestamps.
6279          */
6280         tracing_reset_online_cpus(&tr->trace_buffer);
6281 
6282 #ifdef CONFIG_TRACER_MAX_TRACE
6283         if (tr->max_buffer.buffer)
6284                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6285         tracing_reset_online_cpus(&tr->max_buffer);
6286 #endif
6287 
6288         mutex_unlock(&trace_types_lock);
6289 
6290         return 0;
6291 }
6292 
6293 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6294                                    size_t cnt, loff_t *fpos)
6295 {
6296         struct seq_file *m = filp->private_data;
6297         struct trace_array *tr = m->private;
6298         char buf[64];
6299         const char *clockstr;
6300         int ret;
6301 
6302         if (cnt >= sizeof(buf))
6303                 return -EINVAL;
6304 
6305         if (copy_from_user(buf, ubuf, cnt))
6306                 return -EFAULT;
6307 
6308         buf[cnt] = 0;
6309 
6310         clockstr = strstrip(buf);
6311 
6312         ret = tracing_set_clock(tr, clockstr);
6313         if (ret)
6314                 return ret;
6315 
6316         *fpos += cnt;
6317 
6318         return cnt;
6319 }
6320 
6321 static int tracing_clock_open(struct inode *inode, struct file *file)
6322 {
6323         struct trace_array *tr = inode->i_private;
6324         int ret;
6325 
6326         if (tracing_disabled)
6327                 return -ENODEV;
6328 
6329         if (trace_array_get(tr))
6330                 return -ENODEV;
6331 
6332         ret = single_open(file, tracing_clock_show, inode->i_private);
6333         if (ret < 0)
6334                 trace_array_put(tr);
6335 
6336         return ret;
6337 }
6338 
6339 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6340 {
6341         struct trace_array *tr = m->private;
6342 
6343         mutex_lock(&trace_types_lock);
6344 
6345         if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6346                 seq_puts(m, "delta [absolute]\n");
6347         else
6348                 seq_puts(m, "[delta] absolute\n");
6349 
6350         mutex_unlock(&trace_types_lock);
6351 
6352         return 0;
6353 }
6354 
6355 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6356 {
6357         struct trace_array *tr = inode->i_private;
6358         int ret;
6359 
6360         if (tracing_disabled)
6361                 return -ENODEV;
6362 
6363         if (trace_array_get(tr))
6364                 return -ENODEV;
6365 
6366         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6367         if (ret < 0)
6368                 trace_array_put(tr);
6369 
6370         return ret;
6371 }
6372 
6373 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6374 {
6375         int ret = 0;
6376 
6377         mutex_lock(&trace_types_lock);
6378 
6379         if (abs && tr->time_stamp_abs_ref++)
6380                 goto out;
6381 
6382         if (!abs) {
6383                 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6384                         ret = -EINVAL;
6385                         goto out;
6386                 }
6387 
6388                 if (--tr->time_stamp_abs_ref)
6389                         goto out;
6390         }
6391 
6392         ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6393 
6394 #ifdef CONFIG_TRACER_MAX_TRACE
6395         if (tr->max_buffer.buffer)
6396                 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6397 #endif
6398  out:
6399         mutex_unlock(&trace_types_lock);
6400 
6401         return ret;
6402 }
6403 
6404 struct ftrace_buffer_info {
6405         struct trace_iterator   iter;
6406         void                    *spare;
6407         unsigned int            spare_cpu;
6408         unsigned int            read;
6409 };
6410 
6411 #ifdef CONFIG_TRACER_SNAPSHOT
6412 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6413 {
6414         struct trace_array *tr = inode->i_private;
6415         struct trace_iterator *iter;
6416         struct seq_file *m;
6417         int ret = 0;
6418 
6419         if (trace_array_get(tr) < 0)
6420                 return -ENODEV;
6421 
6422         if (file->f_mode & FMODE_READ) {
6423                 iter = __tracing_open(inode, file, true);
6424                 if (IS_ERR(iter))
6425                         ret = PTR_ERR(iter);
6426         } else {
6427                 /* Writes still need the seq_file to hold the private data */
6428                 ret = -ENOMEM;
6429                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6430                 if (!m)
6431                         goto out;
6432                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6433                 if (!iter) {
6434                         kfree(m);
6435                         goto out;
6436                 }
6437                 ret = 0;
6438 
6439                 iter->tr = tr;
6440                 iter->trace_buffer = &tr->max_buffer;
6441                 iter->cpu_file = tracing_get_cpu(inode);
6442                 m->private = iter;
6443                 file->private_data = m;
6444         }
6445 out:
6446         if (ret < 0)
6447                 trace_array_put(tr);
6448 
6449         return ret;
6450 }
6451 
6452 static ssize_t
6453 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6454                        loff_t *ppos)
6455 {
6456         struct seq_file *m = filp->private_data;
6457         struct trace_iterator *iter = m->private;
6458         struct trace_array *tr = iter->tr;
6459         unsigned long val;
6460         int ret;
6461 
6462         ret = tracing_update_buffers();
6463         if (ret < 0)
6464                 return ret;
6465 
6466         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6467         if (ret)
6468                 return ret;
6469 
6470         mutex_lock(&trace_types_lock);
6471 
6472         if (tr->current_trace->use_max_tr) {
6473                 ret = -EBUSY;
6474                 goto out;
6475         }
6476 
6477         switch (val) {
6478         case 0:
6479                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6480                         ret = -EINVAL;
6481                         break;
6482                 }
6483                 if (tr->allocated_snapshot)
6484                         free_snapshot(tr);
6485                 break;
6486         case 1:
6487 /* Only allow per-cpu swap if the ring buffer supports it */
6488 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6489                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6490                         ret = -EINVAL;
6491                         break;
6492                 }
6493 #endif
6494                 if (!tr->allocated_snapshot) {
6495                         ret = tracing_alloc_snapshot_instance(tr);
6496                         if (ret < 0)
6497                                 break;
6498                 }
6499                 local_irq_disable();
6500                 /* Now, we're going to swap */
6501                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6502                         update_max_tr(tr, current, smp_processor_id());
6503                 else
6504                         update_max_tr_single(tr, current, iter->cpu_file);
6505                 local_irq_enable();
6506                 break;
6507         default:
6508                 if (tr->allocated_snapshot) {
6509                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6510                                 tracing_reset_online_cpus(&tr->max_buffer);
6511                         else
6512                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
6513                 }
6514                 break;
6515         }
6516 
6517         if (ret >= 0) {
6518                 *ppos += cnt;
6519                 ret = cnt;
6520         }
6521 out:
6522         mutex_unlock(&trace_types_lock);
6523         return ret;
6524 }
6525 
6526 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6527 {
6528         struct seq_file *m = file->private_data;
6529         int ret;
6530 
6531         ret = tracing_release(inode, file);
6532 
6533         if (file->f_mode & FMODE_READ)
6534                 return ret;
6535 
6536         /* If write only, the seq_file is just a stub */
6537         if (m)
6538                 kfree(m->private);
6539         kfree(m);
6540 
6541         return 0;
6542 }
6543 
6544 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6545 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6546                                     size_t count, loff_t *ppos);
6547 static int tracing_buffers_release(struct inode *inode, struct file *file);
6548 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6549                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6550 
6551 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6552 {
6553         struct ftrace_buffer_info *info;
6554         int ret;
6555 
6556         ret = tracing_buffers_open(inode, filp);
6557         if (ret < 0)
6558                 return ret;
6559 
6560         info = filp->private_data;
6561 
6562         if (info->iter.trace->use_max_tr) {
6563                 tracing_buffers_release(inode, filp);
6564                 return -EBUSY;
6565         }
6566 
6567         info->iter.snapshot = true;
6568         info->iter.trace_buffer = &info->iter.tr->max_buffer;
6569 
6570         return ret;
6571 }
6572 
6573 #endif /* CONFIG_TRACER_SNAPSHOT */
6574 
6575 
6576 static const struct file_operations tracing_thresh_fops = {
6577         .open           = tracing_open_generic,
6578         .read           = tracing_thresh_read,
6579         .write          = tracing_thresh_write,
6580         .llseek         = generic_file_llseek,
6581 };
6582 
6583 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6584 static const struct file_operations tracing_max_lat_fops = {
6585         .open           = tracing_open_generic,
6586         .read           = tracing_max_lat_read,
6587         .write          = tracing_max_lat_write,
6588         .llseek         = generic_file_llseek,
6589 };
6590 #endif
6591 
6592 static const struct file_operations set_tracer_fops = {
6593         .open           = tracing_open_generic,
6594         .read           = tracing_set_trace_read,
6595         .write          = tracing_set_trace_write,
6596         .llseek         = generic_file_llseek,
6597 };
6598 
6599 static const struct file_operations tracing_pipe_fops = {
6600         .open           = tracing_open_pipe,
6601         .poll           = tracing_poll_pipe,
6602         .read           = tracing_read_pipe,
6603         .splice_read    = tracing_splice_read_pipe,
6604         .release        = tracing_release_pipe,
6605         .llseek         = no_llseek,
6606 };
6607 
6608 static const struct file_operations tracing_entries_fops = {
6609         .open           = tracing_open_generic_tr,
6610         .read           = tracing_entries_read,
6611         .write          = tracing_entries_write,
6612         .llseek         = generic_file_llseek,
6613         .release        = tracing_release_generic_tr,
6614 };
6615 
6616 static const struct file_operations tracing_total_entries_fops = {
6617         .open           = tracing_open_generic_tr,
6618         .read           = tracing_total_entries_read,
6619         .llseek         = generic_file_llseek,
6620         .release        = tracing_release_generic_tr,
6621 };
6622 
6623 static const struct file_operations tracing_free_buffer_fops = {
6624         .open           = tracing_open_generic_tr,
6625         .write          = tracing_free_buffer_write,
6626         .release        = tracing_free_buffer_release,
6627 };
6628 
6629 static const struct file_operations tracing_mark_fops = {
6630         .open           = tracing_open_generic_tr,
6631         .write          = tracing_mark_write,
6632         .llseek         = generic_file_llseek,
6633         .release        = tracing_release_generic_tr,
6634 };
6635 
6636 static const struct file_operations tracing_mark_raw_fops = {
6637         .open           = tracing_open_generic_tr,
6638         .write          = tracing_mark_raw_write,
6639         .llseek         = generic_file_llseek,
6640         .release        = tracing_release_generic_tr,
6641 };
6642 
6643 static const struct file_operations trace_clock_fops = {
6644         .open           = tracing_clock_open,
6645         .read           = seq_read,
6646         .llseek         = seq_lseek,
6647         .release        = tracing_single_release_tr,
6648         .write          = tracing_clock_write,
6649 };
6650 
6651 static const struct file_operations trace_time_stamp_mode_fops = {
6652         .open           = tracing_time_stamp_mode_open,
6653         .read           = seq_read,
6654         .llseek         = seq_lseek,
6655         .release        = tracing_single_release_tr,
6656 };
6657 
6658 #ifdef CONFIG_TRACER_SNAPSHOT
6659 static const struct file_operations snapshot_fops = {
6660         .open           = tracing_snapshot_open,
6661         .read           = seq_read,
6662         .write          = tracing_snapshot_write,
6663         .llseek         = tracing_lseek,
6664         .release        = tracing_snapshot_release,
6665 };
6666 
6667 static const struct file_operations snapshot_raw_fops = {
6668         .open           = snapshot_raw_open,
6669         .read           = tracing_buffers_read,
6670         .release        = tracing_buffers_release,
6671         .splice_read    = tracing_buffers_splice_read,
6672         .llseek         = no_llseek,
6673 };
6674 
6675 #endif /* CONFIG_TRACER_SNAPSHOT */
6676 
6677 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6678 {
6679         struct trace_array *tr = inode->i_private;
6680         struct ftrace_buffer_info *info;
6681         int ret;
6682 
6683         if (tracing_disabled)
6684                 return -ENODEV;
6685 
6686         if (trace_array_get(tr) < 0)
6687                 return -ENODEV;
6688 
6689         info = kzalloc(sizeof(*info), GFP_KERNEL);
6690         if (!info) {
6691                 trace_array_put(tr);
6692                 return -ENOMEM;
6693         }
6694 
6695         mutex_lock(&trace_types_lock);
6696 
6697         info->iter.tr           = tr;
6698         info->iter.cpu_file     = tracing_get_cpu(inode);
6699         info->iter.trace        = tr->current_trace;
6700         info->iter.trace_buffer = &tr->trace_buffer;
6