~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/kernel/trace/trace.c

Version: ~ [ linux-4.17-rc2 ] ~ [ linux-4.16.4 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.36 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.96 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.129 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.51 ] ~ [ linux-4.0.9 ] ~ [ linux-3.19.8 ] ~ [ linux-3.18.106 ] ~ [ linux-3.17.8 ] ~ [ linux-3.16.56 ] ~ [ linux-3.15.10 ] ~ [ linux-3.14.79 ] ~ [ linux-3.13.11 ] ~ [ linux-3.12.74 ] ~ [ linux-3.11.10 ] ~ [ linux-3.10.108 ] ~ [ linux-3.9.11 ] ~ [ linux-3.8.13 ] ~ [ linux-3.7.10 ] ~ [ linux-3.6.11 ] ~ [ linux-3.5.7 ] ~ [ linux-3.4.113 ] ~ [ linux-3.3.8 ] ~ [ linux-3.2.101 ] ~ [ linux-3.1.10 ] ~ [ linux-3.0.101 ] ~ [ linux-2.6.39.4 ] ~ [ linux-2.6.38.8 ] ~ [ linux-2.6.37.6 ] ~ [ linux-2.6.36.4 ] ~ [ linux-2.6.35.14 ] ~ [ linux-2.6.34.15 ] ~ [ linux-2.6.33.20 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.27.62 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * ring buffer based function tracer
  3  *
  4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
  5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
  6  *
  7  * Originally taken from the RT patch by:
  8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
  9  *
 10  * Based on code from the latency_tracer, that is:
 11  *  Copyright (C) 2004-2006 Ingo Molnar
 12  *  Copyright (C) 2004 Nadia Yvette Chambers
 13  */
 14 #include <linux/ring_buffer.h>
 15 #include <generated/utsrelease.h>
 16 #include <linux/stacktrace.h>
 17 #include <linux/writeback.h>
 18 #include <linux/kallsyms.h>
 19 #include <linux/seq_file.h>
 20 #include <linux/notifier.h>
 21 #include <linux/irqflags.h>
 22 #include <linux/debugfs.h>
 23 #include <linux/tracefs.h>
 24 #include <linux/pagemap.h>
 25 #include <linux/hardirq.h>
 26 #include <linux/linkage.h>
 27 #include <linux/uaccess.h>
 28 #include <linux/vmalloc.h>
 29 #include <linux/ftrace.h>
 30 #include <linux/module.h>
 31 #include <linux/percpu.h>
 32 #include <linux/splice.h>
 33 #include <linux/kdebug.h>
 34 #include <linux/string.h>
 35 #include <linux/mount.h>
 36 #include <linux/rwsem.h>
 37 #include <linux/slab.h>
 38 #include <linux/ctype.h>
 39 #include <linux/init.h>
 40 #include <linux/poll.h>
 41 #include <linux/nmi.h>
 42 #include <linux/fs.h>
 43 #include <linux/trace.h>
 44 #include <linux/sched/clock.h>
 45 #include <linux/sched/rt.h>
 46 
 47 #include "trace.h"
 48 #include "trace_output.h"
 49 
 50 /*
 51  * On boot up, the ring buffer is set to the minimum size, so that
 52  * we do not waste memory on systems that are not using tracing.
 53  */
 54 bool ring_buffer_expanded;
 55 
 56 /*
 57  * We need to change this state when a selftest is running.
 58  * A selftest will lurk into the ring-buffer to count the
 59  * entries inserted during the selftest although some concurrent
 60  * insertions into the ring-buffer such as trace_printk could occurred
 61  * at the same time, giving false positive or negative results.
 62  */
 63 static bool __read_mostly tracing_selftest_running;
 64 
 65 /*
 66  * If a tracer is running, we do not want to run SELFTEST.
 67  */
 68 bool __read_mostly tracing_selftest_disabled;
 69 
 70 /* Pipe tracepoints to printk */
 71 struct trace_iterator *tracepoint_print_iter;
 72 int tracepoint_printk;
 73 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
 74 
 75 /* For tracers that don't implement custom flags */
 76 static struct tracer_opt dummy_tracer_opt[] = {
 77         { }
 78 };
 79 
 80 static int
 81 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
 82 {
 83         return 0;
 84 }
 85 
 86 /*
 87  * To prevent the comm cache from being overwritten when no
 88  * tracing is active, only save the comm when a trace event
 89  * occurred.
 90  */
 91 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
 92 
 93 /*
 94  * Kill all tracing for good (never come back).
 95  * It is initialized to 1 but will turn to zero if the initialization
 96  * of the tracer is successful. But that is the only place that sets
 97  * this back to zero.
 98  */
 99 static int tracing_disabled = 1;
100 
101 cpumask_var_t __read_mostly     tracing_buffer_mask;
102 
103 /*
104  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
105  *
106  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
107  * is set, then ftrace_dump is called. This will output the contents
108  * of the ftrace buffers to the console.  This is very useful for
109  * capturing traces that lead to crashes and outputing it to a
110  * serial console.
111  *
112  * It is default off, but you can enable it with either specifying
113  * "ftrace_dump_on_oops" in the kernel command line, or setting
114  * /proc/sys/kernel/ftrace_dump_on_oops
115  * Set 1 if you want to dump buffers of all CPUs
116  * Set 2 if you want to dump the buffer of the CPU that triggered oops
117  */
118 
119 enum ftrace_dump_mode ftrace_dump_on_oops;
120 
121 /* When set, tracing will stop when a WARN*() is hit */
122 int __disable_trace_on_warning;
123 
124 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
125 /* Map of enums to their values, for "eval_map" file */
126 struct trace_eval_map_head {
127         struct module                   *mod;
128         unsigned long                   length;
129 };
130 
131 union trace_eval_map_item;
132 
133 struct trace_eval_map_tail {
134         /*
135          * "end" is first and points to NULL as it must be different
136          * than "mod" or "eval_string"
137          */
138         union trace_eval_map_item       *next;
139         const char                      *end;   /* points to NULL */
140 };
141 
142 static DEFINE_MUTEX(trace_eval_mutex);
143 
144 /*
145  * The trace_eval_maps are saved in an array with two extra elements,
146  * one at the beginning, and one at the end. The beginning item contains
147  * the count of the saved maps (head.length), and the module they
148  * belong to if not built in (head.mod). The ending item contains a
149  * pointer to the next array of saved eval_map items.
150  */
151 union trace_eval_map_item {
152         struct trace_eval_map           map;
153         struct trace_eval_map_head      head;
154         struct trace_eval_map_tail      tail;
155 };
156 
157 static union trace_eval_map_item *trace_eval_maps;
158 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
159 
160 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
161 
162 #define MAX_TRACER_SIZE         100
163 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
164 static char *default_bootup_tracer;
165 
166 static bool allocate_snapshot;
167 
168 static int __init set_cmdline_ftrace(char *str)
169 {
170         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
171         default_bootup_tracer = bootup_tracer_buf;
172         /* We are using ftrace early, expand it */
173         ring_buffer_expanded = true;
174         return 1;
175 }
176 __setup("ftrace=", set_cmdline_ftrace);
177 
178 static int __init set_ftrace_dump_on_oops(char *str)
179 {
180         if (*str++ != '=' || !*str) {
181                 ftrace_dump_on_oops = DUMP_ALL;
182                 return 1;
183         }
184 
185         if (!strcmp("orig_cpu", str)) {
186                 ftrace_dump_on_oops = DUMP_ORIG;
187                 return 1;
188         }
189 
190         return 0;
191 }
192 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
193 
194 static int __init stop_trace_on_warning(char *str)
195 {
196         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
197                 __disable_trace_on_warning = 1;
198         return 1;
199 }
200 __setup("traceoff_on_warning", stop_trace_on_warning);
201 
202 static int __init boot_alloc_snapshot(char *str)
203 {
204         allocate_snapshot = true;
205         /* We also need the main ring buffer expanded */
206         ring_buffer_expanded = true;
207         return 1;
208 }
209 __setup("alloc_snapshot", boot_alloc_snapshot);
210 
211 
212 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
213 
214 static int __init set_trace_boot_options(char *str)
215 {
216         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
217         return 0;
218 }
219 __setup("trace_options=", set_trace_boot_options);
220 
221 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
222 static char *trace_boot_clock __initdata;
223 
224 static int __init set_trace_boot_clock(char *str)
225 {
226         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
227         trace_boot_clock = trace_boot_clock_buf;
228         return 0;
229 }
230 __setup("trace_clock=", set_trace_boot_clock);
231 
232 static int __init set_tracepoint_printk(char *str)
233 {
234         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
235                 tracepoint_printk = 1;
236         return 1;
237 }
238 __setup("tp_printk", set_tracepoint_printk);
239 
240 unsigned long long ns2usecs(u64 nsec)
241 {
242         nsec += 500;
243         do_div(nsec, 1000);
244         return nsec;
245 }
246 
247 /* trace_flags holds trace_options default values */
248 #define TRACE_DEFAULT_FLAGS                                             \
249         (FUNCTION_DEFAULT_FLAGS |                                       \
250          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
251          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
252          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
253          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
254 
255 /* trace_options that are only supported by global_trace */
256 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
257                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
258 
259 /* trace_flags that are default zero for instances */
260 #define ZEROED_TRACE_FLAGS \
261         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
262 
263 /*
264  * The global_trace is the descriptor that holds the top-level tracing
265  * buffers for the live tracing.
266  */
267 static struct trace_array global_trace = {
268         .trace_flags = TRACE_DEFAULT_FLAGS,
269 };
270 
271 LIST_HEAD(ftrace_trace_arrays);
272 
273 int trace_array_get(struct trace_array *this_tr)
274 {
275         struct trace_array *tr;
276         int ret = -ENODEV;
277 
278         mutex_lock(&trace_types_lock);
279         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
280                 if (tr == this_tr) {
281                         tr->ref++;
282                         ret = 0;
283                         break;
284                 }
285         }
286         mutex_unlock(&trace_types_lock);
287 
288         return ret;
289 }
290 
291 static void __trace_array_put(struct trace_array *this_tr)
292 {
293         WARN_ON(!this_tr->ref);
294         this_tr->ref--;
295 }
296 
297 void trace_array_put(struct trace_array *this_tr)
298 {
299         mutex_lock(&trace_types_lock);
300         __trace_array_put(this_tr);
301         mutex_unlock(&trace_types_lock);
302 }
303 
304 int call_filter_check_discard(struct trace_event_call *call, void *rec,
305                               struct ring_buffer *buffer,
306                               struct ring_buffer_event *event)
307 {
308         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
309             !filter_match_preds(call->filter, rec)) {
310                 __trace_event_discard_commit(buffer, event);
311                 return 1;
312         }
313 
314         return 0;
315 }
316 
317 void trace_free_pid_list(struct trace_pid_list *pid_list)
318 {
319         vfree(pid_list->pids);
320         kfree(pid_list);
321 }
322 
323 /**
324  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
325  * @filtered_pids: The list of pids to check
326  * @search_pid: The PID to find in @filtered_pids
327  *
328  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
329  */
330 bool
331 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
332 {
333         /*
334          * If pid_max changed after filtered_pids was created, we
335          * by default ignore all pids greater than the previous pid_max.
336          */
337         if (search_pid >= filtered_pids->pid_max)
338                 return false;
339 
340         return test_bit(search_pid, filtered_pids->pids);
341 }
342 
343 /**
344  * trace_ignore_this_task - should a task be ignored for tracing
345  * @filtered_pids: The list of pids to check
346  * @task: The task that should be ignored if not filtered
347  *
348  * Checks if @task should be traced or not from @filtered_pids.
349  * Returns true if @task should *NOT* be traced.
350  * Returns false if @task should be traced.
351  */
352 bool
353 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
354 {
355         /*
356          * Return false, because if filtered_pids does not exist,
357          * all pids are good to trace.
358          */
359         if (!filtered_pids)
360                 return false;
361 
362         return !trace_find_filtered_pid(filtered_pids, task->pid);
363 }
364 
365 /**
366  * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
367  * @pid_list: The list to modify
368  * @self: The current task for fork or NULL for exit
369  * @task: The task to add or remove
370  *
371  * If adding a task, if @self is defined, the task is only added if @self
372  * is also included in @pid_list. This happens on fork and tasks should
373  * only be added when the parent is listed. If @self is NULL, then the
374  * @task pid will be removed from the list, which would happen on exit
375  * of a task.
376  */
377 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
378                                   struct task_struct *self,
379                                   struct task_struct *task)
380 {
381         if (!pid_list)
382                 return;
383 
384         /* For forks, we only add if the forking task is listed */
385         if (self) {
386                 if (!trace_find_filtered_pid(pid_list, self->pid))
387                         return;
388         }
389 
390         /* Sorry, but we don't support pid_max changing after setting */
391         if (task->pid >= pid_list->pid_max)
392                 return;
393 
394         /* "self" is set for forks, and NULL for exits */
395         if (self)
396                 set_bit(task->pid, pid_list->pids);
397         else
398                 clear_bit(task->pid, pid_list->pids);
399 }
400 
401 /**
402  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
403  * @pid_list: The pid list to show
404  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
405  * @pos: The position of the file
406  *
407  * This is used by the seq_file "next" operation to iterate the pids
408  * listed in a trace_pid_list structure.
409  *
410  * Returns the pid+1 as we want to display pid of zero, but NULL would
411  * stop the iteration.
412  */
413 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
414 {
415         unsigned long pid = (unsigned long)v;
416 
417         (*pos)++;
418 
419         /* pid already is +1 of the actual prevous bit */
420         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
421 
422         /* Return pid + 1 to allow zero to be represented */
423         if (pid < pid_list->pid_max)
424                 return (void *)(pid + 1);
425 
426         return NULL;
427 }
428 
429 /**
430  * trace_pid_start - Used for seq_file to start reading pid lists
431  * @pid_list: The pid list to show
432  * @pos: The position of the file
433  *
434  * This is used by seq_file "start" operation to start the iteration
435  * of listing pids.
436  *
437  * Returns the pid+1 as we want to display pid of zero, but NULL would
438  * stop the iteration.
439  */
440 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
441 {
442         unsigned long pid;
443         loff_t l = 0;
444 
445         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
446         if (pid >= pid_list->pid_max)
447                 return NULL;
448 
449         /* Return pid + 1 so that zero can be the exit value */
450         for (pid++; pid && l < *pos;
451              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
452                 ;
453         return (void *)pid;
454 }
455 
456 /**
457  * trace_pid_show - show the current pid in seq_file processing
458  * @m: The seq_file structure to write into
459  * @v: A void pointer of the pid (+1) value to display
460  *
461  * Can be directly used by seq_file operations to display the current
462  * pid value.
463  */
464 int trace_pid_show(struct seq_file *m, void *v)
465 {
466         unsigned long pid = (unsigned long)v - 1;
467 
468         seq_printf(m, "%lu\n", pid);
469         return 0;
470 }
471 
472 /* 128 should be much more than enough */
473 #define PID_BUF_SIZE            127
474 
475 int trace_pid_write(struct trace_pid_list *filtered_pids,
476                     struct trace_pid_list **new_pid_list,
477                     const char __user *ubuf, size_t cnt)
478 {
479         struct trace_pid_list *pid_list;
480         struct trace_parser parser;
481         unsigned long val;
482         int nr_pids = 0;
483         ssize_t read = 0;
484         ssize_t ret = 0;
485         loff_t pos;
486         pid_t pid;
487 
488         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
489                 return -ENOMEM;
490 
491         /*
492          * Always recreate a new array. The write is an all or nothing
493          * operation. Always create a new array when adding new pids by
494          * the user. If the operation fails, then the current list is
495          * not modified.
496          */
497         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
498         if (!pid_list)
499                 return -ENOMEM;
500 
501         pid_list->pid_max = READ_ONCE(pid_max);
502 
503         /* Only truncating will shrink pid_max */
504         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
505                 pid_list->pid_max = filtered_pids->pid_max;
506 
507         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
508         if (!pid_list->pids) {
509                 kfree(pid_list);
510                 return -ENOMEM;
511         }
512 
513         if (filtered_pids) {
514                 /* copy the current bits to the new max */
515                 for_each_set_bit(pid, filtered_pids->pids,
516                                  filtered_pids->pid_max) {
517                         set_bit(pid, pid_list->pids);
518                         nr_pids++;
519                 }
520         }
521 
522         while (cnt > 0) {
523 
524                 pos = 0;
525 
526                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
527                 if (ret < 0 || !trace_parser_loaded(&parser))
528                         break;
529 
530                 read += ret;
531                 ubuf += ret;
532                 cnt -= ret;
533 
534                 ret = -EINVAL;
535                 if (kstrtoul(parser.buffer, 0, &val))
536                         break;
537                 if (val >= pid_list->pid_max)
538                         break;
539 
540                 pid = (pid_t)val;
541 
542                 set_bit(pid, pid_list->pids);
543                 nr_pids++;
544 
545                 trace_parser_clear(&parser);
546                 ret = 0;
547         }
548         trace_parser_put(&parser);
549 
550         if (ret < 0) {
551                 trace_free_pid_list(pid_list);
552                 return ret;
553         }
554 
555         if (!nr_pids) {
556                 /* Cleared the list of pids */
557                 trace_free_pid_list(pid_list);
558                 read = ret;
559                 pid_list = NULL;
560         }
561 
562         *new_pid_list = pid_list;
563 
564         return read;
565 }
566 
567 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
568 {
569         u64 ts;
570 
571         /* Early boot up does not have a buffer yet */
572         if (!buf->buffer)
573                 return trace_clock_local();
574 
575         ts = ring_buffer_time_stamp(buf->buffer, cpu);
576         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
577 
578         return ts;
579 }
580 
581 u64 ftrace_now(int cpu)
582 {
583         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
584 }
585 
586 /**
587  * tracing_is_enabled - Show if global_trace has been disabled
588  *
589  * Shows if the global trace has been enabled or not. It uses the
590  * mirror flag "buffer_disabled" to be used in fast paths such as for
591  * the irqsoff tracer. But it may be inaccurate due to races. If you
592  * need to know the accurate state, use tracing_is_on() which is a little
593  * slower, but accurate.
594  */
595 int tracing_is_enabled(void)
596 {
597         /*
598          * For quick access (irqsoff uses this in fast path), just
599          * return the mirror variable of the state of the ring buffer.
600          * It's a little racy, but we don't really care.
601          */
602         smp_rmb();
603         return !global_trace.buffer_disabled;
604 }
605 
606 /*
607  * trace_buf_size is the size in bytes that is allocated
608  * for a buffer. Note, the number of bytes is always rounded
609  * to page size.
610  *
611  * This number is purposely set to a low number of 16384.
612  * If the dump on oops happens, it will be much appreciated
613  * to not have to wait for all that output. Anyway this can be
614  * boot time and run time configurable.
615  */
616 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
617 
618 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
619 
620 /* trace_types holds a link list of available tracers. */
621 static struct tracer            *trace_types __read_mostly;
622 
623 /*
624  * trace_types_lock is used to protect the trace_types list.
625  */
626 DEFINE_MUTEX(trace_types_lock);
627 
628 /*
629  * serialize the access of the ring buffer
630  *
631  * ring buffer serializes readers, but it is low level protection.
632  * The validity of the events (which returns by ring_buffer_peek() ..etc)
633  * are not protected by ring buffer.
634  *
635  * The content of events may become garbage if we allow other process consumes
636  * these events concurrently:
637  *   A) the page of the consumed events may become a normal page
638  *      (not reader page) in ring buffer, and this page will be rewrited
639  *      by events producer.
640  *   B) The page of the consumed events may become a page for splice_read,
641  *      and this page will be returned to system.
642  *
643  * These primitives allow multi process access to different cpu ring buffer
644  * concurrently.
645  *
646  * These primitives don't distinguish read-only and read-consume access.
647  * Multi read-only access are also serialized.
648  */
649 
650 #ifdef CONFIG_SMP
651 static DECLARE_RWSEM(all_cpu_access_lock);
652 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
653 
654 static inline void trace_access_lock(int cpu)
655 {
656         if (cpu == RING_BUFFER_ALL_CPUS) {
657                 /* gain it for accessing the whole ring buffer. */
658                 down_write(&all_cpu_access_lock);
659         } else {
660                 /* gain it for accessing a cpu ring buffer. */
661 
662                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
663                 down_read(&all_cpu_access_lock);
664 
665                 /* Secondly block other access to this @cpu ring buffer. */
666                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
667         }
668 }
669 
670 static inline void trace_access_unlock(int cpu)
671 {
672         if (cpu == RING_BUFFER_ALL_CPUS) {
673                 up_write(&all_cpu_access_lock);
674         } else {
675                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
676                 up_read(&all_cpu_access_lock);
677         }
678 }
679 
680 static inline void trace_access_lock_init(void)
681 {
682         int cpu;
683 
684         for_each_possible_cpu(cpu)
685                 mutex_init(&per_cpu(cpu_access_lock, cpu));
686 }
687 
688 #else
689 
690 static DEFINE_MUTEX(access_lock);
691 
692 static inline void trace_access_lock(int cpu)
693 {
694         (void)cpu;
695         mutex_lock(&access_lock);
696 }
697 
698 static inline void trace_access_unlock(int cpu)
699 {
700         (void)cpu;
701         mutex_unlock(&access_lock);
702 }
703 
704 static inline void trace_access_lock_init(void)
705 {
706 }
707 
708 #endif
709 
710 #ifdef CONFIG_STACKTRACE
711 static void __ftrace_trace_stack(struct ring_buffer *buffer,
712                                  unsigned long flags,
713                                  int skip, int pc, struct pt_regs *regs);
714 static inline void ftrace_trace_stack(struct trace_array *tr,
715                                       struct ring_buffer *buffer,
716                                       unsigned long flags,
717                                       int skip, int pc, struct pt_regs *regs);
718 
719 #else
720 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
721                                         unsigned long flags,
722                                         int skip, int pc, struct pt_regs *regs)
723 {
724 }
725 static inline void ftrace_trace_stack(struct trace_array *tr,
726                                       struct ring_buffer *buffer,
727                                       unsigned long flags,
728                                       int skip, int pc, struct pt_regs *regs)
729 {
730 }
731 
732 #endif
733 
734 static __always_inline void
735 trace_event_setup(struct ring_buffer_event *event,
736                   int type, unsigned long flags, int pc)
737 {
738         struct trace_entry *ent = ring_buffer_event_data(event);
739 
740         tracing_generic_entry_update(ent, flags, pc);
741         ent->type = type;
742 }
743 
744 static __always_inline struct ring_buffer_event *
745 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
746                           int type,
747                           unsigned long len,
748                           unsigned long flags, int pc)
749 {
750         struct ring_buffer_event *event;
751 
752         event = ring_buffer_lock_reserve(buffer, len);
753         if (event != NULL)
754                 trace_event_setup(event, type, flags, pc);
755 
756         return event;
757 }
758 
759 void tracer_tracing_on(struct trace_array *tr)
760 {
761         if (tr->trace_buffer.buffer)
762                 ring_buffer_record_on(tr->trace_buffer.buffer);
763         /*
764          * This flag is looked at when buffers haven't been allocated
765          * yet, or by some tracers (like irqsoff), that just want to
766          * know if the ring buffer has been disabled, but it can handle
767          * races of where it gets disabled but we still do a record.
768          * As the check is in the fast path of the tracers, it is more
769          * important to be fast than accurate.
770          */
771         tr->buffer_disabled = 0;
772         /* Make the flag seen by readers */
773         smp_wmb();
774 }
775 
776 /**
777  * tracing_on - enable tracing buffers
778  *
779  * This function enables tracing buffers that may have been
780  * disabled with tracing_off.
781  */
782 void tracing_on(void)
783 {
784         tracer_tracing_on(&global_trace);
785 }
786 EXPORT_SYMBOL_GPL(tracing_on);
787 
788 
789 static __always_inline void
790 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
791 {
792         __this_cpu_write(trace_taskinfo_save, true);
793 
794         /* If this is the temp buffer, we need to commit fully */
795         if (this_cpu_read(trace_buffered_event) == event) {
796                 /* Length is in event->array[0] */
797                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
798                 /* Release the temp buffer */
799                 this_cpu_dec(trace_buffered_event_cnt);
800         } else
801                 ring_buffer_unlock_commit(buffer, event);
802 }
803 
804 /**
805  * __trace_puts - write a constant string into the trace buffer.
806  * @ip:    The address of the caller
807  * @str:   The constant string to write
808  * @size:  The size of the string.
809  */
810 int __trace_puts(unsigned long ip, const char *str, int size)
811 {
812         struct ring_buffer_event *event;
813         struct ring_buffer *buffer;
814         struct print_entry *entry;
815         unsigned long irq_flags;
816         int alloc;
817         int pc;
818 
819         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
820                 return 0;
821 
822         pc = preempt_count();
823 
824         if (unlikely(tracing_selftest_running || tracing_disabled))
825                 return 0;
826 
827         alloc = sizeof(*entry) + size + 2; /* possible \n added */
828 
829         local_save_flags(irq_flags);
830         buffer = global_trace.trace_buffer.buffer;
831         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
832                                             irq_flags, pc);
833         if (!event)
834                 return 0;
835 
836         entry = ring_buffer_event_data(event);
837         entry->ip = ip;
838 
839         memcpy(&entry->buf, str, size);
840 
841         /* Add a newline if necessary */
842         if (entry->buf[size - 1] != '\n') {
843                 entry->buf[size] = '\n';
844                 entry->buf[size + 1] = '\0';
845         } else
846                 entry->buf[size] = '\0';
847 
848         __buffer_unlock_commit(buffer, event);
849         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
850 
851         return size;
852 }
853 EXPORT_SYMBOL_GPL(__trace_puts);
854 
855 /**
856  * __trace_bputs - write the pointer to a constant string into trace buffer
857  * @ip:    The address of the caller
858  * @str:   The constant string to write to the buffer to
859  */
860 int __trace_bputs(unsigned long ip, const char *str)
861 {
862         struct ring_buffer_event *event;
863         struct ring_buffer *buffer;
864         struct bputs_entry *entry;
865         unsigned long irq_flags;
866         int size = sizeof(struct bputs_entry);
867         int pc;
868 
869         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
870                 return 0;
871 
872         pc = preempt_count();
873 
874         if (unlikely(tracing_selftest_running || tracing_disabled))
875                 return 0;
876 
877         local_save_flags(irq_flags);
878         buffer = global_trace.trace_buffer.buffer;
879         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
880                                             irq_flags, pc);
881         if (!event)
882                 return 0;
883 
884         entry = ring_buffer_event_data(event);
885         entry->ip                       = ip;
886         entry->str                      = str;
887 
888         __buffer_unlock_commit(buffer, event);
889         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
890 
891         return 1;
892 }
893 EXPORT_SYMBOL_GPL(__trace_bputs);
894 
895 #ifdef CONFIG_TRACER_SNAPSHOT
896 static void tracing_snapshot_instance(struct trace_array *tr)
897 {
898         struct tracer *tracer = tr->current_trace;
899         unsigned long flags;
900 
901         if (in_nmi()) {
902                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
903                 internal_trace_puts("*** snapshot is being ignored        ***\n");
904                 return;
905         }
906 
907         if (!tr->allocated_snapshot) {
908                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
909                 internal_trace_puts("*** stopping trace here!   ***\n");
910                 tracing_off();
911                 return;
912         }
913 
914         /* Note, snapshot can not be used when the tracer uses it */
915         if (tracer->use_max_tr) {
916                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
917                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
918                 return;
919         }
920 
921         local_irq_save(flags);
922         update_max_tr(tr, current, smp_processor_id());
923         local_irq_restore(flags);
924 }
925 
926 /**
927  * tracing_snapshot - take a snapshot of the current buffer.
928  *
929  * This causes a swap between the snapshot buffer and the current live
930  * tracing buffer. You can use this to take snapshots of the live
931  * trace when some condition is triggered, but continue to trace.
932  *
933  * Note, make sure to allocate the snapshot with either
934  * a tracing_snapshot_alloc(), or by doing it manually
935  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
936  *
937  * If the snapshot buffer is not allocated, it will stop tracing.
938  * Basically making a permanent snapshot.
939  */
940 void tracing_snapshot(void)
941 {
942         struct trace_array *tr = &global_trace;
943 
944         tracing_snapshot_instance(tr);
945 }
946 EXPORT_SYMBOL_GPL(tracing_snapshot);
947 
948 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
949                                         struct trace_buffer *size_buf, int cpu_id);
950 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
951 
952 static int alloc_snapshot(struct trace_array *tr)
953 {
954         int ret;
955 
956         if (!tr->allocated_snapshot) {
957 
958                 /* allocate spare buffer */
959                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
960                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
961                 if (ret < 0)
962                         return ret;
963 
964                 tr->allocated_snapshot = true;
965         }
966 
967         return 0;
968 }
969 
970 static void free_snapshot(struct trace_array *tr)
971 {
972         /*
973          * We don't free the ring buffer. instead, resize it because
974          * The max_tr ring buffer has some state (e.g. ring->clock) and
975          * we want preserve it.
976          */
977         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
978         set_buffer_entries(&tr->max_buffer, 1);
979         tracing_reset_online_cpus(&tr->max_buffer);
980         tr->allocated_snapshot = false;
981 }
982 
983 /**
984  * tracing_alloc_snapshot - allocate snapshot buffer.
985  *
986  * This only allocates the snapshot buffer if it isn't already
987  * allocated - it doesn't also take a snapshot.
988  *
989  * This is meant to be used in cases where the snapshot buffer needs
990  * to be set up for events that can't sleep but need to be able to
991  * trigger a snapshot.
992  */
993 int tracing_alloc_snapshot(void)
994 {
995         struct trace_array *tr = &global_trace;
996         int ret;
997 
998         ret = alloc_snapshot(tr);
999         WARN_ON(ret < 0);
1000 
1001         return ret;
1002 }
1003 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1004 
1005 /**
1006  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1007  *
1008  * This is similar to tracing_snapshot(), but it will allocate the
1009  * snapshot buffer if it isn't already allocated. Use this only
1010  * where it is safe to sleep, as the allocation may sleep.
1011  *
1012  * This causes a swap between the snapshot buffer and the current live
1013  * tracing buffer. You can use this to take snapshots of the live
1014  * trace when some condition is triggered, but continue to trace.
1015  */
1016 void tracing_snapshot_alloc(void)
1017 {
1018         int ret;
1019 
1020         ret = tracing_alloc_snapshot();
1021         if (ret < 0)
1022                 return;
1023 
1024         tracing_snapshot();
1025 }
1026 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1027 #else
1028 void tracing_snapshot(void)
1029 {
1030         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1031 }
1032 EXPORT_SYMBOL_GPL(tracing_snapshot);
1033 int tracing_alloc_snapshot(void)
1034 {
1035         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1036         return -ENODEV;
1037 }
1038 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1039 void tracing_snapshot_alloc(void)
1040 {
1041         /* Give warning */
1042         tracing_snapshot();
1043 }
1044 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1045 #endif /* CONFIG_TRACER_SNAPSHOT */
1046 
1047 void tracer_tracing_off(struct trace_array *tr)
1048 {
1049         if (tr->trace_buffer.buffer)
1050                 ring_buffer_record_off(tr->trace_buffer.buffer);
1051         /*
1052          * This flag is looked at when buffers haven't been allocated
1053          * yet, or by some tracers (like irqsoff), that just want to
1054          * know if the ring buffer has been disabled, but it can handle
1055          * races of where it gets disabled but we still do a record.
1056          * As the check is in the fast path of the tracers, it is more
1057          * important to be fast than accurate.
1058          */
1059         tr->buffer_disabled = 1;
1060         /* Make the flag seen by readers */
1061         smp_wmb();
1062 }
1063 
1064 /**
1065  * tracing_off - turn off tracing buffers
1066  *
1067  * This function stops the tracing buffers from recording data.
1068  * It does not disable any overhead the tracers themselves may
1069  * be causing. This function simply causes all recording to
1070  * the ring buffers to fail.
1071  */
1072 void tracing_off(void)
1073 {
1074         tracer_tracing_off(&global_trace);
1075 }
1076 EXPORT_SYMBOL_GPL(tracing_off);
1077 
1078 void disable_trace_on_warning(void)
1079 {
1080         if (__disable_trace_on_warning)
1081                 tracing_off();
1082 }
1083 
1084 /**
1085  * tracer_tracing_is_on - show real state of ring buffer enabled
1086  * @tr : the trace array to know if ring buffer is enabled
1087  *
1088  * Shows real state of the ring buffer if it is enabled or not.
1089  */
1090 int tracer_tracing_is_on(struct trace_array *tr)
1091 {
1092         if (tr->trace_buffer.buffer)
1093                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1094         return !tr->buffer_disabled;
1095 }
1096 
1097 /**
1098  * tracing_is_on - show state of ring buffers enabled
1099  */
1100 int tracing_is_on(void)
1101 {
1102         return tracer_tracing_is_on(&global_trace);
1103 }
1104 EXPORT_SYMBOL_GPL(tracing_is_on);
1105 
1106 static int __init set_buf_size(char *str)
1107 {
1108         unsigned long buf_size;
1109 
1110         if (!str)
1111                 return 0;
1112         buf_size = memparse(str, &str);
1113         /* nr_entries can not be zero */
1114         if (buf_size == 0)
1115                 return 0;
1116         trace_buf_size = buf_size;
1117         return 1;
1118 }
1119 __setup("trace_buf_size=", set_buf_size);
1120 
1121 static int __init set_tracing_thresh(char *str)
1122 {
1123         unsigned long threshold;
1124         int ret;
1125 
1126         if (!str)
1127                 return 0;
1128         ret = kstrtoul(str, 0, &threshold);
1129         if (ret < 0)
1130                 return 0;
1131         tracing_thresh = threshold * 1000;
1132         return 1;
1133 }
1134 __setup("tracing_thresh=", set_tracing_thresh);
1135 
1136 unsigned long nsecs_to_usecs(unsigned long nsecs)
1137 {
1138         return nsecs / 1000;
1139 }
1140 
1141 /*
1142  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1143  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1144  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1145  * of strings in the order that the evals (enum) were defined.
1146  */
1147 #undef C
1148 #define C(a, b) b
1149 
1150 /* These must match the bit postions in trace_iterator_flags */
1151 static const char *trace_options[] = {
1152         TRACE_FLAGS
1153         NULL
1154 };
1155 
1156 static struct {
1157         u64 (*func)(void);
1158         const char *name;
1159         int in_ns;              /* is this clock in nanoseconds? */
1160 } trace_clocks[] = {
1161         { trace_clock_local,            "local",        1 },
1162         { trace_clock_global,           "global",       1 },
1163         { trace_clock_counter,          "counter",      0 },
1164         { trace_clock_jiffies,          "uptime",       0 },
1165         { trace_clock,                  "perf",         1 },
1166         { ktime_get_mono_fast_ns,       "mono",         1 },
1167         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1168         { ktime_get_mono_fast_ns,       "boot",         1 },
1169         ARCH_TRACE_CLOCKS
1170 };
1171 
1172 bool trace_clock_in_ns(struct trace_array *tr)
1173 {
1174         if (trace_clocks[tr->clock_id].in_ns)
1175                 return true;
1176 
1177         return false;
1178 }
1179 
1180 /*
1181  * trace_parser_get_init - gets the buffer for trace parser
1182  */
1183 int trace_parser_get_init(struct trace_parser *parser, int size)
1184 {
1185         memset(parser, 0, sizeof(*parser));
1186 
1187         parser->buffer = kmalloc(size, GFP_KERNEL);
1188         if (!parser->buffer)
1189                 return 1;
1190 
1191         parser->size = size;
1192         return 0;
1193 }
1194 
1195 /*
1196  * trace_parser_put - frees the buffer for trace parser
1197  */
1198 void trace_parser_put(struct trace_parser *parser)
1199 {
1200         kfree(parser->buffer);
1201         parser->buffer = NULL;
1202 }
1203 
1204 /*
1205  * trace_get_user - reads the user input string separated by  space
1206  * (matched by isspace(ch))
1207  *
1208  * For each string found the 'struct trace_parser' is updated,
1209  * and the function returns.
1210  *
1211  * Returns number of bytes read.
1212  *
1213  * See kernel/trace/trace.h for 'struct trace_parser' details.
1214  */
1215 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1216         size_t cnt, loff_t *ppos)
1217 {
1218         char ch;
1219         size_t read = 0;
1220         ssize_t ret;
1221 
1222         if (!*ppos)
1223                 trace_parser_clear(parser);
1224 
1225         ret = get_user(ch, ubuf++);
1226         if (ret)
1227                 goto out;
1228 
1229         read++;
1230         cnt--;
1231 
1232         /*
1233          * The parser is not finished with the last write,
1234          * continue reading the user input without skipping spaces.
1235          */
1236         if (!parser->cont) {
1237                 /* skip white space */
1238                 while (cnt && isspace(ch)) {
1239                         ret = get_user(ch, ubuf++);
1240                         if (ret)
1241                                 goto out;
1242                         read++;
1243                         cnt--;
1244                 }
1245 
1246                 parser->idx = 0;
1247 
1248                 /* only spaces were written */
1249                 if (isspace(ch) || !ch) {
1250                         *ppos += read;
1251                         ret = read;
1252                         goto out;
1253                 }
1254         }
1255 
1256         /* read the non-space input */
1257         while (cnt && !isspace(ch) && ch) {
1258                 if (parser->idx < parser->size - 1)
1259                         parser->buffer[parser->idx++] = ch;
1260                 else {
1261                         ret = -EINVAL;
1262                         goto out;
1263                 }
1264                 ret = get_user(ch, ubuf++);
1265                 if (ret)
1266                         goto out;
1267                 read++;
1268                 cnt--;
1269         }
1270 
1271         /* We either got finished input or we have to wait for another call. */
1272         if (isspace(ch) || !ch) {
1273                 parser->buffer[parser->idx] = 0;
1274                 parser->cont = false;
1275         } else if (parser->idx < parser->size - 1) {
1276                 parser->cont = true;
1277                 parser->buffer[parser->idx++] = ch;
1278                 /* Make sure the parsed string always terminates with '\0'. */
1279                 parser->buffer[parser->idx] = 0;
1280         } else {
1281                 ret = -EINVAL;
1282                 goto out;
1283         }
1284 
1285         *ppos += read;
1286         ret = read;
1287 
1288 out:
1289         return ret;
1290 }
1291 
1292 /* TODO add a seq_buf_to_buffer() */
1293 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1294 {
1295         int len;
1296 
1297         if (trace_seq_used(s) <= s->seq.readpos)
1298                 return -EBUSY;
1299 
1300         len = trace_seq_used(s) - s->seq.readpos;
1301         if (cnt > len)
1302                 cnt = len;
1303         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1304 
1305         s->seq.readpos += cnt;
1306         return cnt;
1307 }
1308 
1309 unsigned long __read_mostly     tracing_thresh;
1310 
1311 #ifdef CONFIG_TRACER_MAX_TRACE
1312 /*
1313  * Copy the new maximum trace into the separate maximum-trace
1314  * structure. (this way the maximum trace is permanently saved,
1315  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1316  */
1317 static void
1318 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1319 {
1320         struct trace_buffer *trace_buf = &tr->trace_buffer;
1321         struct trace_buffer *max_buf = &tr->max_buffer;
1322         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1323         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1324 
1325         max_buf->cpu = cpu;
1326         max_buf->time_start = data->preempt_timestamp;
1327 
1328         max_data->saved_latency = tr->max_latency;
1329         max_data->critical_start = data->critical_start;
1330         max_data->critical_end = data->critical_end;
1331 
1332         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1333         max_data->pid = tsk->pid;
1334         /*
1335          * If tsk == current, then use current_uid(), as that does not use
1336          * RCU. The irq tracer can be called out of RCU scope.
1337          */
1338         if (tsk == current)
1339                 max_data->uid = current_uid();
1340         else
1341                 max_data->uid = task_uid(tsk);
1342 
1343         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1344         max_data->policy = tsk->policy;
1345         max_data->rt_priority = tsk->rt_priority;
1346 
1347         /* record this tasks comm */
1348         tracing_record_cmdline(tsk);
1349 }
1350 
1351 /**
1352  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1353  * @tr: tracer
1354  * @tsk: the task with the latency
1355  * @cpu: The cpu that initiated the trace.
1356  *
1357  * Flip the buffers between the @tr and the max_tr and record information
1358  * about which task was the cause of this latency.
1359  */
1360 void
1361 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1362 {
1363         struct ring_buffer *buf;
1364 
1365         if (tr->stop_count)
1366                 return;
1367 
1368         WARN_ON_ONCE(!irqs_disabled());
1369 
1370         if (!tr->allocated_snapshot) {
1371                 /* Only the nop tracer should hit this when disabling */
1372                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1373                 return;
1374         }
1375 
1376         arch_spin_lock(&tr->max_lock);
1377 
1378         buf = tr->trace_buffer.buffer;
1379         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1380         tr->max_buffer.buffer = buf;
1381 
1382         __update_max_tr(tr, tsk, cpu);
1383         arch_spin_unlock(&tr->max_lock);
1384 }
1385 
1386 /**
1387  * update_max_tr_single - only copy one trace over, and reset the rest
1388  * @tr - tracer
1389  * @tsk - task with the latency
1390  * @cpu - the cpu of the buffer to copy.
1391  *
1392  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1393  */
1394 void
1395 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1396 {
1397         int ret;
1398 
1399         if (tr->stop_count)
1400                 return;
1401 
1402         WARN_ON_ONCE(!irqs_disabled());
1403         if (!tr->allocated_snapshot) {
1404                 /* Only the nop tracer should hit this when disabling */
1405                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1406                 return;
1407         }
1408 
1409         arch_spin_lock(&tr->max_lock);
1410 
1411         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1412 
1413         if (ret == -EBUSY) {
1414                 /*
1415                  * We failed to swap the buffer due to a commit taking
1416                  * place on this CPU. We fail to record, but we reset
1417                  * the max trace buffer (no one writes directly to it)
1418                  * and flag that it failed.
1419                  */
1420                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1421                         "Failed to swap buffers due to commit in progress\n");
1422         }
1423 
1424         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1425 
1426         __update_max_tr(tr, tsk, cpu);
1427         arch_spin_unlock(&tr->max_lock);
1428 }
1429 #endif /* CONFIG_TRACER_MAX_TRACE */
1430 
1431 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1432 {
1433         /* Iterators are static, they should be filled or empty */
1434         if (trace_buffer_iter(iter, iter->cpu_file))
1435                 return 0;
1436 
1437         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1438                                 full);
1439 }
1440 
1441 #ifdef CONFIG_FTRACE_STARTUP_TEST
1442 static bool selftests_can_run;
1443 
1444 struct trace_selftests {
1445         struct list_head                list;
1446         struct tracer                   *type;
1447 };
1448 
1449 static LIST_HEAD(postponed_selftests);
1450 
1451 static int save_selftest(struct tracer *type)
1452 {
1453         struct trace_selftests *selftest;
1454 
1455         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1456         if (!selftest)
1457                 return -ENOMEM;
1458 
1459         selftest->type = type;
1460         list_add(&selftest->list, &postponed_selftests);
1461         return 0;
1462 }
1463 
1464 static int run_tracer_selftest(struct tracer *type)
1465 {
1466         struct trace_array *tr = &global_trace;
1467         struct tracer *saved_tracer = tr->current_trace;
1468         int ret;
1469 
1470         if (!type->selftest || tracing_selftest_disabled)
1471                 return 0;
1472 
1473         /*
1474          * If a tracer registers early in boot up (before scheduling is
1475          * initialized and such), then do not run its selftests yet.
1476          * Instead, run it a little later in the boot process.
1477          */
1478         if (!selftests_can_run)
1479                 return save_selftest(type);
1480 
1481         /*
1482          * Run a selftest on this tracer.
1483          * Here we reset the trace buffer, and set the current
1484          * tracer to be this tracer. The tracer can then run some
1485          * internal tracing to verify that everything is in order.
1486          * If we fail, we do not register this tracer.
1487          */
1488         tracing_reset_online_cpus(&tr->trace_buffer);
1489 
1490         tr->current_trace = type;
1491 
1492 #ifdef CONFIG_TRACER_MAX_TRACE
1493         if (type->use_max_tr) {
1494                 /* If we expanded the buffers, make sure the max is expanded too */
1495                 if (ring_buffer_expanded)
1496                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1497                                            RING_BUFFER_ALL_CPUS);
1498                 tr->allocated_snapshot = true;
1499         }
1500 #endif
1501 
1502         /* the test is responsible for initializing and enabling */
1503         pr_info("Testing tracer %s: ", type->name);
1504         ret = type->selftest(type, tr);
1505         /* the test is responsible for resetting too */
1506         tr->current_trace = saved_tracer;
1507         if (ret) {
1508                 printk(KERN_CONT "FAILED!\n");
1509                 /* Add the warning after printing 'FAILED' */
1510                 WARN_ON(1);
1511                 return -1;
1512         }
1513         /* Only reset on passing, to avoid touching corrupted buffers */
1514         tracing_reset_online_cpus(&tr->trace_buffer);
1515 
1516 #ifdef CONFIG_TRACER_MAX_TRACE
1517         if (type->use_max_tr) {
1518                 tr->allocated_snapshot = false;
1519 
1520                 /* Shrink the max buffer again */
1521                 if (ring_buffer_expanded)
1522                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1523                                            RING_BUFFER_ALL_CPUS);
1524         }
1525 #endif
1526 
1527         printk(KERN_CONT "PASSED\n");
1528         return 0;
1529 }
1530 
1531 static __init int init_trace_selftests(void)
1532 {
1533         struct trace_selftests *p, *n;
1534         struct tracer *t, **last;
1535         int ret;
1536 
1537         selftests_can_run = true;
1538 
1539         mutex_lock(&trace_types_lock);
1540 
1541         if (list_empty(&postponed_selftests))
1542                 goto out;
1543 
1544         pr_info("Running postponed tracer tests:\n");
1545 
1546         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1547                 ret = run_tracer_selftest(p->type);
1548                 /* If the test fails, then warn and remove from available_tracers */
1549                 if (ret < 0) {
1550                         WARN(1, "tracer: %s failed selftest, disabling\n",
1551                              p->type->name);
1552                         last = &trace_types;
1553                         for (t = trace_types; t; t = t->next) {
1554                                 if (t == p->type) {
1555                                         *last = t->next;
1556                                         break;
1557                                 }
1558                                 last = &t->next;
1559                         }
1560                 }
1561                 list_del(&p->list);
1562                 kfree(p);
1563         }
1564 
1565  out:
1566         mutex_unlock(&trace_types_lock);
1567 
1568         return 0;
1569 }
1570 core_initcall(init_trace_selftests);
1571 #else
1572 static inline int run_tracer_selftest(struct tracer *type)
1573 {
1574         return 0;
1575 }
1576 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1577 
1578 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1579 
1580 static void __init apply_trace_boot_options(void);
1581 
1582 /**
1583  * register_tracer - register a tracer with the ftrace system.
1584  * @type - the plugin for the tracer
1585  *
1586  * Register a new plugin tracer.
1587  */
1588 int __init register_tracer(struct tracer *type)
1589 {
1590         struct tracer *t;
1591         int ret = 0;
1592 
1593         if (!type->name) {
1594                 pr_info("Tracer must have a name\n");
1595                 return -1;
1596         }
1597 
1598         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1599                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1600                 return -1;
1601         }
1602 
1603         mutex_lock(&trace_types_lock);
1604 
1605         tracing_selftest_running = true;
1606 
1607         for (t = trace_types; t; t = t->next) {
1608                 if (strcmp(type->name, t->name) == 0) {
1609                         /* already found */
1610                         pr_info("Tracer %s already registered\n",
1611                                 type->name);
1612                         ret = -1;
1613                         goto out;
1614                 }
1615         }
1616 
1617         if (!type->set_flag)
1618                 type->set_flag = &dummy_set_flag;
1619         if (!type->flags) {
1620                 /*allocate a dummy tracer_flags*/
1621                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1622                 if (!type->flags) {
1623                         ret = -ENOMEM;
1624                         goto out;
1625                 }
1626                 type->flags->val = 0;
1627                 type->flags->opts = dummy_tracer_opt;
1628         } else
1629                 if (!type->flags->opts)
1630                         type->flags->opts = dummy_tracer_opt;
1631 
1632         /* store the tracer for __set_tracer_option */
1633         type->flags->trace = type;
1634 
1635         ret = run_tracer_selftest(type);
1636         if (ret < 0)
1637                 goto out;
1638 
1639         type->next = trace_types;
1640         trace_types = type;
1641         add_tracer_options(&global_trace, type);
1642 
1643  out:
1644         tracing_selftest_running = false;
1645         mutex_unlock(&trace_types_lock);
1646 
1647         if (ret || !default_bootup_tracer)
1648                 goto out_unlock;
1649 
1650         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1651                 goto out_unlock;
1652 
1653         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1654         /* Do we want this tracer to start on bootup? */
1655         tracing_set_tracer(&global_trace, type->name);
1656         default_bootup_tracer = NULL;
1657 
1658         apply_trace_boot_options();
1659 
1660         /* disable other selftests, since this will break it. */
1661         tracing_selftest_disabled = true;
1662 #ifdef CONFIG_FTRACE_STARTUP_TEST
1663         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1664                type->name);
1665 #endif
1666 
1667  out_unlock:
1668         return ret;
1669 }
1670 
1671 void tracing_reset(struct trace_buffer *buf, int cpu)
1672 {
1673         struct ring_buffer *buffer = buf->buffer;
1674 
1675         if (!buffer)
1676                 return;
1677 
1678         ring_buffer_record_disable(buffer);
1679 
1680         /* Make sure all commits have finished */
1681         synchronize_sched();
1682         ring_buffer_reset_cpu(buffer, cpu);
1683 
1684         ring_buffer_record_enable(buffer);
1685 }
1686 
1687 void tracing_reset_online_cpus(struct trace_buffer *buf)
1688 {
1689         struct ring_buffer *buffer = buf->buffer;
1690         int cpu;
1691 
1692         if (!buffer)
1693                 return;
1694 
1695         ring_buffer_record_disable(buffer);
1696 
1697         /* Make sure all commits have finished */
1698         synchronize_sched();
1699 
1700         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1701 
1702         for_each_online_cpu(cpu)
1703                 ring_buffer_reset_cpu(buffer, cpu);
1704 
1705         ring_buffer_record_enable(buffer);
1706 }
1707 
1708 /* Must have trace_types_lock held */
1709 void tracing_reset_all_online_cpus(void)
1710 {
1711         struct trace_array *tr;
1712 
1713         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1714                 if (!tr->clear_trace)
1715                         continue;
1716                 tr->clear_trace = false;
1717                 tracing_reset_online_cpus(&tr->trace_buffer);
1718 #ifdef CONFIG_TRACER_MAX_TRACE
1719                 tracing_reset_online_cpus(&tr->max_buffer);
1720 #endif
1721         }
1722 }
1723 
1724 static int *tgid_map;
1725 
1726 #define SAVED_CMDLINES_DEFAULT 128
1727 #define NO_CMDLINE_MAP UINT_MAX
1728 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1729 struct saved_cmdlines_buffer {
1730         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1731         unsigned *map_cmdline_to_pid;
1732         unsigned cmdline_num;
1733         int cmdline_idx;
1734         char *saved_cmdlines;
1735 };
1736 static struct saved_cmdlines_buffer *savedcmd;
1737 
1738 /* temporary disable recording */
1739 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1740 
1741 static inline char *get_saved_cmdlines(int idx)
1742 {
1743         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1744 }
1745 
1746 static inline void set_cmdline(int idx, const char *cmdline)
1747 {
1748         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1749 }
1750 
1751 static int allocate_cmdlines_buffer(unsigned int val,
1752                                     struct saved_cmdlines_buffer *s)
1753 {
1754         s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1755                                         GFP_KERNEL);
1756         if (!s->map_cmdline_to_pid)
1757                 return -ENOMEM;
1758 
1759         s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1760         if (!s->saved_cmdlines) {
1761                 kfree(s->map_cmdline_to_pid);
1762                 return -ENOMEM;
1763         }
1764 
1765         s->cmdline_idx = 0;
1766         s->cmdline_num = val;
1767         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1768                sizeof(s->map_pid_to_cmdline));
1769         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1770                val * sizeof(*s->map_cmdline_to_pid));
1771 
1772         return 0;
1773 }
1774 
1775 static int trace_create_savedcmd(void)
1776 {
1777         int ret;
1778 
1779         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1780         if (!savedcmd)
1781                 return -ENOMEM;
1782 
1783         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1784         if (ret < 0) {
1785                 kfree(savedcmd);
1786                 savedcmd = NULL;
1787                 return -ENOMEM;
1788         }
1789 
1790         return 0;
1791 }
1792 
1793 int is_tracing_stopped(void)
1794 {
1795         return global_trace.stop_count;
1796 }
1797 
1798 /**
1799  * tracing_start - quick start of the tracer
1800  *
1801  * If tracing is enabled but was stopped by tracing_stop,
1802  * this will start the tracer back up.
1803  */
1804 void tracing_start(void)
1805 {
1806         struct ring_buffer *buffer;
1807         unsigned long flags;
1808 
1809         if (tracing_disabled)
1810                 return;
1811 
1812         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1813         if (--global_trace.stop_count) {
1814                 if (global_trace.stop_count < 0) {
1815                         /* Someone screwed up their debugging */
1816                         WARN_ON_ONCE(1);
1817                         global_trace.stop_count = 0;
1818                 }
1819                 goto out;
1820         }
1821 
1822         /* Prevent the buffers from switching */
1823         arch_spin_lock(&global_trace.max_lock);
1824 
1825         buffer = global_trace.trace_buffer.buffer;
1826         if (buffer)
1827                 ring_buffer_record_enable(buffer);
1828 
1829 #ifdef CONFIG_TRACER_MAX_TRACE
1830         buffer = global_trace.max_buffer.buffer;
1831         if (buffer)
1832                 ring_buffer_record_enable(buffer);
1833 #endif
1834 
1835         arch_spin_unlock(&global_trace.max_lock);
1836 
1837  out:
1838         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1839 }
1840 
1841 static void tracing_start_tr(struct trace_array *tr)
1842 {
1843         struct ring_buffer *buffer;
1844         unsigned long flags;
1845 
1846         if (tracing_disabled)
1847                 return;
1848 
1849         /* If global, we need to also start the max tracer */
1850         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1851                 return tracing_start();
1852 
1853         raw_spin_lock_irqsave(&tr->start_lock, flags);
1854 
1855         if (--tr->stop_count) {
1856                 if (tr->stop_count < 0) {
1857                         /* Someone screwed up their debugging */
1858                         WARN_ON_ONCE(1);
1859                         tr->stop_count = 0;
1860                 }
1861                 goto out;
1862         }
1863 
1864         buffer = tr->trace_buffer.buffer;
1865         if (buffer)
1866                 ring_buffer_record_enable(buffer);
1867 
1868  out:
1869         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1870 }
1871 
1872 /**
1873  * tracing_stop - quick stop of the tracer
1874  *
1875  * Light weight way to stop tracing. Use in conjunction with
1876  * tracing_start.
1877  */
1878 void tracing_stop(void)
1879 {
1880         struct ring_buffer *buffer;
1881         unsigned long flags;
1882 
1883         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1884         if (global_trace.stop_count++)
1885                 goto out;
1886 
1887         /* Prevent the buffers from switching */
1888         arch_spin_lock(&global_trace.max_lock);
1889 
1890         buffer = global_trace.trace_buffer.buffer;
1891         if (buffer)
1892                 ring_buffer_record_disable(buffer);
1893 
1894 #ifdef CONFIG_TRACER_MAX_TRACE
1895         buffer = global_trace.max_buffer.buffer;
1896         if (buffer)
1897                 ring_buffer_record_disable(buffer);
1898 #endif
1899 
1900         arch_spin_unlock(&global_trace.max_lock);
1901 
1902  out:
1903         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1904 }
1905 
1906 static void tracing_stop_tr(struct trace_array *tr)
1907 {
1908         struct ring_buffer *buffer;
1909         unsigned long flags;
1910 
1911         /* If global, we need to also stop the max tracer */
1912         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1913                 return tracing_stop();
1914 
1915         raw_spin_lock_irqsave(&tr->start_lock, flags);
1916         if (tr->stop_count++)
1917                 goto out;
1918 
1919         buffer = tr->trace_buffer.buffer;
1920         if (buffer)
1921                 ring_buffer_record_disable(buffer);
1922 
1923  out:
1924         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1925 }
1926 
1927 static int trace_save_cmdline(struct task_struct *tsk)
1928 {
1929         unsigned pid, idx;
1930 
1931         /* treat recording of idle task as a success */
1932         if (!tsk->pid)
1933                 return 1;
1934 
1935         if (unlikely(tsk->pid > PID_MAX_DEFAULT))
1936                 return 0;
1937 
1938         /*
1939          * It's not the end of the world if we don't get
1940          * the lock, but we also don't want to spin
1941          * nor do we want to disable interrupts,
1942          * so if we miss here, then better luck next time.
1943          */
1944         if (!arch_spin_trylock(&trace_cmdline_lock))
1945                 return 0;
1946 
1947         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1948         if (idx == NO_CMDLINE_MAP) {
1949                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1950 
1951                 /*
1952                  * Check whether the cmdline buffer at idx has a pid
1953                  * mapped. We are going to overwrite that entry so we
1954                  * need to clear the map_pid_to_cmdline. Otherwise we
1955                  * would read the new comm for the old pid.
1956                  */
1957                 pid = savedcmd->map_cmdline_to_pid[idx];
1958                 if (pid != NO_CMDLINE_MAP)
1959                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1960 
1961                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1962                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1963 
1964                 savedcmd->cmdline_idx = idx;
1965         }
1966 
1967         set_cmdline(idx, tsk->comm);
1968 
1969         arch_spin_unlock(&trace_cmdline_lock);
1970 
1971         return 1;
1972 }
1973 
1974 static void __trace_find_cmdline(int pid, char comm[])
1975 {
1976         unsigned map;
1977 
1978         if (!pid) {
1979                 strcpy(comm, "<idle>");
1980                 return;
1981         }
1982 
1983         if (WARN_ON_ONCE(pid < 0)) {
1984                 strcpy(comm, "<XXX>");
1985                 return;
1986         }
1987 
1988         if (pid > PID_MAX_DEFAULT) {
1989                 strcpy(comm, "<...>");
1990                 return;
1991         }
1992 
1993         map = savedcmd->map_pid_to_cmdline[pid];
1994         if (map != NO_CMDLINE_MAP)
1995                 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
1996         else
1997                 strcpy(comm, "<...>");
1998 }
1999 
2000 void trace_find_cmdline(int pid, char comm[])
2001 {
2002         preempt_disable();
2003         arch_spin_lock(&trace_cmdline_lock);
2004 
2005         __trace_find_cmdline(pid, comm);
2006 
2007         arch_spin_unlock(&trace_cmdline_lock);
2008         preempt_enable();
2009 }
2010 
2011 int trace_find_tgid(int pid)
2012 {
2013         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2014                 return 0;
2015 
2016         return tgid_map[pid];
2017 }
2018 
2019 static int trace_save_tgid(struct task_struct *tsk)
2020 {
2021         /* treat recording of idle task as a success */
2022         if (!tsk->pid)
2023                 return 1;
2024 
2025         if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2026                 return 0;
2027 
2028         tgid_map[tsk->pid] = tsk->tgid;
2029         return 1;
2030 }
2031 
2032 static bool tracing_record_taskinfo_skip(int flags)
2033 {
2034         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2035                 return true;
2036         if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2037                 return true;
2038         if (!__this_cpu_read(trace_taskinfo_save))
2039                 return true;
2040         return false;
2041 }
2042 
2043 /**
2044  * tracing_record_taskinfo - record the task info of a task
2045  *
2046  * @task  - task to record
2047  * @flags - TRACE_RECORD_CMDLINE for recording comm
2048  *        - TRACE_RECORD_TGID for recording tgid
2049  */
2050 void tracing_record_taskinfo(struct task_struct *task, int flags)
2051 {
2052         bool done;
2053 
2054         if (tracing_record_taskinfo_skip(flags))
2055                 return;
2056 
2057         /*
2058          * Record as much task information as possible. If some fail, continue
2059          * to try to record the others.
2060          */
2061         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2062         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2063 
2064         /* If recording any information failed, retry again soon. */
2065         if (!done)
2066                 return;
2067 
2068         __this_cpu_write(trace_taskinfo_save, false);
2069 }
2070 
2071 /**
2072  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2073  *
2074  * @prev - previous task during sched_switch
2075  * @next - next task during sched_switch
2076  * @flags - TRACE_RECORD_CMDLINE for recording comm
2077  *          TRACE_RECORD_TGID for recording tgid
2078  */
2079 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2080                                           struct task_struct *next, int flags)
2081 {
2082         bool done;
2083 
2084         if (tracing_record_taskinfo_skip(flags))
2085                 return;
2086 
2087         /*
2088          * Record as much task information as possible. If some fail, continue
2089          * to try to record the others.
2090          */
2091         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2092         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2093         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2094         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2095 
2096         /* If recording any information failed, retry again soon. */
2097         if (!done)
2098                 return;
2099 
2100         __this_cpu_write(trace_taskinfo_save, false);
2101 }
2102 
2103 /* Helpers to record a specific task information */
2104 void tracing_record_cmdline(struct task_struct *task)
2105 {
2106         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2107 }
2108 
2109 void tracing_record_tgid(struct task_struct *task)
2110 {
2111         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2112 }
2113 
2114 /*
2115  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2116  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2117  * simplifies those functions and keeps them in sync.
2118  */
2119 enum print_line_t trace_handle_return(struct trace_seq *s)
2120 {
2121         return trace_seq_has_overflowed(s) ?
2122                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2123 }
2124 EXPORT_SYMBOL_GPL(trace_handle_return);
2125 
2126 void
2127 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2128                              int pc)
2129 {
2130         struct task_struct *tsk = current;
2131 
2132         entry->preempt_count            = pc & 0xff;
2133         entry->pid                      = (tsk) ? tsk->pid : 0;
2134         entry->flags =
2135 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2136                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2137 #else
2138                 TRACE_FLAG_IRQS_NOSUPPORT |
2139 #endif
2140                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2141                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2142                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2143                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2144                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2145 }
2146 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2147 
2148 struct ring_buffer_event *
2149 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2150                           int type,
2151                           unsigned long len,
2152                           unsigned long flags, int pc)
2153 {
2154         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2155 }
2156 
2157 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2158 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2159 static int trace_buffered_event_ref;
2160 
2161 /**
2162  * trace_buffered_event_enable - enable buffering events
2163  *
2164  * When events are being filtered, it is quicker to use a temporary
2165  * buffer to write the event data into if there's a likely chance
2166  * that it will not be committed. The discard of the ring buffer
2167  * is not as fast as committing, and is much slower than copying
2168  * a commit.
2169  *
2170  * When an event is to be filtered, allocate per cpu buffers to
2171  * write the event data into, and if the event is filtered and discarded
2172  * it is simply dropped, otherwise, the entire data is to be committed
2173  * in one shot.
2174  */
2175 void trace_buffered_event_enable(void)
2176 {
2177         struct ring_buffer_event *event;
2178         struct page *page;
2179         int cpu;
2180 
2181         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2182 
2183         if (trace_buffered_event_ref++)
2184                 return;
2185 
2186         for_each_tracing_cpu(cpu) {
2187                 page = alloc_pages_node(cpu_to_node(cpu),
2188                                         GFP_KERNEL | __GFP_NORETRY, 0);
2189                 if (!page)
2190                         goto failed;
2191 
2192                 event = page_address(page);
2193                 memset(event, 0, sizeof(*event));
2194 
2195                 per_cpu(trace_buffered_event, cpu) = event;
2196 
2197                 preempt_disable();
2198                 if (cpu == smp_processor_id() &&
2199                     this_cpu_read(trace_buffered_event) !=
2200                     per_cpu(trace_buffered_event, cpu))
2201                         WARN_ON_ONCE(1);
2202                 preempt_enable();
2203         }
2204 
2205         return;
2206  failed:
2207         trace_buffered_event_disable();
2208 }
2209 
2210 static void enable_trace_buffered_event(void *data)
2211 {
2212         /* Probably not needed, but do it anyway */
2213         smp_rmb();
2214         this_cpu_dec(trace_buffered_event_cnt);
2215 }
2216 
2217 static void disable_trace_buffered_event(void *data)
2218 {
2219         this_cpu_inc(trace_buffered_event_cnt);
2220 }
2221 
2222 /**
2223  * trace_buffered_event_disable - disable buffering events
2224  *
2225  * When a filter is removed, it is faster to not use the buffered
2226  * events, and to commit directly into the ring buffer. Free up
2227  * the temp buffers when there are no more users. This requires
2228  * special synchronization with current events.
2229  */
2230 void trace_buffered_event_disable(void)
2231 {
2232         int cpu;
2233 
2234         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2235 
2236         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2237                 return;
2238 
2239         if (--trace_buffered_event_ref)
2240                 return;
2241 
2242         preempt_disable();
2243         /* For each CPU, set the buffer as used. */
2244         smp_call_function_many(tracing_buffer_mask,
2245                                disable_trace_buffered_event, NULL, 1);
2246         preempt_enable();
2247 
2248         /* Wait for all current users to finish */
2249         synchronize_sched();
2250 
2251         for_each_tracing_cpu(cpu) {
2252                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2253                 per_cpu(trace_buffered_event, cpu) = NULL;
2254         }
2255         /*
2256          * Make sure trace_buffered_event is NULL before clearing
2257          * trace_buffered_event_cnt.
2258          */
2259         smp_wmb();
2260 
2261         preempt_disable();
2262         /* Do the work on each cpu */
2263         smp_call_function_many(tracing_buffer_mask,
2264                                enable_trace_buffered_event, NULL, 1);
2265         preempt_enable();
2266 }
2267 
2268 static struct ring_buffer *temp_buffer;
2269 
2270 struct ring_buffer_event *
2271 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2272                           struct trace_event_file *trace_file,
2273                           int type, unsigned long len,
2274                           unsigned long flags, int pc)
2275 {
2276         struct ring_buffer_event *entry;
2277         int val;
2278 
2279         *current_rb = trace_file->tr->trace_buffer.buffer;
2280 
2281         if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2282              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2283             (entry = this_cpu_read(trace_buffered_event))) {
2284                 /* Try to use the per cpu buffer first */
2285                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2286                 if (val == 1) {
2287                         trace_event_setup(entry, type, flags, pc);
2288                         entry->array[0] = len;
2289                         return entry;
2290                 }
2291                 this_cpu_dec(trace_buffered_event_cnt);
2292         }
2293 
2294         entry = __trace_buffer_lock_reserve(*current_rb,
2295                                             type, len, flags, pc);
2296         /*
2297          * If tracing is off, but we have triggers enabled
2298          * we still need to look at the event data. Use the temp_buffer
2299          * to store the trace event for the tigger to use. It's recusive
2300          * safe and will not be recorded anywhere.
2301          */
2302         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2303                 *current_rb = temp_buffer;
2304                 entry = __trace_buffer_lock_reserve(*current_rb,
2305                                                     type, len, flags, pc);
2306         }
2307         return entry;
2308 }
2309 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2310 
2311 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2312 static DEFINE_MUTEX(tracepoint_printk_mutex);
2313 
2314 static void output_printk(struct trace_event_buffer *fbuffer)
2315 {
2316         struct trace_event_call *event_call;
2317         struct trace_event *event;
2318         unsigned long flags;
2319         struct trace_iterator *iter = tracepoint_print_iter;
2320 
2321         /* We should never get here if iter is NULL */
2322         if (WARN_ON_ONCE(!iter))
2323                 return;
2324 
2325         event_call = fbuffer->trace_file->event_call;
2326         if (!event_call || !event_call->event.funcs ||
2327             !event_call->event.funcs->trace)
2328                 return;
2329 
2330         event = &fbuffer->trace_file->event_call->event;
2331 
2332         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2333         trace_seq_init(&iter->seq);
2334         iter->ent = fbuffer->entry;
2335         event_call->event.funcs->trace(iter, 0, event);
2336         trace_seq_putc(&iter->seq, 0);
2337         printk("%s", iter->seq.buffer);
2338 
2339         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2340 }
2341 
2342 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2343                              void __user *buffer, size_t *lenp,
2344                              loff_t *ppos)
2345 {
2346         int save_tracepoint_printk;
2347         int ret;
2348 
2349         mutex_lock(&tracepoint_printk_mutex);
2350         save_tracepoint_printk = tracepoint_printk;
2351 
2352         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2353 
2354         /*
2355          * This will force exiting early, as tracepoint_printk
2356          * is always zero when tracepoint_printk_iter is not allocated
2357          */
2358         if (!tracepoint_print_iter)
2359                 tracepoint_printk = 0;
2360 
2361         if (save_tracepoint_printk == tracepoint_printk)
2362                 goto out;
2363 
2364         if (tracepoint_printk)
2365                 static_key_enable(&tracepoint_printk_key.key);
2366         else
2367                 static_key_disable(&tracepoint_printk_key.key);
2368 
2369  out:
2370         mutex_unlock(&tracepoint_printk_mutex);
2371 
2372         return ret;
2373 }
2374 
2375 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2376 {
2377         if (static_key_false(&tracepoint_printk_key.key))
2378                 output_printk(fbuffer);
2379 
2380         event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2381                                     fbuffer->event, fbuffer->entry,
2382                                     fbuffer->flags, fbuffer->pc);
2383 }
2384 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2385 
2386 /*
2387  * Skip 3:
2388  *
2389  *   trace_buffer_unlock_commit_regs()
2390  *   trace_event_buffer_commit()
2391  *   trace_event_raw_event_xxx()
2392  */
2393 # define STACK_SKIP 3
2394 
2395 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2396                                      struct ring_buffer *buffer,
2397                                      struct ring_buffer_event *event,
2398                                      unsigned long flags, int pc,
2399                                      struct pt_regs *regs)
2400 {
2401         __buffer_unlock_commit(buffer, event);
2402 
2403         /*
2404          * If regs is not set, then skip the necessary functions.
2405          * Note, we can still get here via blktrace, wakeup tracer
2406          * and mmiotrace, but that's ok if they lose a function or
2407          * two. They are not that meaningful.
2408          */
2409         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2410         ftrace_trace_userstack(buffer, flags, pc);
2411 }
2412 
2413 /*
2414  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2415  */
2416 void
2417 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2418                                    struct ring_buffer_event *event)
2419 {
2420         __buffer_unlock_commit(buffer, event);
2421 }
2422 
2423 static void
2424 trace_process_export(struct trace_export *export,
2425                struct ring_buffer_event *event)
2426 {
2427         struct trace_entry *entry;
2428         unsigned int size = 0;
2429 
2430         entry = ring_buffer_event_data(event);
2431         size = ring_buffer_event_length(event);
2432         export->write(export, entry, size);
2433 }
2434 
2435 static DEFINE_MUTEX(ftrace_export_lock);
2436 
2437 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2438 
2439 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2440 
2441 static inline void ftrace_exports_enable(void)
2442 {
2443         static_branch_enable(&ftrace_exports_enabled);
2444 }
2445 
2446 static inline void ftrace_exports_disable(void)
2447 {
2448         static_branch_disable(&ftrace_exports_enabled);
2449 }
2450 
2451 void ftrace_exports(struct ring_buffer_event *event)
2452 {
2453         struct trace_export *export;
2454 
2455         preempt_disable_notrace();
2456 
2457         export = rcu_dereference_raw_notrace(ftrace_exports_list);
2458         while (export) {
2459                 trace_process_export(export, event);
2460                 export = rcu_dereference_raw_notrace(export->next);
2461         }
2462 
2463         preempt_enable_notrace();
2464 }
2465 
2466 static inline void
2467 add_trace_export(struct trace_export **list, struct trace_export *export)
2468 {
2469         rcu_assign_pointer(export->next, *list);
2470         /*
2471          * We are entering export into the list but another
2472          * CPU might be walking that list. We need to make sure
2473          * the export->next pointer is valid before another CPU sees
2474          * the export pointer included into the list.
2475          */
2476         rcu_assign_pointer(*list, export);
2477 }
2478 
2479 static inline int
2480 rm_trace_export(struct trace_export **list, struct trace_export *export)
2481 {
2482         struct trace_export **p;
2483 
2484         for (p = list; *p != NULL; p = &(*p)->next)
2485                 if (*p == export)
2486                         break;
2487 
2488         if (*p != export)
2489                 return -1;
2490 
2491         rcu_assign_pointer(*p, (*p)->next);
2492 
2493         return 0;
2494 }
2495 
2496 static inline void
2497 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2498 {
2499         if (*list == NULL)
2500                 ftrace_exports_enable();
2501 
2502         add_trace_export(list, export);
2503 }
2504 
2505 static inline int
2506 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2507 {
2508         int ret;
2509 
2510         ret = rm_trace_export(list, export);
2511         if (*list == NULL)
2512                 ftrace_exports_disable();
2513 
2514         return ret;
2515 }
2516 
2517 int register_ftrace_export(struct trace_export *export)
2518 {
2519         if (WARN_ON_ONCE(!export->write))
2520                 return -1;
2521 
2522         mutex_lock(&ftrace_export_lock);
2523 
2524         add_ftrace_export(&ftrace_exports_list, export);
2525 
2526         mutex_unlock(&ftrace_export_lock);
2527 
2528         return 0;
2529 }
2530 EXPORT_SYMBOL_GPL(register_ftrace_export);
2531 
2532 int unregister_ftrace_export(struct trace_export *export)
2533 {
2534         int ret;
2535 
2536         mutex_lock(&ftrace_export_lock);
2537 
2538         ret = rm_ftrace_export(&ftrace_exports_list, export);
2539 
2540         mutex_unlock(&ftrace_export_lock);
2541 
2542         return ret;
2543 }
2544 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2545 
2546 void
2547 trace_function(struct trace_array *tr,
2548                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2549                int pc)
2550 {
2551         struct trace_event_call *call = &event_function;
2552         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2553         struct ring_buffer_event *event;
2554         struct ftrace_entry *entry;
2555 
2556         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2557                                             flags, pc);
2558         if (!event)
2559                 return;
2560         entry   = ring_buffer_event_data(event);
2561         entry->ip                       = ip;
2562         entry->parent_ip                = parent_ip;
2563 
2564         if (!call_filter_check_discard(call, entry, buffer, event)) {
2565                 if (static_branch_unlikely(&ftrace_exports_enabled))
2566                         ftrace_exports(event);
2567                 __buffer_unlock_commit(buffer, event);
2568         }
2569 }
2570 
2571 #ifdef CONFIG_STACKTRACE
2572 
2573 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2574 struct ftrace_stack {
2575         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
2576 };
2577 
2578 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2579 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2580 
2581 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2582                                  unsigned long flags,
2583                                  int skip, int pc, struct pt_regs *regs)
2584 {
2585         struct trace_event_call *call = &event_kernel_stack;
2586         struct ring_buffer_event *event;
2587         struct stack_entry *entry;
2588         struct stack_trace trace;
2589         int use_stack;
2590         int size = FTRACE_STACK_ENTRIES;
2591 
2592         trace.nr_entries        = 0;
2593         trace.skip              = skip;
2594 
2595         /*
2596          * Add one, for this function and the call to save_stack_trace()
2597          * If regs is set, then these functions will not be in the way.
2598          */
2599 #ifndef CONFIG_UNWINDER_ORC
2600         if (!regs)
2601                 trace.skip++;
2602 #endif
2603 
2604         /*
2605          * Since events can happen in NMIs there's no safe way to
2606          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2607          * or NMI comes in, it will just have to use the default
2608          * FTRACE_STACK_SIZE.
2609          */
2610         preempt_disable_notrace();
2611 
2612         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2613         /*
2614          * We don't need any atomic variables, just a barrier.
2615          * If an interrupt comes in, we don't care, because it would
2616          * have exited and put the counter back to what we want.
2617          * We just need a barrier to keep gcc from moving things
2618          * around.
2619          */
2620         barrier();
2621         if (use_stack == 1) {
2622                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
2623                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
2624 
2625                 if (regs)
2626                         save_stack_trace_regs(regs, &trace);
2627                 else
2628                         save_stack_trace(&trace);
2629 
2630                 if (trace.nr_entries > size)
2631                         size = trace.nr_entries;
2632         } else
2633                 /* From now on, use_stack is a boolean */
2634                 use_stack = 0;
2635 
2636         size *= sizeof(unsigned long);
2637 
2638         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2639                                             sizeof(*entry) + size, flags, pc);
2640         if (!event)
2641                 goto out;
2642         entry = ring_buffer_event_data(event);
2643 
2644         memset(&entry->caller, 0, size);
2645 
2646         if (use_stack)
2647                 memcpy(&entry->caller, trace.entries,
2648                        trace.nr_entries * sizeof(unsigned long));
2649         else {
2650                 trace.max_entries       = FTRACE_STACK_ENTRIES;
2651                 trace.entries           = entry->caller;
2652                 if (regs)
2653                         save_stack_trace_regs(regs, &trace);
2654                 else
2655                         save_stack_trace(&trace);
2656         }
2657 
2658         entry->size = trace.nr_entries;
2659 
2660         if (!call_filter_check_discard(call, entry, buffer, event))
2661                 __buffer_unlock_commit(buffer, event);
2662 
2663  out:
2664         /* Again, don't let gcc optimize things here */
2665         barrier();
2666         __this_cpu_dec(ftrace_stack_reserve);
2667         preempt_enable_notrace();
2668 
2669 }
2670 
2671 static inline void ftrace_trace_stack(struct trace_array *tr,
2672                                       struct ring_buffer *buffer,
2673                                       unsigned long flags,
2674                                       int skip, int pc, struct pt_regs *regs)
2675 {
2676         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2677                 return;
2678 
2679         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2680 }
2681 
2682 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2683                    int pc)
2684 {
2685         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2686 
2687         if (rcu_is_watching()) {
2688                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2689                 return;
2690         }
2691 
2692         /*
2693          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2694          * but if the above rcu_is_watching() failed, then the NMI
2695          * triggered someplace critical, and rcu_irq_enter() should
2696          * not be called from NMI.
2697          */
2698         if (unlikely(in_nmi()))
2699                 return;
2700 
2701         rcu_irq_enter_irqson();
2702         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2703         rcu_irq_exit_irqson();
2704 }
2705 
2706 /**
2707  * trace_dump_stack - record a stack back trace in the trace buffer
2708  * @skip: Number of functions to skip (helper handlers)
2709  */
2710 void trace_dump_stack(int skip)
2711 {
2712         unsigned long flags;
2713 
2714         if (tracing_disabled || tracing_selftest_running)
2715                 return;
2716 
2717         local_save_flags(flags);
2718 
2719 #ifndef CONFIG_UNWINDER_ORC
2720         /* Skip 1 to skip this function. */
2721         skip++;
2722 #endif
2723         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2724                              flags, skip, preempt_count(), NULL);
2725 }
2726 
2727 static DEFINE_PER_CPU(int, user_stack_count);
2728 
2729 void
2730 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2731 {
2732         struct trace_event_call *call = &event_user_stack;
2733         struct ring_buffer_event *event;
2734         struct userstack_entry *entry;
2735         struct stack_trace trace;
2736 
2737         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2738                 return;
2739 
2740         /*
2741          * NMIs can not handle page faults, even with fix ups.
2742          * The save user stack can (and often does) fault.
2743          */
2744         if (unlikely(in_nmi()))
2745                 return;
2746 
2747         /*
2748          * prevent recursion, since the user stack tracing may
2749          * trigger other kernel events.
2750          */
2751         preempt_disable();
2752         if (__this_cpu_read(user_stack_count))
2753                 goto out;
2754 
2755         __this_cpu_inc(user_stack_count);
2756 
2757         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2758                                             sizeof(*entry), flags, pc);
2759         if (!event)
2760                 goto out_drop_count;
2761         entry   = ring_buffer_event_data(event);
2762 
2763         entry->tgid             = current->tgid;
2764         memset(&entry->caller, 0, sizeof(entry->caller));
2765 
2766         trace.nr_entries        = 0;
2767         trace.max_entries       = FTRACE_STACK_ENTRIES;
2768         trace.skip              = 0;
2769         trace.entries           = entry->caller;
2770 
2771         save_stack_trace_user(&trace);
2772         if (!call_filter_check_discard(call, entry, buffer, event))
2773                 __buffer_unlock_commit(buffer, event);
2774 
2775  out_drop_count:
2776         __this_cpu_dec(user_stack_count);
2777  out:
2778         preempt_enable();
2779 }
2780 
2781 #ifdef UNUSED
2782 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2783 {
2784         ftrace_trace_userstack(tr, flags, preempt_count());
2785 }
2786 #endif /* UNUSED */
2787 
2788 #endif /* CONFIG_STACKTRACE */
2789 
2790 /* created for use with alloc_percpu */
2791 struct trace_buffer_struct {
2792         int nesting;
2793         char buffer[4][TRACE_BUF_SIZE];
2794 };
2795 
2796 static struct trace_buffer_struct *trace_percpu_buffer;
2797 
2798 /*
2799  * Thise allows for lockless recording.  If we're nested too deeply, then
2800  * this returns NULL.
2801  */
2802 static char *get_trace_buf(void)
2803 {
2804         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2805 
2806         if (!buffer || buffer->nesting >= 4)
2807                 return NULL;
2808 
2809         buffer->nesting++;
2810 
2811         /* Interrupts must see nesting incremented before we use the buffer */
2812         barrier();
2813         return &buffer->buffer[buffer->nesting][0];
2814 }
2815 
2816 static void put_trace_buf(void)
2817 {
2818         /* Don't let the decrement of nesting leak before this */
2819         barrier();
2820         this_cpu_dec(trace_percpu_buffer->nesting);
2821 }
2822 
2823 static int alloc_percpu_trace_buffer(void)
2824 {
2825         struct trace_buffer_struct *buffers;
2826 
2827         buffers = alloc_percpu(struct trace_buffer_struct);
2828         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2829                 return -ENOMEM;
2830 
2831         trace_percpu_buffer = buffers;
2832         return 0;
2833 }
2834 
2835 static int buffers_allocated;
2836 
2837 void trace_printk_init_buffers(void)
2838 {
2839         if (buffers_allocated)
2840                 return;
2841 
2842         if (alloc_percpu_trace_buffer())
2843                 return;
2844 
2845         /* trace_printk() is for debug use only. Don't use it in production. */
2846 
2847         pr_warn("\n");
2848         pr_warn("**********************************************************\n");
2849         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2850         pr_warn("**                                                      **\n");
2851         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2852         pr_warn("**                                                      **\n");
2853         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2854         pr_warn("** unsafe for production use.                           **\n");
2855         pr_warn("**                                                      **\n");
2856         pr_warn("** If you see this message and you are not debugging    **\n");
2857         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2858         pr_warn("**                                                      **\n");
2859         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2860         pr_warn("**********************************************************\n");
2861 
2862         /* Expand the buffers to set size */
2863         tracing_update_buffers();
2864 
2865         buffers_allocated = 1;
2866 
2867         /*
2868          * trace_printk_init_buffers() can be called by modules.
2869          * If that happens, then we need to start cmdline recording
2870          * directly here. If the global_trace.buffer is already
2871          * allocated here, then this was called by module code.
2872          */
2873         if (global_trace.trace_buffer.buffer)
2874                 tracing_start_cmdline_record();
2875 }
2876 
2877 void trace_printk_start_comm(void)
2878 {
2879         /* Start tracing comms if trace printk is set */
2880         if (!buffers_allocated)
2881                 return;
2882         tracing_start_cmdline_record();
2883 }
2884 
2885 static void trace_printk_start_stop_comm(int enabled)
2886 {
2887         if (!buffers_allocated)
2888                 return;
2889 
2890         if (enabled)
2891                 tracing_start_cmdline_record();
2892         else
2893                 tracing_stop_cmdline_record();
2894 }
2895 
2896 /**
2897  * trace_vbprintk - write binary msg to tracing buffer
2898  *
2899  */
2900 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2901 {
2902         struct trace_event_call *call = &event_bprint;
2903         struct ring_buffer_event *event;
2904         struct ring_buffer *buffer;
2905         struct trace_array *tr = &global_trace;
2906         struct bprint_entry *entry;
2907         unsigned long flags;
2908         char *tbuffer;
2909         int len = 0, size, pc;
2910 
2911         if (unlikely(tracing_selftest_running || tracing_disabled))
2912                 return 0;
2913 
2914         /* Don't pollute graph traces with trace_vprintk internals */
2915         pause_graph_tracing();
2916 
2917         pc = preempt_count();
2918         preempt_disable_notrace();
2919 
2920         tbuffer = get_trace_buf();
2921         if (!tbuffer) {
2922                 len = 0;
2923                 goto out_nobuffer;
2924         }
2925 
2926         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2927 
2928         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2929                 goto out;
2930 
2931         local_save_flags(flags);
2932         size = sizeof(*entry) + sizeof(u32) * len;
2933         buffer = tr->trace_buffer.buffer;
2934         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2935                                             flags, pc);
2936         if (!event)
2937                 goto out;
2938         entry = ring_buffer_event_data(event);
2939         entry->ip                       = ip;
2940         entry->fmt                      = fmt;
2941 
2942         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2943         if (!call_filter_check_discard(call, entry, buffer, event)) {
2944                 __buffer_unlock_commit(buffer, event);
2945                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2946         }
2947 
2948 out:
2949         put_trace_buf();
2950 
2951 out_nobuffer:
2952         preempt_enable_notrace();
2953         unpause_graph_tracing();
2954 
2955         return len;
2956 }
2957 EXPORT_SYMBOL_GPL(trace_vbprintk);
2958 
2959 static int
2960 __trace_array_vprintk(struct ring_buffer *buffer,
2961                       unsigned long ip, const char *fmt, va_list args)
2962 {
2963         struct trace_event_call *call = &event_print;
2964         struct ring_buffer_event *event;
2965         int len = 0, size, pc;
2966         struct print_entry *entry;
2967         unsigned long flags;
2968         char *tbuffer;
2969 
2970         if (tracing_disabled || tracing_selftest_running)
2971                 return 0;
2972 
2973         /* Don't pollute graph traces with trace_vprintk internals */
2974         pause_graph_tracing();
2975 
2976         pc = preempt_count();
2977         preempt_disable_notrace();
2978 
2979 
2980         tbuffer = get_trace_buf();
2981         if (!tbuffer) {
2982                 len = 0;
2983                 goto out_nobuffer;
2984         }
2985 
2986         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2987 
2988         local_save_flags(flags);
2989         size = sizeof(*entry) + len + 1;
2990         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2991                                             flags, pc);
2992         if (!event)
2993                 goto out;
2994         entry = ring_buffer_event_data(event);
2995         entry->ip = ip;
2996 
2997         memcpy(&entry->buf, tbuffer, len + 1);
2998         if (!call_filter_check_discard(call, entry, buffer, event)) {
2999                 __buffer_unlock_commit(buffer, event);
3000                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3001         }
3002 
3003 out:
3004         put_trace_buf();
3005 
3006 out_nobuffer:
3007         preempt_enable_notrace();
3008         unpause_graph_tracing();
3009 
3010         return len;
3011 }
3012 
3013 int trace_array_vprintk(struct trace_array *tr,
3014                         unsigned long ip, const char *fmt, va_list args)
3015 {
3016         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3017 }
3018 
3019 int trace_array_printk(struct trace_array *tr,
3020                        unsigned long ip, const char *fmt, ...)
3021 {
3022         int ret;
3023         va_list ap;
3024 
3025         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3026                 return 0;
3027 
3028         va_start(ap, fmt);
3029         ret = trace_array_vprintk(tr, ip, fmt, ap);
3030         va_end(ap);
3031         return ret;
3032 }
3033 
3034 int trace_array_printk_buf(struct ring_buffer *buffer,
3035                            unsigned long ip, const char *fmt, ...)
3036 {
3037         int ret;
3038         va_list ap;
3039 
3040         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3041                 return 0;
3042 
3043         va_start(ap, fmt);
3044         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3045         va_end(ap);
3046         return ret;
3047 }
3048 
3049 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3050 {
3051         return trace_array_vprintk(&global_trace, ip, fmt, args);
3052 }
3053 EXPORT_SYMBOL_GPL(trace_vprintk);
3054 
3055 static void trace_iterator_increment(struct trace_iterator *iter)
3056 {
3057         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3058 
3059         iter->idx++;
3060         if (buf_iter)
3061                 ring_buffer_read(buf_iter, NULL);
3062 }
3063 
3064 static struct trace_entry *
3065 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3066                 unsigned long *lost_events)
3067 {
3068         struct ring_buffer_event *event;
3069         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3070 
3071         if (buf_iter)
3072                 event = ring_buffer_iter_peek(buf_iter, ts);
3073         else
3074                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3075                                          lost_events);
3076 
3077         if (event) {
3078                 iter->ent_size = ring_buffer_event_length(event);
3079                 return ring_buffer_event_data(event);
3080         }
3081         iter->ent_size = 0;
3082         return NULL;
3083 }
3084 
3085 static struct trace_entry *
3086 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3087                   unsigned long *missing_events, u64 *ent_ts)
3088 {
3089         struct ring_buffer *buffer = iter->trace_buffer->buffer;
3090         struct trace_entry *ent, *next = NULL;
3091         unsigned long lost_events = 0, next_lost = 0;
3092         int cpu_file = iter->cpu_file;
3093         u64 next_ts = 0, ts;
3094         int next_cpu = -1;
3095         int next_size = 0;
3096         int cpu;
3097 
3098         /*
3099          * If we are in a per_cpu trace file, don't bother by iterating over
3100          * all cpu and peek directly.
3101          */
3102         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3103                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3104                         return NULL;
3105                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3106                 if (ent_cpu)
3107                         *ent_cpu = cpu_file;
3108 
3109                 return ent;
3110         }
3111 
3112         for_each_tracing_cpu(cpu) {
3113 
3114                 if (ring_buffer_empty_cpu(buffer, cpu))
3115                         continue;
3116 
3117                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3118 
3119                 /*
3120                  * Pick the entry with the smallest timestamp:
3121                  */
3122                 if (ent && (!next || ts < next_ts)) {
3123                         next = ent;
3124                         next_cpu = cpu;
3125                         next_ts = ts;
3126                         next_lost = lost_events;
3127                         next_size = iter->ent_size;
3128                 }
3129         }
3130 
3131         iter->ent_size = next_size;
3132 
3133         if (ent_cpu)
3134                 *ent_cpu = next_cpu;
3135 
3136         if (ent_ts)
3137                 *ent_ts = next_ts;
3138 
3139         if (missing_events)
3140                 *missing_events = next_lost;
3141 
3142         return next;
3143 }
3144 
3145 /* Find the next real entry, without updating the iterator itself */
3146 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3147                                           int *ent_cpu, u64 *ent_ts)
3148 {
3149         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3150 }
3151 
3152 /* Find the next real entry, and increment the iterator to the next entry */
3153 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3154 {
3155         iter->ent = __find_next_entry(iter, &iter->cpu,
3156                                       &iter->lost_events, &iter->ts);
3157 
3158         if (iter->ent)
3159                 trace_iterator_increment(iter);
3160 
3161         return iter->ent ? iter : NULL;
3162 }
3163 
3164 static void trace_consume(struct trace_iterator *iter)
3165 {
3166         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3167                             &iter->lost_events);
3168 }
3169 
3170 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3171 {
3172         struct trace_iterator *iter = m->private;
3173         int i = (int)*pos;
3174         void *ent;
3175 
3176         WARN_ON_ONCE(iter->leftover);
3177 
3178         (*pos)++;
3179 
3180         /* can't go backwards */
3181         if (iter->idx > i)
3182                 return NULL;
3183 
3184         if (iter->idx < 0)
3185                 ent = trace_find_next_entry_inc(iter);
3186         else
3187                 ent = iter;
3188 
3189         while (ent && iter->idx < i)
3190                 ent = trace_find_next_entry_inc(iter);
3191 
3192         iter->pos = *pos;
3193 
3194         return ent;
3195 }
3196 
3197 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3198 {
3199         struct ring_buffer_event *event;
3200         struct ring_buffer_iter *buf_iter;
3201         unsigned long entries = 0;
3202         u64 ts;
3203 
3204         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3205 
3206         buf_iter = trace_buffer_iter(iter, cpu);
3207         if (!buf_iter)
3208                 return;
3209 
3210         ring_buffer_iter_reset(buf_iter);
3211 
3212         /*
3213          * We could have the case with the max latency tracers
3214          * that a reset never took place on a cpu. This is evident
3215          * by the timestamp being before the start of the buffer.
3216          */
3217         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3218                 if (ts >= iter->trace_buffer->time_start)
3219                         break;
3220                 entries++;
3221                 ring_buffer_read(buf_iter, NULL);
3222         }
3223 
3224         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3225 }
3226 
3227 /*
3228  * The current tracer is copied to avoid a global locking
3229  * all around.
3230  */
3231 static void *s_start(struct seq_file *m, loff_t *pos)
3232 {
3233         struct trace_iterator *iter = m->private;
3234         struct trace_array *tr = iter->tr;
3235         int cpu_file = iter->cpu_file;
3236         void *p = NULL;
3237         loff_t l = 0;
3238         int cpu;
3239 
3240         /*
3241          * copy the tracer to avoid using a global lock all around.
3242          * iter->trace is a copy of current_trace, the pointer to the
3243          * name may be used instead of a strcmp(), as iter->trace->name
3244          * will point to the same string as current_trace->name.
3245          */
3246         mutex_lock(&trace_types_lock);
3247         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3248                 *iter->trace = *tr->current_trace;
3249         mutex_unlock(&trace_types_lock);
3250 
3251 #ifdef CONFIG_TRACER_MAX_TRACE
3252         if (iter->snapshot && iter->trace->use_max_tr)
3253                 return ERR_PTR(-EBUSY);
3254 #endif
3255 
3256         if (!iter->snapshot)
3257                 atomic_inc(&trace_record_taskinfo_disabled);
3258 
3259         if (*pos != iter->pos) {
3260                 iter->ent = NULL;
3261                 iter->cpu = 0;
3262                 iter->idx = -1;
3263 
3264                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3265                         for_each_tracing_cpu(cpu)
3266                                 tracing_iter_reset(iter, cpu);
3267                 } else
3268                         tracing_iter_reset(iter, cpu_file);
3269 
3270                 iter->leftover = 0;
3271                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3272                         ;
3273 
3274         } else {
3275                 /*
3276                  * If we overflowed the seq_file before, then we want
3277                  * to just reuse the trace_seq buffer again.
3278                  */
3279                 if (iter->leftover)
3280                         p = iter;
3281                 else {
3282                         l = *pos - 1;
3283                         p = s_next(m, p, &l);
3284                 }
3285         }
3286 
3287         trace_event_read_lock();
3288         trace_access_lock(cpu_file);
3289         return p;
3290 }
3291 
3292 static void s_stop(struct seq_file *m, void *p)
3293 {
3294         struct trace_iterator *iter = m->private;
3295 
3296 #ifdef CONFIG_TRACER_MAX_TRACE
3297         if (iter->snapshot && iter->trace->use_max_tr)
3298                 return;
3299 #endif
3300 
3301         if (!iter->snapshot)
3302                 atomic_dec(&trace_record_taskinfo_disabled);
3303 
3304         trace_access_unlock(iter->cpu_file);
3305         trace_event_read_unlock();
3306 }
3307 
3308 static void
3309 get_total_entries(struct trace_buffer *buf,
3310                   unsigned long *total, unsigned long *entries)
3311 {
3312         unsigned long count;
3313         int cpu;
3314 
3315         *total = 0;
3316         *entries = 0;
3317 
3318         for_each_tracing_cpu(cpu) {
3319                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3320                 /*
3321                  * If this buffer has skipped entries, then we hold all
3322                  * entries for the trace and we need to ignore the
3323                  * ones before the time stamp.
3324                  */
3325                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3326                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3327                         /* total is the same as the entries */
3328                         *total += count;
3329                 } else
3330                         *total += count +
3331                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
3332                 *entries += count;
3333         }
3334 }
3335 
3336 static void print_lat_help_header(struct seq_file *m)
3337 {
3338         seq_puts(m, "#                  _------=> CPU#            \n"
3339                     "#                 / _-----=> irqs-off        \n"
3340                     "#                | / _----=> need-resched    \n"
3341                     "#                || / _---=> hardirq/softirq \n"
3342                     "#                ||| / _--=> preempt-depth   \n"
3343                     "#                |||| /     delay            \n"
3344                     "#  cmd     pid   ||||| time  |   caller      \n"
3345                     "#     \\   /      |||||  \\    |   /         \n");
3346 }
3347 
3348 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3349 {
3350         unsigned long total;
3351         unsigned long entries;
3352 
3353         get_total_entries(buf, &total, &entries);
3354         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3355                    entries, total, num_online_cpus());
3356         seq_puts(m, "#\n");
3357 }
3358 
3359 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3360                                    unsigned int flags)
3361 {
3362         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3363 
3364         print_event_info(buf, m);
3365 
3366         seq_printf(m, "#           TASK-PID   CPU#   %s  TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3367         seq_printf(m, "#              | |       |    %s     |         |\n",      tgid ? "  |      " : "");
3368 }
3369 
3370 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3371                                        unsigned int flags)
3372 {
3373         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3374         const char tgid_space[] = "          ";
3375         const char space[] = "  ";
3376 
3377         seq_printf(m, "#                          %s  _-----=> irqs-off\n",
3378                    tgid ? tgid_space : space);
3379         seq_printf(m, "#                          %s / _----=> need-resched\n",
3380                    tgid ? tgid_space : space);
3381         seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",
3382                    tgid ? tgid_space : space);
3383         seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",
3384                    tgid ? tgid_space : space);
3385         seq_printf(m, "#                          %s||| /     delay\n",
3386                    tgid ? tgid_space : space);
3387         seq_printf(m, "#           TASK-PID   CPU#%s||||    TIMESTAMP  FUNCTION\n",
3388                    tgid ? "   TGID   " : space);
3389         seq_printf(m, "#              | |       | %s||||       |         |\n",
3390                    tgid ? "     |    " : space);
3391 }
3392 
3393 void
3394 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3395 {
3396         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3397         struct trace_buffer *buf = iter->trace_buffer;
3398         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3399         struct tracer *type = iter->trace;
3400         unsigned long entries;
3401         unsigned long total;
3402         const char *name = "preemption";
3403 
3404         name = type->name;
3405 
3406         get_total_entries(buf, &total, &entries);
3407 
3408         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3409                    name, UTS_RELEASE);
3410         seq_puts(m, "# -----------------------------------"
3411                  "---------------------------------\n");
3412         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3413                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3414                    nsecs_to_usecs(data->saved_latency),
3415                    entries,
3416                    total,
3417                    buf->cpu,
3418 #if defined(CONFIG_PREEMPT_NONE)
3419                    "server",
3420 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3421                    "desktop",
3422 #elif defined(CONFIG_PREEMPT)
3423                    "preempt",
3424 #else
3425                    "unknown",
3426 #endif
3427                    /* These are reserved for later use */
3428                    0, 0, 0, 0);
3429 #ifdef CONFIG_SMP
3430         seq_printf(m, " #P:%d)\n", num_online_cpus());
3431 #else
3432         seq_puts(m, ")\n");
3433 #endif
3434         seq_puts(m, "#    -----------------\n");
3435         seq_printf(m, "#    | task: %.16s-%d "
3436                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3437                    data->comm, data->pid,
3438                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3439                    data->policy, data->rt_priority);
3440         seq_puts(m, "#    -----------------\n");
3441 
3442         if (data->critical_start) {
3443                 seq_puts(m, "#  => started at: ");
3444                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3445                 trace_print_seq(m, &iter->seq);
3446                 seq_puts(m, "\n#  => ended at:   ");
3447                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3448                 trace_print_seq(m, &iter->seq);
3449                 seq_puts(m, "\n#\n");
3450         }
3451 
3452         seq_puts(m, "#\n");
3453 }
3454 
3455 static void test_cpu_buff_start(struct trace_iterator *iter)
3456 {
3457         struct trace_seq *s = &iter->seq;
3458         struct trace_array *tr = iter->tr;
3459 
3460         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3461                 return;
3462 
3463         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3464                 return;
3465 
3466         if (cpumask_available(iter->started) &&
3467             cpumask_test_cpu(iter->cpu, iter->started))
3468                 return;
3469 
3470         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3471                 return;
3472 
3473         if (cpumask_available(iter->started))
3474                 cpumask_set_cpu(iter->cpu, iter->started);
3475 
3476         /* Don't print started cpu buffer for the first entry of the trace */
3477         if (iter->idx > 1)
3478                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3479                                 iter->cpu);
3480 }
3481 
3482 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3483 {
3484         struct trace_array *tr = iter->tr;
3485         struct trace_seq *s = &iter->seq;
3486         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3487         struct trace_entry *entry;
3488         struct trace_event *event;
3489 
3490         entry = iter->ent;
3491 
3492         test_cpu_buff_start(iter);
3493 
3494         event = ftrace_find_event(entry->type);
3495 
3496         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3497                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3498                         trace_print_lat_context(iter);
3499                 else
3500                         trace_print_context(iter);
3501         }
3502 
3503         if (trace_seq_has_overflowed(s))
3504                 return TRACE_TYPE_PARTIAL_LINE;
3505 
3506         if (event)
3507                 return event->funcs->trace(iter, sym_flags, event);
3508 
3509         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3510 
3511         return trace_handle_return(s);
3512 }
3513 
3514 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3515 {
3516         struct trace_array *tr = iter->tr;
3517         struct trace_seq *s = &iter->seq;
3518         struct trace_entry *entry;
3519         struct trace_event *event;
3520 
3521         entry = iter->ent;
3522 
3523         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3524                 trace_seq_printf(s, "%d %d %llu ",
3525                                  entry->pid, iter->cpu, iter->ts);
3526 
3527         if (trace_seq_has_overflowed(s))
3528                 return TRACE_TYPE_PARTIAL_LINE;
3529 
3530         event = ftrace_find_event(entry->type);
3531         if (event)
3532                 return event->funcs->raw(iter, 0, event);
3533 
3534         trace_seq_printf(s, "%d ?\n", entry->type);
3535 
3536         return trace_handle_return(s);
3537 }
3538 
3539 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3540 {
3541         struct trace_array *tr = iter->tr;
3542         struct trace_seq *s = &iter->seq;
3543         unsigned char newline = '\n';
3544         struct trace_entry *entry;
3545         struct trace_event *event;
3546 
3547         entry = iter->ent;
3548 
3549         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3550                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3551                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3552                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3553                 if (trace_seq_has_overflowed(s))
3554                         return TRACE_TYPE_PARTIAL_LINE;
3555         }
3556 
3557         event = ftrace_find_event(entry->type);
3558         if (event) {
3559                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3560                 if (ret != TRACE_TYPE_HANDLED)
3561                         return ret;
3562         }
3563 
3564         SEQ_PUT_FIELD(s, newline);
3565 
3566         return trace_handle_return(s);
3567 }
3568 
3569 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3570 {
3571         struct trace_array *tr = iter->tr;
3572         struct trace_seq *s = &iter->seq;
3573         struct trace_entry *entry;
3574         struct trace_event *event;
3575 
3576         entry = iter->ent;
3577 
3578         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3579                 SEQ_PUT_FIELD(s, entry->pid);
3580                 SEQ_PUT_FIELD(s, iter->cpu);
3581                 SEQ_PUT_FIELD(s, iter->ts);
3582                 if (trace_seq_has_overflowed(s))
3583                         return TRACE_TYPE_PARTIAL_LINE;
3584         }
3585 
3586         event = ftrace_find_event(entry->type);
3587         return event ? event->funcs->binary(iter, 0, event) :
3588                 TRACE_TYPE_HANDLED;
3589 }
3590 
3591 int trace_empty(struct trace_iterator *iter)
3592 {
3593         struct ring_buffer_iter *buf_iter;
3594         int cpu;
3595 
3596         /* If we are looking at one CPU buffer, only check that one */
3597         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3598                 cpu = iter->cpu_file;
3599                 buf_iter = trace_buffer_iter(iter, cpu);
3600                 if (buf_iter) {
3601                         if (!ring_buffer_iter_empty(buf_iter))
3602                                 return 0;
3603                 } else {
3604                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3605                                 return 0;
3606                 }
3607                 return 1;
3608         }
3609 
3610         for_each_tracing_cpu(cpu) {
3611                 buf_iter = trace_buffer_iter(iter, cpu);
3612                 if (buf_iter) {
3613                         if (!ring_buffer_iter_empty(buf_iter))
3614                                 return 0;
3615                 } else {
3616                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3617                                 return 0;
3618                 }
3619         }
3620 
3621         return 1;
3622 }
3623 
3624 /*  Called with trace_event_read_lock() held. */
3625 enum print_line_t print_trace_line(struct trace_iterator *iter)
3626 {
3627         struct trace_array *tr = iter->tr;
3628         unsigned long trace_flags = tr->trace_flags;
3629         enum print_line_t ret;
3630 
3631         if (iter->lost_events) {
3632                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3633                                  iter->cpu, iter->lost_events);
3634                 if (trace_seq_has_overflowed(&iter->seq))
3635                         return TRACE_TYPE_PARTIAL_LINE;
3636         }
3637 
3638         if (iter->trace && iter->trace->print_line) {
3639                 ret = iter->trace->print_line(iter);
3640                 if (ret != TRACE_TYPE_UNHANDLED)
3641                         return ret;
3642         }
3643 
3644         if (iter->ent->type == TRACE_BPUTS &&
3645                         trace_flags & TRACE_ITER_PRINTK &&
3646                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3647                 return trace_print_bputs_msg_only(iter);
3648 
3649         if (iter->ent->type == TRACE_BPRINT &&
3650                         trace_flags & TRACE_ITER_PRINTK &&
3651                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3652                 return trace_print_bprintk_msg_only(iter);
3653 
3654         if (iter->ent->type == TRACE_PRINT &&
3655                         trace_flags & TRACE_ITER_PRINTK &&
3656                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3657                 return trace_print_printk_msg_only(iter);
3658 
3659         if (trace_flags & TRACE_ITER_BIN)
3660                 return print_bin_fmt(iter);
3661 
3662         if (trace_flags & TRACE_ITER_HEX)
3663                 return print_hex_fmt(iter);
3664 
3665         if (trace_flags & TRACE_ITER_RAW)
3666                 return print_raw_fmt(iter);
3667 
3668         return print_trace_fmt(iter);
3669 }
3670 
3671 void trace_latency_header(struct seq_file *m)
3672 {
3673         struct trace_iterator *iter = m->private;
3674         struct trace_array *tr = iter->tr;
3675 
3676         /* print nothing if the buffers are empty */
3677         if (trace_empty(iter))
3678                 return;
3679 
3680         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3681                 print_trace_header(m, iter);
3682 
3683         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3684                 print_lat_help_header(m);
3685 }
3686 
3687 void trace_default_header(struct seq_file *m)
3688 {
3689         struct trace_iterator *iter = m->private;
3690         struct trace_array *tr = iter->tr;
3691         unsigned long trace_flags = tr->trace_flags;
3692 
3693         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3694                 return;
3695 
3696         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3697                 /* print nothing if the buffers are empty */
3698                 if (trace_empty(iter))
3699                         return;
3700                 print_trace_header(m, iter);
3701                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3702                         print_lat_help_header(m);
3703         } else {
3704                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3705                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3706                                 print_func_help_header_irq(iter->trace_buffer,
3707                                                            m, trace_flags);
3708                         else
3709                                 print_func_help_header(iter->trace_buffer, m,
3710                                                        trace_flags);
3711                 }
3712         }
3713 }
3714 
3715 static void test_ftrace_alive(struct seq_file *m)
3716 {
3717         if (!ftrace_is_dead())
3718                 return;
3719         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3720                     "#          MAY BE MISSING FUNCTION EVENTS\n");
3721 }
3722 
3723 #ifdef CONFIG_TRACER_MAX_TRACE
3724 static void show_snapshot_main_help(struct seq_file *m)
3725 {
3726         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3727                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3728                     "#                      Takes a snapshot of the main buffer.\n"
3729                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3730                     "#                      (Doesn't have to be '2' works with any number that\n"
3731                     "#                       is not a '' or '1')\n");
3732 }
3733 
3734 static void show_snapshot_percpu_help(struct seq_file *m)
3735 {
3736         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3737 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3738         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3739                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
3740 #else
3741         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3742                     "#                     Must use main snapshot file to allocate.\n");
3743 #endif
3744         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3745                     "#                      (Doesn't have to be '2' works with any number that\n"
3746                     "#                       is not a '' or '1')\n");
3747 }
3748 
3749 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3750 {
3751         if (iter->tr->allocated_snapshot)
3752                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3753         else
3754                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3755 
3756         seq_puts(m, "# Snapshot commands:\n");
3757         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3758                 show_snapshot_main_help(m);
3759         else
3760                 show_snapshot_percpu_help(m);
3761 }
3762 #else
3763 /* Should never be called */
3764 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3765 #endif
3766 
3767 static int s_show(struct seq_file *m, void *v)
3768 {
3769         struct trace_iterator *iter = v;
3770         int ret;
3771 
3772         if (iter->ent == NULL) {
3773                 if (iter->tr) {
3774                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
3775                         seq_puts(m, "#\n");
3776                         test_ftrace_alive(m);
3777                 }
3778                 if (iter->snapshot && trace_empty(iter))
3779                         print_snapshot_help(m, iter);
3780                 else if (iter->trace && iter->trace->print_header)
3781                         iter->trace->print_header(m);
3782                 else
3783                         trace_default_header(m);
3784 
3785         } else if (iter->leftover) {
3786                 /*
3787                  * If we filled the seq_file buffer earlier, we
3788                  * want to just show it now.
3789                  */
3790                 ret = trace_print_seq(m, &iter->seq);
3791 
3792                 /* ret should this time be zero, but you never know */
3793                 iter->leftover = ret;
3794 
3795         } else {
3796                 print_trace_line(iter);
3797                 ret = trace_print_seq(m, &iter->seq);
3798                 /*
3799                  * If we overflow the seq_file buffer, then it will
3800                  * ask us for this data again at start up.
3801                  * Use that instead.
3802                  *  ret is 0 if seq_file write succeeded.
3803                  *        -1 otherwise.
3804                  */
3805                 iter->leftover = ret;
3806         }
3807 
3808         return 0;
3809 }
3810 
3811 /*
3812  * Should be used after trace_array_get(), trace_types_lock
3813  * ensures that i_cdev was already initialized.
3814  */
3815 static inline int tracing_get_cpu(struct inode *inode)
3816 {
3817         if (inode->i_cdev) /* See trace_create_cpu_file() */
3818                 return (long)inode->i_cdev - 1;
3819         return RING_BUFFER_ALL_CPUS;
3820 }
3821 
3822 static const struct seq_operations tracer_seq_ops = {
3823         .start          = s_start,
3824         .next           = s_next,
3825         .stop           = s_stop,
3826         .show           = s_show,
3827 };
3828 
3829 static struct trace_iterator *
3830 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3831 {
3832         struct trace_array *tr = inode->i_private;
3833         struct trace_iterator *iter;
3834         int cpu;
3835 
3836         if (tracing_disabled)
3837                 return ERR_PTR(-ENODEV);
3838 
3839         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3840         if (!iter)
3841                 return ERR_PTR(-ENOMEM);
3842 
3843         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3844                                     GFP_KERNEL);
3845         if (!iter->buffer_iter)
3846                 goto release;
3847 
3848         /*
3849          * We make a copy of the current tracer to avoid concurrent
3850          * changes on it while we are reading.
3851          */
3852         mutex_lock(&trace_types_lock);
3853         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3854         if (!iter->trace)
3855                 goto fail;
3856 
3857         *iter->trace = *tr->current_trace;
3858 
3859         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3860                 goto fail;
3861 
3862         iter->tr = tr;
3863 
3864 #ifdef CONFIG_TRACER_MAX_TRACE
3865         /* Currently only the top directory has a snapshot */
3866         if (tr->current_trace->print_max || snapshot)
3867                 iter->trace_buffer = &tr->max_buffer;
3868         else
3869 #endif
3870                 iter->trace_buffer = &tr->trace_buffer;
3871         iter->snapshot = snapshot;
3872         iter->pos = -1;
3873         iter->cpu_file = tracing_get_cpu(inode);
3874         mutex_init(&iter->mutex);
3875 
3876         /* Notify the tracer early; before we stop tracing. */
3877         if (iter->trace && iter->trace->open)
3878                 iter->trace->open(iter);
3879 
3880         /* Annotate start of buffers if we had overruns */
3881         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3882                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3883 
3884         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3885         if (trace_clocks[tr->clock_id].in_ns)
3886                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3887 
3888         /* stop the trace while dumping if we are not opening "snapshot" */
3889         if (!iter->snapshot)
3890                 tracing_stop_tr(tr);
3891 
3892         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3893                 for_each_tracing_cpu(cpu) {
3894                         iter->buffer_iter[cpu] =
3895                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3896                 }
3897                 ring_buffer_read_prepare_sync();
3898                 for_each_tracing_cpu(cpu) {
3899                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3900                         tracing_iter_reset(iter, cpu);
3901                 }
3902         } else {
3903                 cpu = iter->cpu_file;
3904                 iter->buffer_iter[cpu] =
3905                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3906                 ring_buffer_read_prepare_sync();
3907                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3908                 tracing_iter_reset(iter, cpu);
3909         }
3910 
3911         mutex_unlock(&trace_types_lock);
3912 
3913         return iter;
3914 
3915  fail:
3916         mutex_unlock(&trace_types_lock);
3917         kfree(iter->trace);
3918         kfree(iter->buffer_iter);
3919 release:
3920         seq_release_private(inode, file);
3921         return ERR_PTR(-ENOMEM);
3922 }
3923 
3924 int tracing_open_generic(struct inode *inode, struct file *filp)
3925 {
3926         if (tracing_disabled)
3927                 return -ENODEV;
3928 
3929         filp->private_data = inode->i_private;
3930         return 0;
3931 }
3932 
3933 bool tracing_is_disabled(void)
3934 {
3935         return (tracing_disabled) ? true: false;
3936 }
3937 
3938 /*
3939  * Open and update trace_array ref count.
3940  * Must have the current trace_array passed to it.
3941  */
3942 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3943 {
3944         struct trace_array *tr = inode->i_private;
3945 
3946         if (tracing_disabled)
3947                 return -ENODEV;
3948 
3949         if (trace_array_get(tr) < 0)
3950                 return -ENODEV;
3951 
3952         filp->private_data = inode->i_private;
3953 
3954         return 0;
3955 }
3956 
3957 static int tracing_release(struct inode *inode, struct file *file)
3958 {
3959         struct trace_array *tr = inode->i_private;
3960         struct seq_file *m = file->private_data;
3961         struct trace_iterator *iter;
3962         int cpu;
3963 
3964         if (!(file->f_mode & FMODE_READ)) {
3965                 trace_array_put(tr);
3966                 return 0;
3967         }
3968 
3969         /* Writes do not use seq_file */
3970         iter = m->private;
3971         mutex_lock(&trace_types_lock);
3972 
3973         for_each_tracing_cpu(cpu) {
3974                 if (iter->buffer_iter[cpu])
3975                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3976         }
3977 
3978         if (iter->trace && iter->trace->close)
3979                 iter->trace->close(iter);
3980 
3981         if (!iter->snapshot)
3982                 /* reenable tracing if it was previously enabled */
3983                 tracing_start_tr(tr);
3984 
3985         __trace_array_put(tr);
3986 
3987         mutex_unlock(&trace_types_lock);
3988 
3989         mutex_destroy(&iter->mutex);
3990         free_cpumask_var(iter->started);
3991         kfree(iter->trace);
3992         kfree(iter->buffer_iter);
3993         seq_release_private(inode, file);
3994 
3995         return 0;
3996 }
3997 
3998 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3999 {
4000         struct trace_array *tr = inode->i_private;
4001 
4002         trace_array_put(tr);
4003         return 0;
4004 }
4005 
4006 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4007 {
4008         struct trace_array *tr = inode->i_private;
4009 
4010         trace_array_put(tr);
4011 
4012         return single_release(inode, file);
4013 }
4014 
4015 static int tracing_open(struct inode *inode, struct file *file)
4016 {
4017         struct trace_array *tr = inode->i_private;
4018         struct trace_iterator *iter;
4019         int ret = 0;
4020 
4021         if (trace_array_get(tr) < 0)
4022                 return -ENODEV;
4023 
4024         /* If this file was open for write, then erase contents */
4025         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4026                 int cpu = tracing_get_cpu(inode);
4027                 struct trace_buffer *trace_buf = &tr->trace_buffer;
4028 
4029 #ifdef CONFIG_TRACER_MAX_TRACE
4030                 if (tr->current_trace->print_max)
4031                         trace_buf = &tr->max_buffer;
4032 #endif
4033 
4034                 if (cpu == RING_BUFFER_ALL_CPUS)
4035                         tracing_reset_online_cpus(trace_buf);
4036                 else
4037                         tracing_reset(trace_buf, cpu);
4038         }
4039 
4040         if (file->f_mode & FMODE_READ) {
4041                 iter = __tracing_open(inode, file, false);
4042                 if (IS_ERR(iter))
4043                         ret = PTR_ERR(iter);
4044                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4045                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4046         }
4047 
4048         if (ret < 0)
4049                 trace_array_put(tr);
4050 
4051         return ret;
4052 }
4053 
4054 /*
4055  * Some tracers are not suitable for instance buffers.
4056  * A tracer is always available for the global array (toplevel)
4057  * or if it explicitly states that it is.
4058  */
4059 static bool
4060 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4061 {
4062         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4063 }
4064 
4065 /* Find the next tracer that this trace array may use */
4066 static struct tracer *
4067 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4068 {
4069         while (t && !trace_ok_for_array(t, tr))
4070                 t = t->next;
4071 
4072         return t;
4073 }
4074 
4075 static void *
4076 t_next(struct seq_file *m, void *v, loff_t *pos)
4077 {
4078         struct trace_array *tr = m->private;
4079         struct tracer *t = v;
4080 
4081         (*pos)++;
4082 
4083         if (t)
4084                 t = get_tracer_for_array(tr, t->next);
4085 
4086         return t;
4087 }
4088 
4089 static void *t_start(struct seq_file *m, loff_t *pos)
4090 {
4091         struct trace_array *tr = m->private;
4092         struct tracer *t;
4093         loff_t l = 0;
4094 
4095         mutex_lock(&trace_types_lock);
4096 
4097         t = get_tracer_for_array(tr, trace_types);
4098         for (; t && l < *pos; t = t_next(m, t, &l))
4099                         ;
4100 
4101         return t;
4102 }
4103 
4104 static void t_stop(struct seq_file *m, void *p)
4105 {
4106         mutex_unlock(&trace_types_lock);
4107 }
4108 
4109 static int t_show(struct seq_file *m, void *v)
4110 {
4111         struct tracer *t = v;
4112 
4113         if (!t)
4114                 return 0;
4115 
4116         seq_puts(m, t->name);
4117         if (t->next)
4118                 seq_putc(m, ' ');
4119         else
4120                 seq_putc(m, '\n');
4121 
4122         return 0;
4123 }
4124 
4125 static const struct seq_operations show_traces_seq_ops = {
4126         .start          = t_start,
4127         .next           = t_next,
4128         .stop           = t_stop,
4129         .show           = t_show,
4130 };
4131 
4132 static int show_traces_open(struct inode *inode, struct file *file)
4133 {
4134         struct trace_array *tr = inode->i_private;
4135         struct seq_file *m;
4136         int ret;
4137 
4138         if (tracing_disabled)
4139                 return -ENODEV;
4140 
4141         ret = seq_open(file, &show_traces_seq_ops);
4142         if (ret)
4143                 return ret;
4144 
4145         m = file->private_data;
4146         m->private = tr;
4147 
4148         return 0;
4149 }
4150 
4151 static ssize_t
4152 tracing_write_stub(struct file *filp, const char __user *ubuf,
4153                    size_t count, loff_t *ppos)
4154 {
4155         return count;
4156 }
4157 
4158 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4159 {
4160         int ret;
4161 
4162         if (file->f_mode & FMODE_READ)
4163                 ret = seq_lseek(file, offset, whence);
4164         else
4165                 file->f_pos = ret = 0;
4166 
4167         return ret;
4168 }
4169 
4170 static const struct file_operations tracing_fops = {
4171         .open           = tracing_open,
4172         .read           = seq_read,
4173         .write          = tracing_write_stub,
4174         .llseek         = tracing_lseek,
4175         .release        = tracing_release,
4176 };
4177 
4178 static const struct file_operations show_traces_fops = {
4179         .open           = show_traces_open,
4180         .read           = seq_read,
4181         .release        = seq_release,
4182         .llseek         = seq_lseek,
4183 };
4184 
4185 static ssize_t
4186 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4187                      size_t count, loff_t *ppos)
4188 {
4189         struct trace_array *tr = file_inode(filp)->i_private;
4190         char *mask_str;
4191         int len;
4192 
4193         len = snprintf(NULL, 0, "%*pb\n",
4194                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
4195         mask_str = kmalloc(len, GFP_KERNEL);
4196         if (!mask_str)
4197                 return -ENOMEM;
4198 
4199         len = snprintf(mask_str, len, "%*pb\n",
4200                        cpumask_pr_args(tr->tracing_cpumask));
4201         if (len >= count) {
4202                 count = -EINVAL;
4203                 goto out_err;
4204         }
4205         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4206 
4207 out_err:
4208         kfree(mask_str);
4209 
4210         return count;
4211 }
4212 
4213 static ssize_t
4214 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4215                       size_t count, loff_t *ppos)
4216 {
4217         struct trace_array *tr = file_inode(filp)->i_private;
4218         cpumask_var_t tracing_cpumask_new;
4219         int err, cpu;
4220 
4221         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4222                 return -ENOMEM;
4223 
4224         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4225         if (err)
4226                 goto err_unlock;
4227 
4228         local_irq_disable();
4229         arch_spin_lock(&tr->max_lock);
4230         for_each_tracing_cpu(cpu) {
4231                 /*
4232                  * Increase/decrease the disabled counter if we are
4233                  * about to flip a bit in the cpumask:
4234                  */
4235                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4236                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4237                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4238                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4239                 }
4240                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4241                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4242                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4243                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4244                 }
4245         }
4246         arch_spin_unlock(&tr->max_lock);
4247         local_irq_enable();
4248 
4249         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4250         free_cpumask_var(tracing_cpumask_new);
4251 
4252         return count;
4253 
4254 err_unlock:
4255         free_cpumask_var(tracing_cpumask_new);
4256 
4257         return err;
4258 }
4259 
4260 static const struct file_operations tracing_cpumask_fops = {
4261         .open           = tracing_open_generic_tr,
4262         .read           = tracing_cpumask_read,
4263         .write          = tracing_cpumask_write,
4264         .release        = tracing_release_generic_tr,
4265         .llseek         = generic_file_llseek,
4266 };
4267 
4268 static int tracing_trace_options_show(struct seq_file *m, void *v)
4269 {
4270         struct tracer_opt *trace_opts;
4271         struct trace_array *tr = m->private;
4272         u32 tracer_flags;
4273         int i;
4274 
4275         mutex_lock(&trace_types_lock);
4276         tracer_flags = tr->current_trace->flags->val;
4277         trace_opts = tr->current_trace->flags->opts;
4278 
4279         for (i = 0; trace_options[i]; i++) {
4280                 if (tr->trace_flags & (1 << i))
4281                         seq_printf(m, "%s\n", trace_options[i]);
4282                 else
4283                         seq_printf(m, "no%s\n", trace_options[i]);
4284         }
4285 
4286         for (i = 0; trace_opts[i].name; i++) {
4287                 if (tracer_flags & trace_opts[i].bit)
4288                         seq_printf(m, "%s\n", trace_opts[i].name);
4289                 else
4290                         seq_printf(m, "no%s\n", trace_opts[i].name);
4291         }
4292         mutex_unlock(&trace_types_lock);
4293 
4294         return 0;
4295 }
4296 
4297 static int __set_tracer_option(struct trace_array *tr,
4298                                struct tracer_flags *tracer_flags,
4299                                struct tracer_opt *opts, int neg)
4300 {
4301         struct tracer *trace = tracer_flags->trace;
4302         int ret;
4303 
4304         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4305         if (ret)
4306                 return ret;
4307 
4308         if (neg)
4309                 tracer_flags->val &= ~opts->bit;
4310         else
4311                 tracer_flags->val |= opts->bit;
4312         return 0;
4313 }
4314 
4315 /* Try to assign a tracer specific option */
4316 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4317 {
4318         struct tracer *trace = tr->current_trace;
4319         struct tracer_flags *tracer_flags = trace->flags;
4320         struct tracer_opt *opts = NULL;
4321         int i;
4322 
4323         for (i = 0; tracer_flags->opts[i].name; i++) {
4324                 opts = &tracer_flags->opts[i];
4325 
4326                 if (strcmp(cmp, opts->name) == 0)
4327                         return __set_tracer_option(tr, trace->flags, opts, neg);
4328         }
4329 
4330         return -EINVAL;
4331 }
4332 
4333 /* Some tracers require overwrite to stay enabled */
4334 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4335 {
4336         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4337                 return -1;
4338 
4339         return 0;
4340 }
4341 
4342 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4343 {
4344         /* do nothing if flag is already set */
4345         if (!!(tr->trace_flags & mask) == !!enabled)
4346                 return 0;
4347 
4348         /* Give the tracer a chance to approve the change */
4349         if (tr->current_trace->flag_changed)
4350                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4351                         return -EINVAL;
4352 
4353         if (enabled)
4354                 tr->trace_flags |= mask;
4355         else
4356                 tr->trace_flags &= ~mask;
4357 
4358         if (mask == TRACE_ITER_RECORD_CMD)
4359                 trace_event_enable_cmd_record(enabled);
4360 
4361         if (mask == TRACE_ITER_RECORD_TGID) {
4362                 if (!tgid_map)
4363                         tgid_map = kzalloc((PID_MAX_DEFAULT + 1) * sizeof(*tgid_map),
4364                                            GFP_KERNEL);
4365                 if (!tgid_map) {
4366                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4367                         return -ENOMEM;
4368                 }
4369 
4370                 trace_event_enable_tgid_record(enabled);
4371         }
4372 
4373         if (mask == TRACE_ITER_EVENT_FORK)
4374                 trace_event_follow_fork(tr, enabled);
4375 
4376         if (mask == TRACE_ITER_FUNC_FORK)
4377                 ftrace_pid_follow_fork(tr, enabled);
4378 
4379         if (mask == TRACE_ITER_OVERWRITE) {
4380                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4381 #ifdef CONFIG_TRACER_MAX_TRACE
4382                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4383 #endif
4384         }
4385 
4386         if (mask == TRACE_ITER_PRINTK) {
4387                 trace_printk_start_stop_comm(enabled);
4388                 trace_printk_control(enabled);
4389         }
4390 
4391         return 0;
4392 }
4393 
4394 static int trace_set_options(struct trace_array *tr, char *option)
4395 {
4396         char *cmp;
4397         int neg = 0;
4398         int ret = -ENODEV;
4399         int i;
4400         size_t orig_len = strlen(option);
4401 
4402         cmp = strstrip(option);
4403 
4404         if (strncmp(cmp, "no", 2) == 0) {
4405                 neg = 1;
4406                 cmp += 2;
4407         }
4408 
4409         mutex_lock(&trace_types_lock);
4410 
4411         for (i = 0; trace_options[i]; i++) {
4412                 if (strcmp(cmp, trace_options[i]) == 0) {
4413                         ret = set_tracer_flag(tr, 1 << i, !neg);
4414                         break;
4415                 }
4416         }
4417 
4418         /* If no option could be set, test the specific tracer options */
4419         if (!trace_options[i])
4420                 ret = set_tracer_option(tr, cmp, neg);
4421 
4422         mutex_unlock(&trace_types_lock);
4423 
4424         /*
4425          * If the first trailing whitespace is replaced with '\0' by strstrip,
4426          * turn it back into a space.
4427          */
4428         if (orig_len > strlen(option))
4429                 option[strlen(option)] = ' ';
4430 
4431         return ret;
4432 }
4433 
4434 static void __init apply_trace_boot_options(void)
4435 {
4436         char *buf = trace_boot_options_buf;
4437         char *option;
4438 
4439         while (true) {
4440                 option = strsep(&buf, ",");
4441 
4442                 if (!option)
4443                         break;
4444 
4445                 if (*option)
4446                         trace_set_options(&global_trace, option);
4447 
4448                 /* Put back the comma to allow this to be called again */
4449                 if (buf)
4450                         *(buf - 1) = ',';
4451         }
4452 }
4453 
4454 static ssize_t
4455 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4456                         size_t cnt, loff_t *ppos)
4457 {
4458         struct seq_file *m = filp->private_data;
4459         struct trace_array *tr = m->private;
4460         char buf[64];
4461         int ret;
4462 
4463         if (cnt >= sizeof(buf))
4464                 return -EINVAL;
4465 
4466         if (copy_from_user(buf, ubuf, cnt))
4467                 return -EFAULT;
4468 
4469         buf[cnt] = 0;
4470 
4471         ret = trace_set_options(tr, buf);
4472         if (ret < 0)
4473                 return ret;
4474 
4475         *ppos += cnt;
4476 
4477         return cnt;
4478 }
4479 
4480 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4481 {
4482         struct trace_array *tr = inode->i_private;
4483         int ret;
4484 
4485         if (tracing_disabled)
4486                 return -ENODEV;
4487 
4488         if (trace_array_get(tr) < 0)
4489                 return -ENODEV;
4490 
4491         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4492         if (ret < 0)
4493                 trace_array_put(tr);
4494 
4495         return ret;
4496 }
4497 
4498 static const struct file_operations tracing_iter_fops = {
4499         .open           = tracing_trace_options_open,
4500         .read           = seq_read,
4501         .llseek         = seq_lseek,
4502         .release        = tracing_single_release_tr,
4503         .write          = tracing_trace_options_write,
4504 };
4505 
4506 static const char readme_msg[] =
4507         "tracing mini-HOWTO:\n\n"
4508         "# echo 0 > tracing_on : quick way to disable tracing\n"
4509         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4510         " Important files:\n"
4511         "  trace\t\t\t- The static contents of the buffer\n"
4512         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4513         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4514         "  current_tracer\t- function and latency tracers\n"
4515         "  available_tracers\t- list of configured tracers for current_tracer\n"
4516         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4517         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4518         "  trace_clock\t\t-change the clock used to order events\n"
4519         "       local:   Per cpu clock but may not be synced across CPUs\n"
4520         "      global:   Synced across CPUs but slows tracing down.\n"
4521         "     counter:   Not a clock, but just an increment\n"
4522         "      uptime:   Jiffy counter from time of boot\n"
4523         "        perf:   Same clock that perf events use\n"
4524 #ifdef CONFIG_X86_64
4525         "     x86-tsc:   TSC cycle counter\n"
4526 #endif
4527         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
4528         "       delta:   Delta difference against a buffer-wide timestamp\n"
4529         "    absolute:   Absolute (standalone) timestamp\n"
4530         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4531         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4532         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4533         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4534         "\t\t\t  Remove sub-buffer with rmdir\n"
4535         "  trace_options\t\t- Set format or modify how tracing happens\n"
4536         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4537         "\t\t\t  option name\n"
4538         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4539 #ifdef CONFIG_DYNAMIC_FTRACE
4540         "\n  available_filter_functions - list of functions that can be filtered on\n"
4541         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4542         "\t\t\t  functions\n"
4543         "\t     accepts: func_full_name or glob-matching-pattern\n"
4544         "\t     modules: Can select a group via module\n"
4545         "\t      Format: :mod:<module-name>\n"
4546         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4547         "\t    triggers: a command to perform when function is hit\n"
4548         "\t      Format: <function>:<trigger>[:count]\n"
4549         "\t     trigger: traceon, traceoff\n"
4550         "\t\t      enable_event:<system>:<event>\n"
4551         "\t\t      disable_event:<system>:<event>\n"
4552 #ifdef CONFIG_STACKTRACE
4553         "\t\t      stacktrace\n"
4554 #endif
4555 #ifdef CONFIG_TRACER_SNAPSHOT
4556         "\t\t      snapshot\n"
4557 #endif
4558         "\t\t      dump\n"
4559         "\t\t      cpudump\n"
4560         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4561         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4562         "\t     The first one will disable tracing every time do_fault is hit\n"
4563         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4564         "\t       The first time do trap is hit and it disables tracing, the\n"
4565         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4566         "\t       the counter will not decrement. It only decrements when the\n"
4567         "\t       trigger did work\n"
4568         "\t     To remove trigger without count:\n"
4569         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4570         "\t     To remove trigger with a count:\n"
4571         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4572         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4573         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4574         "\t    modules: Can select a group via module command :mod:\n"
4575         "\t    Does not accept triggers\n"
4576 #endif /* CONFIG_DYNAMIC_FTRACE */
4577 #ifdef CONFIG_FUNCTION_TRACER
4578         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4579         "\t\t    (function)\n"
4580 #endif
4581 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4582         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4583         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4584         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4585 #endif
4586 #ifdef CONFIG_TRACER_SNAPSHOT
4587         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4588         "\t\t\t  snapshot buffer. Read the contents for more\n"
4589         "\t\t\t  information\n"
4590 #endif
4591 #ifdef CONFIG_STACK_TRACER
4592         "  stack_trace\t\t- Shows the max stack trace when active\n"
4593         "  stack_max_size\t- Shows current max stack size that was traced\n"
4594         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4595         "\t\t\t  new trace)\n"
4596 #ifdef CONFIG_DYNAMIC_FTRACE
4597         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4598         "\t\t\t  traces\n"
4599 #endif
4600 #endif /* CONFIG_STACK_TRACER */
4601 #ifdef CONFIG_KPROBE_EVENTS
4602         "  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4603         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4604 #endif
4605 #ifdef CONFIG_UPROBE_EVENTS
4606         "  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4607         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4608 #endif
4609 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4610         "\t  accepts: event-definitions (one definition per line)\n"
4611         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4612         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4613         "\t           -:[<group>/]<event>\n"
4614 #ifdef CONFIG_KPROBE_EVENTS
4615         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4616   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4617 #endif
4618 #ifdef CONFIG_UPROBE_EVENTS
4619         "\t    place: <path>:<offset>\n"
4620 #endif
4621         "\t     args: <name>=fetcharg[:type]\n"
4622         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4623         "\t           $stack<index>, $stack, $retval, $comm\n"
4624         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4625         "\t           b<bit-width>@<bit-offset>/<container-size>\n"
4626 #endif
4627         "  events/\t\t- Directory containing all trace event subsystems:\n"
4628         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4629         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4630         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4631         "\t\t\t  events\n"
4632         "      filter\t\t- If set, only events passing filter are traced\n"
4633         "  events/<system>/<event>/\t- Directory containing control files for\n"
4634         "\t\t\t  <event>:\n"
4635         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4636         "      filter\t\t- If set, only events passing filter are traced\n"
4637         "      trigger\t\t- If set, a command to perform when event is hit\n"
4638         "\t    Format: <trigger>[:count][if <filter>]\n"
4639         "\t   trigger: traceon, traceoff\n"
4640         "\t            enable_event:<system>:<event>\n"
4641         "\t            disable_event:<system>:<event>\n"
4642 #ifdef CONFIG_HIST_TRIGGERS
4643         "\t            enable_hist:<system>:<event>\n"
4644         "\t            disable_hist:<system>:<event>\n"
4645 #endif
4646 #ifdef CONFIG_STACKTRACE
4647         "\t\t    stacktrace\n"
4648 #endif
4649 #ifdef CONFIG_TRACER_SNAPSHOT
4650         "\t\t    snapshot\n"
4651 #endif
4652 #ifdef CONFIG_HIST_TRIGGERS
4653         "\t\t    hist (see below)\n"
4654 #endif
4655         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4656         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4657         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4658         "\t                  events/block/block_unplug/trigger\n"
4659         "\t   The first disables tracing every time block_unplug is hit.\n"
4660         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4661         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4662         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4663         "\t   Like function triggers, the counter is only decremented if it\n"
4664         "\t    enabled or disabled tracing.\n"
4665         "\t   To remove a trigger without a count:\n"
4666         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4667         "\t   To remove a trigger with a count:\n"
4668         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4669         "\t   Filters can be ignored when removing a trigger.\n"
4670 #ifdef CONFIG_HIST_TRIGGERS
4671         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4672         "\t    Format: hist:keys=<field1[,field2,...]>\n"
4673         "\t            [:values=<field1[,field2,...]>]\n"
4674         "\t            [:sort=<field1[,field2,...]>]\n"
4675         "\t            [:size=#entries]\n"
4676         "\t            [:pause][:continue][:clear]\n"
4677         "\t            [:name=histname1]\n"
4678         "\t            [if <filter>]\n\n"
4679         "\t    When a matching event is hit, an entry is added to a hash\n"
4680         "\t    table using the key(s) and value(s) named, and the value of a\n"
4681         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4682         "\t    correspond to fields in the event's format description.  Keys\n"
4683         "\t    can be any field, or the special string 'stacktrace'.\n"
4684         "\t    Compound keys consisting of up to two fields can be specified\n"
4685         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4686         "\t    fields.  Sort keys consisting of up to two fields can be\n"
4687         "\t    specified using the 'sort' keyword.  The sort direction can\n"
4688         "\t    be modified by appending '.descending' or '.ascending' to a\n"
4689         "\t    sort field.  The 'size' parameter can be used to specify more\n"
4690         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4691         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4692         "\t    its histogram data will be shared with other triggers of the\n"
4693         "\t    same name, and trigger hits will update this common data.\n\n"
4694         "\t    Reading the 'hist' file for the event will dump the hash\n"
4695         "\t    table in its entirety to stdout.  If there are multiple hist\n"
4696         "\t    triggers attached to an event, there will be a table for each\n"
4697         "\t    trigger in the output.  The table displayed for a named\n"
4698         "\t    trigger will be the same as any other instance having the\n"
4699         "\t    same name.  The default format used to display a given field\n"
4700         "\t    can be modified by appending any of the following modifiers\n"
4701         "\t    to the field name, as applicable:\n\n"
4702         "\t            .hex        display a number as a hex value\n"
4703         "\t            .sym        display an address as a symbol\n"
4704         "\t            .sym-offset display an address as a symbol and offset\n"
4705         "\t            .execname   display a common_pid as a program name\n"
4706         "\t            .syscall    display a syscall id as a syscall name\n"
4707         "\t            .log2       display log2 value rather than raw number\n"
4708         "\t            .usecs      display a common_timestamp in microseconds\n\n"
4709         "\t    The 'pause' parameter can be used to pause an existing hist\n"
4710         "\t    trigger or to start a hist trigger but not log any events\n"
4711         "\t    until told to do so.  'continue' can be used to start or\n"
4712         "\t    restart a paused hist trigger.\n\n"
4713         "\t    The 'clear' parameter will clear the contents of a running\n"
4714         "\t    hist trigger and leave its current paused/active state\n"
4715         "\t    unchanged.\n\n"
4716         "\t    The enable_hist and disable_hist triggers can be used to\n"
4717         "\t    have one event conditionally start and stop another event's\n"
4718         "\t    already-attached hist trigger.  The syntax is analagous to\n"
4719         "\t    the enable_event and disable_event triggers.\n"
4720 #endif
4721 ;
4722 
4723 static ssize_t
4724 tracing_readme_read(struct file *filp, char __user *ubuf,
4725                        size_t cnt, loff_t *ppos)
4726 {
4727         return simple_read_from_buffer(ubuf, cnt, ppos,
4728                                         readme_msg, strlen(readme_msg));
4729 }
4730 
4731 static const struct file_operations tracing_readme_fops = {
4732         .open           = tracing_open_generic,
4733         .read           = tracing_readme_read,
4734         .llseek         = generic_file_llseek,
4735 };
4736 
4737 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4738 {
4739         int *ptr = v;
4740 
4741         if (*pos || m->count)
4742                 ptr++;
4743 
4744         (*pos)++;
4745 
4746         for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
4747                 if (trace_find_tgid(*ptr))
4748                         return ptr;
4749         }
4750 
4751         return NULL;
4752 }
4753 
4754 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
4755 {
4756         void *v;
4757         loff_t l = 0;
4758 
4759         if (!tgid_map)
4760                 return NULL;
4761 
4762         v = &tgid_map[0];
4763         while (l <= *pos) {
4764                 v = saved_tgids_next(m, v, &l);
4765                 if (!v)
4766                         return NULL;
4767         }
4768 
4769         return v;
4770 }
4771 
4772 static void saved_tgids_stop(struct seq_file *m, void *v)
4773 {
4774 }
4775 
4776 static int saved_tgids_show(struct seq_file *m, void *v)
4777 {
4778         int pid = (int *)v - tgid_map;
4779 
4780         seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
4781         return 0;
4782 }
4783 
4784 static const struct seq_operations tracing_saved_tgids_seq_ops = {
4785         .start          = saved_tgids_start,
4786         .stop           = saved_tgids_stop,
4787         .next           = saved_tgids_next,
4788         .show           = saved_tgids_show,
4789 };
4790 
4791 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
4792 {
4793         if (tracing_disabled)
4794                 return -ENODEV;
4795 
4796         return seq_open(filp, &tracing_saved_tgids_seq_ops);
4797 }
4798 
4799 
4800 static const struct file_operations tracing_saved_tgids_fops = {
4801         .open           = tracing_saved_tgids_open,
4802         .read           = seq_read,
4803         .llseek         = seq_lseek,
4804         .release        = seq_release,
4805 };
4806 
4807 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4808 {
4809         unsigned int *ptr = v;
4810 
4811         if (*pos || m->count)
4812                 ptr++;
4813 
4814         (*pos)++;
4815 
4816         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4817              ptr++) {
4818                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4819                         continue;
4820 
4821                 return ptr;
4822         }
4823 
4824         return NULL;
4825 }
4826 
4827 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4828 {
4829         void *v;
4830         loff_t l = 0;
4831 
4832         preempt_disable();
4833         arch_spin_lock(&trace_cmdline_lock);
4834 
4835         v = &savedcmd->map_cmdline_to_pid[0];
4836         while (l <= *pos) {
4837                 v = saved_cmdlines_next(m, v, &l);
4838                 if (!v)
4839                         return NULL;
4840         }
4841 
4842         return v;
4843 }
4844 
4845 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4846 {
4847         arch_spin_unlock(&trace_cmdline_lock);
4848         preempt_enable();
4849 }
4850 
4851 static int saved_cmdlines_show(struct seq_file *m, void *v)
4852 {
4853         char buf[TASK_COMM_LEN];
4854         unsigned int *pid = v;
4855 
4856         __trace_find_cmdline(*pid, buf);
4857         seq_printf(m, "%d %s\n", *pid, buf);
4858         return 0;
4859 }
4860 
4861 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4862         .start          = saved_cmdlines_start,
4863         .next           = saved_cmdlines_next,
4864         .stop           = saved_cmdlines_stop,
4865         .show           = saved_cmdlines_show,
4866 };
4867 
4868 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4869 {
4870         if (tracing_disabled)
4871                 return -ENODEV;
4872 
4873         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4874 }
4875 
4876 static const struct file_operations tracing_saved_cmdlines_fops = {
4877         .open           = tracing_saved_cmdlines_open,
4878         .read           = seq_read,
4879         .llseek         = seq_lseek,
4880         .release        = seq_release,
4881 };
4882 
4883 static ssize_t
4884 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4885                                  size_t cnt, loff_t *ppos)
4886 {
4887         char buf[64];
4888         int r;
4889 
4890         arch_spin_lock(&trace_cmdline_lock);
4891         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4892         arch_spin_unlock(&trace_cmdline_lock);
4893 
4894         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4895 }
4896 
4897 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4898 {
4899         kfree(s->saved_cmdlines);
4900         kfree(s->map_cmdline_to_pid);
4901         kfree(s);
4902 }
4903 
4904 static int tracing_resize_saved_cmdlines(unsigned int val)
4905 {
4906         struct saved_cmdlines_buffer *s, *savedcmd_temp;
4907 
4908         s = kmalloc(sizeof(*s), GFP_KERNEL);
4909         if (!s)
4910                 return -ENOMEM;
4911 
4912         if (allocate_cmdlines_buffer(val, s) < 0) {
4913                 kfree(s);
4914                 return -ENOMEM;
4915         }
4916 
4917         arch_spin_lock(&trace_cmdline_lock);
4918         savedcmd_temp = savedcmd;
4919         savedcmd = s;
4920         arch_spin_unlock(&trace_cmdline_lock);
4921         free_saved_cmdlines_buffer(savedcmd_temp);
4922 
4923         return 0;
4924 }
4925 
4926 static ssize_t
4927 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4928                                   size_t cnt, loff_t *ppos)
4929 {
4930         unsigned long val;
4931         int ret;
4932 
4933         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4934         if (ret)
4935                 return ret;
4936 
4937         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4938         if (!val || val > PID_MAX_DEFAULT)
4939                 return -EINVAL;
4940 
4941         ret = tracing_resize_saved_cmdlines((unsigned int)val);
4942         if (ret < 0)
4943                 return ret;
4944 
4945         *ppos += cnt;
4946 
4947         return cnt;
4948 }
4949 
4950 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4951         .open           = tracing_open_generic,
4952         .read           = tracing_saved_cmdlines_size_read,
4953         .write          = tracing_saved_cmdlines_size_write,
4954 };
4955 
4956 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
4957 static union trace_eval_map_item *
4958 update_eval_map(union trace_eval_map_item *ptr)
4959 {
4960         if (!ptr->map.eval_string) {
4961                 if (ptr->tail.next) {
4962                         ptr = ptr->tail.next;
4963                         /* Set ptr to the next real item (skip head) */
4964                         ptr++;
4965                 } else
4966                         return NULL;
4967         }
4968         return ptr;
4969 }
4970 
4971 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
4972 {
4973         union trace_eval_map_item *ptr = v;
4974 
4975         /*
4976          * Paranoid! If ptr points to end, we don't want to increment past it.
4977          * This really should never happen.
4978          */
4979         ptr = update_eval_map(ptr);
4980         if (WARN_ON_ONCE(!ptr))
4981                 return NULL;
4982 
4983         ptr++;
4984 
4985         (*pos)++;
4986 
4987         ptr = update_eval_map(ptr);
4988 
4989         return ptr;
4990 }
4991 
4992 static void *eval_map_start(struct seq_file *m, loff_t *pos)
4993 {
4994         union trace_eval_map_item *v;
4995         loff_t l = 0;
4996 
4997         mutex_lock(&trace_eval_mutex);
4998 
4999         v = trace_eval_maps;
5000         if (v)
5001                 v++;
5002 
5003         while (v && l < *pos) {
5004                 v = eval_map_next(m, v, &l);
5005         }
5006 
5007         return v;
5008 }
5009 
5010 static void eval_map_stop(struct seq_file *m, void *v)
5011 {
5012         mutex_unlock(&trace_eval_mutex);
5013 }
5014 
5015 static int eval_map_show(struct seq_file *m, void *v)
5016 {
5017         union trace_eval_map_item *ptr = v;
5018 
5019         seq_printf(m, "%s %ld (%s)\n",
5020                    ptr->map.eval_string, ptr->map.eval_value,
5021                    ptr->map.system);
5022 
5023         return 0;
5024 }
5025 
5026 static const struct seq_operations tracing_eval_map_seq_ops = {
5027         .start          = eval_map_start,
5028         .next           = eval_map_next,
5029         .stop           = eval_map_stop,
5030         .show           = eval_map_show,
5031 };
5032 
5033 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5034 {
5035         if (tracing_disabled)
5036                 return -ENODEV;
5037 
5038         return seq_open(filp, &tracing_eval_map_seq_ops);
5039 }
5040 
5041 static const struct file_operations tracing_eval_map_fops = {
5042         .open           = tracing_eval_map_open,
5043         .read           = seq_read,
5044         .llseek         = seq_lseek,
5045         .release        = seq_release,
5046 };
5047 
5048 static inline union trace_eval_map_item *
5049 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5050 {
5051         /* Return tail of array given the head */
5052         return ptr + ptr->head.length + 1;
5053 }
5054 
5055 static void
5056 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5057                            int len)
5058 {
5059         struct trace_eval_map **stop;
5060         struct trace_eval_map **map;
5061         union trace_eval_map_item *map_array;
5062         union trace_eval_map_item *ptr;
5063 
5064         stop = start + len;
5065 
5066         /*
5067          * The trace_eval_maps contains the map plus a head and tail item,
5068          * where the head holds the module and length of array, and the
5069          * tail holds a pointer to the next list.
5070          */
5071         map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
5072         if (!map_array) {
5073                 pr_warn("Unable to allocate trace eval mapping\n");
5074                 return;
5075         }
5076 
5077         mutex_lock(&trace_eval_mutex);
5078 
5079         if (!trace_eval_maps)
5080                 trace_eval_maps = map_array;
5081         else {
5082                 ptr = trace_eval_maps;
5083                 for (;;) {
5084                         ptr = trace_eval_jmp_to_tail(ptr);
5085                         if (!ptr->tail.next)
5086                                 break;
5087                         ptr = ptr->tail.next;
5088 
5089                 }
5090                 ptr->tail.next = map_array;
5091         }
5092         map_array->head.mod = mod;
5093         map_array->head.length = len;
5094         map_array++;
5095 
5096         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5097                 map_array->map = **map;
5098                 map_array++;
5099         }
5100         memset(map_array, 0, sizeof(*map_array));
5101 
5102         mutex_unlock(&trace_eval_mutex);
5103 }
5104 
5105 static void trace_create_eval_file(struct dentry *d_tracer)
5106 {
5107         trace_create_file("eval_map", 0444, d_tracer,
5108                           NULL, &tracing_eval_map_fops);
5109 }
5110 
5111 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5112 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5113 static inline void trace_insert_eval_map_file(struct module *mod,
5114                               struct trace_eval_map **start, int len) { }
5115 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5116 
5117 static void trace_insert_eval_map(struct module *mod,
5118                                   struct trace_eval_map **start, int len)
5119 {
5120         struct trace_eval_map **map;
5121 
5122         if (len <= 0)
5123                 return;
5124 
5125         map = start;
5126 
5127         trace_event_eval_update(map, len);
5128 
5129         trace_insert_eval_map_file(mod, start, len);
5130 }
5131 
5132 static ssize_t
5133 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5134                        size_t cnt, loff_t *ppos)
5135 {
5136         struct trace_array *tr = filp->private_data;
5137         char buf[MAX_TRACER_SIZE+2];
5138         int r;
5139 
5140         mutex_lock(&trace_types_lock);
5141         r = sprintf(buf, "%s\n", tr->current_trace->name);
5142         mutex_unlock(&trace_types_lock);
5143 
5144         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5145 }
5146 
5147 int tracer_init(struct tracer *t, struct trace_array *tr)
5148 {
5149         tracing_reset_online_cpus(&tr->trace_buffer);
5150         return t->init(tr);
5151 }
5152 
5153 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5154 {
5155         int cpu;
5156 
5157         for_each_tracing_cpu(cpu)
5158                 per_cpu_ptr(buf->data, cpu)->entries = val;
5159 }
5160 
5161 #ifdef CONFIG_TRACER_MAX_TRACE
5162 /* resize @tr's buffer to the size of @size_tr's entries */
5163 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5164                                         struct trace_buffer *size_buf, int cpu_id)
5165 {
5166         int cpu, ret = 0;
5167 
5168         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5169                 for_each_tracing_cpu(cpu) {
5170                         ret = ring_buffer_resize(trace_buf->buffer,
5171                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5172                         if (ret < 0)
5173                                 break;
5174                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5175                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5176                 }
5177         } else {
5178                 ret = ring_buffer_resize(trace_buf->buffer,
5179                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5180                 if (ret == 0)
5181                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5182                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5183         }
5184 
5185         return ret;
5186 }
5187 #endif /* CONFIG_TRACER_MAX_TRACE */
5188 
5189 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5190                                         unsigned long size, int cpu)
5191 {
5192         int ret;
5193 
5194         /*
5195          * If kernel or user changes the size of the ring buffer
5196          * we use the size that was given, and we can forget about
5197          * expanding it later.
5198          */
5199         ring_buffer_expanded = true;
5200 
5201         /* May be called before buffers are initialized */
5202         if (!tr->trace_buffer.buffer)
5203                 return 0;
5204 
5205         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5206         if (ret < 0)
5207                 return ret;
5208 
5209 #ifdef CONFIG_TRACER_MAX_TRACE
5210         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5211             !tr->current_trace->use_max_tr)
5212                 goto out;
5213 
5214         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5215         if (ret < 0) {
5216                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5217                                                      &tr->trace_buffer, cpu);
5218                 if (r < 0) {
5219                         /*
5220                          * AARGH! We are left with different
5221                          * size max buffer!!!!
5222                          * The max buffer is our "snapshot" buffer.
5223                          * When a tracer needs a snapshot (one of the
5224                          * latency tracers), it swaps the max buffer
5225                          * with the saved snap shot. We succeeded to
5226                          * update the size of the main buffer, but failed to
5227                          * update the size of the max buffer. But when we tried
5228                          * to reset the main buffer to the original size, we
5229                          * failed there too. This is very unlikely to
5230                          * happen, but if it does, warn and kill all
5231                          * tracing.
5232                          */
5233                         WARN_ON(1);
5234                         tracing_disabled = 1;
5235                 }
5236                 return ret;
5237         }
5238 
5239         if (cpu == RING_BUFFER_ALL_CPUS)
5240                 set_buffer_entries(&tr->max_buffer, size);
5241         else
5242                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5243 
5244  out:
5245 #endif /* CONFIG_TRACER_MAX_TRACE */
5246 
5247         if (cpu == RING_BUFFER_ALL_CPUS)
5248                 set_buffer_entries(&tr->trace_buffer, size);
5249         else
5250                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5251 
5252         return ret;
5253 }
5254 
5255 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5256                                           unsigned long size, int cpu_id)
5257 {
5258         int ret = size;
5259 
5260         mutex_lock(&trace_types_lock);
5261 
5262         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5263                 /* make sure, this cpu is enabled in the mask */
5264                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5265                         ret = -EINVAL;
5266                         goto out;
5267                 }
5268         }
5269 
5270         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5271         if (ret < 0)
5272                 ret = -ENOMEM;
5273 
5274 out:
5275         mutex_unlock(&trace_types_lock);
5276 
5277         return ret;
5278 }
5279 
5280 
5281 /**
5282  * tracing_update_buffers - used by tracing facility to expand ring buffers
5283  *
5284  * To save on memory when the tracing is never used on a system with it
5285  * configured in. The ring buffers are set to a minimum size. But once
5286  * a user starts to use the tracing facility, then they need to grow
5287  * to their default size.
5288  *
5289  * This function is to be called when a tracer is about to be used.
5290  */
5291 int tracing_update_buffers(void)
5292 {
5293         int ret = 0;
5294 
5295         mutex_lock(&trace_types_lock);
5296         if (!ring_buffer_expanded)
5297                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5298                                                 RING_BUFFER_ALL_CPUS);
5299         mutex_unlock(&trace_types_lock);
5300 
5301         return ret;
5302 }
5303 
5304 struct trace_option_dentry;
5305 
5306 static void
5307 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5308 
5309 /*
5310  * Used to clear out the tracer before deletion of an instance.
5311  * Must have trace_types_lock held.
5312  */
5313 static void tracing_set_nop(struct trace_array *tr)
5314 {
5315         if (tr->current_trace == &nop_trace)
5316                 return;
5317         
5318         tr->current_trace->enabled--;
5319 
5320         if (tr->current_trace->reset)
5321                 tr->current_trace->reset(tr);
5322 
5323         tr->current_trace = &nop_trace;
5324 }
5325 
5326 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5327 {
5328         /* Only enable if the directory has been created already. */
5329         if (!tr->dir)
5330                 return;
5331 
5332         create_trace_option_files(tr, t);
5333 }
5334 
5335 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5336 {
5337         struct tracer *t;
5338 #ifdef CONFIG_TRACER_MAX_TRACE
5339         bool had_max_tr;
5340 #endif
5341         int ret = 0;
5342 
5343         mutex_lock(&trace_types_lock);
5344 
5345         if (!ring_buffer_expanded) {
5346                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5347                                                 RING_BUFFER_ALL_CPUS);
5348                 if (ret < 0)
5349                         goto out;
5350                 ret = 0;
5351         }
5352 
5353         for (t = trace_types; t; t = t->next) {
5354                 if (strcmp(t->name, buf) == 0)
5355                         break;
5356         }
5357         if (!t) {
5358                 ret = -EINVAL;
5359                 goto out;
5360         }
5361         if (t == tr->current_trace)
5362                 goto out;
5363 
5364         /* Some tracers won't work on kernel command line */
5365         if (system_state < SYSTEM_RUNNING && t->noboot) {
5366                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5367                         t->name);
5368                 goto out;
5369         }
5370 
5371         /* Some tracers are only allowed for the top level buffer */
5372         if (!trace_ok_for_array(t, tr)) {
5373                 ret = -EINVAL;
5374                 goto out;
5375         }
5376 
5377         /* If trace pipe files are being read, we can't change the tracer */
5378         if (tr->current_trace->ref) {
5379                 ret = -EBUSY;
5380                 goto out;
5381         }
5382 
5383         trace_branch_disable();
5384 
5385         tr->current_trace->enabled--;
5386 
5387         if (tr->current_trace->reset)
5388                 tr->current_trace->reset(tr);
5389 
5390         /* Current trace needs to be nop_trace before synchronize_sched */
5391         tr->current_trace = &nop_trace;
5392 
5393 #ifdef CONFIG_TRACER_MAX_TRACE
5394         had_max_tr = tr->allocated_snapshot;
5395 
5396         if (had_max_tr && !t->use_max_tr) {
5397                 /*
5398                  * We need to make sure that the update_max_tr sees that
5399                  * current_trace changed to nop_trace to keep it from
5400                  * swapping the buffers after we resize it.
5401                  * The update_max_tr is called from interrupts disabled
5402                  * so a synchronized_sched() is sufficient.
5403                  */
5404                 synchronize_sched();
5405                 free_snapshot(tr);
5406         }
5407 #endif
5408 
5409 #ifdef CONFIG_TRACER_MAX_TRACE
5410         if (t->use_max_tr && !had_max_tr) {
5411                 ret = alloc_snapshot(tr);
5412                 if (ret < 0)
5413                         goto out;
5414         }
5415 #endif
5416 
5417         if (t->init) {
5418                 ret = tracer_init(t, tr);
5419                 if (ret)
5420                         goto out;
5421         }
5422 
5423         tr->current_trace = t;
5424         tr->current_trace->enabled++;
5425         trace_branch_enable(tr);
5426  out:
5427         mutex_unlock(&trace_types_lock);
5428 
5429         return ret;
5430 }
5431 
5432 static ssize_t
5433 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5434                         size_t cnt, loff_t *ppos)
5435 {
5436         struct trace_array *tr = filp->private_data;
5437         char buf[MAX_TRACER_SIZE+1];
5438         int i;
5439         size_t ret;
5440         int err;
5441 
5442         ret = cnt;
5443 
5444         if (cnt > MAX_TRACER_SIZE)
5445                 cnt = MAX_TRACER_SIZE;
5446 
5447         if (copy_from_user(buf, ubuf, cnt))
5448                 return -EFAULT;
5449 
5450         buf[cnt] = 0;
5451 
5452         /* strip ending whitespace. */
5453         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5454                 buf[i] = 0;
5455 
5456         err = tracing_set_tracer(tr, buf);
5457         if (err)
5458                 return err;
5459 
5460         *ppos += ret;
5461 
5462         return ret;
5463 }
5464 
5465 static ssize_t
5466 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5467                    size_t cnt, loff_t *ppos)
5468 {
5469         char buf[64];
5470         int r;
5471 
5472         r = snprintf(buf, sizeof(buf), "%ld\n",
5473                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5474         if (r > sizeof(buf))
5475                 r = sizeof(buf);
5476         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5477 }
5478 
5479 static ssize_t
5480 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5481                     size_t cnt, loff_t *ppos)
5482 {
5483         unsigned long val;
5484         int ret;
5485 
5486         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5487         if (ret)
5488                 return ret;
5489 
5490         *ptr = val * 1000;
5491 
5492         return cnt;
5493 }
5494 
5495 static ssize_t
5496 tracing_thresh_read(struct file *filp, char __user *ubuf,
5497                     size_t cnt, loff_t *ppos)
5498 {
5499         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5500 }
5501 
5502 static ssize_t
5503 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5504                      size_t cnt, loff_t *ppos)
5505 {
5506         struct trace_array *tr = filp->private_data;
5507         int ret;
5508 
5509         mutex_lock(&trace_types_lock);
5510         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5511         if (ret < 0)
5512                 goto out;
5513 
5514         if (tr->current_trace->update_thresh) {
5515                 ret = tr->current_trace->update_thresh(tr);
5516                 if (ret < 0)
5517                         goto out;
5518         }
5519 
5520         ret = cnt;
5521 out:
5522         mutex_unlock(&trace_types_lock);
5523 
5524         return ret;
5525 }
5526 
5527 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5528 
5529 static ssize_t
5530 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5531                      size_t cnt, loff_t *ppos)
5532 {
5533         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5534 }
5535 
5536 static ssize_t
5537 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5538                       size_t cnt, loff_t *ppos)
5539 {
5540         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5541 }
5542 
5543 #endif
5544 
5545 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5546 {
5547         struct trace_array *tr = inode->i_private;
5548         struct trace_iterator *iter;
5549         int ret = 0;
5550 
5551         if (tracing_disabled)
5552                 return -ENODEV;
5553 
5554         if (trace_array_get(tr) < 0)
5555                 return -ENODEV;
5556 
5557         mutex_lock(&trace_types_lock);
5558 
5559         /* create a buffer to store the information to pass to userspace */
5560         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5561         if (!iter) {
5562                 ret = -ENOMEM;
5563                 __trace_array_put(tr);
5564                 goto out;
5565         }
5566 
5567         trace_seq_init(&iter->seq);
5568         iter->trace = tr->current_trace;
5569 
5570         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5571                 ret = -ENOMEM;
5572                 goto fail;
5573         }
5574 
5575         /* trace pipe does not show start of buffer */
5576         cpumask_setall(iter->started);
5577 
5578         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5579                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5580 
5581         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5582         if (trace_clocks[tr->clock_id].in_ns)
5583                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5584 
5585         iter->tr = tr;
5586         iter->trace_buffer = &tr->trace_buffer;
5587         iter->cpu_file = tracing_get_cpu(inode);
5588         mutex_init(&iter->mutex);
5589         filp->private_data = iter;
5590 
5591         if (iter->trace->pipe_open)
5592                 iter->trace->pipe_open(iter);
5593 
5594         nonseekable_open(inode, filp);
5595 
5596         tr->current_trace->ref++;
5597 out:
5598         mutex_unlock(&trace_types_lock);
5599         return ret;
5600 
5601 fail:
5602         kfree(iter->trace);
5603         kfree(iter);
5604         __trace_array_put(tr);
5605         mutex_unlock(&trace_types_lock);
5606         return ret;
5607 }
5608 
5609 static int tracing_release_pipe(struct inode *inode, struct file *file)
5610 {
5611         struct trace_iterator *iter = file->private_data;
5612         struct trace_array *tr = inode->i_private;
5613 
5614         mutex_lock(&trace_types_lock);
5615 
5616         tr->current_trace->ref--;
5617 
5618         if (iter->trace->pipe_close)
5619                 iter->trace->pipe_close(iter);
5620 
5621         mutex_unlock(&trace_types_lock);
5622 
5623         free_cpumask_var(iter->started);
5624         mutex_destroy(&iter->mutex);
5625         kfree(iter);
5626 
5627         trace_array_put(tr);
5628 
5629         return 0;
5630 }
5631 
5632 static __poll_t
5633 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5634 {
5635         struct trace_array *tr = iter->tr;
5636 
5637         /* Iterators are static, they should be filled or empty */
5638         if (trace_buffer_iter(iter, iter->cpu_file))
5639                 return EPOLLIN | EPOLLRDNORM;
5640 
5641         if (tr->trace_flags & TRACE_ITER_BLOCK)
5642                 /*
5643                  * Always select as readable when in blocking mode
5644                  */
5645                 return EPOLLIN | EPOLLRDNORM;
5646         else
5647                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5648                                              filp, poll_table);
5649 }
5650 
5651 static __poll_t
5652 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5653 {
5654         struct trace_iterator *iter = filp->private_data;
5655 
5656         return trace_poll(iter, filp, poll_table);
5657 }
5658 
5659 /* Must be called with iter->mutex held. */
5660 static int tracing_wait_pipe(struct file *filp)
5661 {
5662         struct trace_iterator *iter = filp->private_data;
5663         int ret;
5664 
5665         while (trace_empty(iter)) {
5666 
5667                 if ((filp->f_flags & O_NONBLOCK)) {
5668                         return -EAGAIN;
5669                 }
5670 
5671                 /*
5672                  * We block until we read something and tracing is disabled.
5673                  * We still block if tracing is disabled, but we have never
5674                  * read anything. This allows a user to cat this file, and
5675                  * then enable tracing. But after we have read something,
5676                  * we give an EOF when tracing is again disabled.
5677                  *
5678                  * iter->pos will be 0 if we haven't read anything.
5679                  */
5680                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5681                         break;
5682 
5683                 mutex_unlock(&iter->mutex);
5684 
5685                 ret = wait_on_pipe(iter, false);
5686 
5687                 mutex_lock(&iter->mutex);
5688 
5689                 if (ret)
5690                         return ret;
5691         }
5692 
5693         return 1;
5694 }
5695 
5696 /*
5697  * Consumer reader.
5698  */
5699 static ssize_t
5700 tracing_read_pipe(struct file *filp, char __user *ubuf,
5701                   size_t cnt, loff_t *ppos)
5702 {
5703         struct trace_iterator *iter = filp->private_data;
5704         ssize_t sret;
5705 
5706         /*
5707          * Avoid more than one consumer on a single file descriptor
5708          * This is just a matter of traces coherency, the ring buffer itself
5709          * is protected.
5710          */
5711         mutex_lock(&iter->mutex);
5712 
5713         /* return any leftover data */
5714         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5715         if (sret != -EBUSY)
5716                 goto out;
5717 
5718         trace_seq_init(&iter->seq);
5719 
5720         if (iter->trace->read) {
5721                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5722                 if (sret)
5723                         goto out;
5724         }
5725 
5726 waitagain:
5727         sret = tracing_wait_pipe(filp);
5728         if (sret <= 0)
5729                 goto out;
5730 
5731         /* stop when tracing is finished */
5732         if (trace_empty(iter)) {
5733                 sret = 0;
5734                 goto out;
5735         }
5736 
5737         if (cnt >= PAGE_SIZE)
5738                 cnt = PAGE_SIZE - 1;
5739 
5740         /* reset all but tr, trace, and overruns */
5741         memset(&iter->seq, 0,
5742                sizeof(struct trace_iterator) -
5743                offsetof(struct trace_iterator, seq));
5744         cpumask_clear(iter->started);
5745         iter->pos = -1;
5746 
5747         trace_event_read_lock();
5748         trace_access_lock(iter->cpu_file);
5749         while (trace_find_next_entry_inc(iter) != NULL) {
5750                 enum print_line_t ret;
5751                 int save_len = iter->seq.seq.len;
5752 
5753                 ret = print_trace_line(iter);
5754                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5755                         /* don't print partial lines */
5756                         iter->seq.seq.len = save_len;
5757                         break;
5758                 }
5759                 if (ret != TRACE_TYPE_NO_CONSUME)
5760                         trace_consume(iter);
5761 
5762                 if (trace_seq_used(&iter->seq) >= cnt)
5763                         break;
5764 
5765                 /*
5766                  * Setting the full flag means we reached the trace_seq buffer
5767                  * size and we should leave by partial output condition above.
5768                  * One of the trace_seq_* functions is not used properly.
5769                  */
5770                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5771                           iter->ent->type);
5772         }
5773         trace_access_unlock(iter->cpu_file);
5774         trace_event_read_unlock();
5775 
5776         /* Now copy what we have to the user */
5777         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5778         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5779                 trace_seq_init(&iter->seq);
5780 
5781         /*
5782          * If there was nothing to send to user, in spite of consuming trace
5783          * entries, go back to wait for more entries.
5784          */
5785         if (sret == -EBUSY)
5786                 goto waitagain;
5787 
5788 out:
5789         mutex_unlock(&iter->mutex);
5790 
5791         return sret;
5792 }
5793 
5794 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5795                                      unsigned int idx)
5796 {
5797         __free_page(spd->pages[idx]);
5798 }
5799 
5800 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5801         .can_merge              = 0,
5802         .confirm                = generic_pipe_buf_confirm,
5803         .release                = generic_pipe_buf_release,
5804         .steal                  = generic_pipe_buf_steal,
5805         .get                    = generic_pipe_buf_get,
5806 };
5807 
5808 static size_t
5809 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5810 {
5811         size_t count;
5812         int save_len;
5813         int ret;
5814 
5815         /* Seq buffer is page-sized, exactly what we need. */
5816         for (;;) {
5817                 save_len = iter->seq.seq.len;
5818                 ret = print_trace_line(iter);
5819 
5820                 if (trace_seq_has_overflowed(&iter->seq)) {
5821                         iter->seq.seq.len = save_len;
5822                         break;
5823                 }
5824 
5825                 /*
5826                  * This should not be hit, because it should only
5827                  * be set if the iter->seq overflowed. But check it
5828                  * anyway to be safe.
5829                  */
5830                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5831                         iter->seq.seq.len = save_len;
5832                         break;
5833                 }
5834 
5835                 count = trace_seq_used(&iter->seq) - save_len;
5836                 if (rem < count) {
5837                         rem = 0;
5838                         iter->seq.seq.len = save_len;
5839                         break;
5840                 }
5841 
5842                 if (ret != TRACE_TYPE_NO_CONSUME)
5843                         trace_consume(iter);
5844                 rem -= count;
5845                 if (!trace_find_next_entry_inc(iter))   {
5846                         rem = 0;
5847                         iter->ent = NULL;
5848                         break;
5849                 }
5850         }
5851 
5852         return rem;
5853 }
5854 
5855 static ssize_t tracing_splice_read_pipe(struct file *filp,
5856                                         loff_t *ppos,
5857                                         struct pipe_inode_info *pipe,
5858                                         size_t len,
5859                                         unsigned int flags)
5860 {
5861         struct page *pages_def[PIPE_DEF_BUFFERS];
5862         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5863         struct trace_iterator *iter = filp->private_data;
5864         struct splice_pipe_desc spd = {
5865                 .pages          = pages_def,
5866                 .partial        = partial_def,
5867                 .nr_pages       = 0, /* This gets updated below. */
5868                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5869                 .ops            = &tracing_pipe_buf_ops,
5870                 .spd_release    = tracing_spd_release_pipe,
5871         };
5872         ssize_t ret;
5873         size_t rem;
5874         unsigned int i;
5875 
5876         if (splice_grow_spd(pipe, &spd))
5877                 return -ENOMEM;
5878 
5879         mutex_lock(&iter->mutex);
5880 
5881         if (iter->trace->splice_read) {
5882                 ret = iter->trace->splice_read(iter, filp,
5883                                                ppos, pipe, len, flags);
5884                 if (ret)
5885                         goto out_err;
5886         }
5887 
5888         ret = tracing_wait_pipe(filp);
5889         if (ret <= 0)
5890                 goto out_err;
5891 
5892         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5893                 ret = -EFAULT;
5894                 goto out_err;
5895         }
5896 
5897         trace_event_read_lock();
5898         trace_access_lock(iter->cpu_file);
5899 
5900         /* Fill as many pages as possible. */
5901         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5902                 spd.pages[i] = alloc_page(GFP_KERNEL);
5903                 if (!spd.pages[i])
5904                         break;
5905 
5906                 rem = tracing_fill_pipe_page(rem, iter);
5907 
5908                 /* Copy the data into the page, so we can start over. */
5909                 ret = trace_seq_to_buffer(&iter->seq,
5910                                           page_address(spd.pages[i]),
5911                                           trace_seq_used(&iter->seq));
5912                 if (ret < 0) {
5913                         __free_page(spd.pages[i]);
5914                         break;
5915                 }
5916                 spd.partial[i].offset = 0;
5917                 spd.partial[i].len = trace_seq_used(&iter->seq);
5918 
5919                 trace_seq_init(&iter->seq);
5920         }
5921 
5922         trace_access_unlock(iter->cpu_file);
5923         trace_event_read_unlock();
5924         mutex_unlock(&iter->mutex);
5925 
5926         spd.nr_pages = i;
5927 
5928         if (i)
5929                 ret = splice_to_pipe(pipe, &spd);
5930         else
5931                 ret = 0;
5932 out:
5933         splice_shrink_spd(&spd);
5934         return ret;
5935 
5936 out_err:
5937         mutex_unlock(&iter->mutex);
5938         goto out;
5939 }
5940 
5941 static ssize_t
5942 tracing_entries_read(struct file *filp, char __user *ubuf,
5943                      size_t cnt, loff_t *ppos)
5944 {
5945         struct inode *inode = file_inode(filp);
5946         struct trace_array *tr = inode->i_private;
5947         int cpu = tracing_get_cpu(inode);
5948         char buf[64];
5949         int r = 0;
5950         ssize_t ret;
5951 
5952         mutex_lock(&trace_types_lock);
5953 
5954         if (cpu == RING_BUFFER_ALL_CPUS) {
5955                 int cpu, buf_size_same;
5956                 unsigned long size;
5957 
5958                 size = 0;
5959                 buf_size_same = 1;
5960                 /* check if all cpu sizes are same */
5961                 for_each_tracing_cpu(cpu) {
5962                         /* fill in the size from first enabled cpu */
5963                         if (size == 0)
5964                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5965                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5966                                 buf_size_same = 0;
5967                                 break;
5968                         }
5969                 }
5970 
5971                 if (buf_size_same) {
5972                         if (!ring_buffer_expanded)
5973                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
5974                                             size >> 10,
5975                                             trace_buf_size >> 10);
5976                         else
5977                                 r = sprintf(buf, "%lu\n", size >> 10);
5978                 } else
5979                         r = sprintf(buf, "X\n");
5980         } else
5981                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5982 
5983         mutex_unlock(&trace_types_lock);
5984 
5985         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5986         return ret;
5987 }
5988 
5989 static ssize_t
5990 tracing_entries_write(struct file *filp, const char __user *ubuf,
5991                       size_t cnt, loff_t *ppos)
5992 {
5993         struct inode *inode = file_inode(filp);
5994         struct trace_array *tr = inode->i_private;
5995         unsigned long val;
5996         int ret;
5997 
5998         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5999         if (ret)
6000                 return ret;
6001 
6002         /* must have at least 1 entry */
6003         if (!val)
6004                 return -EINVAL;
6005 
6006         /* value is in KB */
6007         val <<= 10;
6008         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6009         if (ret < 0)
6010                 return ret;
6011 
6012         *ppos += cnt;
6013 
6014         return cnt;
6015 }
6016 
6017 static ssize_t
6018 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6019                                 size_t cnt, loff_t *ppos)
6020 {
6021         struct trace_array *tr = filp->private_data;
6022         char buf[64];
6023         int r, cpu;
6024         unsigned long size = 0, expanded_size = 0;
6025 
6026         mutex_lock(&trace_types_lock);
6027         for_each_tracing_cpu(cpu) {
6028                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6029                 if (!ring_buffer_expanded)
6030                         expanded_size += trace_buf_size >> 10;
6031         }
6032         if (ring_buffer_expanded)
6033                 r = sprintf(buf, "%lu\n", size);
6034         else
6035                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6036         mutex_unlock(&trace_types_lock);
6037 
6038         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6039 }
6040 
6041 static ssize_t
6042 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6043                           size_t cnt, loff_t *ppos)
6044 {
6045         /*
6046          * There is no need to read what the user has written, this function
6047          * is just to make sure that there is no error when "echo" is used
6048          */
6049 
6050         *ppos += cnt;
6051 
6052         return cnt;
6053 }
6054 
6055 static int
6056 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6057 {
6058         struct trace_array *tr = inode->i_private;
6059 
6060         /* disable tracing ? */
6061         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6062                 tracer_tracing_off(tr);
6063         /* resize the ring buffer to 0 */
6064         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6065 
6066         trace_array_put(tr);
6067 
6068         return 0;
6069 }
6070 
6071 static ssize_t
6072 tracing_mark_write(struct file *filp, const char __user *ubuf,
6073                                         size_t cnt, loff_t *fpos)
6074 {
6075         struct trace_array *tr = filp->private_data;
6076         struct ring_buffer_event *event;
6077         struct ring_buffer *buffer;
6078         struct print_entry *entry;
6079         unsigned long irq_flags;
6080         const char faulted[] = "<faulted>";
6081         ssize_t written;
6082         int size;
6083         int len;
6084 
6085 /* Used in tracing_mark_raw_write() as well */
6086 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
6087 
6088         if (tracing_disabled)
6089                 return -EINVAL;
6090 
6091         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6092                 return -EINVAL;
6093 
6094         if (cnt > TRACE_BUF_SIZE)
6095                 cnt = TRACE_BUF_SIZE;
6096 
6097         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6098 
6099         local_save_flags(irq_flags);
6100         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6101 
6102         /* If less than "<faulted>", then make sure we can still add that */
6103         if (cnt < FAULTED_SIZE)
6104                 size += FAULTED_SIZE - cnt;
6105 
6106         buffer = tr->trace_buffer.buffer;
6107         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6108                                             irq_flags, preempt_count());
6109         if (unlikely(!event))
6110                 /* Ring buffer disabled, return as if not open for write */
6111                 return -EBADF;
6112 
6113         entry = ring_buffer_event_data(event);
6114         entry->ip = _THIS_IP_;
6115 
6116         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6117         if (len) {
6118                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6119                 cnt = FAULTED_SIZE;
6120                 written = -EFAULT;
6121         } else
6122                 written = cnt;
6123         len = cnt;
6124 
6125         if (entry->buf[cnt - 1] != '\n') {
6126                 entry->buf[cnt] = '\n';
6127                 entry->buf[cnt + 1] = '\0';
6128         } else
6129                 entry->buf[cnt] = '\0';
6130 
6131         __buffer_unlock_commit(buffer, event);
6132 
6133         if (written > 0)
6134                 *fpos += written;
6135 
6136         return written;
6137 }
6138 
6139 /* Limit it for now to 3K (including tag) */
6140 #define RAW_DATA_MAX_SIZE (1024*3)
6141 
6142 static ssize_t
6143 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6144                                         size_t cnt, loff_t *fpos)
6145 {
6146         struct trace_array *tr = filp->private_data;
6147         struct ring_buffer_event *event;
6148         struct ring_buffer *buffer;
6149         struct raw_data_entry *entry;
6150         const char faulted[] = "<faulted>";
6151         unsigned long irq_flags;
6152         ssize_t written;
6153         int size;
6154         int len;
6155 
6156 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6157 
6158         if (tracing_disabled)
6159                 return -EINVAL;
6160 
6161         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6162                 return -EINVAL;
6163 
6164         /* The marker must at least have a tag id */
6165         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6166                 return -EINVAL;
6167 
6168         if (cnt > TRACE_BUF_SIZE)
6169                 cnt = TRACE_BUF_SIZE;
6170 
6171         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6172 
6173         local_save_flags(irq_flags);
6174         size = sizeof(*entry) + cnt;
6175         if (cnt < FAULT_SIZE_ID)
6176                 size += FAULT_SIZE_ID - cnt;
6177 
6178         buffer = tr->trace_buffer.buffer;
6179         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6180                                             irq_flags, preempt_count());
6181         if (!event)
6182                 /* Ring buffer disabled, return as if not open for write */
6183                 return -EBADF;
6184 
6185         entry = ring_buffer_event_data(event);
6186 
6187         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6188         if (len) {
6189                 entry->id = -1;
6190                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6191                 written = -EFAULT;
6192         } else
6193                 written = cnt;
6194 
6195         __buffer_unlock_commit(buffer, event);
6196 
6197         if (written > 0)
6198                 *fpos += written;
6199 
6200         return written;
6201 }
6202 
6203 static int tracing_clock_show(struct seq_file *m, void *v)
6204 {
6205         struct trace_array *tr = m->private;
6206         int i;
6207 
6208         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6209                 seq_printf(m,
6210                         "%s%s%s%s", i ? " " : "",
6211                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6212                         i == tr->clock_id ? "]" : "");
6213         seq_putc(m, '\n');
6214 
6215         return 0;
6216 }
6217 
6218 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6219 {
6220         int i;
6221 
6222         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6223                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6224                         break;
6225         }
6226         if (i == ARRAY_SIZE(trace_clocks))
6227                 return -EINVAL;
6228 
6229         mutex_lock(&trace_types_lock);
6230 
6231         tr->clock_id = i;
6232 
6233         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6234 
6235         /*
6236          * New clock may not be consistent with the previous clock.
6237          * Reset the buffer so that it doesn't have incomparable timestamps.
6238          */
6239         tracing_reset_online_cpus(&tr->trace_buffer);
6240 
6241 #ifdef CONFIG_TRACER_MAX_TRACE
6242         if (tr->max_buffer.buffer)
6243                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6244         tracing_reset_online_cpus(&tr->max_buffer);
6245 #endif
6246 
6247         mutex_unlock(&trace_types_lock);
6248 
6249         return 0;
6250 }
6251 
6252 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6253                                    size_t cnt, loff_t *fpos)
6254 {
6255         struct seq_file *m = filp->private_data;
6256         struct trace_array *tr = m->private;
6257         char buf[64];
6258         const char *clockstr;
6259         int ret;
6260 
6261         if (cnt >= sizeof(buf))
6262                 return -EINVAL;
6263 
6264         if (copy_from_user(buf, ubuf, cnt))
6265                 return -EFAULT;
6266 
6267         buf[cnt] = 0;
6268 
6269         clockstr = strstrip(buf);
6270 
6271         ret = tracing_set_clock(tr, clockstr);
6272         if (ret)
6273                 return ret;
6274 
6275         *fpos += cnt;
6276 
6277         return cnt;
6278 }
6279 
6280 static int tracing_clock_open(struct inode *inode, struct file *file)
6281 {
6282         struct trace_array *tr = inode->i_private;
6283         int ret;
6284 
6285         if (tracing_disabled)
6286                 return -ENODEV;
6287 
6288         if (trace_array_get(tr))
6289                 return -ENODEV;
6290 
6291         ret = single_open(file, tracing_clock_show, inode->i_private);
6292         if (ret < 0)
6293                 trace_array_put(tr);
6294 
6295         return ret;
6296 }
6297 
6298 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6299 {
6300         struct trace_array *tr = m->private;
6301 
6302         mutex_lock(&trace_types_lock);
6303 
6304         if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6305                 seq_puts(m, "delta [absolute]\n");
6306         else
6307                 seq_puts(m, "[delta] absolute\n");
6308 
6309         mutex_unlock(&trace_types_lock);
6310 
6311         return 0;
6312 }
6313 
6314 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6315 {
6316         struct trace_array *tr = inode->i_private;
6317         int ret;
6318 
6319         if (tracing_disabled)
6320                 return -ENODEV;
6321 
6322         if (trace_array_get(tr))
6323                 return -ENODEV;
6324 
6325         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6326         if (ret < 0)
6327                 trace_array_put(tr);
6328 
6329         return ret;
6330 }
6331 
6332 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6333 {
6334         int ret = 0;
6335 
6336         mutex_lock(&trace_types_lock);
6337 
6338         if (abs && tr->time_stamp_abs_ref++)
6339                 goto out;
6340 
6341         if (!abs) {
6342                 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6343                         ret = -EINVAL;
6344                         goto out;
6345                 }
6346 
6347                 if (--tr->time_stamp_abs_ref)
6348                         goto out;
6349         }
6350 
6351         ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6352 
6353 #ifdef CONFIG_TRACER_MAX_TRACE
6354         if (tr->max_buffer.buffer)
6355                 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6356 #endif
6357  out:
6358         mutex_unlock(&trace_types_lock);
6359 
6360         return ret;
6361 }
6362 
6363 struct ftrace_buffer_info {
6364         struct trace_iterator   iter;
6365         void                    *spare;
6366         unsigned int            spare_cpu;
6367         unsigned int            read;
6368 };
6369 
6370 #ifdef CONFIG_TRACER_SNAPSHOT
6371 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6372 {
6373         struct trace_array *tr = inode->i_private;
6374         struct trace_iterator *iter;
6375         struct seq_file *m;
6376         int ret = 0;
6377 
6378         if (trace_array_get(tr) < 0)
6379                 return -ENODEV;
6380 
6381         if (file->f_mode & FMODE_READ) {
6382                 iter = __tracing_open(inode, file, true);
6383                 if (IS_ERR(iter))
6384                         ret = PTR_ERR(iter);
6385         } else {
6386                 /* Writes still need the seq_file to hold the private data */
6387                 ret = -ENOMEM;
6388                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6389                 if (!m)
6390                         goto out;
6391                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6392                 if (!iter) {
6393                         kfree(m);
6394                         goto out;
6395                 }
6396                 ret = 0;
6397 
6398                 iter->tr = tr;
6399                 iter->trace_buffer = &tr->max_buffer;
6400                 iter->cpu_file = tracing_get_cpu(inode);
6401                 m->private = iter;
6402                 file->private_data = m;
6403         }
6404 out:
6405         if (ret < 0)
6406                 trace_array_put(tr);
6407 
6408         return ret;
6409 }
6410 
6411 static ssize_t
6412 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6413                        loff_t *ppos)
6414 {
6415         struct seq_file *m = filp->private_data;
6416         struct trace_iterator *iter = m->private;
6417         struct trace_array *tr = iter->tr;
6418         unsigned long val;
6419         int ret;
6420 
6421         ret = tracing_update_buffers();
6422         if (ret < 0)
6423                 return ret;
6424 
6425         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6426         if (ret)
6427                 return ret;
6428 
6429         mutex_lock(&trace_types_lock);
6430 
6431         if (tr->current_trace->use_max_tr) {
6432                 ret = -EBUSY;
6433                 goto out;
6434         }
6435 
6436         switch (val) {
6437         case 0:
6438                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6439                         ret = -EINVAL;
6440                         break;
6441                 }
6442                 if (tr->allocated_snapshot)
6443                         free_snapshot(tr);
6444                 break;
6445         case 1:
6446 /* Only allow per-cpu swap if the ring buffer supports it */
6447 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6448                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6449                         ret = -EINVAL;
6450                         break;
6451                 }
6452 #endif
6453                 if (!tr->allocated_snapshot) {
6454                         ret = alloc_snapshot(tr);
6455                         if (ret < 0)
6456                                 break;
6457                 }
6458                 local_irq_disable();
6459                 /* Now, we're going to swap */
6460                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6461                         update_max_tr(tr, current, smp_processor_id());
6462                 else
6463                         update_max_tr_single(tr, current, iter->cpu_file);
6464                 local_irq_enable();
6465                 break;
6466         default:
6467                 if (tr->allocated_snapshot) {
6468                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6469                                 tracing_reset_online_cpus(&tr->max_buffer);
6470                         else
6471                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
6472                 }
6473                 break;
6474         }
6475 
6476         if (ret >= 0) {
6477                 *ppos += cnt;
6478                 ret = cnt;
6479         }
6480 out:
6481         mutex_unlock(&trace_types_lock);
6482         return ret;
6483 }
6484 
6485 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6486 {
6487         struct seq_file *m = file->private_data;
6488         int ret;
6489 
6490         ret = tracing_release(inode, file);
6491 
6492         if (file->f_mode & FMODE_READ)
6493                 return ret;
6494 
6495         /* If write only, the seq_file is just a stub */
6496         if (m)
6497                 kfree(m->private);
6498         kfree(m);
6499 
6500         return 0;
6501 }
6502 
6503 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6504 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6505                                     size_t count, loff_t *ppos);
6506 static int tracing_buffers_release(struct inode *inode, struct file *file);
6507 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6508                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6509 
6510 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6511 {
6512         struct ftrace_buffer_info *info;
6513         int ret;
6514 
6515         ret = tracing_buffers_open(inode, filp);
6516         if (ret < 0)
6517                 return ret;
6518 
6519         info = filp->private_data;
6520 
6521         if (info->iter.trace->use_max_tr) {
6522                 tracing_buffers_release(inode, filp);
6523                 return -EBUSY;
6524         }
6525 
6526         info->iter.snapshot = true;
6527         info->iter.trace_buffer = &info->iter.tr->max_buffer;
6528 
6529         return ret;
6530 }
6531 
6532 #endif /* CONFIG_TRACER_SNAPSHOT */
6533 
6534 
6535 static const struct file_operations tracing_thresh_fops = {
6536         .open           = tracing_open_generic,
6537         .read           = tracing_thresh_read,
6538         .write          = tracing_thresh_write,
6539         .llseek         = generic_file_llseek,
6540 };
6541 
6542 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6543 static const struct file_operations tracing_max_lat_fops = {
6544         .open           = tracing_open_generic,
6545         .read           = tracing_max_lat_read,
6546         .write          = tracing_max_lat_write,
6547         .llseek         = generic_file_llseek,
6548 };
6549 #endif
6550 
6551 static const struct file_operations set_tracer_fops = {
6552         .open           = tracing_open_generic,
6553         .read           = tracing_set_trace_read,
6554         .write          = tracing_set_trace_write,
6555         .llseek         = generic_file_llseek,
6556 };
6557 
6558 static const struct file_operations tracing_pipe_fops = {
6559         .open           = tracing_open_pipe,
6560         .poll           = tracing_poll_pipe,
6561         .read           = tracing_read_pipe,
6562         .splice_read    = tracing_splice_read_pipe,
6563         .release        = tracing_release_pipe,
6564         .llseek         = no_llseek,
6565 };
6566 
6567 static const struct file_operations tracing_entries_fops = {
6568         .open           = tracing_open_generic_tr,
6569         .read           = tracing_entries_read,
6570         .write          = tracing_entries_write,
6571         .llseek         = generic_file_llseek,
6572         .release        = tracing_release_generic_tr,
6573 };
6574 
6575 static const struct file_operations tracing_total_entries_fops = {
6576         .open           = tracing_open_generic_tr,
6577         .read           = tracing_total_entries_read,
6578         .llseek         = generic_file_llseek,
6579         .release        = tracing_release_generic_tr,
6580 };
6581 
6582 static const struct file_operations tracing_free_buffer_fops = {
6583         .open           = tracing_open_generic_tr,
6584         .write          = tracing_free_buffer_write,
6585         .release        = tracing_free_buffer_release,
6586 };
6587 
6588 static const struct file_operations tracing_mark_fops = {
6589         .open           = tracing_open_generic_tr,
6590         .write          = tracing_mark_write,
6591         .llseek         = generic_file_llseek,
6592         .release        = tracing_release_generic_tr,
6593 };
6594 
6595 static const struct file_operations tracing_mark_raw_fops = {
6596         .open           = tracing_open_generic_tr,
6597         .write          = tracing_mark_raw_write,
6598         .llseek         = generic_file_llseek,
6599         .release        = tracing_release_generic_tr,
6600 };
6601 
6602 static const struct file_operations trace_clock_fops = {
6603         .open           = tracing_clock_open,
6604         .read           = seq_read,
6605         .llseek         = seq_lseek,
6606         .release        = tracing_single_release_tr,
6607         .write          = tracing_clock_write,
6608 };
6609 
6610 static const struct file_operations trace_time_stamp_mode_fops = {
6611         .open           = tracing_time_stamp_mode_open,
6612         .read           = seq_read,
6613         .llseek         = seq_lseek,
6614         .release        = tracing_single_release_tr,
6615 };
6616 
6617 #ifdef CONFIG_TRACER_SNAPSHOT
6618 static const struct file_operations snapshot_fops = {
6619         .open           = tracing_snapshot_open,
6620         .read           = seq_read,
6621         .write          = tracing_snapshot_write,
6622         .llseek         = tracing_lseek,
6623         .release        = tracing_snapshot_release,
6624 };
6625 
6626 static const struct file_operations snapshot_raw_fops = {
6627         .open           = snapshot_raw_open,
6628         .read           = tracing_buffers_read,
6629         .release        = tracing_buffers_release,
6630         .splice_read    = tracing_buffers_splice_read,
6631         .llseek         = no_llseek,
6632 };
6633 
6634 #endif /* CONFIG_TRACER_SNAPSHOT */
6635 
6636 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6637 {
6638         struct trace_array *tr = inode->i_private;
6639         struct ftrace_buffer_info *info;
6640         int ret;
6641 
6642         if (tracing_disabled)
6643                 return -ENODEV;
6644 
6645         if (trace_array_get(tr) < 0)
6646                 return -ENODEV;
6647 
6648         info = kzalloc(sizeof(*info), GFP_KERNEL);
6649         if (!info) {
6650                 trace_array_put(tr);
6651                 return -ENOMEM;
6652         }
6653 
6654         mutex_lock(&trace_types_lock);
6655 
6656         info->iter.tr           = tr;
6657         info->iter.cpu_file     = tracing_get_cpu(inode);
6658         info->iter.trace        = tr->current_trace;
6659         info->iter.trace_buffer = &tr->trace_buffer;
6660         info->spare             = NULL;
6661         /* Force reading ring buffer for first read */
6662         info->read              = (unsigned int)-1;
6663 
6664         filp->private_data = info;
6665 
6666         tr->current_trace->ref++;
6667 
6668         mutex_unlock(&trace_types_lock);
6669 
6670         ret = nonseekable_open(inode, filp);
6671         if (ret < 0)
6672                 trace_array_put(tr);
6673 
6674         return ret;
6675 }
6676 
6677 static __poll_t
6678 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6679 {
6680         struct ftrace_buffer_info *info = filp->private_data;
6681         struct trace_iterator *iter = &info->iter;
6682 
6683         return trace_poll(iter, filp, poll_table);
6684 }
6685 
6686 static ssize_t
6687 tracing_buffers_read(struct file *filp, char __user *ubuf,
6688                      size_t count, loff_t *ppos)
6689 {
6690         struct ftrace_buffer_info *info = filp->private_data;
6691         struct trace_iterator *iter = &info->iter;
6692         ssize_t ret = 0;
6693         ssize_t size;
6694 
6695         if (!count)
6696                 return 0;
6697 
6698 #ifdef CONFIG_TRACER_MAX_TRACE
6699         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6700                 return -EBUSY;
6701 #endif
6702 
6703         if (!info->spare) {
6704                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6705                                                           iter->cpu_file);
6706                 if (IS_ERR(info->spare)) {
6707                         ret = PTR_ERR(info->spare);
6708                         info->spare = NULL;
6709                 } else {
6710                         info->spare_cpu = iter->cpu_file;
6711                 }
6712         }
6713         if (!info->spare)
6714                 return ret;
6715 
6716         /* Do we have previous read data to read? */
6717         if (info->read < PAGE_SIZE)
6718                 goto read;
6719 
6720  again:
6721         trace_access_lock(iter->cpu_file);
6722         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6723                                     &info->spare,
6724                                     count,
6725                                     iter->cpu_file, 0);
6726         trace_access_unlock(iter->cpu_file);
6727 
6728         if (ret < 0) {
6729                 if (trace_empty(iter)) {
6730                         if ((filp->f_flags & O_NONBLOCK))
6731                                 return -EAGAIN;
6732 
6733                         ret = wait_on_pipe(iter, false);
6734                         if (ret)
6735                                 return ret;
6736 
6737                         goto again;
6738                 }
6739                 return 0;
6740         }
6741 
6742         info->read = 0;
6743  read:
6744         size = PAGE_SIZE - info->read;
6745         if (size > count)
6746                 size = count;
6747 
6748         ret = copy_to_user(ubuf, info->spare + info->read, size);
6749         if (ret == size)
6750                 return -EFAULT;
6751 
6752         size -= ret;
6753 
6754         *ppos += size;
6755         info->read += size;
6756 
6757         return size;
6758 }
6759 
6760 static int tracing_buffers_release(struct inode *inode, struct file *file)
6761 {
6762         struct ftrace_buffer_info *info = file->private_data;
6763         struct trace_iterator *iter = &info->iter;
6764 
6765         mutex_lock(&trace_types_lock);
6766 
6767         iter->tr->current_trace->ref--;
6768 
6769         __trace_array_put(iter->tr);
6770 
6771         if (info->spare)
6772                 ring_buffer_free_read_page(iter->trace_buffer->buffer,
6773                                            info->spare_cpu, info->spare);
6774         kfree(info);
6775 
6776         mutex_unlock(&trace_types_lock);
6777 
6778         return 0;
6779 }
6780 
6781 struct buffer_ref {
6782         struct ring_buffer      *buffer;
6783         void                    *page;
6784         int                     cpu;
6785         int                     ref;
6786 };
6787 
6788 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6789                                     struct pipe_buffer *buf)
6790 {
6791         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6792 
6793         if (--ref->ref)
6794                 return;
6795 
6796         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6797         kfree(ref);
6798         buf->private = 0;
6799 }
6800 
6801 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6802                                 struct pipe_buffer *buf)
6803 {
6804         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6805 
6806         ref->ref++;
6807 }
6808 
6809 /* Pipe buffer operations for a buffer. */
6810 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6811         .can_merge              = 0,
6812         .confirm                = generic_pipe_buf_confirm,
6813         .release                = buffer_pipe_buf_release,
6814         .steal                  = generic_pipe_buf_steal,
6815         .get                    = buffer_pipe_buf_get,
6816 };
6817 
6818 /*
6819  * Callback from splice_to_pipe(), if we need to release some pages
6820  * at the end of the spd in case we error'ed out in filling the pipe.
6821  */
6822 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6823 {
6824         struct buffer_ref *ref =
6825                 (struct buffer_ref *)spd->partial[i].private;
6826 
6827         if (--ref->ref)
6828                 return;
6829 
6830         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6831         kfree(ref);
6832         spd->partial[i].private = 0;
6833 }
6834 
6835 static ssize_t
6836 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6837                             struct pipe_inode_info *pipe, size_t len,
6838                             unsigned int flags)
6839 {
6840         struct ftrace_buffer_info *info = file->private_data;
6841         struct trace_iterator *iter = &info->iter;
6842         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6843         struct page *pages_def[PIPE_DEF_BUFFERS];
6844         struct splice_pipe_desc spd = {
6845                 .pages          = pages_def,
6846                 .partial        = partial_def,
6847                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6848                 .ops            = &buffer_pipe_buf_ops,
6849                 .spd_release    = buffer_spd_release,
6850         };
6851         struct buffer_ref *ref;
6852         int entries, i;
6853         ssize_t ret = 0;
6854 
6855 #ifdef CONFIG_TRACER_MAX_TRACE
6856         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6857                 return -EBUSY;
6858 #endif
6859 
6860         if (*ppos & (PAGE_SIZE - 1))
6861                 return -EINVAL;
6862 
6863         if (len & (PAGE_SIZE - 1)) {
6864                 if (len < PAGE_SIZE)
6865                         return -EINVAL;
6866                 len &= PAGE_MASK;
6867         }
6868 
6869         if (splice_grow_spd(pipe, &spd))
6870                 return -ENOMEM;
6871 
6872  again:
6873         trace_access_lock(iter->cpu_file);
6874         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6875 
6876         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6877                 struct page *page;
6878                 int r;
6879 
6880                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6881                 if (!ref) {
6882                         ret = -ENOMEM;
6883                         break;
6884                 }
6885 
6886                 ref->ref = 1;
6887                 ref->buffer = iter->trace_buffer->buffer;
6888                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6889                 if (IS_ERR(ref->page)) {
6890                         ret = PTR_ERR(ref->page);
6891                         ref->page = NULL;
6892                         kfree(ref);
6893                         break;
6894                 }
6895                 ref->cpu = iter->cpu_file;
6896 
6897                 r = ring_buffer_read_page(ref->buffer, &ref->page,
6898                                           len, iter->cpu_file, 1);
6899                 if (r < 0) {
6900                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
6901                                                    ref->page);
6902                         kfree(ref);
6903                         break;
6904                 }
6905 
6906                 page = virt_to_page(ref->page);
6907 
6908                 spd.pages[i] = page;
6909                 spd.partial[i].len = PAGE_SIZE;
6910                 spd.partial[i].offset = 0;
6911                 spd.partial[i].private = (unsigned long)ref;
6912                 spd.nr_pages++;
6913                 *ppos += PAGE_SIZE;
6914 
6915                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6916         }
6917 
6918         trace_access_unlock(iter->cpu_file);
6919         spd.nr_pages = i;
6920 
6921         /* did we read anything? */
6922         if (!spd.nr_pages) {
6923                 if (ret)
6924                         goto out;
6925 
6926                 ret = -EAGAIN;
6927                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6928                         goto out;
6929 
6930                 ret = wait_on_pipe(iter, true);
6931                 if (ret)
6932                         goto out;
6933 
6934                 goto again;
6935         }
6936 
6937         ret = splice_to_pipe(pipe, &spd);
6938 out:
6939         splice_shrink_spd(&spd);
6940 
6941         return ret;
6942 }
6943 
6944 static const struct file_operations tracing_buffers_fops = {
6945         .open           = tracing_buffers_open,
6946         .read           = tracing_buffers_read,
6947         .poll           = tracing_buffers_poll,
6948         .release        = tracing_buffers_release,
6949         .splice_read    = tracing_buffers_splice_read,
6950         .llseek         = no_llseek,
6951 };
6952 
6953 static ssize_t
6954 tracing_stats_read(struct file *filp, char __user *ubuf,
6955                    size_t count, loff_t *ppos)
6956 {
6957         struct inode *inode = file_inode(filp);
6958         struct trace_array *tr = inode->i_private;
6959         struct trace_buffer *trace_buf = &tr->trace_buffer;
6960         int cpu = tracing_get_cpu(inode);
6961         struct trace_seq *s;
6962         unsigned long cnt;
6963         unsigned long long t;
6964         unsigned long usec_rem;
6965 
6966         s = kmalloc(sizeof(*s), GFP_KERNEL);
6967         if (!s)
6968                 return -ENOMEM;
6969 
6970         trace_seq_init(s);
6971 
6972         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6973         trace_seq_printf(s, "entries: %ld\n", cnt);
6974 
6975         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6976         trace_seq_printf(s, "overrun: %ld\n", cnt);
6977 
6978         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6979         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6980 
6981         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6982         trace_seq_printf(s, "bytes: %ld\n", cnt);
6983 
6984         if (trace_clocks[tr->clock_id].in_ns) {
6985                 /* local or global for trace_clock */
6986                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6987                 usec_rem = do_div(t, USEC_PER_SEC);
6988                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6989                                                                 t, usec_rem);
6990 
6991                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6992                 usec_rem = do_div(t, USEC_PER_SEC);
6993                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6994         } else {
6995                 /* counter or tsc mode for trace_clock */
6996                 trace_seq_printf(s, "oldest event ts: %llu\n",
6997                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6998 
6999                 trace_seq_printf(s, "now ts: %llu\n",
7000                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7001         }
7002 
7003         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7004         trace_seq_printf(s, "dropped events: %ld\n", cnt);
7005 
7006         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7007         trace_seq_printf(s, "read events: %ld\n", cnt);
7008 
7009         count = simple_read_from_buffer(ubuf, count, ppos,
7010                                         s->buffer, trace_seq_used(s));
7011 
7012         kfree(s);
7013 
7014         return count;
7015 }
7016 
7017 static const struct file_operations tracing_stats_fops = {
7018         .open           = tracing_open_generic_tr,
7019         .read           = tracing_stats_read,
7020         .llseek         = generic_file_llseek,
7021         .release        = tracing_release_generic_tr,
7022 };
7023 
7024 #ifdef CONFIG_DYNAMIC_FTRACE
7025 
7026 static ssize_t
7027 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7028                   size_t cnt, loff_t *ppos)
7029 {
7030         unsigned long *p = filp->private_data;
7031         char buf[64]; /* Not too big for a shallow stack */
7032         int r;
7033 
7034         r = scnprintf(buf, 63, "%ld", *p);
7035         buf[r++] = '\n';
7036 
7037         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7038 }
7039 
7040 static const struct file_operations tracing_dyn_info_fops = {
7041         .open           = tracing_open_generic,
7042         .read           = tracing_read_dyn_info,
7043         .llseek         = generic_file_llseek,
7044 };
7045 #endif /* CONFIG_DYNAMIC_FTRACE */
7046 
7047 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7048 static void
7049 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7050                 struct trace_array *tr, struct ftrace_probe_ops *ops,
7051                 void *data)
7052 {
7053         tracing_snapshot_instance(tr);
7054 }
7055 
7056 static void
7057 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7058                       struct trace_array *tr, struct ftrace_probe_ops *ops,
7059                       void *data)
7060 {
7061         struct ftrace_func_mapper *mapper = data;
7062         long *count = NULL;
7063 
7064         if (mapper)
7065                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7066 
7067         if (count) {
7068 
7069                 if (*count <= 0)
7070                         return;
7071 
7072                 (*count)--;
7073         }
7074 
7075         tracing_snapshot_instance(tr);
7076 }
7077 
7078 static int
7079 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7080                       struct ftrace_probe_ops *ops, void *data)
7081 {
7082         struct ftrace_func_mapper *mapper = data;
7083         long *count = NULL;
7084 
7085         seq_printf(m, "%ps:", (void *)ip);
7086 
7087         seq_puts(m, "snapshot");
7088 
7089         if (mapper)
7090                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7091 
7092         if (count)
7093                 seq_printf(m, ":count=%ld\n", *count);
7094         else
7095                 seq_puts(m, ":unlimited\n");
7096 
7097         return 0;
7098 }
7099 
7100 static int
7101 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7102                      unsigned long ip, void *init_data, void **data)
7103 {
7104         struct ftrace_func_mapper *mapper = *data;
7105 
7106         if (!mapper) {
7107                 mapper = allocate_ftrace_func_mapper();
7108                 if (!mapper)
7109                         return -ENOMEM;
7110                 *data = mapper;
7111         }
7112 
7113         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7114 }
7115 
7116 static void
7117 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7118                      unsigned long ip, void *data)
7119 {
7120         struct ftrace_func_mapper *mapper = data;
7121 
7122         if (!ip) {
7123                 if (!mapper)
7124                         return;
7125                 free_ftrace_func_mapper(mapper, NULL);
7126                 return;
7127         }
7128 
7129         ftrace_func_mapper_remove_ip(mapper, ip);
7130 }
7131 
7132 static struct ftrace_probe_ops snapshot_probe_ops = {
7133         .func                   = ftrace_snapshot,
7134         .print                  = ftrace_snapshot_print,
7135 };
7136 
7137 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7138         .func                   = ftrace_count_snapshot,
7139         .print                  = ftrace_snapshot_print,
7140         .init                   = ftrace_snapshot_init,
7141         .free                   = ftrace_snapshot_free,
7142 };
7143 
7144 static int
7145 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7146                                char *glob, char *cmd, char *param, int enable)
7147 {
7148         struct ftrace_probe_ops *ops;
7149         void *count = (void *)-1;
7150         char *number;
7151         int ret;
7152 
7153         if (!tr)
7154                 return -ENODEV;
7155 
7156         /* hash funcs only work with set_ftrace_filter */
7157         if (!enable)
7158                 return -EINVAL;
7159 
7160         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7161 
7162         if (glob[0] == '!')
7163                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7164 
7165         if (!param)
7166                 goto out_reg;
7167 
7168         number = strsep(&param, ":");
7169 
7170         if (!strlen(number))
7171                 goto out_reg;
7172 
7173         /*
7174          * We use the callback data field (which is a pointer)
7175          * as our counter.
7176          */
7177         ret = kstrtoul(number, 0, (unsigned long *)&count);
7178         if (ret)
7179                 return ret;
7180 
7181  out_reg:
7182         ret = alloc_snapshot(tr);
7183         if (ret < 0)
7184                 goto out;
7185 
7186         ret = register_ftrace_function_probe(glob, tr, ops, count);
7187 
7188  out:
7189         return ret < 0 ? ret : 0;
7190 }
7191 
7192 static struct ftrace_func_command ftrace_snapshot_cmd = {
7193         .name                   = "snapshot",
7194         .func                   = ftrace_trace_snapshot_callback,
7195 };
7196 
7197 static __init int register_snapshot_cmd(void)
7198 {
7199         return register_ftrace_command(&ftrace_snapshot_cmd);
7200 }
7201 #else
7202 static inline __init int register_snapshot_cmd(void) { return 0; }
7203 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7204 
7205 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7206 {
7207         if (WARN_ON(!tr->dir))
7208                 return ERR_PTR(-ENODEV);
7209 
7210         /* Top directory uses NULL as the parent */
7211         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7212                 return NULL;
7213 
7214         /* All sub buffers have a descriptor */
7215         return tr->dir;
7216 }
7217 
7218 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7219 {
7220         struct dentry *d_tracer;
7221 
7222         if (tr->percpu_dir)
7223                 return tr->percpu_dir;
7224 
7225         d_tracer = tracing_get_dentry(tr);
7226         if (IS_ERR(d_tracer))
7227                 return NULL;
7228 
7229         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7230 
7231         WARN_ONCE(!tr->percpu_dir,
7232                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7233 
7234         return tr->percpu_dir;
7235 }
7236 
7237 static struct dentry *
7238 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7239                       void *data, long cpu, const struct file_operations *fops)
7240 {
7241         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7242 
7243         if (ret) /* See tracing_get_cpu() */
7244                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
7245         return ret;
7246 }
7247 
7248 static void
7249 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7250 {
7251         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7252         struct dentry *d_cpu;
7253         char cpu_dir[30]; /* 30 characters should be more than enough */
7254 
7255         if (!d_percpu)
7256                 return;
7257 
7258         snprintf(cpu_dir, 30, "cpu%ld", cpu);
7259         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7260         if (!d_cpu) {
7261                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7262                 return;
7263         }
7264 
7265         /* per cpu trace_pipe */
7266         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7267                                 tr, cpu, &tracing_pipe_fops);
7268 
7269         /* per cpu trace */
7270         trace_create_cpu_file("trace", 0644, d_cpu,
7271                                 tr, cpu, &tracing_fops);
7272 
7273         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7274                                 tr, cpu, &tracing_buffers_fops);
7275 
7276         trace_create_cpu_file("stats", 0444, d_cpu,
7277                                 tr, cpu, &tracing_stats_fops);
7278 
7279         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7280                                 tr, cpu, &tracing_entries_fops);
7281 
7282 #ifdef CONFIG_TRACER_SNAPSHOT
7283         trace_create_cpu_file("snapshot", 0644, d_cpu,
7284                                 tr, cpu, &snapshot_fops);
7285 
7286         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7287                                 tr, cpu, &snapshot_raw_fops);
7288 #endif
7289 }
7290 
7291 #ifdef CONFIG_FTRACE_SELFTEST
7292 /* Let selftest have access to static functions in this file */
7293 #include "trace_selftest.c"
7294 #endif
7295 
7296 static ssize_t
7297 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7298                         loff_t *ppos)
7299 {
7300         struct trace_option_dentry *topt = filp->private_data;
7301         char *buf;
7302 
7303         if (topt->flags->val & topt->opt->bit)
7304                 buf = "1\n";
7305         else
7306                 buf = "\n";
7307 
7308         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7309 }
7310 
7311 static ssize_t
7312 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7313                          loff_t *ppos)
7314 {
7315         struct trace_option_dentry *topt = filp->private_data;
7316         unsigned long val;
7317         int ret;
7318 
7319         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7320         if (ret)
7321                 return ret;
7322 
7323         if (val != 0 && val != 1)
7324                 return -EINVAL;
7325 
7326         if (!!(topt->flags->val & topt->opt->bit) != val) {
7327                 mutex_lock(&trace_types_lock);
7328                 ret = __set_tracer_option(topt->tr, topt->flags,
7329                                           topt->opt, !val);
7330                 mutex_unlock(&trace_types_lock);
7331                 if (ret)
7332                         return ret;
7333         }
7334 
7335         *ppos += cnt;
7336 
7337         return cnt;
7338 }
7339 
7340 
7341 static const struct file_operations trace_options_fops = {
7342         .open = tracing_open_generic,
7343         .read = trace_options_read,
7344         .write = trace_options_write,
7345         .llseek = generic_file_llseek,
7346 };
7347 
7348 /*
7349  * In order to pass in both the trace_array descriptor as well as the index
7350  * to the flag that the trace option file represents, the trace_array
7351  * has a character array of trace_flags_index[], which holds the index
7352  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7353  * The address of this character array is passed to the flag option file
7354  * read/write callbacks.
7355  *
7356  * In order to extract both the index and the trace_array descriptor,
7357  * get_tr_index() uses the following algorithm.
7358  *
7359  *   idx = *ptr;
7360  *
7361  * As the pointer itself contains the address of the index (remember
7362  * index[1] == 1).
7363  *
7364  * Then to get the trace_array descriptor, by subtracting that index
7365  * from the ptr, we get to the start of the index itself.
7366  *
7367  *   ptr - idx == &index[0]
7368  *
7369  * Then a simple container_of() from that pointer gets us to the
7370  * trace_array descriptor.
7371  */
7372 static void get_tr_index(void *data, struct trace_array **ptr,
7373                          unsigned int *pindex)
7374 {
7375         *pindex = *(unsigned char *)data;
7376 
7377         *ptr = container_of(data - *pindex, struct trace_array,
7378                             trace_flags_index);
7379 }
7380 
7381 static ssize_t
7382 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7383                         loff_t *ppos)
7384 {
7385         void *tr_index = filp->private_data;
7386         struct trace_array *tr;
7387         unsigned int index;
7388         char *buf;
7389 
7390         get_tr_index(tr_index, &tr, &index);
7391 
7392         if (tr->trace_flags & (1 << index))
7393                 buf = "1\n";
7394         else
7395                 buf = "\n";
7396 
7397         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7398 }
7399 
7400 static ssize_t
7401 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7402                          loff_t *ppos)
7403 {
7404         void *tr_index = filp->private_data;
7405         struct trace_array *tr;
7406         unsigned int index;
7407         unsigned long val;
7408         int ret;
7409 
7410         get_tr_index(tr_index, &tr, &index);
7411 
7412         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7413         if (ret)
7414                 return ret;
7415 
7416         if (val != 0 && val != 1)
7417                 return -EINVAL;
7418 
7419         mutex_lock(&trace_types_lock);
7420         ret = set_tracer_flag(tr, 1 << index, val);
7421         mutex_unlock(&trace_types_lock);
7422 
7423         if (ret < 0)
7424                 return ret;
7425 
7426         *ppos += cnt;
7427 
7428         return cnt;
7429 }
7430 
7431 static const struct file_operations trace_options_core_fops = {
7432         .open = tracing_open_generic,
7433         .read = trace_options_core_read,
7434         .write = trace_options_core_write,
7435         .llseek = generic_file_llseek,
7436 };
7437 
7438 struct dentry *trace_create_file(const char *name,
7439                                  umode_t mode,
7440                                  struct dentry *parent,
7441                                  void *data,
7442                                  const struct file_operations *fops)
7443 {
7444         struct dentry *ret;
7445 
7446         ret = tracefs_create_file(name, mode, parent, data, fops);
7447         if (!ret)
7448                 pr_warn("Could not create tracefs '%s' entry\n", name);
7449 
7450         return ret;
7451 }
7452 
7453 
7454 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7455 {
7456         struct dentry *d_tracer;
7457 
7458         if (tr->options)
7459                 return tr->options;
7460 
7461         d_tracer = tracing_get_dentry(tr);
7462         if (IS_ERR(d_tracer))
7463                 return NULL;
7464 
7465         tr->options = tracefs_create_dir("options", d_tracer);
7466         if (!tr->options) {
7467                 pr_warn("Could not create tracefs directory 'options'\n");
7468                 return NULL;
7469         }
7470 
7471         return tr->options;
7472 }
7473 
7474 static void
7475 create_trace_option_file(struct trace_array *tr,
7476                          struct trace_option_dentry *topt,
7477                          struct tracer_flags *flags,
7478                          struct tracer_opt *opt)
7479 {
7480         struct dentry *t_options;
7481 
7482         t_options = trace_options_init_dentry(tr);
7483         if (!t_options)
7484                 return;
7485 
7486         topt->flags = flags;
7487         topt->opt = opt;
7488         topt->tr = tr;
7489 
7490         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7491                                     &trace_options_fops);
7492 
7493 }
7494 
7495 static void
7496 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7497 {
7498         struct trace_option_dentry *topts;
7499         struct trace_options *tr_topts;
7500         struct tracer_flags *flags;
7501         struct tracer_opt *opts;
7502         int cnt;
7503         int i;
7504 
7505         if (!tracer)
7506                 return;
7507 
7508         flags = tracer->flags;
7509 
7510         if (!flags || !flags->opts)
7511                 return;
7512 
7513         /*
7514          * If this is an instance, only create flags for tracers
7515          * the instance may have.
7516          */
7517         if (!trace_ok_for_array(tracer, tr))
7518                 return;
7519 
7520         for (i = 0; i < tr->nr_topts; i++) {
7521                 /* Make sure there's no duplicate flags. */
7522                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7523                         return;
7524         }
7525 
7526         opts = flags->opts;
7527 
7528         for (cnt = 0; opts[cnt].name; cnt++)
7529                 ;
7530 
7531         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7532         if (!topts)
7533                 return;
7534 
7535         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7536                             GFP_KERNEL);
7537         if (!tr_topts) {
7538                 kfree(topts);
7539                 return;
7540         }
7541 
7542         tr->topts = tr_topts;
7543         tr->topts[tr->nr_topts].tracer = tracer;
7544         tr->topts[tr->nr_topts].topts = topts;
7545         tr->nr_topts++;
7546 
7547         for (cnt = 0; opts[cnt].name; cnt++) {
7548                 create_trace_option_file(tr, &topts[cnt], flags,
7549                                          &opts[cnt]);
7550                 WARN_ONCE(topts[cnt].entry == NULL,
7551                           "Failed to create trace option: %s",
7552                           opts[cnt].name);
7553         }
7554 }
7555 
7556 static struct dentry *
7557 create_trace_option_core_file(struct trace_array *tr,
7558                               const char *option, long index)
7559 {
7560         struct dentry *t_options;
7561 
7562         t_options = trace_options_init_dentry(tr);
7563         if (!t_options)
7564                 return NULL;
7565 
7566         return trace_create_file(option, 0644, t_options,
7567                                  (void *)&tr->trace_flags_index[index],
7568                                  &trace_options_core_fops);
7569 }
7570 
7571 static void create_trace_options_dir(struct trace_array *tr)
7572 {
7573         struct dentry *t_options;
7574         bool top_level = tr == &global_trace;
7575         int i;
7576 
7577         t_options = trace_options_init_dentry(tr);
7578         if (!t_options)
7579                 return;
7580 
7581         for (i = 0; trace_options[i]; i++) {
7582                 if (top_level ||
7583                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7584                         create_trace_option_core_file(tr, trace_options[i], i);
7585         }
7586 }
7587 
7588 static ssize_t
7589 rb_simple_read(struct file *filp, char __user *ubuf,
7590                size_t cnt, loff_t *ppos)
7591 {
7592         struct trace_array *tr = filp->private_data;
7593         char buf[64];
7594         int r;
7595 
7596         r = tracer_tracing_is_on(tr);
7597         r = sprintf(buf, "%d\n", r);
7598 
7599         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7600 }
7601 
7602 static ssize_t
7603 rb_simple_write(struct file *filp, const char __user *ubuf,
7604                 size_t cnt, loff_t *ppos)
7605 {
7606         struct trace_array *tr = filp->private_data;
7607         struct ring_buffer *buffer = tr->trace_buffer.buffer;
7608         unsigned long val;
7609         int ret;
7610 
7611         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7612         if (ret)
7613                 return ret;
7614 
7615         if (buffer) {
7616                 mutex_lock(&trace_types_lock);
7617                 if (val) {
7618                         tracer_tracing_on(tr);
7619                         if (tr->current_trace->start)
7620                                 tr->current_trace->start(tr);
7621                 } else {
7622                         tracer_tracing_off(tr);
7623                         if (tr->current_trace->stop)
7624                                 tr->current_trace->stop(tr);
7625                 }
7626                 mutex_unlock(&trace_types_lock);
7627         }
7628 
7629         (*ppos)++;
7630 
7631         return cnt;
7632 }
7633 
7634 static const struct file_operations rb_simple_fops = {
7635         .open           = tracing_open_generic_tr,
7636         .read           = rb_simple_read,
7637         .write          = rb_simple_write,
7638         .release        = tracing_release_generic_tr,
7639         .llseek         = default_llseek,
7640 };
7641 
7642 struct dentry *trace_instance_dir;
7643 
7644 static void
7645 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7646 
7647 static int
7648 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7649 {
7650         enum ring_buffer_flags rb_flags;
7651 
7652         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7653 
7654         buf->tr = tr;
7655 
7656         buf->buffer = ring_buffer_alloc(size, rb_flags);
7657         if (!buf->buffer)
7658                 return -ENOMEM;
7659 
7660         buf->data = alloc_percpu(struct trace_array_cpu);
7661         if (!buf->data) {
7662                 ring_buffer_free(buf->buffer);
7663                 buf->buffer = NULL;
7664                 return -ENOMEM;
7665         }
7666 
7667         /* Allocate the first page for all buffers */
7668         set_buffer_entries(&tr->trace_buffer,
7669                            ring_buffer_size(tr->trace_buffer.buffer, 0));
7670 
7671         return 0;
7672 }
7673 
7674 static int allocate_trace_buffers(struct trace_array *tr, int size)
7675 {
7676         int ret;
7677 
7678         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7679         if (ret)
7680                 return ret;
7681 
7682 #ifdef CONFIG_TRACER_MAX_TRACE
7683         ret = allocate_trace_buffer(tr, &tr->max_buffer,
7684                                     allocate_snapshot ? size : 1);
7685         if (WARN_ON(ret)) {
7686                 ring_buffer_free(tr->trace_buffer.buffer);
7687                 tr->trace_buffer.buffer = NULL;
7688                 free_percpu(tr->trace_buffer.data);
7689                 tr->trace_buffer.data = NULL;
7690                 return -ENOMEM;
7691         }
7692         tr->allocated_snapshot = allocate_snapshot;
7693 
7694         /*
7695          * Only the top level trace array gets its snapshot allocated
7696          * from the kernel command line.
7697          */
7698         allocate_snapshot = false;
7699 #endif
7700         return 0;
7701 }
7702 
7703 static void free_trace_buffer(struct trace_buffer *buf)
7704 {
7705         if (buf->buffer) {
7706                 ring_buffer_free(buf->buffer);
7707                 buf->buffer = NULL;
7708                 free_percpu(buf->data);
7709                 buf->data = NULL;
7710         }
7711 }
7712 
7713 static void free_trace_buffers(struct trace_array *tr)
7714 {
7715         if (!tr)
7716                 return;
7717 
7718         free_trace_buffer(&tr->trace_buffer);
7719 
7720 #ifdef CONFIG_TRACER_MAX_TRACE
7721         free_trace_buffer(&tr->max_buffer);
7722 #endif
7723 }
7724 
7725 static void init_trace_flags_index(struct trace_array *tr)
7726 {
7727         int i;
7728 
7729         /* Used by the trace options files */
7730         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7731                 tr->trace_flags_index[i] = i;
7732 }
7733 
7734 static void __update_tracer_options(struct trace_array *tr)
7735 {
7736         struct tracer *t;
7737 
7738         for (t = trace_types; t; t = t->next)
7739                 add_tracer_options(tr, t);
7740 }
7741 
7742 static void update_tracer_options(struct trace_array *tr)
7743 {
7744         mutex_lock(&trace_types_lock);
7745         __update_tracer_options(tr);
7746         mutex_unlock(&trace_types_lock);
7747 }
7748 
7749 static int instance_mkdir(const char *name)
7750 {
7751         struct trace_array *tr;
7752         int ret;
7753 
7754         mutex_lock(&event_mutex);
7755         mutex_lock(&trace_types_lock);
7756 
7757         ret = -EEXIST;
7758         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7759                 if (tr->name && strcmp(tr->name, name) == 0)
7760                         goto out_unlock;
7761         }
7762 
7763         ret = -ENOMEM;
7764         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7765         if (!tr)
7766                 goto out_unlock;
7767 
7768         tr->name = kstrdup(name, GFP_KERNEL);
7769         if (!tr->name)
7770                 goto out_free_tr;
7771 
7772         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7773                 goto out_free_tr;
7774 
7775         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7776 
7777         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7778 
7779         raw_spin_lock_init(&tr->start_lock);
7780 
7781         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7782 
7783         tr->current_trace = &nop_trace;
7784 
7785         INIT_LIST_HEAD(&tr->systems);
7786         INIT_LIST_HEAD(&tr->events);
7787         INIT_LIST_HEAD(&tr->hist_vars);
7788 
7789         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7790                 goto out_free_tr;
7791 
7792         tr->dir = tracefs_create_dir(name, trace_instance_dir);
7793         if (!tr->dir)
7794                 goto out_free_tr;
7795 
7796         ret = event_trace_add_tracer(tr->dir, tr);
7797         if (ret) {
7798                 tracefs_remove_recursive(tr->dir);
7799                 goto out_free_tr;
7800         }
7801 
7802         ftrace_init_trace_array(tr);
7803 
7804         init_tracer_tracefs(tr, tr->dir);
7805         init_trace_flags_index(tr);
7806         __update_tracer_options(tr);
7807 
7808         list_add(&tr->list, &ftrace_trace_arrays);
7809 
7810         mutex_unlock(&trace_types_lock);
7811         mutex_unlock(&event_mutex);
7812 
7813         return 0;
7814 
7815  out_free_tr:
7816         free_trace_buffers(tr);
7817         free_cpumask_var(tr->tracing_cpumask);
7818         kfree(tr->name);
7819         kfree(tr);
7820 
7821  out_unlock:
7822         mutex_unlock(&trace_types_lock);
7823         mutex_unlock(&event_mutex);
7824 
7825         return ret;
7826 
7827 }
7828 
7829 static int instance_rmdir(const char *name)
7830 {
7831         struct trace_array *tr;
7832         int found = 0;
7833         int ret;
7834         int i;
7835 
7836         mutex_lock(&event_mutex);
7837         mutex_lock(&trace_types_lock);
7838 
7839         ret = -ENODEV;
7840         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7841                 if (tr->name && strcmp(tr->name, name) == 0) {
7842                         found = 1;
7843                         break;
7844                 }
7845         }
7846         if (!found)
7847                 goto out_unlock;
7848 
7849         ret = -EBUSY;
7850         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7851                 goto out_unlock;
7852 
7853         list_del(&tr->list);
7854 
7855         /* Disable all the flags that were enabled coming in */
7856         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7857                 if ((1 << i) & ZEROED_TRACE_FLAGS)
7858                         set_tracer_flag(tr, 1 << i, 0);
7859         }
7860 
7861         tracing_set_nop(tr);
7862         clear_ftrace_function_probes(tr);
7863         event_trace_del_tracer(tr);
7864         ftrace_clear_pids(tr);
7865         ftrace_destroy_function_files(tr);
7866         tracefs_remove_recursive(tr->dir);
7867         free_trace_buffers(tr);
7868 
7869         for (i = 0; i < tr->nr_topts; i++) {
7870                 kfree(tr->topts[i].topts);
7871         }
7872         kfree(tr->topts);
7873 
7874         free_cpumask_var(tr->tracing_cpumask);
7875         kfree(tr->name);
7876         kfree(tr);
7877 
7878         ret = 0;
7879 
7880  out_unlock:
7881         mutex_unlock(&trace_types_lock);
7882         mutex_unlock(&event_mutex);
7883 
7884         return ret;
7885 }
7886 
7887 static __init void create_trace_instances(struct dentry *d_tracer)
7888 {
7889         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7890                                                          instance_mkdir,
7891                                                          instance_rmdir);
7892         if (WARN_ON(!trace_instance_dir))
7893                 return;
7894 }
7895 
7896 static void
7897 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7898 {
7899         int cpu;
7900 
7901         trace_create_file("available_tracers", 0444, d_tracer,
7902                         tr, &show_traces_fops);
7903 
7904         trace_create_file("current_tracer", 0644, d_tracer,
7905                         tr, &set_tracer_fops);
7906 
7907         trace_create_file("tracing_cpumask", 0644, d_tracer,
7908                           tr, &tracing_cpumask_fops);
7909 
7910         trace_create_file("trace_options", 0644, d_tracer,
7911                           tr, &tracing_iter_fops);
7912 
7913         trace_create_file("trace", 0644, d_tracer,
7914                           tr, &tracing_fops);
7915 
7916         trace_create_file("trace_pipe", 0444, d_tracer,
7917                           tr, &tracing_pipe_fops);
7918 
7919         trace_create_file("buffer_size_kb", 0644, d_tracer,
7920                           tr, &tracing_entries_fops);
7921 
7922         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7923                           tr, &tracing_total_entries_fops);
7924 
7925         trace_create_file("free_buffer", 0200, d_tracer,
7926                           tr, &tracing_free_buffer_fops);
7927 
7928         trace_create_file("trace_marker", 0220, d_tracer,
7929                           tr, &tracing_mark_fops);
7930 
7931         trace_create_file("trace_marker_raw", 0220, d_tracer,
7932                           tr, &tracing_mark_raw_fops);
7933 
7934         trace_create_file("trace_clock", 0644, d_tracer, tr,
7935                           &trace_clock_fops);
7936 
7937         trace_create_file("tracing_on", 0644, d_tracer,
7938                           tr, &rb_simple_fops);
7939 
7940         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
7941                           &trace_time_stamp_mode_fops);
7942 
7943         create_trace_options_dir(tr);
7944 
7945 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7946         trace_create_file("tracing_max_latency", 0644, d_tracer,
7947                         &tr->max_latency, &tracing_max_lat_fops);
7948 #endif
7949 
7950         if (ftrace_create_function_files(tr, d_tracer))
7951                 WARN(1, "Could not allocate function filter files");
7952 
7953 #ifdef CONFIG_TRACER_SNAPSHOT
7954         trace_create_file("snapshot", 0644, d_tracer,
7955                           tr, &snapshot_fops);
7956 #endif
7957 
7958         for_each_tracing_cpu(cpu)
7959                 tracing_init_tracefs_percpu(tr, cpu);
7960 
7961         ftrace_init_tracefs(tr, d_tracer);
7962 }
7963 
7964 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7965 {
7966         struct vfsmount *mnt;
7967         struct file_system_type *type;
7968 
7969         /*
7970          * To maintain backward compatibility for tools that mount
7971          * debugfs to get to the tracing facility, tracefs is automatically
7972          * mounted to the debugfs/tracing directory.
7973          */
7974         type = get_fs_type("tracefs");
7975         if (!type)
7976                 return NULL;
7977         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
7978         put_filesystem(type);
7979         if (IS_ERR(mnt))
7980                 return NULL;
7981         mntget(mnt);
7982 
7983         return mnt;
7984 }
7985 
7986 /**
7987  * tracing_init_dentry - initialize top level trace array
7988  *
7989  * This is called when creating files or directories in the tracing
7990  * directory. It is called via fs_initcall() by any of the boot up code
7991  * and expects to return the dentry of the top level tracing directory.
7992  */
7993 struct dentry *tracing_init_dentry(void)
7994 {
7995         struct trace_array *tr = &global_trace;
7996 
7997         /* The top level trace array uses  NULL as parent */
7998         if (tr->dir)
7999                 return NULL;
8000 
8001         if (WARN_ON(!tracefs_initialized()) ||
8002                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
8003                  WARN_ON(!debugfs_initialized())))
8004                 return ERR_PTR(-ENODEV);
8005 
8006         /*
8007          * As there may still be users that expect the tracing
8008          * files to exist in debugfs/tracing, we must automount
8009          * the tracefs file system there, so older tools still
8010          * work with the newer kerenl.
8011          */
8012         tr->dir = debugfs_create_automount("tracing", NULL,
8013                                            trace_automount, NULL);
8014         if (!tr->dir) {
8015                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
8016                 return ERR_PTR(-ENOMEM);
8017         }
8018 
8019         return NULL;
8020 }
8021 
8022 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8023 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8024 
8025 static void __init trace_eval_init(void)
8026 {
8027         int len;
8028 
8029         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8030         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8031 }
8032 
8033 #ifdef CONFIG_MODULES
8034 static void trace_module_add_evals(struct module *mod)
8035 {
8036         if (!mod->num_trace_evals)
8037                 return;
8038 
8039         /*
8040          * Modules with bad taint do not have events created, do
8041          * not bother with enums either.
8042          */
8043         if (trace_module_has_bad_taint(mod))
8044                 return;
8045 
8046         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8047 }
8048 
8049 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8050 static void trace_module_remove_evals(struct module *mod)
8051 {
8052         union trace_eval_map_item *map;
8053         union trace_eval_map_item **last = &trace_eval_maps;
8054 
8055         if (!mod->num_trace_evals)
8056                 return;
8057 
8058         mutex_lock(&trace_eval_mutex);
8059 
8060         map = trace_eval_maps;
8061 
8062         while (map) {
8063                 if (map->head.mod == mod)
8064                         break;
8065                 map = trace_eval_jmp_to_tail(map);
8066                 last = &map->tail.next;
8067                 map = map->tail.next;
8068         }
8069         if (!map)
8070                 goto out;
8071 
8072         *last = trace_eval_jmp_to_tail(map)->tail.next;
8073         kfree(map);
8074  out:
8075         mutex_unlock(&trace_eval_mutex);
8076 }
8077 #else
8078 static inline void trace_module_remove_evals(struct module *mod) { }
8079 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8080 
8081 static int trace_module_notify(struct notifier_block *self,
8082                                unsigned long val, void *data)
8083 {
8084         struct module *mod = data;
8085 
8086         switch (val) {
8087         case MODULE_STATE_COMING:
8088                 trace_module_add_evals(mod);
8089                 break;
8090         case MODULE_STATE_GOING:
8091                 trace_module_remove_evals(mod);
8092                 break;
8093         }
8094 
8095         return 0;
8096 }
8097 
8098 static struct notifier_block trace_module_nb = {
8099         .notifier_call = trace_module_notify,
8100         .priority = 0,
8101 };
8102 #endif /* CONFIG_MODULES */
8103 
8104 static __init int tracer_init_tracefs(void)
8105 {
8106         struct dentry *d_tracer;
8107 
8108         trace_access_lock_init();
8109 
8110         d_tracer = tracing_init_dentry();
8111         if (IS_ERR(d_tracer))
8112                 return 0;
8113 
8114         init_tracer_tracefs(&global_trace, d_tracer);
8115         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8116 
8117         trace_create_file("tracing_thresh", 0644, d_tracer,
8118                         &global_trace, &tracing_thresh_fops);
8119 
8120         trace_create_file("README", 0444, d_tracer,
8121                         NULL, &tracing_readme_fops);
8122 
8123         trace_create_file("saved_cmdlines", 0444, d_tracer,
8124                         NULL, &tracing_saved_cmdlines_fops);
8125 
8126         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8127                           NULL, &tracing_saved_cmdlines_size_fops);
8128 
8129         trace_create_file("saved_tgids", 0444, d_tracer,
8130                         NULL, &tracing_saved_tgids_fops);
8131 
8132         trace_eval_init();
8133 
8134         trace_create_eval_file(d_tracer);
8135 
8136 #ifdef CONFIG_MODULES
8137         register_module_notifier(&trace_module_nb);
8138 #endif
8139 
8140 #ifdef CONFIG_DYNAMIC_FTRACE
8141         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8142                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8143 #endif
8144 
8145         create_trace_instances(d_tracer);
8146 
8147         update_tracer_options(&global_trace);
8148 
8149         return 0;
8150 }
8151 
8152 static int trace_panic_handler(struct notifier_block *this,
8153                                unsigned long event, void *unused)
8154 {
8155         if (ftrace_dump_on_oops)
8156                 ftrace_dump(ftrace_dump_on_oops);
8157         return NOTIFY_OK;
8158 }
8159 
8160 static struct notifier_block trace_panic_notifier = {
8161         .notifier_call  = trace_panic_handler,
8162         .next           = NULL,
8163         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
8164 };
8165 
8166 static int trace_die_handler(struct notifier_block *self,
8167                              unsigned long val,
8168                              void *data)
8169 {
8170         switch (val) {
8171         case DIE_OOPS:
8172                 if (ftrace_dump_on_oops)
8173                         ftrace_dump(ftrace_dump_on_oops);
8174                 break;
8175         default:
8176                 break;
8177         }
8178         return NOTIFY_OK;
8179 }
8180 
8181 static struct notifier_block trace_die_notifier = {
8182         .notifier_call = trace_die_handler,
8183         .priority = 200
8184 };
8185 
8186 /*
8187  * printk is set to max of 1024, we really don't need it that big.
8188  * Nothing should be printing 1000 characters anyway.
8189  */
8190 #define TRACE_MAX_PRINT         1000
8191 
8192 /*
8193  * Define here KERN_TRACE so that we have one place to modify
8194  * it if we decide to change what log level the ftrace dump
8195  * should be at.
8196  */
8197 #define KERN_TRACE              KERN_EMERG
8198 
8199 void
8200 trace_printk_seq(struct trace_seq *s)
8201 {
8202         /* Probably should print a warning here. */
8203         if (s->seq.len >= TRACE_MAX_PRINT)
8204                 s->seq.len = TRACE_MAX_PRINT;
8205 
8206         /*
8207          * More paranoid code. Although the buffer size is set to
8208          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8209          * an extra layer of protection.
8210          */
8211         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8212                 s->seq.len = s->seq.size - 1;
8213 
8214         /* should be zero ended, but we are paranoid. */
8215         s->buffer[s->seq.len] = 0;
8216 
8217         printk(KERN_TRACE "%s", s->buffer);
8218 
8219         trace_seq_init(s);
8220 }
8221 
8222 void trace_init_global_iter(struct trace_iterator *iter)
8223 {
8224         iter->tr = &global_trace;
8225         iter->trace = iter->tr->current_trace;
8226         iter->cpu_file = RING_BUFFER_ALL_CPUS;
8227         iter->