~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/kernel/trace/trace.c

Version: ~ [ linux-5.5-rc7 ] ~ [ linux-5.4.13 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.97 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.166 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.210 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.210 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.19.8 ] ~ [ linux-3.18.140 ] ~ [ linux-3.17.8 ] ~ [ linux-3.16.81 ] ~ [ linux-3.15.10 ] ~ [ linux-3.14.79 ] ~ [ linux-3.13.11 ] ~ [ linux-3.12.74 ] ~ [ linux-3.11.10 ] ~ [ linux-3.10.108 ] ~ [ linux-3.9.11 ] ~ [ linux-3.8.13 ] ~ [ linux-3.7.10 ] ~ [ linux-3.6.11 ] ~ [ linux-3.5.7 ] ~ [ linux-3.4.113 ] ~ [ linux-3.3.8 ] ~ [ linux-3.2.102 ] ~ [ linux-3.1.10 ] ~ [ linux-3.0.101 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * ring buffer based function tracer
  3  *
  4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
  5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
  6  *
  7  * Originally taken from the RT patch by:
  8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
  9  *
 10  * Based on code from the latency_tracer, that is:
 11  *  Copyright (C) 2004-2006 Ingo Molnar
 12  *  Copyright (C) 2004 Nadia Yvette Chambers
 13  */
 14 #include <linux/ring_buffer.h>
 15 #include <generated/utsrelease.h>
 16 #include <linux/stacktrace.h>
 17 #include <linux/writeback.h>
 18 #include <linux/kallsyms.h>
 19 #include <linux/seq_file.h>
 20 #include <linux/notifier.h>
 21 #include <linux/irqflags.h>
 22 #include <linux/debugfs.h>
 23 #include <linux/tracefs.h>
 24 #include <linux/pagemap.h>
 25 #include <linux/hardirq.h>
 26 #include <linux/linkage.h>
 27 #include <linux/uaccess.h>
 28 #include <linux/vmalloc.h>
 29 #include <linux/ftrace.h>
 30 #include <linux/module.h>
 31 #include <linux/percpu.h>
 32 #include <linux/splice.h>
 33 #include <linux/kdebug.h>
 34 #include <linux/string.h>
 35 #include <linux/mount.h>
 36 #include <linux/rwsem.h>
 37 #include <linux/slab.h>
 38 #include <linux/ctype.h>
 39 #include <linux/init.h>
 40 #include <linux/poll.h>
 41 #include <linux/nmi.h>
 42 #include <linux/fs.h>
 43 #include <linux/trace.h>
 44 #include <linux/sched/rt.h>
 45 
 46 #include "trace.h"
 47 #include "trace_output.h"
 48 
 49 /*
 50  * On boot up, the ring buffer is set to the minimum size, so that
 51  * we do not waste memory on systems that are not using tracing.
 52  */
 53 bool ring_buffer_expanded;
 54 
 55 /*
 56  * We need to change this state when a selftest is running.
 57  * A selftest will lurk into the ring-buffer to count the
 58  * entries inserted during the selftest although some concurrent
 59  * insertions into the ring-buffer such as trace_printk could occurred
 60  * at the same time, giving false positive or negative results.
 61  */
 62 static bool __read_mostly tracing_selftest_running;
 63 
 64 /*
 65  * If a tracer is running, we do not want to run SELFTEST.
 66  */
 67 bool __read_mostly tracing_selftest_disabled;
 68 
 69 /* Pipe tracepoints to printk */
 70 struct trace_iterator *tracepoint_print_iter;
 71 int tracepoint_printk;
 72 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
 73 
 74 /* For tracers that don't implement custom flags */
 75 static struct tracer_opt dummy_tracer_opt[] = {
 76         { }
 77 };
 78 
 79 static int
 80 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
 81 {
 82         return 0;
 83 }
 84 
 85 /*
 86  * To prevent the comm cache from being overwritten when no
 87  * tracing is active, only save the comm when a trace event
 88  * occurred.
 89  */
 90 static DEFINE_PER_CPU(bool, trace_cmdline_save);
 91 
 92 /*
 93  * Kill all tracing for good (never come back).
 94  * It is initialized to 1 but will turn to zero if the initialization
 95  * of the tracer is successful. But that is the only place that sets
 96  * this back to zero.
 97  */
 98 static int tracing_disabled = 1;
 99 
100 cpumask_var_t __read_mostly     tracing_buffer_mask;
101 
102 /*
103  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
104  *
105  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
106  * is set, then ftrace_dump is called. This will output the contents
107  * of the ftrace buffers to the console.  This is very useful for
108  * capturing traces that lead to crashes and outputing it to a
109  * serial console.
110  *
111  * It is default off, but you can enable it with either specifying
112  * "ftrace_dump_on_oops" in the kernel command line, or setting
113  * /proc/sys/kernel/ftrace_dump_on_oops
114  * Set 1 if you want to dump buffers of all CPUs
115  * Set 2 if you want to dump the buffer of the CPU that triggered oops
116  */
117 
118 enum ftrace_dump_mode ftrace_dump_on_oops;
119 
120 /* When set, tracing will stop when a WARN*() is hit */
121 int __disable_trace_on_warning;
122 
123 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
124 /* Map of enums to their values, for "enum_map" file */
125 struct trace_enum_map_head {
126         struct module                   *mod;
127         unsigned long                   length;
128 };
129 
130 union trace_enum_map_item;
131 
132 struct trace_enum_map_tail {
133         /*
134          * "end" is first and points to NULL as it must be different
135          * than "mod" or "enum_string"
136          */
137         union trace_enum_map_item       *next;
138         const char                      *end;   /* points to NULL */
139 };
140 
141 static DEFINE_MUTEX(trace_enum_mutex);
142 
143 /*
144  * The trace_enum_maps are saved in an array with two extra elements,
145  * one at the beginning, and one at the end. The beginning item contains
146  * the count of the saved maps (head.length), and the module they
147  * belong to if not built in (head.mod). The ending item contains a
148  * pointer to the next array of saved enum_map items.
149  */
150 union trace_enum_map_item {
151         struct trace_enum_map           map;
152         struct trace_enum_map_head      head;
153         struct trace_enum_map_tail      tail;
154 };
155 
156 static union trace_enum_map_item *trace_enum_maps;
157 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
158 
159 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
160 
161 #define MAX_TRACER_SIZE         100
162 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
163 static char *default_bootup_tracer;
164 
165 static bool allocate_snapshot;
166 
167 static int __init set_cmdline_ftrace(char *str)
168 {
169         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
170         default_bootup_tracer = bootup_tracer_buf;
171         /* We are using ftrace early, expand it */
172         ring_buffer_expanded = true;
173         return 1;
174 }
175 __setup("ftrace=", set_cmdline_ftrace);
176 
177 static int __init set_ftrace_dump_on_oops(char *str)
178 {
179         if (*str++ != '=' || !*str) {
180                 ftrace_dump_on_oops = DUMP_ALL;
181                 return 1;
182         }
183 
184         if (!strcmp("orig_cpu", str)) {
185                 ftrace_dump_on_oops = DUMP_ORIG;
186                 return 1;
187         }
188 
189         return 0;
190 }
191 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
192 
193 static int __init stop_trace_on_warning(char *str)
194 {
195         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
196                 __disable_trace_on_warning = 1;
197         return 1;
198 }
199 __setup("traceoff_on_warning", stop_trace_on_warning);
200 
201 static int __init boot_alloc_snapshot(char *str)
202 {
203         allocate_snapshot = true;
204         /* We also need the main ring buffer expanded */
205         ring_buffer_expanded = true;
206         return 1;
207 }
208 __setup("alloc_snapshot", boot_alloc_snapshot);
209 
210 
211 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
212 
213 static int __init set_trace_boot_options(char *str)
214 {
215         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
216         return 0;
217 }
218 __setup("trace_options=", set_trace_boot_options);
219 
220 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
221 static char *trace_boot_clock __initdata;
222 
223 static int __init set_trace_boot_clock(char *str)
224 {
225         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
226         trace_boot_clock = trace_boot_clock_buf;
227         return 0;
228 }
229 __setup("trace_clock=", set_trace_boot_clock);
230 
231 static int __init set_tracepoint_printk(char *str)
232 {
233         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
234                 tracepoint_printk = 1;
235         return 1;
236 }
237 __setup("tp_printk", set_tracepoint_printk);
238 
239 unsigned long long ns2usecs(u64 nsec)
240 {
241         nsec += 500;
242         do_div(nsec, 1000);
243         return nsec;
244 }
245 
246 /* trace_flags holds trace_options default values */
247 #define TRACE_DEFAULT_FLAGS                                             \
248         (FUNCTION_DEFAULT_FLAGS |                                       \
249          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
250          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
251          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
252          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
253 
254 /* trace_options that are only supported by global_trace */
255 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
256                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
257 
258 /* trace_flags that are default zero for instances */
259 #define ZEROED_TRACE_FLAGS \
260         TRACE_ITER_EVENT_FORK
261 
262 /*
263  * The global_trace is the descriptor that holds the tracing
264  * buffers for the live tracing. For each CPU, it contains
265  * a link list of pages that will store trace entries. The
266  * page descriptor of the pages in the memory is used to hold
267  * the link list by linking the lru item in the page descriptor
268  * to each of the pages in the buffer per CPU.
269  *
270  * For each active CPU there is a data field that holds the
271  * pages for the buffer for that CPU. Each CPU has the same number
272  * of pages allocated for its buffer.
273  */
274 static struct trace_array global_trace = {
275         .trace_flags = TRACE_DEFAULT_FLAGS,
276 };
277 
278 LIST_HEAD(ftrace_trace_arrays);
279 
280 int trace_array_get(struct trace_array *this_tr)
281 {
282         struct trace_array *tr;
283         int ret = -ENODEV;
284 
285         mutex_lock(&trace_types_lock);
286         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
287                 if (tr == this_tr) {
288                         tr->ref++;
289                         ret = 0;
290                         break;
291                 }
292         }
293         mutex_unlock(&trace_types_lock);
294 
295         return ret;
296 }
297 
298 static void __trace_array_put(struct trace_array *this_tr)
299 {
300         WARN_ON(!this_tr->ref);
301         this_tr->ref--;
302 }
303 
304 void trace_array_put(struct trace_array *this_tr)
305 {
306         mutex_lock(&trace_types_lock);
307         __trace_array_put(this_tr);
308         mutex_unlock(&trace_types_lock);
309 }
310 
311 int call_filter_check_discard(struct trace_event_call *call, void *rec,
312                               struct ring_buffer *buffer,
313                               struct ring_buffer_event *event)
314 {
315         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
316             !filter_match_preds(call->filter, rec)) {
317                 __trace_event_discard_commit(buffer, event);
318                 return 1;
319         }
320 
321         return 0;
322 }
323 
324 void trace_free_pid_list(struct trace_pid_list *pid_list)
325 {
326         vfree(pid_list->pids);
327         kfree(pid_list);
328 }
329 
330 /**
331  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
332  * @filtered_pids: The list of pids to check
333  * @search_pid: The PID to find in @filtered_pids
334  *
335  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
336  */
337 bool
338 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
339 {
340         /*
341          * If pid_max changed after filtered_pids was created, we
342          * by default ignore all pids greater than the previous pid_max.
343          */
344         if (search_pid >= filtered_pids->pid_max)
345                 return false;
346 
347         return test_bit(search_pid, filtered_pids->pids);
348 }
349 
350 /**
351  * trace_ignore_this_task - should a task be ignored for tracing
352  * @filtered_pids: The list of pids to check
353  * @task: The task that should be ignored if not filtered
354  *
355  * Checks if @task should be traced or not from @filtered_pids.
356  * Returns true if @task should *NOT* be traced.
357  * Returns false if @task should be traced.
358  */
359 bool
360 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
361 {
362         /*
363          * Return false, because if filtered_pids does not exist,
364          * all pids are good to trace.
365          */
366         if (!filtered_pids)
367                 return false;
368 
369         return !trace_find_filtered_pid(filtered_pids, task->pid);
370 }
371 
372 /**
373  * trace_pid_filter_add_remove - Add or remove a task from a pid_list
374  * @pid_list: The list to modify
375  * @self: The current task for fork or NULL for exit
376  * @task: The task to add or remove
377  *
378  * If adding a task, if @self is defined, the task is only added if @self
379  * is also included in @pid_list. This happens on fork and tasks should
380  * only be added when the parent is listed. If @self is NULL, then the
381  * @task pid will be removed from the list, which would happen on exit
382  * of a task.
383  */
384 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
385                                   struct task_struct *self,
386                                   struct task_struct *task)
387 {
388         if (!pid_list)
389                 return;
390 
391         /* For forks, we only add if the forking task is listed */
392         if (self) {
393                 if (!trace_find_filtered_pid(pid_list, self->pid))
394                         return;
395         }
396 
397         /* Sorry, but we don't support pid_max changing after setting */
398         if (task->pid >= pid_list->pid_max)
399                 return;
400 
401         /* "self" is set for forks, and NULL for exits */
402         if (self)
403                 set_bit(task->pid, pid_list->pids);
404         else
405                 clear_bit(task->pid, pid_list->pids);
406 }
407 
408 /**
409  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
410  * @pid_list: The pid list to show
411  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
412  * @pos: The position of the file
413  *
414  * This is used by the seq_file "next" operation to iterate the pids
415  * listed in a trace_pid_list structure.
416  *
417  * Returns the pid+1 as we want to display pid of zero, but NULL would
418  * stop the iteration.
419  */
420 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
421 {
422         unsigned long pid = (unsigned long)v;
423 
424         (*pos)++;
425 
426         /* pid already is +1 of the actual prevous bit */
427         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
428 
429         /* Return pid + 1 to allow zero to be represented */
430         if (pid < pid_list->pid_max)
431                 return (void *)(pid + 1);
432 
433         return NULL;
434 }
435 
436 /**
437  * trace_pid_start - Used for seq_file to start reading pid lists
438  * @pid_list: The pid list to show
439  * @pos: The position of the file
440  *
441  * This is used by seq_file "start" operation to start the iteration
442  * of listing pids.
443  *
444  * Returns the pid+1 as we want to display pid of zero, but NULL would
445  * stop the iteration.
446  */
447 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
448 {
449         unsigned long pid;
450         loff_t l = 0;
451 
452         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
453         if (pid >= pid_list->pid_max)
454                 return NULL;
455 
456         /* Return pid + 1 so that zero can be the exit value */
457         for (pid++; pid && l < *pos;
458              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
459                 ;
460         return (void *)pid;
461 }
462 
463 /**
464  * trace_pid_show - show the current pid in seq_file processing
465  * @m: The seq_file structure to write into
466  * @v: A void pointer of the pid (+1) value to display
467  *
468  * Can be directly used by seq_file operations to display the current
469  * pid value.
470  */
471 int trace_pid_show(struct seq_file *m, void *v)
472 {
473         unsigned long pid = (unsigned long)v - 1;
474 
475         seq_printf(m, "%lu\n", pid);
476         return 0;
477 }
478 
479 /* 128 should be much more than enough */
480 #define PID_BUF_SIZE            127
481 
482 int trace_pid_write(struct trace_pid_list *filtered_pids,
483                     struct trace_pid_list **new_pid_list,
484                     const char __user *ubuf, size_t cnt)
485 {
486         struct trace_pid_list *pid_list;
487         struct trace_parser parser;
488         unsigned long val;
489         int nr_pids = 0;
490         ssize_t read = 0;
491         ssize_t ret = 0;
492         loff_t pos;
493         pid_t pid;
494 
495         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
496                 return -ENOMEM;
497 
498         /*
499          * Always recreate a new array. The write is an all or nothing
500          * operation. Always create a new array when adding new pids by
501          * the user. If the operation fails, then the current list is
502          * not modified.
503          */
504         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
505         if (!pid_list)
506                 return -ENOMEM;
507 
508         pid_list->pid_max = READ_ONCE(pid_max);
509 
510         /* Only truncating will shrink pid_max */
511         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
512                 pid_list->pid_max = filtered_pids->pid_max;
513 
514         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
515         if (!pid_list->pids) {
516                 kfree(pid_list);
517                 return -ENOMEM;
518         }
519 
520         if (filtered_pids) {
521                 /* copy the current bits to the new max */
522                 for_each_set_bit(pid, filtered_pids->pids,
523                                  filtered_pids->pid_max) {
524                         set_bit(pid, pid_list->pids);
525                         nr_pids++;
526                 }
527         }
528 
529         while (cnt > 0) {
530 
531                 pos = 0;
532 
533                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
534                 if (ret < 0 || !trace_parser_loaded(&parser))
535                         break;
536 
537                 read += ret;
538                 ubuf += ret;
539                 cnt -= ret;
540 
541                 parser.buffer[parser.idx] = 0;
542 
543                 ret = -EINVAL;
544                 if (kstrtoul(parser.buffer, 0, &val))
545                         break;
546                 if (val >= pid_list->pid_max)
547                         break;
548 
549                 pid = (pid_t)val;
550 
551                 set_bit(pid, pid_list->pids);
552                 nr_pids++;
553 
554                 trace_parser_clear(&parser);
555                 ret = 0;
556         }
557         trace_parser_put(&parser);
558 
559         if (ret < 0) {
560                 trace_free_pid_list(pid_list);
561                 return ret;
562         }
563 
564         if (!nr_pids) {
565                 /* Cleared the list of pids */
566                 trace_free_pid_list(pid_list);
567                 read = ret;
568                 pid_list = NULL;
569         }
570 
571         *new_pid_list = pid_list;
572 
573         return read;
574 }
575 
576 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
577 {
578         u64 ts;
579 
580         /* Early boot up does not have a buffer yet */
581         if (!buf->buffer)
582                 return trace_clock_local();
583 
584         ts = ring_buffer_time_stamp(buf->buffer, cpu);
585         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
586 
587         return ts;
588 }
589 
590 u64 ftrace_now(int cpu)
591 {
592         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
593 }
594 
595 /**
596  * tracing_is_enabled - Show if global_trace has been disabled
597  *
598  * Shows if the global trace has been enabled or not. It uses the
599  * mirror flag "buffer_disabled" to be used in fast paths such as for
600  * the irqsoff tracer. But it may be inaccurate due to races. If you
601  * need to know the accurate state, use tracing_is_on() which is a little
602  * slower, but accurate.
603  */
604 int tracing_is_enabled(void)
605 {
606         /*
607          * For quick access (irqsoff uses this in fast path), just
608          * return the mirror variable of the state of the ring buffer.
609          * It's a little racy, but we don't really care.
610          */
611         smp_rmb();
612         return !global_trace.buffer_disabled;
613 }
614 
615 /*
616  * trace_buf_size is the size in bytes that is allocated
617  * for a buffer. Note, the number of bytes is always rounded
618  * to page size.
619  *
620  * This number is purposely set to a low number of 16384.
621  * If the dump on oops happens, it will be much appreciated
622  * to not have to wait for all that output. Anyway this can be
623  * boot time and run time configurable.
624  */
625 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
626 
627 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
628 
629 /* trace_types holds a link list of available tracers. */
630 static struct tracer            *trace_types __read_mostly;
631 
632 /*
633  * trace_types_lock is used to protect the trace_types list.
634  */
635 DEFINE_MUTEX(trace_types_lock);
636 
637 /*
638  * serialize the access of the ring buffer
639  *
640  * ring buffer serializes readers, but it is low level protection.
641  * The validity of the events (which returns by ring_buffer_peek() ..etc)
642  * are not protected by ring buffer.
643  *
644  * The content of events may become garbage if we allow other process consumes
645  * these events concurrently:
646  *   A) the page of the consumed events may become a normal page
647  *      (not reader page) in ring buffer, and this page will be rewrited
648  *      by events producer.
649  *   B) The page of the consumed events may become a page for splice_read,
650  *      and this page will be returned to system.
651  *
652  * These primitives allow multi process access to different cpu ring buffer
653  * concurrently.
654  *
655  * These primitives don't distinguish read-only and read-consume access.
656  * Multi read-only access are also serialized.
657  */
658 
659 #ifdef CONFIG_SMP
660 static DECLARE_RWSEM(all_cpu_access_lock);
661 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
662 
663 static inline void trace_access_lock(int cpu)
664 {
665         if (cpu == RING_BUFFER_ALL_CPUS) {
666                 /* gain it for accessing the whole ring buffer. */
667                 down_write(&all_cpu_access_lock);
668         } else {
669                 /* gain it for accessing a cpu ring buffer. */
670 
671                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
672                 down_read(&all_cpu_access_lock);
673 
674                 /* Secondly block other access to this @cpu ring buffer. */
675                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
676         }
677 }
678 
679 static inline void trace_access_unlock(int cpu)
680 {
681         if (cpu == RING_BUFFER_ALL_CPUS) {
682                 up_write(&all_cpu_access_lock);
683         } else {
684                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
685                 up_read(&all_cpu_access_lock);
686         }
687 }
688 
689 static inline void trace_access_lock_init(void)
690 {
691         int cpu;
692 
693         for_each_possible_cpu(cpu)
694                 mutex_init(&per_cpu(cpu_access_lock, cpu));
695 }
696 
697 #else
698 
699 static DEFINE_MUTEX(access_lock);
700 
701 static inline void trace_access_lock(int cpu)
702 {
703         (void)cpu;
704         mutex_lock(&access_lock);
705 }
706 
707 static inline void trace_access_unlock(int cpu)
708 {
709         (void)cpu;
710         mutex_unlock(&access_lock);
711 }
712 
713 static inline void trace_access_lock_init(void)
714 {
715 }
716 
717 #endif
718 
719 #ifdef CONFIG_STACKTRACE
720 static void __ftrace_trace_stack(struct ring_buffer *buffer,
721                                  unsigned long flags,
722                                  int skip, int pc, struct pt_regs *regs);
723 static inline void ftrace_trace_stack(struct trace_array *tr,
724                                       struct ring_buffer *buffer,
725                                       unsigned long flags,
726                                       int skip, int pc, struct pt_regs *regs);
727 
728 #else
729 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
730                                         unsigned long flags,
731                                         int skip, int pc, struct pt_regs *regs)
732 {
733 }
734 static inline void ftrace_trace_stack(struct trace_array *tr,
735                                       struct ring_buffer *buffer,
736                                       unsigned long flags,
737                                       int skip, int pc, struct pt_regs *regs)
738 {
739 }
740 
741 #endif
742 
743 static __always_inline void
744 trace_event_setup(struct ring_buffer_event *event,
745                   int type, unsigned long flags, int pc)
746 {
747         struct trace_entry *ent = ring_buffer_event_data(event);
748 
749         tracing_generic_entry_update(ent, flags, pc);
750         ent->type = type;
751 }
752 
753 static __always_inline struct ring_buffer_event *
754 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
755                           int type,
756                           unsigned long len,
757                           unsigned long flags, int pc)
758 {
759         struct ring_buffer_event *event;
760 
761         event = ring_buffer_lock_reserve(buffer, len);
762         if (event != NULL)
763                 trace_event_setup(event, type, flags, pc);
764 
765         return event;
766 }
767 
768 static void tracer_tracing_on(struct trace_array *tr)
769 {
770         if (tr->trace_buffer.buffer)
771                 ring_buffer_record_on(tr->trace_buffer.buffer);
772         /*
773          * This flag is looked at when buffers haven't been allocated
774          * yet, or by some tracers (like irqsoff), that just want to
775          * know if the ring buffer has been disabled, but it can handle
776          * races of where it gets disabled but we still do a record.
777          * As the check is in the fast path of the tracers, it is more
778          * important to be fast than accurate.
779          */
780         tr->buffer_disabled = 0;
781         /* Make the flag seen by readers */
782         smp_wmb();
783 }
784 
785 /**
786  * tracing_on - enable tracing buffers
787  *
788  * This function enables tracing buffers that may have been
789  * disabled with tracing_off.
790  */
791 void tracing_on(void)
792 {
793         tracer_tracing_on(&global_trace);
794 }
795 EXPORT_SYMBOL_GPL(tracing_on);
796 
797 
798 static __always_inline void
799 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
800 {
801         __this_cpu_write(trace_cmdline_save, true);
802 
803         /* If this is the temp buffer, we need to commit fully */
804         if (this_cpu_read(trace_buffered_event) == event) {
805                 /* Length is in event->array[0] */
806                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
807                 /* Release the temp buffer */
808                 this_cpu_dec(trace_buffered_event_cnt);
809         } else
810                 ring_buffer_unlock_commit(buffer, event);
811 }
812 
813 /**
814  * __trace_puts - write a constant string into the trace buffer.
815  * @ip:    The address of the caller
816  * @str:   The constant string to write
817  * @size:  The size of the string.
818  */
819 int __trace_puts(unsigned long ip, const char *str, int size)
820 {
821         struct ring_buffer_event *event;
822         struct ring_buffer *buffer;
823         struct print_entry *entry;
824         unsigned long irq_flags;
825         int alloc;
826         int pc;
827 
828         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
829                 return 0;
830 
831         pc = preempt_count();
832 
833         if (unlikely(tracing_selftest_running || tracing_disabled))
834                 return 0;
835 
836         alloc = sizeof(*entry) + size + 2; /* possible \n added */
837 
838         local_save_flags(irq_flags);
839         buffer = global_trace.trace_buffer.buffer;
840         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
841                                             irq_flags, pc);
842         if (!event)
843                 return 0;
844 
845         entry = ring_buffer_event_data(event);
846         entry->ip = ip;
847 
848         memcpy(&entry->buf, str, size);
849 
850         /* Add a newline if necessary */
851         if (entry->buf[size - 1] != '\n') {
852                 entry->buf[size] = '\n';
853                 entry->buf[size + 1] = '\0';
854         } else
855                 entry->buf[size] = '\0';
856 
857         __buffer_unlock_commit(buffer, event);
858         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
859 
860         return size;
861 }
862 EXPORT_SYMBOL_GPL(__trace_puts);
863 
864 /**
865  * __trace_bputs - write the pointer to a constant string into trace buffer
866  * @ip:    The address of the caller
867  * @str:   The constant string to write to the buffer to
868  */
869 int __trace_bputs(unsigned long ip, const char *str)
870 {
871         struct ring_buffer_event *event;
872         struct ring_buffer *buffer;
873         struct bputs_entry *entry;
874         unsigned long irq_flags;
875         int size = sizeof(struct bputs_entry);
876         int pc;
877 
878         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
879                 return 0;
880 
881         pc = preempt_count();
882 
883         if (unlikely(tracing_selftest_running || tracing_disabled))
884                 return 0;
885 
886         local_save_flags(irq_flags);
887         buffer = global_trace.trace_buffer.buffer;
888         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
889                                             irq_flags, pc);
890         if (!event)
891                 return 0;
892 
893         entry = ring_buffer_event_data(event);
894         entry->ip                       = ip;
895         entry->str                      = str;
896 
897         __buffer_unlock_commit(buffer, event);
898         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
899 
900         return 1;
901 }
902 EXPORT_SYMBOL_GPL(__trace_bputs);
903 
904 #ifdef CONFIG_TRACER_SNAPSHOT
905 /**
906  * trace_snapshot - take a snapshot of the current buffer.
907  *
908  * This causes a swap between the snapshot buffer and the current live
909  * tracing buffer. You can use this to take snapshots of the live
910  * trace when some condition is triggered, but continue to trace.
911  *
912  * Note, make sure to allocate the snapshot with either
913  * a tracing_snapshot_alloc(), or by doing it manually
914  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
915  *
916  * If the snapshot buffer is not allocated, it will stop tracing.
917  * Basically making a permanent snapshot.
918  */
919 void tracing_snapshot(void)
920 {
921         struct trace_array *tr = &global_trace;
922         struct tracer *tracer = tr->current_trace;
923         unsigned long flags;
924 
925         if (in_nmi()) {
926                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
927                 internal_trace_puts("*** snapshot is being ignored        ***\n");
928                 return;
929         }
930 
931         if (!tr->allocated_snapshot) {
932                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
933                 internal_trace_puts("*** stopping trace here!   ***\n");
934                 tracing_off();
935                 return;
936         }
937 
938         /* Note, snapshot can not be used when the tracer uses it */
939         if (tracer->use_max_tr) {
940                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
941                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
942                 return;
943         }
944 
945         local_irq_save(flags);
946         update_max_tr(tr, current, smp_processor_id());
947         local_irq_restore(flags);
948 }
949 EXPORT_SYMBOL_GPL(tracing_snapshot);
950 
951 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
952                                         struct trace_buffer *size_buf, int cpu_id);
953 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
954 
955 static int alloc_snapshot(struct trace_array *tr)
956 {
957         int ret;
958 
959         if (!tr->allocated_snapshot) {
960 
961                 /* allocate spare buffer */
962                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
963                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
964                 if (ret < 0)
965                         return ret;
966 
967                 tr->allocated_snapshot = true;
968         }
969 
970         return 0;
971 }
972 
973 static void free_snapshot(struct trace_array *tr)
974 {
975         /*
976          * We don't free the ring buffer. instead, resize it because
977          * The max_tr ring buffer has some state (e.g. ring->clock) and
978          * we want preserve it.
979          */
980         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
981         set_buffer_entries(&tr->max_buffer, 1);
982         tracing_reset_online_cpus(&tr->max_buffer);
983         tr->allocated_snapshot = false;
984 }
985 
986 /**
987  * tracing_alloc_snapshot - allocate snapshot buffer.
988  *
989  * This only allocates the snapshot buffer if it isn't already
990  * allocated - it doesn't also take a snapshot.
991  *
992  * This is meant to be used in cases where the snapshot buffer needs
993  * to be set up for events that can't sleep but need to be able to
994  * trigger a snapshot.
995  */
996 int tracing_alloc_snapshot(void)
997 {
998         struct trace_array *tr = &global_trace;
999         int ret;
1000 
1001         ret = alloc_snapshot(tr);
1002         WARN_ON(ret < 0);
1003 
1004         return ret;
1005 }
1006 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1007 
1008 /**
1009  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
1010  *
1011  * This is similar to trace_snapshot(), but it will allocate the
1012  * snapshot buffer if it isn't already allocated. Use this only
1013  * where it is safe to sleep, as the allocation may sleep.
1014  *
1015  * This causes a swap between the snapshot buffer and the current live
1016  * tracing buffer. You can use this to take snapshots of the live
1017  * trace when some condition is triggered, but continue to trace.
1018  */
1019 void tracing_snapshot_alloc(void)
1020 {
1021         int ret;
1022 
1023         ret = tracing_alloc_snapshot();
1024         if (ret < 0)
1025                 return;
1026 
1027         tracing_snapshot();
1028 }
1029 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1030 #else
1031 void tracing_snapshot(void)
1032 {
1033         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1034 }
1035 EXPORT_SYMBOL_GPL(tracing_snapshot);
1036 int tracing_alloc_snapshot(void)
1037 {
1038         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1039         return -ENODEV;
1040 }
1041 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1042 void tracing_snapshot_alloc(void)
1043 {
1044         /* Give warning */
1045         tracing_snapshot();
1046 }
1047 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1048 #endif /* CONFIG_TRACER_SNAPSHOT */
1049 
1050 static void tracer_tracing_off(struct trace_array *tr)
1051 {
1052         if (tr->trace_buffer.buffer)
1053                 ring_buffer_record_off(tr->trace_buffer.buffer);
1054         /*
1055          * This flag is looked at when buffers haven't been allocated
1056          * yet, or by some tracers (like irqsoff), that just want to
1057          * know if the ring buffer has been disabled, but it can handle
1058          * races of where it gets disabled but we still do a record.
1059          * As the check is in the fast path of the tracers, it is more
1060          * important to be fast than accurate.
1061          */
1062         tr->buffer_disabled = 1;
1063         /* Make the flag seen by readers */
1064         smp_wmb();
1065 }
1066 
1067 /**
1068  * tracing_off - turn off tracing buffers
1069  *
1070  * This function stops the tracing buffers from recording data.
1071  * It does not disable any overhead the tracers themselves may
1072  * be causing. This function simply causes all recording to
1073  * the ring buffers to fail.
1074  */
1075 void tracing_off(void)
1076 {
1077         tracer_tracing_off(&global_trace);
1078 }
1079 EXPORT_SYMBOL_GPL(tracing_off);
1080 
1081 void disable_trace_on_warning(void)
1082 {
1083         if (__disable_trace_on_warning)
1084                 tracing_off();
1085 }
1086 
1087 /**
1088  * tracer_tracing_is_on - show real state of ring buffer enabled
1089  * @tr : the trace array to know if ring buffer is enabled
1090  *
1091  * Shows real state of the ring buffer if it is enabled or not.
1092  */
1093 int tracer_tracing_is_on(struct trace_array *tr)
1094 {
1095         if (tr->trace_buffer.buffer)
1096                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1097         return !tr->buffer_disabled;
1098 }
1099 
1100 /**
1101  * tracing_is_on - show state of ring buffers enabled
1102  */
1103 int tracing_is_on(void)
1104 {
1105         return tracer_tracing_is_on(&global_trace);
1106 }
1107 EXPORT_SYMBOL_GPL(tracing_is_on);
1108 
1109 static int __init set_buf_size(char *str)
1110 {
1111         unsigned long buf_size;
1112 
1113         if (!str)
1114                 return 0;
1115         buf_size = memparse(str, &str);
1116         /* nr_entries can not be zero */
1117         if (buf_size == 0)
1118                 return 0;
1119         trace_buf_size = buf_size;
1120         return 1;
1121 }
1122 __setup("trace_buf_size=", set_buf_size);
1123 
1124 static int __init set_tracing_thresh(char *str)
1125 {
1126         unsigned long threshold;
1127         int ret;
1128 
1129         if (!str)
1130                 return 0;
1131         ret = kstrtoul(str, 0, &threshold);
1132         if (ret < 0)
1133                 return 0;
1134         tracing_thresh = threshold * 1000;
1135         return 1;
1136 }
1137 __setup("tracing_thresh=", set_tracing_thresh);
1138 
1139 unsigned long nsecs_to_usecs(unsigned long nsecs)
1140 {
1141         return nsecs / 1000;
1142 }
1143 
1144 /*
1145  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1146  * It uses C(a, b) where 'a' is the enum name and 'b' is the string that
1147  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1148  * of strings in the order that the enums were defined.
1149  */
1150 #undef C
1151 #define C(a, b) b
1152 
1153 /* These must match the bit postions in trace_iterator_flags */
1154 static const char *trace_options[] = {
1155         TRACE_FLAGS
1156         NULL
1157 };
1158 
1159 static struct {
1160         u64 (*func)(void);
1161         const char *name;
1162         int in_ns;              /* is this clock in nanoseconds? */
1163 } trace_clocks[] = {
1164         { trace_clock_local,            "local",        1 },
1165         { trace_clock_global,           "global",       1 },
1166         { trace_clock_counter,          "counter",      0 },
1167         { trace_clock_jiffies,          "uptime",       0 },
1168         { trace_clock,                  "perf",         1 },
1169         { ktime_get_mono_fast_ns,       "mono",         1 },
1170         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1171         { ktime_get_boot_fast_ns,       "boot",         1 },
1172         ARCH_TRACE_CLOCKS
1173 };
1174 
1175 /*
1176  * trace_parser_get_init - gets the buffer for trace parser
1177  */
1178 int trace_parser_get_init(struct trace_parser *parser, int size)
1179 {
1180         memset(parser, 0, sizeof(*parser));
1181 
1182         parser->buffer = kmalloc(size, GFP_KERNEL);
1183         if (!parser->buffer)
1184                 return 1;
1185 
1186         parser->size = size;
1187         return 0;
1188 }
1189 
1190 /*
1191  * trace_parser_put - frees the buffer for trace parser
1192  */
1193 void trace_parser_put(struct trace_parser *parser)
1194 {
1195         kfree(parser->buffer);
1196 }
1197 
1198 /*
1199  * trace_get_user - reads the user input string separated by  space
1200  * (matched by isspace(ch))
1201  *
1202  * For each string found the 'struct trace_parser' is updated,
1203  * and the function returns.
1204  *
1205  * Returns number of bytes read.
1206  *
1207  * See kernel/trace/trace.h for 'struct trace_parser' details.
1208  */
1209 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1210         size_t cnt, loff_t *ppos)
1211 {
1212         char ch;
1213         size_t read = 0;
1214         ssize_t ret;
1215 
1216         if (!*ppos)
1217                 trace_parser_clear(parser);
1218 
1219         ret = get_user(ch, ubuf++);
1220         if (ret)
1221                 goto out;
1222 
1223         read++;
1224         cnt--;
1225 
1226         /*
1227          * The parser is not finished with the last write,
1228          * continue reading the user input without skipping spaces.
1229          */
1230         if (!parser->cont) {
1231                 /* skip white space */
1232                 while (cnt && isspace(ch)) {
1233                         ret = get_user(ch, ubuf++);
1234                         if (ret)
1235                                 goto out;
1236                         read++;
1237                         cnt--;
1238                 }
1239 
1240                 /* only spaces were written */
1241                 if (isspace(ch)) {
1242                         *ppos += read;
1243                         ret = read;
1244                         goto out;
1245                 }
1246 
1247                 parser->idx = 0;
1248         }
1249 
1250         /* read the non-space input */
1251         while (cnt && !isspace(ch)) {
1252                 if (parser->idx < parser->size - 1)
1253                         parser->buffer[parser->idx++] = ch;
1254                 else {
1255                         ret = -EINVAL;
1256                         goto out;
1257                 }
1258                 ret = get_user(ch, ubuf++);
1259                 if (ret)
1260                         goto out;
1261                 read++;
1262                 cnt--;
1263         }
1264 
1265         /* We either got finished input or we have to wait for another call. */
1266         if (isspace(ch)) {
1267                 parser->buffer[parser->idx] = 0;
1268                 parser->cont = false;
1269         } else if (parser->idx < parser->size - 1) {
1270                 parser->cont = true;
1271                 parser->buffer[parser->idx++] = ch;
1272         } else {
1273                 ret = -EINVAL;
1274                 goto out;
1275         }
1276 
1277         *ppos += read;
1278         ret = read;
1279 
1280 out:
1281         return ret;
1282 }
1283 
1284 /* TODO add a seq_buf_to_buffer() */
1285 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1286 {
1287         int len;
1288 
1289         if (trace_seq_used(s) <= s->seq.readpos)
1290                 return -EBUSY;
1291 
1292         len = trace_seq_used(s) - s->seq.readpos;
1293         if (cnt > len)
1294                 cnt = len;
1295         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1296 
1297         s->seq.readpos += cnt;
1298         return cnt;
1299 }
1300 
1301 unsigned long __read_mostly     tracing_thresh;
1302 
1303 #ifdef CONFIG_TRACER_MAX_TRACE
1304 /*
1305  * Copy the new maximum trace into the separate maximum-trace
1306  * structure. (this way the maximum trace is permanently saved,
1307  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1308  */
1309 static void
1310 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1311 {
1312         struct trace_buffer *trace_buf = &tr->trace_buffer;
1313         struct trace_buffer *max_buf = &tr->max_buffer;
1314         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1315         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1316 
1317         max_buf->cpu = cpu;
1318         max_buf->time_start = data->preempt_timestamp;
1319 
1320         max_data->saved_latency = tr->max_latency;
1321         max_data->critical_start = data->critical_start;
1322         max_data->critical_end = data->critical_end;
1323 
1324         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1325         max_data->pid = tsk->pid;
1326         /*
1327          * If tsk == current, then use current_uid(), as that does not use
1328          * RCU. The irq tracer can be called out of RCU scope.
1329          */
1330         if (tsk == current)
1331                 max_data->uid = current_uid();
1332         else
1333                 max_data->uid = task_uid(tsk);
1334 
1335         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1336         max_data->policy = tsk->policy;
1337         max_data->rt_priority = tsk->rt_priority;
1338 
1339         /* record this tasks comm */
1340         tracing_record_cmdline(tsk);
1341 }
1342 
1343 /**
1344  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1345  * @tr: tracer
1346  * @tsk: the task with the latency
1347  * @cpu: The cpu that initiated the trace.
1348  *
1349  * Flip the buffers between the @tr and the max_tr and record information
1350  * about which task was the cause of this latency.
1351  */
1352 void
1353 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1354 {
1355         struct ring_buffer *buf;
1356 
1357         if (tr->stop_count)
1358                 return;
1359 
1360         WARN_ON_ONCE(!irqs_disabled());
1361 
1362         if (!tr->allocated_snapshot) {
1363                 /* Only the nop tracer should hit this when disabling */
1364                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1365                 return;
1366         }
1367 
1368         arch_spin_lock(&tr->max_lock);
1369 
1370         buf = tr->trace_buffer.buffer;
1371         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1372         tr->max_buffer.buffer = buf;
1373 
1374         __update_max_tr(tr, tsk, cpu);
1375         arch_spin_unlock(&tr->max_lock);
1376 }
1377 
1378 /**
1379  * update_max_tr_single - only copy one trace over, and reset the rest
1380  * @tr - tracer
1381  * @tsk - task with the latency
1382  * @cpu - the cpu of the buffer to copy.
1383  *
1384  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1385  */
1386 void
1387 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1388 {
1389         int ret;
1390 
1391         if (tr->stop_count)
1392                 return;
1393 
1394         WARN_ON_ONCE(!irqs_disabled());
1395         if (!tr->allocated_snapshot) {
1396                 /* Only the nop tracer should hit this when disabling */
1397                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1398                 return;
1399         }
1400 
1401         arch_spin_lock(&tr->max_lock);
1402 
1403         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1404 
1405         if (ret == -EBUSY) {
1406                 /*
1407                  * We failed to swap the buffer due to a commit taking
1408                  * place on this CPU. We fail to record, but we reset
1409                  * the max trace buffer (no one writes directly to it)
1410                  * and flag that it failed.
1411                  */
1412                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1413                         "Failed to swap buffers due to commit in progress\n");
1414         }
1415 
1416         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1417 
1418         __update_max_tr(tr, tsk, cpu);
1419         arch_spin_unlock(&tr->max_lock);
1420 }
1421 #endif /* CONFIG_TRACER_MAX_TRACE */
1422 
1423 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1424 {
1425         /* Iterators are static, they should be filled or empty */
1426         if (trace_buffer_iter(iter, iter->cpu_file))
1427                 return 0;
1428 
1429         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1430                                 full);
1431 }
1432 
1433 #ifdef CONFIG_FTRACE_STARTUP_TEST
1434 static int run_tracer_selftest(struct tracer *type)
1435 {
1436         struct trace_array *tr = &global_trace;
1437         struct tracer *saved_tracer = tr->current_trace;
1438         int ret;
1439 
1440         if (!type->selftest || tracing_selftest_disabled)
1441                 return 0;
1442 
1443         /*
1444          * Run a selftest on this tracer.
1445          * Here we reset the trace buffer, and set the current
1446          * tracer to be this tracer. The tracer can then run some
1447          * internal tracing to verify that everything is in order.
1448          * If we fail, we do not register this tracer.
1449          */
1450         tracing_reset_online_cpus(&tr->trace_buffer);
1451 
1452         tr->current_trace = type;
1453 
1454 #ifdef CONFIG_TRACER_MAX_TRACE
1455         if (type->use_max_tr) {
1456                 /* If we expanded the buffers, make sure the max is expanded too */
1457                 if (ring_buffer_expanded)
1458                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1459                                            RING_BUFFER_ALL_CPUS);
1460                 tr->allocated_snapshot = true;
1461         }
1462 #endif
1463 
1464         /* the test is responsible for initializing and enabling */
1465         pr_info("Testing tracer %s: ", type->name);
1466         ret = type->selftest(type, tr);
1467         /* the test is responsible for resetting too */
1468         tr->current_trace = saved_tracer;
1469         if (ret) {
1470                 printk(KERN_CONT "FAILED!\n");
1471                 /* Add the warning after printing 'FAILED' */
1472                 WARN_ON(1);
1473                 return -1;
1474         }
1475         /* Only reset on passing, to avoid touching corrupted buffers */
1476         tracing_reset_online_cpus(&tr->trace_buffer);
1477 
1478 #ifdef CONFIG_TRACER_MAX_TRACE
1479         if (type->use_max_tr) {
1480                 tr->allocated_snapshot = false;
1481 
1482                 /* Shrink the max buffer again */
1483                 if (ring_buffer_expanded)
1484                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1485                                            RING_BUFFER_ALL_CPUS);
1486         }
1487 #endif
1488 
1489         printk(KERN_CONT "PASSED\n");
1490         return 0;
1491 }
1492 #else
1493 static inline int run_tracer_selftest(struct tracer *type)
1494 {
1495         return 0;
1496 }
1497 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1498 
1499 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1500 
1501 static void __init apply_trace_boot_options(void);
1502 
1503 /**
1504  * register_tracer - register a tracer with the ftrace system.
1505  * @type - the plugin for the tracer
1506  *
1507  * Register a new plugin tracer.
1508  */
1509 int __init register_tracer(struct tracer *type)
1510 {
1511         struct tracer *t;
1512         int ret = 0;
1513 
1514         if (!type->name) {
1515                 pr_info("Tracer must have a name\n");
1516                 return -1;
1517         }
1518 
1519         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1520                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1521                 return -1;
1522         }
1523 
1524         mutex_lock(&trace_types_lock);
1525 
1526         tracing_selftest_running = true;
1527 
1528         for (t = trace_types; t; t = t->next) {
1529                 if (strcmp(type->name, t->name) == 0) {
1530                         /* already found */
1531                         pr_info("Tracer %s already registered\n",
1532                                 type->name);
1533                         ret = -1;
1534                         goto out;
1535                 }
1536         }
1537 
1538         if (!type->set_flag)
1539                 type->set_flag = &dummy_set_flag;
1540         if (!type->flags) {
1541                 /*allocate a dummy tracer_flags*/
1542                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1543                 if (!type->flags) {
1544                         ret = -ENOMEM;
1545                         goto out;
1546                 }
1547                 type->flags->val = 0;
1548                 type->flags->opts = dummy_tracer_opt;
1549         } else
1550                 if (!type->flags->opts)
1551                         type->flags->opts = dummy_tracer_opt;
1552 
1553         /* store the tracer for __set_tracer_option */
1554         type->flags->trace = type;
1555 
1556         ret = run_tracer_selftest(type);
1557         if (ret < 0)
1558                 goto out;
1559 
1560         type->next = trace_types;
1561         trace_types = type;
1562         add_tracer_options(&global_trace, type);
1563 
1564  out:
1565         tracing_selftest_running = false;
1566         mutex_unlock(&trace_types_lock);
1567 
1568         if (ret || !default_bootup_tracer)
1569                 goto out_unlock;
1570 
1571         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1572                 goto out_unlock;
1573 
1574         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1575         /* Do we want this tracer to start on bootup? */
1576         tracing_set_tracer(&global_trace, type->name);
1577         default_bootup_tracer = NULL;
1578 
1579         apply_trace_boot_options();
1580 
1581         /* disable other selftests, since this will break it. */
1582         tracing_selftest_disabled = true;
1583 #ifdef CONFIG_FTRACE_STARTUP_TEST
1584         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1585                type->name);
1586 #endif
1587 
1588  out_unlock:
1589         return ret;
1590 }
1591 
1592 void tracing_reset(struct trace_buffer *buf, int cpu)
1593 {
1594         struct ring_buffer *buffer = buf->buffer;
1595 
1596         if (!buffer)
1597                 return;
1598 
1599         ring_buffer_record_disable(buffer);
1600 
1601         /* Make sure all commits have finished */
1602         synchronize_sched();
1603         ring_buffer_reset_cpu(buffer, cpu);
1604 
1605         ring_buffer_record_enable(buffer);
1606 }
1607 
1608 void tracing_reset_online_cpus(struct trace_buffer *buf)
1609 {
1610         struct ring_buffer *buffer = buf->buffer;
1611         int cpu;
1612 
1613         if (!buffer)
1614                 return;
1615 
1616         ring_buffer_record_disable(buffer);
1617 
1618         /* Make sure all commits have finished */
1619         synchronize_sched();
1620 
1621         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1622 
1623         for_each_online_cpu(cpu)
1624                 ring_buffer_reset_cpu(buffer, cpu);
1625 
1626         ring_buffer_record_enable(buffer);
1627 }
1628 
1629 /* Must have trace_types_lock held */
1630 void tracing_reset_all_online_cpus(void)
1631 {
1632         struct trace_array *tr;
1633 
1634         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1635                 tracing_reset_online_cpus(&tr->trace_buffer);
1636 #ifdef CONFIG_TRACER_MAX_TRACE
1637                 tracing_reset_online_cpus(&tr->max_buffer);
1638 #endif
1639         }
1640 }
1641 
1642 #define SAVED_CMDLINES_DEFAULT 128
1643 #define NO_CMDLINE_MAP UINT_MAX
1644 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1645 struct saved_cmdlines_buffer {
1646         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1647         unsigned *map_cmdline_to_pid;
1648         unsigned cmdline_num;
1649         int cmdline_idx;
1650         char *saved_cmdlines;
1651 };
1652 static struct saved_cmdlines_buffer *savedcmd;
1653 
1654 /* temporary disable recording */
1655 static atomic_t trace_record_cmdline_disabled __read_mostly;
1656 
1657 static inline char *get_saved_cmdlines(int idx)
1658 {
1659         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1660 }
1661 
1662 static inline void set_cmdline(int idx, const char *cmdline)
1663 {
1664         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1665 }
1666 
1667 static int allocate_cmdlines_buffer(unsigned int val,
1668                                     struct saved_cmdlines_buffer *s)
1669 {
1670         s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1671                                         GFP_KERNEL);
1672         if (!s->map_cmdline_to_pid)
1673                 return -ENOMEM;
1674 
1675         s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1676         if (!s->saved_cmdlines) {
1677                 kfree(s->map_cmdline_to_pid);
1678                 return -ENOMEM;
1679         }
1680 
1681         s->cmdline_idx = 0;
1682         s->cmdline_num = val;
1683         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1684                sizeof(s->map_pid_to_cmdline));
1685         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1686                val * sizeof(*s->map_cmdline_to_pid));
1687 
1688         return 0;
1689 }
1690 
1691 static int trace_create_savedcmd(void)
1692 {
1693         int ret;
1694 
1695         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1696         if (!savedcmd)
1697                 return -ENOMEM;
1698 
1699         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1700         if (ret < 0) {
1701                 kfree(savedcmd);
1702                 savedcmd = NULL;
1703                 return -ENOMEM;
1704         }
1705 
1706         return 0;
1707 }
1708 
1709 int is_tracing_stopped(void)
1710 {
1711         return global_trace.stop_count;
1712 }
1713 
1714 /**
1715  * tracing_start - quick start of the tracer
1716  *
1717  * If tracing is enabled but was stopped by tracing_stop,
1718  * this will start the tracer back up.
1719  */
1720 void tracing_start(void)
1721 {
1722         struct ring_buffer *buffer;
1723         unsigned long flags;
1724 
1725         if (tracing_disabled)
1726                 return;
1727 
1728         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1729         if (--global_trace.stop_count) {
1730                 if (global_trace.stop_count < 0) {
1731                         /* Someone screwed up their debugging */
1732                         WARN_ON_ONCE(1);
1733                         global_trace.stop_count = 0;
1734                 }
1735                 goto out;
1736         }
1737 
1738         /* Prevent the buffers from switching */
1739         arch_spin_lock(&global_trace.max_lock);
1740 
1741         buffer = global_trace.trace_buffer.buffer;
1742         if (buffer)
1743                 ring_buffer_record_enable(buffer);
1744 
1745 #ifdef CONFIG_TRACER_MAX_TRACE
1746         buffer = global_trace.max_buffer.buffer;
1747         if (buffer)
1748                 ring_buffer_record_enable(buffer);
1749 #endif
1750 
1751         arch_spin_unlock(&global_trace.max_lock);
1752 
1753  out:
1754         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1755 }
1756 
1757 static void tracing_start_tr(struct trace_array *tr)
1758 {
1759         struct ring_buffer *buffer;
1760         unsigned long flags;
1761 
1762         if (tracing_disabled)
1763                 return;
1764 
1765         /* If global, we need to also start the max tracer */
1766         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1767                 return tracing_start();
1768 
1769         raw_spin_lock_irqsave(&tr->start_lock, flags);
1770 
1771         if (--tr->stop_count) {
1772                 if (tr->stop_count < 0) {
1773                         /* Someone screwed up their debugging */
1774                         WARN_ON_ONCE(1);
1775                         tr->stop_count = 0;
1776                 }
1777                 goto out;
1778         }
1779 
1780         buffer = tr->trace_buffer.buffer;
1781         if (buffer)
1782                 ring_buffer_record_enable(buffer);
1783 
1784  out:
1785         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1786 }
1787 
1788 /**
1789  * tracing_stop - quick stop of the tracer
1790  *
1791  * Light weight way to stop tracing. Use in conjunction with
1792  * tracing_start.
1793  */
1794 void tracing_stop(void)
1795 {
1796         struct ring_buffer *buffer;
1797         unsigned long flags;
1798 
1799         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1800         if (global_trace.stop_count++)
1801                 goto out;
1802 
1803         /* Prevent the buffers from switching */
1804         arch_spin_lock(&global_trace.max_lock);
1805 
1806         buffer = global_trace.trace_buffer.buffer;
1807         if (buffer)
1808                 ring_buffer_record_disable(buffer);
1809 
1810 #ifdef CONFIG_TRACER_MAX_TRACE
1811         buffer = global_trace.max_buffer.buffer;
1812         if (buffer)
1813                 ring_buffer_record_disable(buffer);
1814 #endif
1815 
1816         arch_spin_unlock(&global_trace.max_lock);
1817 
1818  out:
1819         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1820 }
1821 
1822 static void tracing_stop_tr(struct trace_array *tr)
1823 {
1824         struct ring_buffer *buffer;
1825         unsigned long flags;
1826 
1827         /* If global, we need to also stop the max tracer */
1828         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1829                 return tracing_stop();
1830 
1831         raw_spin_lock_irqsave(&tr->start_lock, flags);
1832         if (tr->stop_count++)
1833                 goto out;
1834 
1835         buffer = tr->trace_buffer.buffer;
1836         if (buffer)
1837                 ring_buffer_record_disable(buffer);
1838 
1839  out:
1840         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1841 }
1842 
1843 void trace_stop_cmdline_recording(void);
1844 
1845 static int trace_save_cmdline(struct task_struct *tsk)
1846 {
1847         unsigned pid, idx;
1848 
1849         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1850                 return 0;
1851 
1852         /*
1853          * It's not the end of the world if we don't get
1854          * the lock, but we also don't want to spin
1855          * nor do we want to disable interrupts,
1856          * so if we miss here, then better luck next time.
1857          */
1858         if (!arch_spin_trylock(&trace_cmdline_lock))
1859                 return 0;
1860 
1861         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1862         if (idx == NO_CMDLINE_MAP) {
1863                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1864 
1865                 /*
1866                  * Check whether the cmdline buffer at idx has a pid
1867                  * mapped. We are going to overwrite that entry so we
1868                  * need to clear the map_pid_to_cmdline. Otherwise we
1869                  * would read the new comm for the old pid.
1870                  */
1871                 pid = savedcmd->map_cmdline_to_pid[idx];
1872                 if (pid != NO_CMDLINE_MAP)
1873                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1874 
1875                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1876                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1877 
1878                 savedcmd->cmdline_idx = idx;
1879         }
1880 
1881         set_cmdline(idx, tsk->comm);
1882 
1883         arch_spin_unlock(&trace_cmdline_lock);
1884 
1885         return 1;
1886 }
1887 
1888 static void __trace_find_cmdline(int pid, char comm[])
1889 {
1890         unsigned map;
1891 
1892         if (!pid) {
1893                 strcpy(comm, "<idle>");
1894                 return;
1895         }
1896 
1897         if (WARN_ON_ONCE(pid < 0)) {
1898                 strcpy(comm, "<XXX>");
1899                 return;
1900         }
1901 
1902         if (pid > PID_MAX_DEFAULT) {
1903                 strcpy(comm, "<...>");
1904                 return;
1905         }
1906 
1907         map = savedcmd->map_pid_to_cmdline[pid];
1908         if (map != NO_CMDLINE_MAP)
1909                 strcpy(comm, get_saved_cmdlines(map));
1910         else
1911                 strcpy(comm, "<...>");
1912 }
1913 
1914 void trace_find_cmdline(int pid, char comm[])
1915 {
1916         preempt_disable();
1917         arch_spin_lock(&trace_cmdline_lock);
1918 
1919         __trace_find_cmdline(pid, comm);
1920 
1921         arch_spin_unlock(&trace_cmdline_lock);
1922         preempt_enable();
1923 }
1924 
1925 void tracing_record_cmdline(struct task_struct *tsk)
1926 {
1927         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1928                 return;
1929 
1930         if (!__this_cpu_read(trace_cmdline_save))
1931                 return;
1932 
1933         if (trace_save_cmdline(tsk))
1934                 __this_cpu_write(trace_cmdline_save, false);
1935 }
1936 
1937 void
1938 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1939                              int pc)
1940 {
1941         struct task_struct *tsk = current;
1942 
1943         entry->preempt_count            = pc & 0xff;
1944         entry->pid                      = (tsk) ? tsk->pid : 0;
1945         entry->flags =
1946 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1947                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1948 #else
1949                 TRACE_FLAG_IRQS_NOSUPPORT |
1950 #endif
1951                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
1952                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1953                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
1954                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1955                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1956 }
1957 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1958 
1959 struct ring_buffer_event *
1960 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1961                           int type,
1962                           unsigned long len,
1963                           unsigned long flags, int pc)
1964 {
1965         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
1966 }
1967 
1968 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
1969 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
1970 static int trace_buffered_event_ref;
1971 
1972 /**
1973  * trace_buffered_event_enable - enable buffering events
1974  *
1975  * When events are being filtered, it is quicker to use a temporary
1976  * buffer to write the event data into if there's a likely chance
1977  * that it will not be committed. The discard of the ring buffer
1978  * is not as fast as committing, and is much slower than copying
1979  * a commit.
1980  *
1981  * When an event is to be filtered, allocate per cpu buffers to
1982  * write the event data into, and if the event is filtered and discarded
1983  * it is simply dropped, otherwise, the entire data is to be committed
1984  * in one shot.
1985  */
1986 void trace_buffered_event_enable(void)
1987 {
1988         struct ring_buffer_event *event;
1989         struct page *page;
1990         int cpu;
1991 
1992         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
1993 
1994         if (trace_buffered_event_ref++)
1995                 return;
1996 
1997         for_each_tracing_cpu(cpu) {
1998                 page = alloc_pages_node(cpu_to_node(cpu),
1999                                         GFP_KERNEL | __GFP_NORETRY, 0);
2000                 if (!page)
2001                         goto failed;
2002 
2003                 event = page_address(page);
2004                 memset(event, 0, sizeof(*event));
2005 
2006                 per_cpu(trace_buffered_event, cpu) = event;
2007 
2008                 preempt_disable();
2009                 if (cpu == smp_processor_id() &&
2010                     this_cpu_read(trace_buffered_event) !=
2011                     per_cpu(trace_buffered_event, cpu))
2012                         WARN_ON_ONCE(1);
2013                 preempt_enable();
2014         }
2015 
2016         return;
2017  failed:
2018         trace_buffered_event_disable();
2019 }
2020 
2021 static void enable_trace_buffered_event(void *data)
2022 {
2023         /* Probably not needed, but do it anyway */
2024         smp_rmb();
2025         this_cpu_dec(trace_buffered_event_cnt);
2026 }
2027 
2028 static void disable_trace_buffered_event(void *data)
2029 {
2030         this_cpu_inc(trace_buffered_event_cnt);
2031 }
2032 
2033 /**
2034  * trace_buffered_event_disable - disable buffering events
2035  *
2036  * When a filter is removed, it is faster to not use the buffered
2037  * events, and to commit directly into the ring buffer. Free up
2038  * the temp buffers when there are no more users. This requires
2039  * special synchronization with current events.
2040  */
2041 void trace_buffered_event_disable(void)
2042 {
2043         int cpu;
2044 
2045         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2046 
2047         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2048                 return;
2049 
2050         if (--trace_buffered_event_ref)
2051                 return;
2052 
2053         preempt_disable();
2054         /* For each CPU, set the buffer as used. */
2055         smp_call_function_many(tracing_buffer_mask,
2056                                disable_trace_buffered_event, NULL, 1);
2057         preempt_enable();
2058 
2059         /* Wait for all current users to finish */
2060         synchronize_sched();
2061 
2062         for_each_tracing_cpu(cpu) {
2063                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2064                 per_cpu(trace_buffered_event, cpu) = NULL;
2065         }
2066         /*
2067          * Make sure trace_buffered_event is NULL before clearing
2068          * trace_buffered_event_cnt.
2069          */
2070         smp_wmb();
2071 
2072         preempt_disable();
2073         /* Do the work on each cpu */
2074         smp_call_function_many(tracing_buffer_mask,
2075                                enable_trace_buffered_event, NULL, 1);
2076         preempt_enable();
2077 }
2078 
2079 static struct ring_buffer *temp_buffer;
2080 
2081 struct ring_buffer_event *
2082 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2083                           struct trace_event_file *trace_file,
2084                           int type, unsigned long len,
2085                           unsigned long flags, int pc)
2086 {
2087         struct ring_buffer_event *entry;
2088         int val;
2089 
2090         *current_rb = trace_file->tr->trace_buffer.buffer;
2091 
2092         if ((trace_file->flags &
2093              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2094             (entry = this_cpu_read(trace_buffered_event))) {
2095                 /* Try to use the per cpu buffer first */
2096                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2097                 if (val == 1) {
2098                         trace_event_setup(entry, type, flags, pc);
2099                         entry->array[0] = len;
2100                         return entry;
2101                 }
2102                 this_cpu_dec(trace_buffered_event_cnt);
2103         }
2104 
2105         entry = __trace_buffer_lock_reserve(*current_rb,
2106                                             type, len, flags, pc);
2107         /*
2108          * If tracing is off, but we have triggers enabled
2109          * we still need to look at the event data. Use the temp_buffer
2110          * to store the trace event for the tigger to use. It's recusive
2111          * safe and will not be recorded anywhere.
2112          */
2113         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2114                 *current_rb = temp_buffer;
2115                 entry = __trace_buffer_lock_reserve(*current_rb,
2116                                                     type, len, flags, pc);
2117         }
2118         return entry;
2119 }
2120 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2121 
2122 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2123 static DEFINE_MUTEX(tracepoint_printk_mutex);
2124 
2125 static void output_printk(struct trace_event_buffer *fbuffer)
2126 {
2127         struct trace_event_call *event_call;
2128         struct trace_event *event;
2129         unsigned long flags;
2130         struct trace_iterator *iter = tracepoint_print_iter;
2131 
2132         /* We should never get here if iter is NULL */
2133         if (WARN_ON_ONCE(!iter))
2134                 return;
2135 
2136         event_call = fbuffer->trace_file->event_call;
2137         if (!event_call || !event_call->event.funcs ||
2138             !event_call->event.funcs->trace)
2139                 return;
2140 
2141         event = &fbuffer->trace_file->event_call->event;
2142 
2143         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2144         trace_seq_init(&iter->seq);
2145         iter->ent = fbuffer->entry;
2146         event_call->event.funcs->trace(iter, 0, event);
2147         trace_seq_putc(&iter->seq, 0);
2148         printk("%s", iter->seq.buffer);
2149 
2150         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2151 }
2152 
2153 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2154                              void __user *buffer, size_t *lenp,
2155                              loff_t *ppos)
2156 {
2157         int save_tracepoint_printk;
2158         int ret;
2159 
2160         mutex_lock(&tracepoint_printk_mutex);
2161         save_tracepoint_printk = tracepoint_printk;
2162 
2163         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2164 
2165         /*
2166          * This will force exiting early, as tracepoint_printk
2167          * is always zero when tracepoint_printk_iter is not allocated
2168          */
2169         if (!tracepoint_print_iter)
2170                 tracepoint_printk = 0;
2171 
2172         if (save_tracepoint_printk == tracepoint_printk)
2173                 goto out;
2174 
2175         if (tracepoint_printk)
2176                 static_key_enable(&tracepoint_printk_key.key);
2177         else
2178                 static_key_disable(&tracepoint_printk_key.key);
2179 
2180  out:
2181         mutex_unlock(&tracepoint_printk_mutex);
2182 
2183         return ret;
2184 }
2185 
2186 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2187 {
2188         if (static_key_false(&tracepoint_printk_key.key))
2189                 output_printk(fbuffer);
2190 
2191         event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2192                                     fbuffer->event, fbuffer->entry,
2193                                     fbuffer->flags, fbuffer->pc);
2194 }
2195 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2196 
2197 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2198                                      struct ring_buffer *buffer,
2199                                      struct ring_buffer_event *event,
2200                                      unsigned long flags, int pc,
2201                                      struct pt_regs *regs)
2202 {
2203         __buffer_unlock_commit(buffer, event);
2204 
2205         /*
2206          * If regs is not set, then skip the following callers:
2207          *   trace_buffer_unlock_commit_regs
2208          *   event_trigger_unlock_commit
2209          *   trace_event_buffer_commit
2210          *   trace_event_raw_event_sched_switch
2211          * Note, we can still get here via blktrace, wakeup tracer
2212          * and mmiotrace, but that's ok if they lose a function or
2213          * two. They are that meaningful.
2214          */
2215         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
2216         ftrace_trace_userstack(buffer, flags, pc);
2217 }
2218 
2219 /*
2220  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2221  */
2222 void
2223 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2224                                    struct ring_buffer_event *event)
2225 {
2226         __buffer_unlock_commit(buffer, event);
2227 }
2228 
2229 static void
2230 trace_process_export(struct trace_export *export,
2231                struct ring_buffer_event *event)
2232 {
2233         struct trace_entry *entry;
2234         unsigned int size = 0;
2235 
2236         entry = ring_buffer_event_data(event);
2237         size = ring_buffer_event_length(event);
2238         export->write(entry, size);
2239 }
2240 
2241 static DEFINE_MUTEX(ftrace_export_lock);
2242 
2243 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2244 
2245 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2246 
2247 static inline void ftrace_exports_enable(void)
2248 {
2249         static_branch_enable(&ftrace_exports_enabled);
2250 }
2251 
2252 static inline void ftrace_exports_disable(void)
2253 {
2254         static_branch_disable(&ftrace_exports_enabled);
2255 }
2256 
2257 void ftrace_exports(struct ring_buffer_event *event)
2258 {
2259         struct trace_export *export;
2260 
2261         preempt_disable_notrace();
2262 
2263         export = rcu_dereference_raw_notrace(ftrace_exports_list);
2264         while (export) {
2265                 trace_process_export(export, event);
2266                 export = rcu_dereference_raw_notrace(export->next);
2267         }
2268 
2269         preempt_enable_notrace();
2270 }
2271 
2272 static inline void
2273 add_trace_export(struct trace_export **list, struct trace_export *export)
2274 {
2275         rcu_assign_pointer(export->next, *list);
2276         /*
2277          * We are entering export into the list but another
2278          * CPU might be walking that list. We need to make sure
2279          * the export->next pointer is valid before another CPU sees
2280          * the export pointer included into the list.
2281          */
2282         rcu_assign_pointer(*list, export);
2283 }
2284 
2285 static inline int
2286 rm_trace_export(struct trace_export **list, struct trace_export *export)
2287 {
2288         struct trace_export **p;
2289 
2290         for (p = list; *p != NULL; p = &(*p)->next)
2291                 if (*p == export)
2292                         break;
2293 
2294         if (*p != export)
2295                 return -1;
2296 
2297         rcu_assign_pointer(*p, (*p)->next);
2298 
2299         return 0;
2300 }
2301 
2302 static inline void
2303 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2304 {
2305         if (*list == NULL)
2306                 ftrace_exports_enable();
2307 
2308         add_trace_export(list, export);
2309 }
2310 
2311 static inline int
2312 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2313 {
2314         int ret;
2315 
2316         ret = rm_trace_export(list, export);
2317         if (*list == NULL)
2318                 ftrace_exports_disable();
2319 
2320         return ret;
2321 }
2322 
2323 int register_ftrace_export(struct trace_export *export)
2324 {
2325         if (WARN_ON_ONCE(!export->write))
2326                 return -1;
2327 
2328         mutex_lock(&ftrace_export_lock);
2329 
2330         add_ftrace_export(&ftrace_exports_list, export);
2331 
2332         mutex_unlock(&ftrace_export_lock);
2333 
2334         return 0;
2335 }
2336 EXPORT_SYMBOL_GPL(register_ftrace_export);
2337 
2338 int unregister_ftrace_export(struct trace_export *export)
2339 {
2340         int ret;
2341 
2342         mutex_lock(&ftrace_export_lock);
2343 
2344         ret = rm_ftrace_export(&ftrace_exports_list, export);
2345 
2346         mutex_unlock(&ftrace_export_lock);
2347 
2348         return ret;
2349 }
2350 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2351 
2352 void
2353 trace_function(struct trace_array *tr,
2354                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2355                int pc)
2356 {
2357         struct trace_event_call *call = &event_function;
2358         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2359         struct ring_buffer_event *event;
2360         struct ftrace_entry *entry;
2361 
2362         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2363                                             flags, pc);
2364         if (!event)
2365                 return;
2366         entry   = ring_buffer_event_data(event);
2367         entry->ip                       = ip;
2368         entry->parent_ip                = parent_ip;
2369 
2370         if (!call_filter_check_discard(call, entry, buffer, event)) {
2371                 if (static_branch_unlikely(&ftrace_exports_enabled))
2372                         ftrace_exports(event);
2373                 __buffer_unlock_commit(buffer, event);
2374         }
2375 }
2376 
2377 #ifdef CONFIG_STACKTRACE
2378 
2379 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2380 struct ftrace_stack {
2381         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
2382 };
2383 
2384 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2385 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2386 
2387 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2388                                  unsigned long flags,
2389                                  int skip, int pc, struct pt_regs *regs)
2390 {
2391         struct trace_event_call *call = &event_kernel_stack;
2392         struct ring_buffer_event *event;
2393         struct stack_entry *entry;
2394         struct stack_trace trace;
2395         int use_stack;
2396         int size = FTRACE_STACK_ENTRIES;
2397 
2398         trace.nr_entries        = 0;
2399         trace.skip              = skip;
2400 
2401         /*
2402          * Add two, for this function and the call to save_stack_trace()
2403          * If regs is set, then these functions will not be in the way.
2404          */
2405         if (!regs)
2406                 trace.skip += 2;
2407 
2408         /*
2409          * Since events can happen in NMIs there's no safe way to
2410          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2411          * or NMI comes in, it will just have to use the default
2412          * FTRACE_STACK_SIZE.
2413          */
2414         preempt_disable_notrace();
2415 
2416         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2417         /*
2418          * We don't need any atomic variables, just a barrier.
2419          * If an interrupt comes in, we don't care, because it would
2420          * have exited and put the counter back to what we want.
2421          * We just need a barrier to keep gcc from moving things
2422          * around.
2423          */
2424         barrier();
2425         if (use_stack == 1) {
2426                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
2427                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
2428 
2429                 if (regs)
2430                         save_stack_trace_regs(regs, &trace);
2431                 else
2432                         save_stack_trace(&trace);
2433 
2434                 if (trace.nr_entries > size)
2435                         size = trace.nr_entries;
2436         } else
2437                 /* From now on, use_stack is a boolean */
2438                 use_stack = 0;
2439 
2440         size *= sizeof(unsigned long);
2441 
2442         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2443                                             sizeof(*entry) + size, flags, pc);
2444         if (!event)
2445                 goto out;
2446         entry = ring_buffer_event_data(event);
2447 
2448         memset(&entry->caller, 0, size);
2449 
2450         if (use_stack)
2451                 memcpy(&entry->caller, trace.entries,
2452                        trace.nr_entries * sizeof(unsigned long));
2453         else {
2454                 trace.max_entries       = FTRACE_STACK_ENTRIES;
2455                 trace.entries           = entry->caller;
2456                 if (regs)
2457                         save_stack_trace_regs(regs, &trace);
2458                 else
2459                         save_stack_trace(&trace);
2460         }
2461 
2462         entry->size = trace.nr_entries;
2463 
2464         if (!call_filter_check_discard(call, entry, buffer, event))
2465                 __buffer_unlock_commit(buffer, event);
2466 
2467  out:
2468         /* Again, don't let gcc optimize things here */
2469         barrier();
2470         __this_cpu_dec(ftrace_stack_reserve);
2471         preempt_enable_notrace();
2472 
2473 }
2474 
2475 static inline void ftrace_trace_stack(struct trace_array *tr,
2476                                       struct ring_buffer *buffer,
2477                                       unsigned long flags,
2478                                       int skip, int pc, struct pt_regs *regs)
2479 {
2480         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2481                 return;
2482 
2483         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2484 }
2485 
2486 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2487                    int pc)
2488 {
2489         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
2490 }
2491 
2492 /**
2493  * trace_dump_stack - record a stack back trace in the trace buffer
2494  * @skip: Number of functions to skip (helper handlers)
2495  */
2496 void trace_dump_stack(int skip)
2497 {
2498         unsigned long flags;
2499 
2500         if (tracing_disabled || tracing_selftest_running)
2501                 return;
2502 
2503         local_save_flags(flags);
2504 
2505         /*
2506          * Skip 3 more, seems to get us at the caller of
2507          * this function.
2508          */
2509         skip += 3;
2510         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2511                              flags, skip, preempt_count(), NULL);
2512 }
2513 
2514 static DEFINE_PER_CPU(int, user_stack_count);
2515 
2516 void
2517 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2518 {
2519         struct trace_event_call *call = &event_user_stack;
2520         struct ring_buffer_event *event;
2521         struct userstack_entry *entry;
2522         struct stack_trace trace;
2523 
2524         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2525                 return;
2526 
2527         /*
2528          * NMIs can not handle page faults, even with fix ups.
2529          * The save user stack can (and often does) fault.
2530          */
2531         if (unlikely(in_nmi()))
2532                 return;
2533 
2534         /*
2535          * prevent recursion, since the user stack tracing may
2536          * trigger other kernel events.
2537          */
2538         preempt_disable();
2539         if (__this_cpu_read(user_stack_count))
2540                 goto out;
2541 
2542         __this_cpu_inc(user_stack_count);
2543 
2544         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2545                                             sizeof(*entry), flags, pc);
2546         if (!event)
2547                 goto out_drop_count;
2548         entry   = ring_buffer_event_data(event);
2549 
2550         entry->tgid             = current->tgid;
2551         memset(&entry->caller, 0, sizeof(entry->caller));
2552 
2553         trace.nr_entries        = 0;
2554         trace.max_entries       = FTRACE_STACK_ENTRIES;
2555         trace.skip              = 0;
2556         trace.entries           = entry->caller;
2557 
2558         save_stack_trace_user(&trace);
2559         if (!call_filter_check_discard(call, entry, buffer, event))
2560                 __buffer_unlock_commit(buffer, event);
2561 
2562  out_drop_count:
2563         __this_cpu_dec(user_stack_count);
2564  out:
2565         preempt_enable();
2566 }
2567 
2568 #ifdef UNUSED
2569 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2570 {
2571         ftrace_trace_userstack(tr, flags, preempt_count());
2572 }
2573 #endif /* UNUSED */
2574 
2575 #endif /* CONFIG_STACKTRACE */
2576 
2577 /* created for use with alloc_percpu */
2578 struct trace_buffer_struct {
2579         int nesting;
2580         char buffer[4][TRACE_BUF_SIZE];
2581 };
2582 
2583 static struct trace_buffer_struct *trace_percpu_buffer;
2584 
2585 /*
2586  * Thise allows for lockless recording.  If we're nested too deeply, then
2587  * this returns NULL.
2588  */
2589 static char *get_trace_buf(void)
2590 {
2591         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2592 
2593         if (!buffer || buffer->nesting >= 4)
2594                 return NULL;
2595 
2596         return &buffer->buffer[buffer->nesting++][0];
2597 }
2598 
2599 static void put_trace_buf(void)
2600 {
2601         this_cpu_dec(trace_percpu_buffer->nesting);
2602 }
2603 
2604 static int alloc_percpu_trace_buffer(void)
2605 {
2606         struct trace_buffer_struct *buffers;
2607 
2608         buffers = alloc_percpu(struct trace_buffer_struct);
2609         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2610                 return -ENOMEM;
2611 
2612         trace_percpu_buffer = buffers;
2613         return 0;
2614 }
2615 
2616 static int buffers_allocated;
2617 
2618 void trace_printk_init_buffers(void)
2619 {
2620         if (buffers_allocated)
2621                 return;
2622 
2623         if (alloc_percpu_trace_buffer())
2624                 return;
2625 
2626         /* trace_printk() is for debug use only. Don't use it in production. */
2627 
2628         pr_warn("\n");
2629         pr_warn("**********************************************************\n");
2630         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2631         pr_warn("**                                                      **\n");
2632         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2633         pr_warn("**                                                      **\n");
2634         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2635         pr_warn("** unsafe for production use.                           **\n");
2636         pr_warn("**                                                      **\n");
2637         pr_warn("** If you see this message and you are not debugging    **\n");
2638         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2639         pr_warn("**                                                      **\n");
2640         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2641         pr_warn("**********************************************************\n");
2642 
2643         /* Expand the buffers to set size */
2644         tracing_update_buffers();
2645 
2646         buffers_allocated = 1;
2647 
2648         /*
2649          * trace_printk_init_buffers() can be called by modules.
2650          * If that happens, then we need to start cmdline recording
2651          * directly here. If the global_trace.buffer is already
2652          * allocated here, then this was called by module code.
2653          */
2654         if (global_trace.trace_buffer.buffer)
2655                 tracing_start_cmdline_record();
2656 }
2657 
2658 void trace_printk_start_comm(void)
2659 {
2660         /* Start tracing comms if trace printk is set */
2661         if (!buffers_allocated)
2662                 return;
2663         tracing_start_cmdline_record();
2664 }
2665 
2666 static void trace_printk_start_stop_comm(int enabled)
2667 {
2668         if (!buffers_allocated)
2669                 return;
2670 
2671         if (enabled)
2672                 tracing_start_cmdline_record();
2673         else
2674                 tracing_stop_cmdline_record();
2675 }
2676 
2677 /**
2678  * trace_vbprintk - write binary msg to tracing buffer
2679  *
2680  */
2681 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2682 {
2683         struct trace_event_call *call = &event_bprint;
2684         struct ring_buffer_event *event;
2685         struct ring_buffer *buffer;
2686         struct trace_array *tr = &global_trace;
2687         struct bprint_entry *entry;
2688         unsigned long flags;
2689         char *tbuffer;
2690         int len = 0, size, pc;
2691 
2692         if (unlikely(tracing_selftest_running || tracing_disabled))
2693                 return 0;
2694 
2695         /* Don't pollute graph traces with trace_vprintk internals */
2696         pause_graph_tracing();
2697 
2698         pc = preempt_count();
2699         preempt_disable_notrace();
2700 
2701         tbuffer = get_trace_buf();
2702         if (!tbuffer) {
2703                 len = 0;
2704                 goto out_nobuffer;
2705         }
2706 
2707         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2708 
2709         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2710                 goto out;
2711 
2712         local_save_flags(flags);
2713         size = sizeof(*entry) + sizeof(u32) * len;
2714         buffer = tr->trace_buffer.buffer;
2715         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2716                                             flags, pc);
2717         if (!event)
2718                 goto out;
2719         entry = ring_buffer_event_data(event);
2720         entry->ip                       = ip;
2721         entry->fmt                      = fmt;
2722 
2723         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2724         if (!call_filter_check_discard(call, entry, buffer, event)) {
2725                 __buffer_unlock_commit(buffer, event);
2726                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2727         }
2728 
2729 out:
2730         put_trace_buf();
2731 
2732 out_nobuffer:
2733         preempt_enable_notrace();
2734         unpause_graph_tracing();
2735 
2736         return len;
2737 }
2738 EXPORT_SYMBOL_GPL(trace_vbprintk);
2739 
2740 static int
2741 __trace_array_vprintk(struct ring_buffer *buffer,
2742                       unsigned long ip, const char *fmt, va_list args)
2743 {
2744         struct trace_event_call *call = &event_print;
2745         struct ring_buffer_event *event;
2746         int len = 0, size, pc;
2747         struct print_entry *entry;
2748         unsigned long flags;
2749         char *tbuffer;
2750 
2751         if (tracing_disabled || tracing_selftest_running)
2752                 return 0;
2753 
2754         /* Don't pollute graph traces with trace_vprintk internals */
2755         pause_graph_tracing();
2756 
2757         pc = preempt_count();
2758         preempt_disable_notrace();
2759 
2760 
2761         tbuffer = get_trace_buf();
2762         if (!tbuffer) {
2763                 len = 0;
2764                 goto out_nobuffer;
2765         }
2766 
2767         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2768 
2769         local_save_flags(flags);
2770         size = sizeof(*entry) + len + 1;
2771         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2772                                             flags, pc);
2773         if (!event)
2774                 goto out;
2775         entry = ring_buffer_event_data(event);
2776         entry->ip = ip;
2777 
2778         memcpy(&entry->buf, tbuffer, len + 1);
2779         if (!call_filter_check_discard(call, entry, buffer, event)) {
2780                 __buffer_unlock_commit(buffer, event);
2781                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2782         }
2783 
2784 out:
2785         put_trace_buf();
2786 
2787 out_nobuffer:
2788         preempt_enable_notrace();
2789         unpause_graph_tracing();
2790 
2791         return len;
2792 }
2793 
2794 int trace_array_vprintk(struct trace_array *tr,
2795                         unsigned long ip, const char *fmt, va_list args)
2796 {
2797         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2798 }
2799 
2800 int trace_array_printk(struct trace_array *tr,
2801                        unsigned long ip, const char *fmt, ...)
2802 {
2803         int ret;
2804         va_list ap;
2805 
2806         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2807                 return 0;
2808 
2809         va_start(ap, fmt);
2810         ret = trace_array_vprintk(tr, ip, fmt, ap);
2811         va_end(ap);
2812         return ret;
2813 }
2814 
2815 int trace_array_printk_buf(struct ring_buffer *buffer,
2816                            unsigned long ip, const char *fmt, ...)
2817 {
2818         int ret;
2819         va_list ap;
2820 
2821         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2822                 return 0;
2823 
2824         va_start(ap, fmt);
2825         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2826         va_end(ap);
2827         return ret;
2828 }
2829 
2830 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2831 {
2832         return trace_array_vprintk(&global_trace, ip, fmt, args);
2833 }
2834 EXPORT_SYMBOL_GPL(trace_vprintk);
2835 
2836 static void trace_iterator_increment(struct trace_iterator *iter)
2837 {
2838         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2839 
2840         iter->idx++;
2841         if (buf_iter)
2842                 ring_buffer_read(buf_iter, NULL);
2843 }
2844 
2845 static struct trace_entry *
2846 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2847                 unsigned long *lost_events)
2848 {
2849         struct ring_buffer_event *event;
2850         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2851 
2852         if (buf_iter)
2853                 event = ring_buffer_iter_peek(buf_iter, ts);
2854         else
2855                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2856                                          lost_events);
2857 
2858         if (event) {
2859                 iter->ent_size = ring_buffer_event_length(event);
2860                 return ring_buffer_event_data(event);
2861         }
2862         iter->ent_size = 0;
2863         return NULL;
2864 }
2865 
2866 static struct trace_entry *
2867 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2868                   unsigned long *missing_events, u64 *ent_ts)
2869 {
2870         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2871         struct trace_entry *ent, *next = NULL;
2872         unsigned long lost_events = 0, next_lost = 0;
2873         int cpu_file = iter->cpu_file;
2874         u64 next_ts = 0, ts;
2875         int next_cpu = -1;
2876         int next_size = 0;
2877         int cpu;
2878 
2879         /*
2880          * If we are in a per_cpu trace file, don't bother by iterating over
2881          * all cpu and peek directly.
2882          */
2883         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2884                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2885                         return NULL;
2886                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2887                 if (ent_cpu)
2888                         *ent_cpu = cpu_file;
2889 
2890                 return ent;
2891         }
2892 
2893         for_each_tracing_cpu(cpu) {
2894 
2895                 if (ring_buffer_empty_cpu(buffer, cpu))
2896                         continue;
2897 
2898                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2899 
2900                 /*
2901                  * Pick the entry with the smallest timestamp:
2902                  */
2903                 if (ent && (!next || ts < next_ts)) {
2904                         next = ent;
2905                         next_cpu = cpu;
2906                         next_ts = ts;
2907                         next_lost = lost_events;
2908                         next_size = iter->ent_size;
2909                 }
2910         }
2911 
2912         iter->ent_size = next_size;
2913 
2914         if (ent_cpu)
2915                 *ent_cpu = next_cpu;
2916 
2917         if (ent_ts)
2918                 *ent_ts = next_ts;
2919 
2920         if (missing_events)
2921                 *missing_events = next_lost;
2922 
2923         return next;
2924 }
2925 
2926 /* Find the next real entry, without updating the iterator itself */
2927 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2928                                           int *ent_cpu, u64 *ent_ts)
2929 {
2930         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2931 }
2932 
2933 /* Find the next real entry, and increment the iterator to the next entry */
2934 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2935 {
2936         iter->ent = __find_next_entry(iter, &iter->cpu,
2937                                       &iter->lost_events, &iter->ts);
2938 
2939         if (iter->ent)
2940                 trace_iterator_increment(iter);
2941 
2942         return iter->ent ? iter : NULL;
2943 }
2944 
2945 static void trace_consume(struct trace_iterator *iter)
2946 {
2947         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2948                             &iter->lost_events);
2949 }
2950 
2951 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2952 {
2953         struct trace_iterator *iter = m->private;
2954         int i = (int)*pos;
2955         void *ent;
2956 
2957         WARN_ON_ONCE(iter->leftover);
2958 
2959         (*pos)++;
2960 
2961         /* can't go backwards */
2962         if (iter->idx > i)
2963                 return NULL;
2964 
2965         if (iter->idx < 0)
2966                 ent = trace_find_next_entry_inc(iter);
2967         else
2968                 ent = iter;
2969 
2970         while (ent && iter->idx < i)
2971                 ent = trace_find_next_entry_inc(iter);
2972 
2973         iter->pos = *pos;
2974 
2975         return ent;
2976 }
2977 
2978 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2979 {
2980         struct ring_buffer_event *event;
2981         struct ring_buffer_iter *buf_iter;
2982         unsigned long entries = 0;
2983         u64 ts;
2984 
2985         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2986 
2987         buf_iter = trace_buffer_iter(iter, cpu);
2988         if (!buf_iter)
2989                 return;
2990 
2991         ring_buffer_iter_reset(buf_iter);
2992 
2993         /*
2994          * We could have the case with the max latency tracers
2995          * that a reset never took place on a cpu. This is evident
2996          * by the timestamp being before the start of the buffer.
2997          */
2998         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2999                 if (ts >= iter->trace_buffer->time_start)
3000                         break;
3001                 entries++;
3002                 ring_buffer_read(buf_iter, NULL);
3003         }
3004 
3005         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3006 }
3007 
3008 /*
3009  * The current tracer is copied to avoid a global locking
3010  * all around.
3011  */
3012 static void *s_start(struct seq_file *m, loff_t *pos)
3013 {
3014         struct trace_iterator *iter = m->private;
3015         struct trace_array *tr = iter->tr;
3016         int cpu_file = iter->cpu_file;
3017         void *p = NULL;
3018         loff_t l = 0;
3019         int cpu;
3020 
3021         /*
3022          * copy the tracer to avoid using a global lock all around.
3023          * iter->trace is a copy of current_trace, the pointer to the
3024          * name may be used instead of a strcmp(), as iter->trace->name
3025          * will point to the same string as current_trace->name.
3026          */
3027         mutex_lock(&trace_types_lock);
3028         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3029                 *iter->trace = *tr->current_trace;
3030         mutex_unlock(&trace_types_lock);
3031 
3032 #ifdef CONFIG_TRACER_MAX_TRACE
3033         if (iter->snapshot && iter->trace->use_max_tr)
3034                 return ERR_PTR(-EBUSY);
3035 #endif
3036 
3037         if (!iter->snapshot)
3038                 atomic_inc(&trace_record_cmdline_disabled);
3039 
3040         if (*pos != iter->pos) {
3041                 iter->ent = NULL;
3042                 iter->cpu = 0;
3043                 iter->idx = -1;
3044 
3045                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3046                         for_each_tracing_cpu(cpu)
3047                                 tracing_iter_reset(iter, cpu);
3048                 } else
3049                         tracing_iter_reset(iter, cpu_file);
3050 
3051                 iter->leftover = 0;
3052                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3053                         ;
3054 
3055         } else {
3056                 /*
3057                  * If we overflowed the seq_file before, then we want
3058                  * to just reuse the trace_seq buffer again.
3059                  */
3060                 if (iter->leftover)
3061                         p = iter;
3062                 else {
3063                         l = *pos - 1;
3064                         p = s_next(m, p, &l);
3065                 }
3066         }
3067 
3068         trace_event_read_lock();
3069         trace_access_lock(cpu_file);
3070         return p;
3071 }
3072 
3073 static void s_stop(struct seq_file *m, void *p)
3074 {
3075         struct trace_iterator *iter = m->private;
3076 
3077 #ifdef CONFIG_TRACER_MAX_TRACE
3078         if (iter->snapshot && iter->trace->use_max_tr)
3079                 return;
3080 #endif
3081 
3082         if (!iter->snapshot)
3083                 atomic_dec(&trace_record_cmdline_disabled);
3084 
3085         trace_access_unlock(iter->cpu_file);
3086         trace_event_read_unlock();
3087 }
3088 
3089 static void
3090 get_total_entries(struct trace_buffer *buf,
3091                   unsigned long *total, unsigned long *entries)
3092 {
3093         unsigned long count;
3094         int cpu;
3095 
3096         *total = 0;
3097         *entries = 0;
3098 
3099         for_each_tracing_cpu(cpu) {
3100                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3101                 /*
3102                  * If this buffer has skipped entries, then we hold all
3103                  * entries for the trace and we need to ignore the
3104                  * ones before the time stamp.
3105                  */
3106                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3107                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3108                         /* total is the same as the entries */
3109                         *total += count;
3110                 } else
3111                         *total += count +
3112                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
3113                 *entries += count;
3114         }
3115 }
3116 
3117 static void print_lat_help_header(struct seq_file *m)
3118 {
3119         seq_puts(m, "#                  _------=> CPU#            \n"
3120                     "#                 / _-----=> irqs-off        \n"
3121                     "#                | / _----=> need-resched    \n"
3122                     "#                || / _---=> hardirq/softirq \n"
3123                     "#                ||| / _--=> preempt-depth   \n"
3124                     "#                |||| /     delay            \n"
3125                     "#  cmd     pid   ||||| time  |   caller      \n"
3126                     "#     \\   /      |||||  \\    |   /         \n");
3127 }
3128 
3129 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3130 {
3131         unsigned long total;
3132         unsigned long entries;
3133 
3134         get_total_entries(buf, &total, &entries);
3135         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3136                    entries, total, num_online_cpus());
3137         seq_puts(m, "#\n");
3138 }
3139 
3140 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
3141 {
3142         print_event_info(buf, m);
3143         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n"
3144                     "#              | |       |          |         |\n");
3145 }
3146 
3147 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
3148 {
3149         print_event_info(buf, m);
3150         seq_puts(m, "#                              _-----=> irqs-off\n"
3151                     "#                             / _----=> need-resched\n"
3152                     "#                            | / _---=> hardirq/softirq\n"
3153                     "#                            || / _--=> preempt-depth\n"
3154                     "#                            ||| /     delay\n"
3155                     "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n"
3156                     "#              | |       |   ||||       |         |\n");
3157 }
3158 
3159 void
3160 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3161 {
3162         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3163         struct trace_buffer *buf = iter->trace_buffer;
3164         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3165         struct tracer *type = iter->trace;
3166         unsigned long entries;
3167         unsigned long total;
3168         const char *name = "preemption";
3169 
3170         name = type->name;
3171 
3172         get_total_entries(buf, &total, &entries);
3173 
3174         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3175                    name, UTS_RELEASE);
3176         seq_puts(m, "# -----------------------------------"
3177                  "---------------------------------\n");
3178         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3179                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3180                    nsecs_to_usecs(data->saved_latency),
3181                    entries,
3182                    total,
3183                    buf->cpu,
3184 #if defined(CONFIG_PREEMPT_NONE)
3185                    "server",
3186 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3187                    "desktop",
3188 #elif defined(CONFIG_PREEMPT)
3189                    "preempt",
3190 #else
3191                    "unknown",
3192 #endif
3193                    /* These are reserved for later use */
3194                    0, 0, 0, 0);
3195 #ifdef CONFIG_SMP
3196         seq_printf(m, " #P:%d)\n", num_online_cpus());
3197 #else
3198         seq_puts(m, ")\n");
3199 #endif
3200         seq_puts(m, "#    -----------------\n");
3201         seq_printf(m, "#    | task: %.16s-%d "
3202                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3203                    data->comm, data->pid,
3204                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3205                    data->policy, data->rt_priority);
3206         seq_puts(m, "#    -----------------\n");
3207 
3208         if (data->critical_start) {
3209                 seq_puts(m, "#  => started at: ");
3210                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3211                 trace_print_seq(m, &iter->seq);
3212                 seq_puts(m, "\n#  => ended at:   ");
3213                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3214                 trace_print_seq(m, &iter->seq);
3215                 seq_puts(m, "\n#\n");
3216         }
3217 
3218         seq_puts(m, "#\n");
3219 }
3220 
3221 static void test_cpu_buff_start(struct trace_iterator *iter)
3222 {
3223         struct trace_seq *s = &iter->seq;
3224         struct trace_array *tr = iter->tr;
3225 
3226         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3227                 return;
3228 
3229         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3230                 return;
3231 
3232         if (iter->started && cpumask_test_cpu(iter->cpu, iter->started))
3233                 return;
3234 
3235         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3236                 return;
3237 
3238         if (iter->started)
3239                 cpumask_set_cpu(iter->cpu, iter->started);
3240 
3241         /* Don't print started cpu buffer for the first entry of the trace */
3242         if (iter->idx > 1)
3243                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3244                                 iter->cpu);
3245 }
3246 
3247 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3248 {
3249         struct trace_array *tr = iter->tr;
3250         struct trace_seq *s = &iter->seq;
3251         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3252         struct trace_entry *entry;
3253         struct trace_event *event;
3254 
3255         entry = iter->ent;
3256 
3257         test_cpu_buff_start(iter);
3258 
3259         event = ftrace_find_event(entry->type);
3260 
3261         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3262                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3263                         trace_print_lat_context(iter);
3264                 else
3265                         trace_print_context(iter);
3266         }
3267 
3268         if (trace_seq_has_overflowed(s))
3269                 return TRACE_TYPE_PARTIAL_LINE;
3270 
3271         if (event)
3272                 return event->funcs->trace(iter, sym_flags, event);
3273 
3274         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3275 
3276         return trace_handle_return(s);
3277 }
3278 
3279 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3280 {
3281         struct trace_array *tr = iter->tr;
3282         struct trace_seq *s = &iter->seq;
3283         struct trace_entry *entry;
3284         struct trace_event *event;
3285 
3286         entry = iter->ent;
3287 
3288         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3289                 trace_seq_printf(s, "%d %d %llu ",
3290                                  entry->pid, iter->cpu, iter->ts);
3291 
3292         if (trace_seq_has_overflowed(s))
3293                 return TRACE_TYPE_PARTIAL_LINE;
3294 
3295         event = ftrace_find_event(entry->type);
3296         if (event)
3297                 return event->funcs->raw(iter, 0, event);
3298 
3299         trace_seq_printf(s, "%d ?\n", entry->type);
3300 
3301         return trace_handle_return(s);
3302 }
3303 
3304 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3305 {
3306         struct trace_array *tr = iter->tr;
3307         struct trace_seq *s = &iter->seq;
3308         unsigned char newline = '\n';
3309         struct trace_entry *entry;
3310         struct trace_event *event;
3311 
3312         entry = iter->ent;
3313 
3314         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3315                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3316                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3317                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3318                 if (trace_seq_has_overflowed(s))
3319                         return TRACE_TYPE_PARTIAL_LINE;
3320         }
3321 
3322         event = ftrace_find_event(entry->type);
3323         if (event) {
3324                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3325                 if (ret != TRACE_TYPE_HANDLED)
3326                         return ret;
3327         }
3328 
3329         SEQ_PUT_FIELD(s, newline);
3330 
3331         return trace_handle_return(s);
3332 }
3333 
3334 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3335 {
3336         struct trace_array *tr = iter->tr;
3337         struct trace_seq *s = &iter->seq;
3338         struct trace_entry *entry;
3339         struct trace_event *event;
3340 
3341         entry = iter->ent;
3342 
3343         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3344                 SEQ_PUT_FIELD(s, entry->pid);
3345                 SEQ_PUT_FIELD(s, iter->cpu);
3346                 SEQ_PUT_FIELD(s, iter->ts);
3347                 if (trace_seq_has_overflowed(s))
3348                         return TRACE_TYPE_PARTIAL_LINE;
3349         }
3350 
3351         event = ftrace_find_event(entry->type);
3352         return event ? event->funcs->binary(iter, 0, event) :
3353                 TRACE_TYPE_HANDLED;
3354 }
3355 
3356 int trace_empty(struct trace_iterator *iter)
3357 {
3358         struct ring_buffer_iter *buf_iter;
3359         int cpu;
3360 
3361         /* If we are looking at one CPU buffer, only check that one */
3362         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3363                 cpu = iter->cpu_file;
3364                 buf_iter = trace_buffer_iter(iter, cpu);
3365                 if (buf_iter) {
3366                         if (!ring_buffer_iter_empty(buf_iter))
3367                                 return 0;
3368                 } else {
3369                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3370                                 return 0;
3371                 }
3372                 return 1;
3373         }
3374 
3375         for_each_tracing_cpu(cpu) {
3376                 buf_iter = trace_buffer_iter(iter, cpu);
3377                 if (buf_iter) {
3378                         if (!ring_buffer_iter_empty(buf_iter))
3379                                 return 0;
3380                 } else {
3381                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3382                                 return 0;
3383                 }
3384         }
3385 
3386         return 1;
3387 }
3388 
3389 /*  Called with trace_event_read_lock() held. */
3390 enum print_line_t print_trace_line(struct trace_iterator *iter)
3391 {
3392         struct trace_array *tr = iter->tr;
3393         unsigned long trace_flags = tr->trace_flags;
3394         enum print_line_t ret;
3395 
3396         if (iter->lost_events) {
3397                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3398                                  iter->cpu, iter->lost_events);
3399                 if (trace_seq_has_overflowed(&iter->seq))
3400                         return TRACE_TYPE_PARTIAL_LINE;
3401         }
3402 
3403         if (iter->trace && iter->trace->print_line) {
3404                 ret = iter->trace->print_line(iter);
3405                 if (ret != TRACE_TYPE_UNHANDLED)
3406                         return ret;
3407         }
3408 
3409         if (iter->ent->type == TRACE_BPUTS &&
3410                         trace_flags & TRACE_ITER_PRINTK &&
3411                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3412                 return trace_print_bputs_msg_only(iter);
3413 
3414         if (iter->ent->type == TRACE_BPRINT &&
3415                         trace_flags & TRACE_ITER_PRINTK &&
3416                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3417                 return trace_print_bprintk_msg_only(iter);
3418 
3419         if (iter->ent->type == TRACE_PRINT &&
3420                         trace_flags & TRACE_ITER_PRINTK &&
3421                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3422                 return trace_print_printk_msg_only(iter);
3423 
3424         if (trace_flags & TRACE_ITER_BIN)
3425                 return print_bin_fmt(iter);
3426 
3427         if (trace_flags & TRACE_ITER_HEX)
3428                 return print_hex_fmt(iter);
3429 
3430         if (trace_flags & TRACE_ITER_RAW)
3431                 return print_raw_fmt(iter);
3432 
3433         return print_trace_fmt(iter);
3434 }
3435 
3436 void trace_latency_header(struct seq_file *m)
3437 {
3438         struct trace_iterator *iter = m->private;
3439         struct trace_array *tr = iter->tr;
3440 
3441         /* print nothing if the buffers are empty */
3442         if (trace_empty(iter))
3443                 return;
3444 
3445         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3446                 print_trace_header(m, iter);
3447 
3448         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3449                 print_lat_help_header(m);
3450 }
3451 
3452 void trace_default_header(struct seq_file *m)
3453 {
3454         struct trace_iterator *iter = m->private;
3455         struct trace_array *tr = iter->tr;
3456         unsigned long trace_flags = tr->trace_flags;
3457 
3458         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3459                 return;
3460 
3461         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3462                 /* print nothing if the buffers are empty */
3463                 if (trace_empty(iter))
3464                         return;
3465                 print_trace_header(m, iter);
3466                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3467                         print_lat_help_header(m);
3468         } else {
3469                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3470                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3471                                 print_func_help_header_irq(iter->trace_buffer, m);
3472                         else
3473                                 print_func_help_header(iter->trace_buffer, m);
3474                 }
3475         }
3476 }
3477 
3478 static void test_ftrace_alive(struct seq_file *m)
3479 {
3480         if (!ftrace_is_dead())
3481                 return;
3482         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3483                     "#          MAY BE MISSING FUNCTION EVENTS\n");
3484 }
3485 
3486 #ifdef CONFIG_TRACER_MAX_TRACE
3487 static void show_snapshot_main_help(struct seq_file *m)
3488 {
3489         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3490                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3491                     "#                      Takes a snapshot of the main buffer.\n"
3492                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3493                     "#                      (Doesn't have to be '2' works with any number that\n"
3494                     "#                       is not a '' or '1')\n");
3495 }
3496 
3497 static void show_snapshot_percpu_help(struct seq_file *m)
3498 {
3499         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3500 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3501         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3502                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
3503 #else
3504         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3505                     "#                     Must use main snapshot file to allocate.\n");
3506 #endif
3507         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3508                     "#                      (Doesn't have to be '2' works with any number that\n"
3509                     "#                       is not a '' or '1')\n");
3510 }
3511 
3512 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3513 {
3514         if (iter->tr->allocated_snapshot)
3515                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3516         else
3517                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3518 
3519         seq_puts(m, "# Snapshot commands:\n");
3520         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3521                 show_snapshot_main_help(m);
3522         else
3523                 show_snapshot_percpu_help(m);
3524 }
3525 #else
3526 /* Should never be called */
3527 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3528 #endif
3529 
3530 static int s_show(struct seq_file *m, void *v)
3531 {
3532         struct trace_iterator *iter = v;
3533         int ret;
3534 
3535         if (iter->ent == NULL) {
3536                 if (iter->tr) {
3537                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
3538                         seq_puts(m, "#\n");
3539                         test_ftrace_alive(m);
3540                 }
3541                 if (iter->snapshot && trace_empty(iter))
3542                         print_snapshot_help(m, iter);
3543                 else if (iter->trace && iter->trace->print_header)
3544                         iter->trace->print_header(m);
3545                 else
3546                         trace_default_header(m);
3547 
3548         } else if (iter->leftover) {
3549                 /*
3550                  * If we filled the seq_file buffer earlier, we
3551                  * want to just show it now.
3552                  */
3553                 ret = trace_print_seq(m, &iter->seq);
3554 
3555                 /* ret should this time be zero, but you never know */
3556                 iter->leftover = ret;
3557 
3558         } else {
3559                 print_trace_line(iter);
3560                 ret = trace_print_seq(m, &iter->seq);
3561                 /*
3562                  * If we overflow the seq_file buffer, then it will
3563                  * ask us for this data again at start up.
3564                  * Use that instead.
3565                  *  ret is 0 if seq_file write succeeded.
3566                  *        -1 otherwise.
3567                  */
3568                 iter->leftover = ret;
3569         }
3570 
3571         return 0;
3572 }
3573 
3574 /*
3575  * Should be used after trace_array_get(), trace_types_lock
3576  * ensures that i_cdev was already initialized.
3577  */
3578 static inline int tracing_get_cpu(struct inode *inode)
3579 {
3580         if (inode->i_cdev) /* See trace_create_cpu_file() */
3581                 return (long)inode->i_cdev - 1;
3582         return RING_BUFFER_ALL_CPUS;
3583 }
3584 
3585 static const struct seq_operations tracer_seq_ops = {
3586         .start          = s_start,
3587         .next           = s_next,
3588         .stop           = s_stop,
3589         .show           = s_show,
3590 };
3591 
3592 static struct trace_iterator *
3593 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3594 {
3595         struct trace_array *tr = inode->i_private;
3596         struct trace_iterator *iter;
3597         int cpu;
3598 
3599         if (tracing_disabled)
3600                 return ERR_PTR(-ENODEV);
3601 
3602         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3603         if (!iter)
3604                 return ERR_PTR(-ENOMEM);
3605 
3606         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3607                                     GFP_KERNEL);
3608         if (!iter->buffer_iter)
3609                 goto release;
3610 
3611         /*
3612          * We make a copy of the current tracer to avoid concurrent
3613          * changes on it while we are reading.
3614          */
3615         mutex_lock(&trace_types_lock);
3616         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3617         if (!iter->trace)
3618                 goto fail;
3619 
3620         *iter->trace = *tr->current_trace;
3621 
3622         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3623                 goto fail;
3624 
3625         iter->tr = tr;
3626 
3627 #ifdef CONFIG_TRACER_MAX_TRACE
3628         /* Currently only the top directory has a snapshot */
3629         if (tr->current_trace->print_max || snapshot)
3630                 iter->trace_buffer = &tr->max_buffer;
3631         else
3632 #endif
3633                 iter->trace_buffer = &tr->trace_buffer;
3634         iter->snapshot = snapshot;
3635         iter->pos = -1;
3636         iter->cpu_file = tracing_get_cpu(inode);
3637         mutex_init(&iter->mutex);
3638 
3639         /* Notify the tracer early; before we stop tracing. */
3640         if (iter->trace && iter->trace->open)
3641                 iter->trace->open(iter);
3642 
3643         /* Annotate start of buffers if we had overruns */
3644         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3645                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3646 
3647         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3648         if (trace_clocks[tr->clock_id].in_ns)
3649                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3650 
3651         /* stop the trace while dumping if we are not opening "snapshot" */
3652         if (!iter->snapshot)
3653                 tracing_stop_tr(tr);
3654 
3655         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3656                 for_each_tracing_cpu(cpu) {
3657                         iter->buffer_iter[cpu] =
3658                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3659                 }
3660                 ring_buffer_read_prepare_sync();
3661                 for_each_tracing_cpu(cpu) {
3662                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3663                         tracing_iter_reset(iter, cpu);
3664                 }
3665         } else {
3666                 cpu = iter->cpu_file;
3667                 iter->buffer_iter[cpu] =
3668                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3669                 ring_buffer_read_prepare_sync();
3670                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3671                 tracing_iter_reset(iter, cpu);
3672         }
3673 
3674         mutex_unlock(&trace_types_lock);
3675 
3676         return iter;
3677 
3678  fail:
3679         mutex_unlock(&trace_types_lock);
3680         kfree(iter->trace);
3681         kfree(iter->buffer_iter);
3682 release:
3683         seq_release_private(inode, file);
3684         return ERR_PTR(-ENOMEM);
3685 }
3686 
3687 int tracing_open_generic(struct inode *inode, struct file *filp)
3688 {
3689         if (tracing_disabled)
3690                 return -ENODEV;
3691 
3692         filp->private_data = inode->i_private;
3693         return 0;
3694 }
3695 
3696 bool tracing_is_disabled(void)
3697 {
3698         return (tracing_disabled) ? true: false;
3699 }
3700 
3701 /*
3702  * Open and update trace_array ref count.
3703  * Must have the current trace_array passed to it.
3704  */
3705 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3706 {
3707         struct trace_array *tr = inode->i_private;
3708 
3709         if (tracing_disabled)
3710                 return -ENODEV;
3711 
3712         if (trace_array_get(tr) < 0)
3713                 return -ENODEV;
3714 
3715         filp->private_data = inode->i_private;
3716 
3717         return 0;
3718 }
3719 
3720 static int tracing_release(struct inode *inode, struct file *file)
3721 {
3722         struct trace_array *tr = inode->i_private;
3723         struct seq_file *m = file->private_data;
3724         struct trace_iterator *iter;
3725         int cpu;
3726 
3727         if (!(file->f_mode & FMODE_READ)) {
3728                 trace_array_put(tr);
3729                 return 0;
3730         }
3731 
3732         /* Writes do not use seq_file */
3733         iter = m->private;
3734         mutex_lock(&trace_types_lock);
3735 
3736         for_each_tracing_cpu(cpu) {
3737                 if (iter->buffer_iter[cpu])
3738                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3739         }
3740 
3741         if (iter->trace && iter->trace->close)
3742                 iter->trace->close(iter);
3743 
3744         if (!iter->snapshot)
3745                 /* reenable tracing if it was previously enabled */
3746                 tracing_start_tr(tr);
3747 
3748         __trace_array_put(tr);
3749 
3750         mutex_unlock(&trace_types_lock);
3751 
3752         mutex_destroy(&iter->mutex);
3753         free_cpumask_var(iter->started);
3754         kfree(iter->trace);
3755         kfree(iter->buffer_iter);
3756         seq_release_private(inode, file);
3757 
3758         return 0;
3759 }
3760 
3761 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3762 {
3763         struct trace_array *tr = inode->i_private;
3764 
3765         trace_array_put(tr);
3766         return 0;
3767 }
3768 
3769 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3770 {
3771         struct trace_array *tr = inode->i_private;
3772 
3773         trace_array_put(tr);
3774 
3775         return single_release(inode, file);
3776 }
3777 
3778 static int tracing_open(struct inode *inode, struct file *file)
3779 {
3780         struct trace_array *tr = inode->i_private;
3781         struct trace_iterator *iter;
3782         int ret = 0;
3783 
3784         if (trace_array_get(tr) < 0)
3785                 return -ENODEV;
3786 
3787         /* If this file was open for write, then erase contents */
3788         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3789                 int cpu = tracing_get_cpu(inode);
3790 
3791                 if (cpu == RING_BUFFER_ALL_CPUS)
3792                         tracing_reset_online_cpus(&tr->trace_buffer);
3793                 else
3794                         tracing_reset(&tr->trace_buffer, cpu);
3795         }
3796 
3797         if (file->f_mode & FMODE_READ) {
3798                 iter = __tracing_open(inode, file, false);
3799                 if (IS_ERR(iter))
3800                         ret = PTR_ERR(iter);
3801                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3802                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3803         }
3804 
3805         if (ret < 0)
3806                 trace_array_put(tr);
3807 
3808         return ret;
3809 }
3810 
3811 /*
3812  * Some tracers are not suitable for instance buffers.
3813  * A tracer is always available for the global array (toplevel)
3814  * or if it explicitly states that it is.
3815  */
3816 static bool
3817 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3818 {
3819         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3820 }
3821 
3822 /* Find the next tracer that this trace array may use */
3823 static struct tracer *
3824 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3825 {
3826         while (t && !trace_ok_for_array(t, tr))
3827                 t = t->next;
3828 
3829         return t;
3830 }
3831 
3832 static void *
3833 t_next(struct seq_file *m, void *v, loff_t *pos)
3834 {
3835         struct trace_array *tr = m->private;
3836         struct tracer *t = v;
3837 
3838         (*pos)++;
3839 
3840         if (t)
3841                 t = get_tracer_for_array(tr, t->next);
3842 
3843         return t;
3844 }
3845 
3846 static void *t_start(struct seq_file *m, loff_t *pos)
3847 {
3848         struct trace_array *tr = m->private;
3849         struct tracer *t;
3850         loff_t l = 0;
3851 
3852         mutex_lock(&trace_types_lock);
3853 
3854         t = get_tracer_for_array(tr, trace_types);
3855         for (; t && l < *pos; t = t_next(m, t, &l))
3856                         ;
3857 
3858         return t;
3859 }
3860 
3861 static void t_stop(struct seq_file *m, void *p)
3862 {
3863         mutex_unlock(&trace_types_lock);
3864 }
3865 
3866 static int t_show(struct seq_file *m, void *v)
3867 {
3868         struct tracer *t = v;
3869 
3870         if (!t)
3871                 return 0;
3872 
3873         seq_puts(m, t->name);
3874         if (t->next)
3875                 seq_putc(m, ' ');
3876         else
3877                 seq_putc(m, '\n');
3878 
3879         return 0;
3880 }
3881 
3882 static const struct seq_operations show_traces_seq_ops = {
3883         .start          = t_start,
3884         .next           = t_next,
3885         .stop           = t_stop,
3886         .show           = t_show,
3887 };
3888 
3889 static int show_traces_open(struct inode *inode, struct file *file)
3890 {
3891         struct trace_array *tr = inode->i_private;
3892         struct seq_file *m;
3893         int ret;
3894 
3895         if (tracing_disabled)
3896                 return -ENODEV;
3897 
3898         ret = seq_open(file, &show_traces_seq_ops);
3899         if (ret)
3900                 return ret;
3901 
3902         m = file->private_data;
3903         m->private = tr;
3904 
3905         return 0;
3906 }
3907 
3908 static ssize_t
3909 tracing_write_stub(struct file *filp, const char __user *ubuf,
3910                    size_t count, loff_t *ppos)
3911 {
3912         return count;
3913 }
3914 
3915 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3916 {
3917         int ret;
3918 
3919         if (file->f_mode & FMODE_READ)
3920                 ret = seq_lseek(file, offset, whence);
3921         else
3922                 file->f_pos = ret = 0;
3923 
3924         return ret;
3925 }
3926 
3927 static const struct file_operations tracing_fops = {
3928         .open           = tracing_open,
3929         .read           = seq_read,
3930         .write          = tracing_write_stub,
3931         .llseek         = tracing_lseek,
3932         .release        = tracing_release,
3933 };
3934 
3935 static const struct file_operations show_traces_fops = {
3936         .open           = show_traces_open,
3937         .read           = seq_read,
3938         .release        = seq_release,
3939         .llseek         = seq_lseek,
3940 };
3941 
3942 /*
3943  * The tracer itself will not take this lock, but still we want
3944  * to provide a consistent cpumask to user-space:
3945  */
3946 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3947 
3948 /*
3949  * Temporary storage for the character representation of the
3950  * CPU bitmask (and one more byte for the newline):
3951  */
3952 static char mask_str[NR_CPUS + 1];
3953 
3954 static ssize_t
3955 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3956                      size_t count, loff_t *ppos)
3957 {
3958         struct trace_array *tr = file_inode(filp)->i_private;
3959         int len;
3960 
3961         mutex_lock(&tracing_cpumask_update_lock);
3962 
3963         len = snprintf(mask_str, count, "%*pb\n",
3964                        cpumask_pr_args(tr->tracing_cpumask));
3965         if (len >= count) {
3966                 count = -EINVAL;
3967                 goto out_err;
3968         }
3969         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3970 
3971 out_err:
3972         mutex_unlock(&tracing_cpumask_update_lock);
3973 
3974         return count;
3975 }
3976 
3977 static ssize_t
3978 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3979                       size_t count, loff_t *ppos)
3980 {
3981         struct trace_array *tr = file_inode(filp)->i_private;
3982         cpumask_var_t tracing_cpumask_new;
3983         int err, cpu;
3984 
3985         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3986                 return -ENOMEM;
3987 
3988         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3989         if (err)
3990                 goto err_unlock;
3991 
3992         mutex_lock(&tracing_cpumask_update_lock);
3993 
3994         local_irq_disable();
3995         arch_spin_lock(&tr->max_lock);
3996         for_each_tracing_cpu(cpu) {
3997                 /*
3998                  * Increase/decrease the disabled counter if we are
3999                  * about to flip a bit in the cpumask:
4000                  */
4001                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4002                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4003                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4004                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4005                 }
4006                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4007                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4008                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4009                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4010                 }
4011         }
4012         arch_spin_unlock(&tr->max_lock);
4013         local_irq_enable();
4014 
4015         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4016 
4017         mutex_unlock(&tracing_cpumask_update_lock);
4018         free_cpumask_var(tracing_cpumask_new);
4019 
4020         return count;
4021 
4022 err_unlock:
4023         free_cpumask_var(tracing_cpumask_new);
4024 
4025         return err;
4026 }
4027 
4028 static const struct file_operations tracing_cpumask_fops = {
4029         .open           = tracing_open_generic_tr,
4030         .read           = tracing_cpumask_read,
4031         .write          = tracing_cpumask_write,
4032         .release        = tracing_release_generic_tr,
4033         .llseek         = generic_file_llseek,
4034 };
4035 
4036 static int tracing_trace_options_show(struct seq_file *m, void *v)
4037 {
4038         struct tracer_opt *trace_opts;
4039         struct trace_array *tr = m->private;
4040         u32 tracer_flags;
4041         int i;
4042 
4043         mutex_lock(&trace_types_lock);
4044         tracer_flags = tr->current_trace->flags->val;
4045         trace_opts = tr->current_trace->flags->opts;
4046 
4047         for (i = 0; trace_options[i]; i++) {
4048                 if (tr->trace_flags & (1 << i))
4049                         seq_printf(m, "%s\n", trace_options[i]);
4050                 else
4051                         seq_printf(m, "no%s\n", trace_options[i]);
4052         }
4053 
4054         for (i = 0; trace_opts[i].name; i++) {
4055                 if (tracer_flags & trace_opts[i].bit)
4056                         seq_printf(m, "%s\n", trace_opts[i].name);
4057                 else
4058                         seq_printf(m, "no%s\n", trace_opts[i].name);
4059         }
4060         mutex_unlock(&trace_types_lock);
4061 
4062         return 0;
4063 }
4064 
4065 static int __set_tracer_option(struct trace_array *tr,
4066                                struct tracer_flags *tracer_flags,
4067                                struct tracer_opt *opts, int neg)
4068 {
4069         struct tracer *trace = tracer_flags->trace;
4070         int ret;
4071 
4072         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4073         if (ret)
4074                 return ret;
4075 
4076         if (neg)
4077                 tracer_flags->val &= ~opts->bit;
4078         else
4079                 tracer_flags->val |= opts->bit;
4080         return 0;
4081 }
4082 
4083 /* Try to assign a tracer specific option */
4084 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4085 {
4086         struct tracer *trace = tr->current_trace;
4087         struct tracer_flags *tracer_flags = trace->flags;
4088         struct tracer_opt *opts = NULL;
4089         int i;
4090 
4091         for (i = 0; tracer_flags->opts[i].name; i++) {
4092                 opts = &tracer_flags->opts[i];
4093 
4094                 if (strcmp(cmp, opts->name) == 0)
4095                         return __set_tracer_option(tr, trace->flags, opts, neg);
4096         }
4097 
4098         return -EINVAL;
4099 }
4100 
4101 /* Some tracers require overwrite to stay enabled */
4102 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4103 {
4104         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4105                 return -1;
4106 
4107         return 0;
4108 }
4109 
4110 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4111 {
4112         /* do nothing if flag is already set */
4113         if (!!(tr->trace_flags & mask) == !!enabled)
4114                 return 0;
4115 
4116         /* Give the tracer a chance to approve the change */
4117         if (tr->current_trace->flag_changed)
4118                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4119                         return -EINVAL;
4120 
4121         if (enabled)
4122                 tr->trace_flags |= mask;
4123         else
4124                 tr->trace_flags &= ~mask;
4125 
4126         if (mask == TRACE_ITER_RECORD_CMD)
4127                 trace_event_enable_cmd_record(enabled);
4128 
4129         if (mask == TRACE_ITER_EVENT_FORK)
4130                 trace_event_follow_fork(tr, enabled);
4131 
4132         if (mask == TRACE_ITER_OVERWRITE) {
4133                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4134 #ifdef CONFIG_TRACER_MAX_TRACE
4135                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4136 #endif
4137         }
4138 
4139         if (mask == TRACE_ITER_PRINTK) {
4140                 trace_printk_start_stop_comm(enabled);
4141                 trace_printk_control(enabled);
4142         }
4143 
4144         return 0;
4145 }
4146 
4147 static int trace_set_options(struct trace_array *tr, char *option)
4148 {
4149         char *cmp;
4150         int neg = 0;
4151         int ret = -ENODEV;
4152         int i;
4153         size_t orig_len = strlen(option);
4154 
4155         cmp = strstrip(option);
4156 
4157         if (strncmp(cmp, "no", 2) == 0) {
4158                 neg = 1;
4159                 cmp += 2;
4160         }
4161 
4162         mutex_lock(&trace_types_lock);
4163 
4164         for (i = 0; trace_options[i]; i++) {
4165                 if (strcmp(cmp, trace_options[i]) == 0) {
4166                         ret = set_tracer_flag(tr, 1 << i, !neg);
4167                         break;
4168                 }
4169         }
4170 
4171         /* If no option could be set, test the specific tracer options */
4172         if (!trace_options[i])
4173                 ret = set_tracer_option(tr, cmp, neg);
4174 
4175         mutex_unlock(&trace_types_lock);
4176 
4177         /*
4178          * If the first trailing whitespace is replaced with '\0' by strstrip,
4179          * turn it back into a space.
4180          */
4181         if (orig_len > strlen(option))
4182                 option[strlen(option)] = ' ';
4183 
4184         return ret;
4185 }
4186 
4187 static void __init apply_trace_boot_options(void)
4188 {
4189         char *buf = trace_boot_options_buf;
4190         char *option;
4191 
4192         while (true) {
4193                 option = strsep(&buf, ",");
4194 
4195                 if (!option)
4196                         break;
4197 
4198                 if (*option)
4199                         trace_set_options(&global_trace, option);
4200 
4201                 /* Put back the comma to allow this to be called again */
4202                 if (buf)
4203                         *(buf - 1) = ',';
4204         }
4205 }
4206 
4207 static ssize_t
4208 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4209                         size_t cnt, loff_t *ppos)
4210 {
4211         struct seq_file *m = filp->private_data;
4212         struct trace_array *tr = m->private;
4213         char buf[64];
4214         int ret;
4215 
4216         if (cnt >= sizeof(buf))
4217                 return -EINVAL;
4218 
4219         if (copy_from_user(buf, ubuf, cnt))
4220                 return -EFAULT;
4221 
4222         buf[cnt] = 0;
4223 
4224         ret = trace_set_options(tr, buf);
4225         if (ret < 0)
4226                 return ret;
4227 
4228         *ppos += cnt;
4229 
4230         return cnt;
4231 }
4232 
4233 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4234 {
4235         struct trace_array *tr = inode->i_private;
4236         int ret;
4237 
4238         if (tracing_disabled)
4239                 return -ENODEV;
4240 
4241         if (trace_array_get(tr) < 0)
4242                 return -ENODEV;
4243 
4244         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4245         if (ret < 0)
4246                 trace_array_put(tr);
4247 
4248         return ret;
4249 }
4250 
4251 static const struct file_operations tracing_iter_fops = {
4252         .open           = tracing_trace_options_open,
4253         .read           = seq_read,
4254         .llseek         = seq_lseek,
4255         .release        = tracing_single_release_tr,
4256         .write          = tracing_trace_options_write,
4257 };
4258 
4259 static const char readme_msg[] =
4260         "tracing mini-HOWTO:\n\n"
4261         "# echo 0 > tracing_on : quick way to disable tracing\n"
4262         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4263         " Important files:\n"
4264         "  trace\t\t\t- The static contents of the buffer\n"
4265         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4266         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4267         "  current_tracer\t- function and latency tracers\n"
4268         "  available_tracers\t- list of configured tracers for current_tracer\n"
4269         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4270         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4271         "  trace_clock\t\t-change the clock used to order events\n"
4272         "       local:   Per cpu clock but may not be synced across CPUs\n"
4273         "      global:   Synced across CPUs but slows tracing down.\n"
4274         "     counter:   Not a clock, but just an increment\n"
4275         "      uptime:   Jiffy counter from time of boot\n"
4276         "        perf:   Same clock that perf events use\n"
4277 #ifdef CONFIG_X86_64
4278         "     x86-tsc:   TSC cycle counter\n"
4279 #endif
4280         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4281         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4282         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4283         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4284         "\t\t\t  Remove sub-buffer with rmdir\n"
4285         "  trace_options\t\t- Set format or modify how tracing happens\n"
4286         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4287         "\t\t\t  option name\n"
4288         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4289 #ifdef CONFIG_DYNAMIC_FTRACE
4290         "\n  available_filter_functions - list of functions that can be filtered on\n"
4291         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4292         "\t\t\t  functions\n"
4293         "\t     accepts: func_full_name or glob-matching-pattern\n"
4294         "\t     modules: Can select a group via module\n"
4295         "\t      Format: :mod:<module-name>\n"
4296         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4297         "\t    triggers: a command to perform when function is hit\n"
4298         "\t      Format: <function>:<trigger>[:count]\n"
4299         "\t     trigger: traceon, traceoff\n"
4300         "\t\t      enable_event:<system>:<event>\n"
4301         "\t\t      disable_event:<system>:<event>\n"
4302 #ifdef CONFIG_STACKTRACE
4303         "\t\t      stacktrace\n"
4304 #endif
4305 #ifdef CONFIG_TRACER_SNAPSHOT
4306         "\t\t      snapshot\n"
4307 #endif
4308         "\t\t      dump\n"
4309         "\t\t      cpudump\n"
4310         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4311         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4312         "\t     The first one will disable tracing every time do_fault is hit\n"
4313         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4314         "\t       The first time do trap is hit and it disables tracing, the\n"
4315         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4316         "\t       the counter will not decrement. It only decrements when the\n"
4317         "\t       trigger did work\n"
4318         "\t     To remove trigger without count:\n"
4319         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4320         "\t     To remove trigger with a count:\n"
4321         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4322         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4323         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4324         "\t    modules: Can select a group via module command :mod:\n"
4325         "\t    Does not accept triggers\n"
4326 #endif /* CONFIG_DYNAMIC_FTRACE */
4327 #ifdef CONFIG_FUNCTION_TRACER
4328         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4329         "\t\t    (function)\n"
4330 #endif
4331 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4332         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4333         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4334         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4335 #endif
4336 #ifdef CONFIG_TRACER_SNAPSHOT
4337         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4338         "\t\t\t  snapshot buffer. Read the contents for more\n"
4339         "\t\t\t  information\n"
4340 #endif
4341 #ifdef CONFIG_STACK_TRACER
4342         "  stack_trace\t\t- Shows the max stack trace when active\n"
4343         "  stack_max_size\t- Shows current max stack size that was traced\n"
4344         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4345         "\t\t\t  new trace)\n"
4346 #ifdef CONFIG_DYNAMIC_FTRACE
4347         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4348         "\t\t\t  traces\n"
4349 #endif
4350 #endif /* CONFIG_STACK_TRACER */
4351 #ifdef CONFIG_KPROBE_EVENT
4352         "  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4353         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4354 #endif
4355 #ifdef CONFIG_UPROBE_EVENT
4356         "  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4357         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4358 #endif
4359 #if defined(CONFIG_KPROBE_EVENT) || defined(CONFIG_UPROBE_EVENT)
4360         "\t  accepts: event-definitions (one definition per line)\n"
4361         "\t   Format: p|r[:[<group>/]<event>] <place> [<args>]\n"
4362         "\t           -:[<group>/]<event>\n"
4363 #ifdef CONFIG_KPROBE_EVENT
4364         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4365 #endif
4366 #ifdef CONFIG_UPROBE_EVENT
4367         "\t    place: <path>:<offset>\n"
4368 #endif
4369         "\t     args: <name>=fetcharg[:type]\n"
4370         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4371         "\t           $stack<index>, $stack, $retval, $comm\n"
4372         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4373         "\t           b<bit-width>@<bit-offset>/<container-size>\n"
4374 #endif
4375         "  events/\t\t- Directory containing all trace event subsystems:\n"
4376         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4377         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4378         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4379         "\t\t\t  events\n"
4380         "      filter\t\t- If set, only events passing filter are traced\n"
4381         "  events/<system>/<event>/\t- Directory containing control files for\n"
4382         "\t\t\t  <event>:\n"
4383         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4384         "      filter\t\t- If set, only events passing filter are traced\n"
4385         "      trigger\t\t- If set, a command to perform when event is hit\n"
4386         "\t    Format: <trigger>[:count][if <filter>]\n"
4387         "\t   trigger: traceon, traceoff\n"
4388         "\t            enable_event:<system>:<event>\n"
4389         "\t            disable_event:<system>:<event>\n"
4390 #ifdef CONFIG_HIST_TRIGGERS
4391         "\t            enable_hist:<system>:<event>\n"
4392         "\t            disable_hist:<system>:<event>\n"
4393 #endif
4394 #ifdef CONFIG_STACKTRACE
4395         "\t\t    stacktrace\n"
4396 #endif
4397 #ifdef CONFIG_TRACER_SNAPSHOT
4398         "\t\t    snapshot\n"
4399 #endif
4400 #ifdef CONFIG_HIST_TRIGGERS
4401         "\t\t    hist (see below)\n"
4402 #endif
4403         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4404         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4405         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4406         "\t                  events/block/block_unplug/trigger\n"
4407         "\t   The first disables tracing every time block_unplug is hit.\n"
4408         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4409         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4410         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4411         "\t   Like function triggers, the counter is only decremented if it\n"
4412         "\t    enabled or disabled tracing.\n"
4413         "\t   To remove a trigger without a count:\n"
4414         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4415         "\t   To remove a trigger with a count:\n"
4416         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4417         "\t   Filters can be ignored when removing a trigger.\n"
4418 #ifdef CONFIG_HIST_TRIGGERS
4419         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4420         "\t    Format: hist:keys=<field1[,field2,...]>\n"
4421         "\t            [:values=<field1[,field2,...]>]\n"
4422         "\t            [:sort=<field1[,field2,...]>]\n"
4423         "\t            [:size=#entries]\n"
4424         "\t            [:pause][:continue][:clear]\n"
4425         "\t            [:name=histname1]\n"
4426         "\t            [if <filter>]\n\n"
4427         "\t    When a matching event is hit, an entry is added to a hash\n"
4428         "\t    table using the key(s) and value(s) named, and the value of a\n"
4429         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4430         "\t    correspond to fields in the event's format description.  Keys\n"
4431         "\t    can be any field, or the special string 'stacktrace'.\n"
4432         "\t    Compound keys consisting of up to two fields can be specified\n"
4433         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4434         "\t    fields.  Sort keys consisting of up to two fields can be\n"
4435         "\t    specified using the 'sort' keyword.  The sort direction can\n"
4436         "\t    be modified by appending '.descending' or '.ascending' to a\n"
4437         "\t    sort field.  The 'size' parameter can be used to specify more\n"
4438         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4439         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4440         "\t    its histogram data will be shared with other triggers of the\n"
4441         "\t    same name, and trigger hits will update this common data.\n\n"
4442         "\t    Reading the 'hist' file for the event will dump the hash\n"
4443         "\t    table in its entirety to stdout.  If there are multiple hist\n"
4444         "\t    triggers attached to an event, there will be a table for each\n"
4445         "\t    trigger in the output.  The table displayed for a named\n"
4446         "\t    trigger will be the same as any other instance having the\n"
4447         "\t    same name.  The default format used to display a given field\n"
4448         "\t    can be modified by appending any of the following modifiers\n"
4449         "\t    to the field name, as applicable:\n\n"
4450         "\t            .hex        display a number as a hex value\n"
4451         "\t            .sym        display an address as a symbol\n"
4452         "\t            .sym-offset display an address as a symbol and offset\n"
4453         "\t            .execname   display a common_pid as a program name\n"
4454         "\t            .syscall    display a syscall id as a syscall name\n\n"
4455         "\t            .log2       display log2 value rather than raw number\n\n"
4456         "\t    The 'pause' parameter can be used to pause an existing hist\n"
4457         "\t    trigger or to start a hist trigger but not log any events\n"
4458         "\t    until told to do so.  'continue' can be used to start or\n"
4459         "\t    restart a paused hist trigger.\n\n"
4460         "\t    The 'clear' parameter will clear the contents of a running\n"
4461         "\t    hist trigger and leave its current paused/active state\n"
4462         "\t    unchanged.\n\n"
4463         "\t    The enable_hist and disable_hist triggers can be used to\n"
4464         "\t    have one event conditionally start and stop another event's\n"
4465         "\t    already-attached hist trigger.  The syntax is analagous to\n"
4466         "\t    the enable_event and disable_event triggers.\n"
4467 #endif
4468 ;
4469 
4470 static ssize_t
4471 tracing_readme_read(struct file *filp, char __user *ubuf,
4472                        size_t cnt, loff_t *ppos)
4473 {
4474         return simple_read_from_buffer(ubuf, cnt, ppos,
4475                                         readme_msg, strlen(readme_msg));
4476 }
4477 
4478 static const struct file_operations tracing_readme_fops = {
4479         .open           = tracing_open_generic,
4480         .read           = tracing_readme_read,
4481         .llseek         = generic_file_llseek,
4482 };
4483 
4484 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4485 {
4486         unsigned int *ptr = v;
4487 
4488         if (*pos || m->count)
4489                 ptr++;
4490 
4491         (*pos)++;
4492 
4493         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4494              ptr++) {
4495                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4496                         continue;
4497 
4498                 return ptr;
4499         }
4500 
4501         return NULL;
4502 }
4503 
4504 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4505 {
4506         void *v;
4507         loff_t l = 0;
4508 
4509         preempt_disable();
4510         arch_spin_lock(&trace_cmdline_lock);
4511 
4512         v = &savedcmd->map_cmdline_to_pid[0];
4513         while (l <= *pos) {
4514                 v = saved_cmdlines_next(m, v, &l);
4515                 if (!v)
4516                         return NULL;
4517         }
4518 
4519         return v;
4520 }
4521 
4522 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4523 {
4524         arch_spin_unlock(&trace_cmdline_lock);
4525         preempt_enable();
4526 }
4527 
4528 static int saved_cmdlines_show(struct seq_file *m, void *v)
4529 {
4530         char buf[TASK_COMM_LEN];
4531         unsigned int *pid = v;
4532 
4533         __trace_find_cmdline(*pid, buf);
4534         seq_printf(m, "%d %s\n", *pid, buf);
4535         return 0;
4536 }
4537 
4538 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4539         .start          = saved_cmdlines_start,
4540         .next           = saved_cmdlines_next,
4541         .stop           = saved_cmdlines_stop,
4542         .show           = saved_cmdlines_show,
4543 };
4544 
4545 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4546 {
4547         if (tracing_disabled)
4548                 return -ENODEV;
4549 
4550         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4551 }
4552 
4553 static const struct file_operations tracing_saved_cmdlines_fops = {
4554         .open           = tracing_saved_cmdlines_open,
4555         .read           = seq_read,
4556         .llseek         = seq_lseek,
4557         .release        = seq_release,
4558 };
4559 
4560 static ssize_t
4561 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4562                                  size_t cnt, loff_t *ppos)
4563 {
4564         char buf[64];
4565         int r;
4566 
4567         arch_spin_lock(&trace_cmdline_lock);
4568         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4569         arch_spin_unlock(&trace_cmdline_lock);
4570 
4571         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4572 }
4573 
4574 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4575 {
4576         kfree(s->saved_cmdlines);
4577         kfree(s->map_cmdline_to_pid);
4578         kfree(s);
4579 }
4580 
4581 static int tracing_resize_saved_cmdlines(unsigned int val)
4582 {
4583         struct saved_cmdlines_buffer *s, *savedcmd_temp;
4584 
4585         s = kmalloc(sizeof(*s), GFP_KERNEL);
4586         if (!s)
4587                 return -ENOMEM;
4588 
4589         if (allocate_cmdlines_buffer(val, s) < 0) {
4590                 kfree(s);
4591                 return -ENOMEM;
4592         }
4593 
4594         arch_spin_lock(&trace_cmdline_lock);
4595         savedcmd_temp = savedcmd;
4596         savedcmd = s;
4597         arch_spin_unlock(&trace_cmdline_lock);
4598         free_saved_cmdlines_buffer(savedcmd_temp);
4599 
4600         return 0;
4601 }
4602 
4603 static ssize_t
4604 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4605                                   size_t cnt, loff_t *ppos)
4606 {
4607         unsigned long val;
4608         int ret;
4609 
4610         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4611         if (ret)
4612                 return ret;
4613 
4614         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4615         if (!val || val > PID_MAX_DEFAULT)
4616                 return -EINVAL;
4617 
4618         ret = tracing_resize_saved_cmdlines((unsigned int)val);
4619         if (ret < 0)
4620                 return ret;
4621 
4622         *ppos += cnt;
4623 
4624         return cnt;
4625 }
4626 
4627 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4628         .open           = tracing_open_generic,
4629         .read           = tracing_saved_cmdlines_size_read,
4630         .write          = tracing_saved_cmdlines_size_write,
4631 };
4632 
4633 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
4634 static union trace_enum_map_item *
4635 update_enum_map(union trace_enum_map_item *ptr)
4636 {
4637         if (!ptr->map.enum_string) {
4638                 if (ptr->tail.next) {
4639                         ptr = ptr->tail.next;
4640                         /* Set ptr to the next real item (skip head) */
4641                         ptr++;
4642                 } else
4643                         return NULL;
4644         }
4645         return ptr;
4646 }
4647 
4648 static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
4649 {
4650         union trace_enum_map_item *ptr = v;
4651 
4652         /*
4653          * Paranoid! If ptr points to end, we don't want to increment past it.
4654          * This really should never happen.
4655          */
4656         ptr = update_enum_map(ptr);
4657         if (WARN_ON_ONCE(!ptr))
4658                 return NULL;
4659 
4660         ptr++;
4661 
4662         (*pos)++;
4663 
4664         ptr = update_enum_map(ptr);
4665 
4666         return ptr;
4667 }
4668 
4669 static void *enum_map_start(struct seq_file *m, loff_t *pos)
4670 {
4671         union trace_enum_map_item *v;
4672         loff_t l = 0;
4673 
4674         mutex_lock(&trace_enum_mutex);
4675 
4676         v = trace_enum_maps;
4677         if (v)
4678                 v++;
4679 
4680         while (v && l < *pos) {
4681                 v = enum_map_next(m, v, &l);
4682         }
4683 
4684         return v;
4685 }
4686 
4687 static void enum_map_stop(struct seq_file *m, void *v)
4688 {
4689         mutex_unlock(&trace_enum_mutex);
4690 }
4691 
4692 static int enum_map_show(struct seq_file *m, void *v)
4693 {
4694         union trace_enum_map_item *ptr = v;
4695 
4696         seq_printf(m, "%s %ld (%s)\n",
4697                    ptr->map.enum_string, ptr->map.enum_value,
4698                    ptr->map.system);
4699 
4700         return 0;
4701 }
4702 
4703 static const struct seq_operations tracing_enum_map_seq_ops = {
4704         .start          = enum_map_start,
4705         .next           = enum_map_next,
4706         .stop           = enum_map_stop,
4707         .show           = enum_map_show,
4708 };
4709 
4710 static int tracing_enum_map_open(struct inode *inode, struct file *filp)
4711 {
4712         if (tracing_disabled)
4713                 return -ENODEV;
4714 
4715         return seq_open(filp, &tracing_enum_map_seq_ops);
4716 }
4717 
4718 static const struct file_operations tracing_enum_map_fops = {
4719         .open           = tracing_enum_map_open,
4720         .read           = seq_read,
4721         .llseek         = seq_lseek,
4722         .release        = seq_release,
4723 };
4724 
4725 static inline union trace_enum_map_item *
4726 trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
4727 {
4728         /* Return tail of array given the head */
4729         return ptr + ptr->head.length + 1;
4730 }
4731 
4732 static void
4733 trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
4734                            int len)
4735 {
4736         struct trace_enum_map **stop;
4737         struct trace_enum_map **map;
4738         union trace_enum_map_item *map_array;
4739         union trace_enum_map_item *ptr;
4740 
4741         stop = start + len;
4742 
4743         /*
4744          * The trace_enum_maps contains the map plus a head and tail item,
4745          * where the head holds the module and length of array, and the
4746          * tail holds a pointer to the next list.
4747          */
4748         map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4749         if (!map_array) {
4750                 pr_warn("Unable to allocate trace enum mapping\n");
4751                 return;
4752         }
4753 
4754         mutex_lock(&trace_enum_mutex);
4755 
4756         if (!trace_enum_maps)
4757                 trace_enum_maps = map_array;
4758         else {
4759                 ptr = trace_enum_maps;
4760                 for (;;) {
4761                         ptr = trace_enum_jmp_to_tail(ptr);
4762                         if (!ptr->tail.next)
4763                                 break;
4764                         ptr = ptr->tail.next;
4765 
4766                 }
4767                 ptr->tail.next = map_array;
4768         }
4769         map_array->head.mod = mod;
4770         map_array->head.length = len;
4771         map_array++;
4772 
4773         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4774                 map_array->map = **map;
4775                 map_array++;
4776         }
4777         memset(map_array, 0, sizeof(*map_array));
4778 
4779         mutex_unlock(&trace_enum_mutex);
4780 }
4781 
4782 static void trace_create_enum_file(struct dentry *d_tracer)
4783 {
4784         trace_create_file("enum_map", 0444, d_tracer,
4785                           NULL, &tracing_enum_map_fops);
4786 }
4787 
4788 #else /* CONFIG_TRACE_ENUM_MAP_FILE */
4789 static inline void trace_create_enum_file(struct dentry *d_tracer) { }
4790 static inline void trace_insert_enum_map_file(struct module *mod,
4791                               struct trace_enum_map **start, int len) { }
4792 #endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
4793 
4794 static void trace_insert_enum_map(struct module *mod,
4795                                   struct trace_enum_map **start, int len)
4796 {
4797         struct trace_enum_map **map;
4798 
4799         if (len <= 0)
4800                 return;
4801 
4802         map = start;
4803 
4804         trace_event_enum_update(map, len);
4805 
4806         trace_insert_enum_map_file(mod, start, len);
4807 }
4808 
4809 static ssize_t
4810 tracing_set_trace_read(struct file *filp, char __user *ubuf,
4811                        size_t cnt, loff_t *ppos)
4812 {
4813         struct trace_array *tr = filp->private_data;
4814         char buf[MAX_TRACER_SIZE+2];
4815         int r;
4816 
4817         mutex_lock(&trace_types_lock);
4818         r = sprintf(buf, "%s\n", tr->current_trace->name);
4819         mutex_unlock(&trace_types_lock);
4820 
4821         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4822 }
4823 
4824 int tracer_init(struct tracer *t, struct trace_array *tr)
4825 {
4826         tracing_reset_online_cpus(&tr->trace_buffer);
4827         return t->init(tr);
4828 }
4829 
4830 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4831 {
4832         int cpu;
4833 
4834         for_each_tracing_cpu(cpu)
4835                 per_cpu_ptr(buf->data, cpu)->entries = val;
4836 }
4837 
4838 #ifdef CONFIG_TRACER_MAX_TRACE
4839 /* resize @tr's buffer to the size of @size_tr's entries */
4840 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4841                                         struct trace_buffer *size_buf, int cpu_id)
4842 {
4843         int cpu, ret = 0;
4844 
4845         if (cpu_id == RING_BUFFER_ALL_CPUS) {
4846                 for_each_tracing_cpu(cpu) {
4847                         ret = ring_buffer_resize(trace_buf->buffer,
4848                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4849                         if (ret < 0)
4850                                 break;
4851                         per_cpu_ptr(trace_buf->data, cpu)->entries =
4852                                 per_cpu_ptr(size_buf->data, cpu)->entries;
4853                 }
4854         } else {
4855                 ret = ring_buffer_resize(trace_buf->buffer,
4856                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4857                 if (ret == 0)
4858                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4859                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
4860         }
4861 
4862         return ret;
4863 }
4864 #endif /* CONFIG_TRACER_MAX_TRACE */
4865 
4866 static int __tracing_resize_ring_buffer(struct trace_array *tr,
4867                                         unsigned long size, int cpu)
4868 {
4869         int ret;
4870 
4871         /*
4872          * If kernel or user changes the size of the ring buffer
4873          * we use the size that was given, and we can forget about
4874          * expanding it later.
4875          */
4876         ring_buffer_expanded = true;
4877 
4878         /* May be called before buffers are initialized */
4879         if (!tr->trace_buffer.buffer)
4880                 return 0;
4881 
4882         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4883         if (ret < 0)
4884                 return ret;
4885 
4886 #ifdef CONFIG_TRACER_MAX_TRACE
4887         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
4888             !tr->current_trace->use_max_tr)
4889                 goto out;
4890 
4891         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4892         if (ret < 0) {
4893                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
4894                                                      &tr->trace_buffer, cpu);
4895                 if (r < 0) {
4896                         /*
4897                          * AARGH! We are left with different
4898                          * size max buffer!!!!
4899                          * The max buffer is our "snapshot" buffer.
4900                          * When a tracer needs a snapshot (one of the
4901                          * latency tracers), it swaps the max buffer
4902                          * with the saved snap shot. We succeeded to
4903                          * update the size of the main buffer, but failed to
4904                          * update the size of the max buffer. But when we tried
4905                          * to reset the main buffer to the original size, we
4906                          * failed there too. This is very unlikely to
4907                          * happen, but if it does, warn and kill all
4908                          * tracing.
4909                          */
4910                         WARN_ON(1);
4911                         tracing_disabled = 1;
4912                 }
4913                 return ret;
4914         }
4915 
4916         if (cpu == RING_BUFFER_ALL_CPUS)
4917                 set_buffer_entries(&tr->max_buffer, size);
4918         else
4919                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4920 
4921  out:
4922 #endif /* CONFIG_TRACER_MAX_TRACE */
4923 
4924         if (cpu == RING_BUFFER_ALL_CPUS)
4925                 set_buffer_entries(&tr->trace_buffer, size);
4926         else
4927                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4928 
4929         return ret;
4930 }
4931 
4932 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4933                                           unsigned long size, int cpu_id)
4934 {
4935         int ret = size;
4936 
4937         mutex_lock(&trace_types_lock);
4938 
4939         if (cpu_id != RING_BUFFER_ALL_CPUS) {
4940                 /* make sure, this cpu is enabled in the mask */
4941                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4942                         ret = -EINVAL;
4943                         goto out;
4944                 }
4945         }
4946 
4947         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4948         if (ret < 0)
4949                 ret = -ENOMEM;
4950 
4951 out:
4952         mutex_unlock(&trace_types_lock);
4953 
4954         return ret;
4955 }
4956 
4957 
4958 /**
4959  * tracing_update_buffers - used by tracing facility to expand ring buffers
4960  *
4961  * To save on memory when the tracing is never used on a system with it
4962  * configured in. The ring buffers are set to a minimum size. But once
4963  * a user starts to use the tracing facility, then they need to grow
4964  * to their default size.
4965  *
4966  * This function is to be called when a tracer is about to be used.
4967  */
4968 int tracing_update_buffers(void)
4969 {
4970         int ret = 0;
4971 
4972         mutex_lock(&trace_types_lock);
4973         if (!ring_buffer_expanded)
4974                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4975                                                 RING_BUFFER_ALL_CPUS);
4976         mutex_unlock(&trace_types_lock);
4977 
4978         return ret;
4979 }
4980 
4981 struct trace_option_dentry;
4982 
4983 static void
4984 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4985 
4986 /*
4987  * Used to clear out the tracer before deletion of an instance.
4988  * Must have trace_types_lock held.
4989  */
4990 static void tracing_set_nop(struct trace_array *tr)
4991 {
4992         if (tr->current_trace == &nop_trace)
4993                 return;
4994         
4995         tr->current_trace->enabled--;
4996 
4997         if (tr->current_trace->reset)
4998                 tr->current_trace->reset(tr);
4999 
5000         tr->current_trace = &nop_trace;
5001 }
5002 
5003 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5004 {
5005         /* Only enable if the directory has been created already. */
5006         if (!tr->dir)
5007                 return;
5008 
5009         create_trace_option_files(tr, t);
5010 }
5011 
5012 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5013 {
5014         struct tracer *t;
5015 #ifdef CONFIG_TRACER_MAX_TRACE
5016         bool had_max_tr;
5017 #endif
5018         int ret = 0;
5019 
5020         mutex_lock(&trace_types_lock);
5021 
5022         if (!ring_buffer_expanded) {
5023                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5024                                                 RING_BUFFER_ALL_CPUS);
5025                 if (ret < 0)
5026                         goto out;
5027                 ret = 0;
5028         }
5029 
5030         for (t = trace_types; t; t = t->next) {
5031                 if (strcmp(t->name, buf) == 0)
5032                         break;
5033         }
5034         if (!t) {
5035                 ret = -EINVAL;
5036                 goto out;
5037         }
5038         if (t == tr->current_trace)
5039                 goto out;
5040 
5041         /* Some tracers are only allowed for the top level buffer */
5042         if (!trace_ok_for_array(t, tr)) {
5043                 ret = -EINVAL;
5044                 goto out;
5045         }
5046 
5047         /* If trace pipe files are being read, we can't change the tracer */
5048         if (tr->current_trace->ref) {
5049                 ret = -EBUSY;
5050                 goto out;
5051         }
5052 
5053         trace_branch_disable();
5054 
5055         tr->current_trace->enabled--;
5056 
5057         if (tr->current_trace->reset)
5058                 tr->current_trace->reset(tr);
5059 
5060         /* Current trace needs to be nop_trace before synchronize_sched */
5061         tr->current_trace = &nop_trace;
5062 
5063 #ifdef CONFIG_TRACER_MAX_TRACE
5064         had_max_tr = tr->allocated_snapshot;
5065 
5066         if (had_max_tr && !t->use_max_tr) {
5067                 /*
5068                  * We need to make sure that the update_max_tr sees that
5069                  * current_trace changed to nop_trace to keep it from
5070                  * swapping the buffers after we resize it.
5071                  * The update_max_tr is called from interrupts disabled
5072                  * so a synchronized_sched() is sufficient.
5073                  */
5074                 synchronize_sched();
5075                 free_snapshot(tr);
5076         }
5077 #endif
5078 
5079 #ifdef CONFIG_TRACER_MAX_TRACE
5080         if (t->use_max_tr && !had_max_tr) {
5081                 ret = alloc_snapshot(tr);
5082                 if (ret < 0)
5083                         goto out;
5084         }
5085 #endif
5086 
5087         if (t->init) {
5088                 ret = tracer_init(t, tr);
5089                 if (ret)
5090                         goto out;
5091         }
5092 
5093         tr->current_trace = t;
5094         tr->current_trace->enabled++;
5095         trace_branch_enable(tr);
5096  out:
5097         mutex_unlock(&trace_types_lock);
5098 
5099         return ret;
5100 }
5101 
5102 static ssize_t
5103 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5104                         size_t cnt, loff_t *ppos)
5105 {
5106         struct trace_array *tr = filp->private_data;
5107         char buf[MAX_TRACER_SIZE+1];
5108         int i;
5109         size_t ret;
5110         int err;
5111 
5112         ret = cnt;
5113 
5114         if (cnt > MAX_TRACER_SIZE)
5115                 cnt = MAX_TRACER_SIZE;
5116 
5117         if (copy_from_user(buf, ubuf, cnt))
5118                 return -EFAULT;
5119 
5120         buf[cnt] = 0;
5121 
5122         /* strip ending whitespace. */
5123         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5124                 buf[i] = 0;
5125 
5126         err = tracing_set_tracer(tr, buf);
5127         if (err)
5128                 return err;
5129 
5130         *ppos += ret;
5131 
5132         return ret;
5133 }
5134 
5135 static ssize_t
5136 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5137                    size_t cnt, loff_t *ppos)
5138 {
5139         char buf[64];
5140         int r;
5141 
5142         r = snprintf(buf, sizeof(buf), "%ld\n",
5143                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5144         if (r > sizeof(buf))
5145                 r = sizeof(buf);
5146         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5147 }
5148 
5149 static ssize_t
5150 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5151                     size_t cnt, loff_t *ppos)
5152 {
5153         unsigned long val;
5154         int ret;
5155 
5156         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5157         if (ret)
5158                 return ret;
5159 
5160         *ptr = val * 1000;
5161 
5162         return cnt;
5163 }
5164 
5165 static ssize_t
5166 tracing_thresh_read(struct file *filp, char __user *ubuf,
5167                     size_t cnt, loff_t *ppos)
5168 {
5169         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5170 }
5171 
5172 static ssize_t
5173 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5174                      size_t cnt, loff_t *ppos)
5175 {
5176         struct trace_array *tr = filp->private_data;
5177         int ret;
5178 
5179         mutex_lock(&trace_types_lock);
5180         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5181         if (ret < 0)
5182                 goto out;
5183 
5184         if (tr->current_trace->update_thresh) {
5185                 ret = tr->current_trace->update_thresh(tr);
5186                 if (ret < 0)
5187                         goto out;
5188         }
5189 
5190         ret = cnt;
5191 out:
5192         mutex_unlock(&trace_types_lock);
5193 
5194         return ret;
5195 }
5196 
5197 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5198 
5199 static ssize_t
5200 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5201                      size_t cnt, loff_t *ppos)
5202 {
5203         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5204 }
5205 
5206 static ssize_t
5207 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5208                       size_t cnt, loff_t *ppos)
5209 {
5210         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5211 }
5212 
5213 #endif
5214 
5215 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5216 {
5217         struct trace_array *tr = inode->i_private;
5218         struct trace_iterator *iter;
5219         int ret = 0;
5220 
5221         if (tracing_disabled)
5222                 return -ENODEV;
5223 
5224         if (trace_array_get(tr) < 0)
5225                 return -ENODEV;
5226 
5227         mutex_lock(&trace_types_lock);
5228 
5229         /* create a buffer to store the information to pass to userspace */
5230         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5231         if (!iter) {
5232                 ret = -ENOMEM;
5233                 __trace_array_put(tr);
5234                 goto out;
5235         }
5236 
5237         trace_seq_init(&iter->seq);
5238         iter->trace = tr->current_trace;
5239 
5240         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5241                 ret = -ENOMEM;
5242                 goto fail;
5243         }
5244 
5245         /* trace pipe does not show start of buffer */
5246         cpumask_setall(iter->started);
5247 
5248         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5249                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5250 
5251         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5252         if (trace_clocks[tr->clock_id].in_ns)
5253                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5254 
5255         iter->tr = tr;
5256         iter->trace_buffer = &tr->trace_buffer;
5257         iter->cpu_file = tracing_get_cpu(inode);
5258         mutex_init(&iter->mutex);
5259         filp->private_data = iter;
5260 
5261         if (iter->trace->pipe_open)
5262                 iter->trace->pipe_open(iter);
5263 
5264         nonseekable_open(inode, filp);
5265 
5266         tr->current_trace->ref++;
5267 out:
5268         mutex_unlock(&trace_types_lock);
5269         return ret;
5270 
5271 fail:
5272         kfree(iter->trace);
5273         kfree(iter);
5274         __trace_array_put(tr);
5275         mutex_unlock(&trace_types_lock);
5276         return ret;
5277 }
5278 
5279 static int tracing_release_pipe(struct inode *inode, struct file *file)
5280 {
5281         struct trace_iterator *iter = file->private_data;
5282         struct trace_array *tr = inode->i_private;
5283 
5284         mutex_lock(&trace_types_lock);
5285 
5286         tr->current_trace->ref--;
5287 
5288         if (iter->trace->pipe_close)
5289                 iter->trace->pipe_close(iter);
5290 
5291         mutex_unlock(&trace_types_lock);
5292 
5293         free_cpumask_var(iter->started);
5294         mutex_destroy(&iter->mutex);
5295         kfree(iter);
5296 
5297         trace_array_put(tr);
5298 
5299         return 0;
5300 }
5301 
5302 static unsigned int
5303 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5304 {
5305         struct trace_array *tr = iter->tr;
5306 
5307         /* Iterators are static, they should be filled or empty */
5308         if (trace_buffer_iter(iter, iter->cpu_file))
5309                 return POLLIN | POLLRDNORM;
5310 
5311         if (tr->trace_flags & TRACE_ITER_BLOCK)
5312                 /*
5313                  * Always select as readable when in blocking mode
5314                  */
5315                 return POLLIN | POLLRDNORM;
5316         else
5317                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5318                                              filp, poll_table);
5319 }
5320 
5321 static unsigned int
5322 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5323 {
5324         struct trace_iterator *iter = filp->private_data;
5325 
5326         return trace_poll(iter, filp, poll_table);
5327 }
5328 
5329 /* Must be called with iter->mutex held. */
5330 static int tracing_wait_pipe(struct file *filp)
5331 {
5332         struct trace_iterator *iter = filp->private_data;
5333         int ret;
5334 
5335         while (trace_empty(iter)) {
5336 
5337                 if ((filp->f_flags & O_NONBLOCK)) {
5338                         return -EAGAIN;
5339                 }
5340 
5341                 /*
5342                  * We block until we read something and tracing is disabled.
5343                  * We still block if tracing is disabled, but we have never
5344                  * read anything. This allows a user to cat this file, and
5345                  * then enable tracing. But after we have read something,
5346                  * we give an EOF when tracing is again disabled.
5347                  *
5348                  * iter->pos will be 0 if we haven't read anything.
5349                  */
5350                 if (!tracing_is_on() && iter->pos)
5351                         break;
5352 
5353                 mutex_unlock(&iter->mutex);
5354 
5355                 ret = wait_on_pipe(iter, false);
5356 
5357                 mutex_lock(&iter->mutex);
5358 
5359                 if (ret)
5360                         return ret;
5361         }
5362 
5363         return 1;
5364 }
5365 
5366 /*
5367  * Consumer reader.
5368  */
5369 static ssize_t
5370 tracing_read_pipe(struct file *filp, char __user *ubuf,
5371                   size_t cnt, loff_t *ppos)
5372 {
5373         struct trace_iterator *iter = filp->private_data;
5374         ssize_t sret;
5375 
5376         /*
5377          * Avoid more than one consumer on a single file descriptor
5378          * This is just a matter of traces coherency, the ring buffer itself
5379          * is protected.
5380          */
5381         mutex_lock(&iter->mutex);
5382 
5383         /* return any leftover data */
5384         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5385         if (sret != -EBUSY)
5386                 goto out;
5387 
5388         trace_seq_init(&iter->seq);
5389 
5390         if (iter->trace->read) {
5391                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5392                 if (sret)
5393                         goto out;
5394         }
5395 
5396 waitagain:
5397         sret = tracing_wait_pipe(filp);
5398         if (sret <= 0)
5399                 goto out;
5400 
5401         /* stop when tracing is finished */
5402         if (trace_empty(iter)) {
5403                 sret = 0;
5404                 goto out;
5405         }
5406 
5407         if (cnt >= PAGE_SIZE)
5408                 cnt = PAGE_SIZE - 1;
5409 
5410         /* reset all but tr, trace, and overruns */
5411         memset(&iter->seq, 0,
5412                sizeof(struct trace_iterator) -
5413                offsetof(struct trace_iterator, seq));
5414         cpumask_clear(iter->started);
5415         iter->pos = -1;
5416 
5417         trace_event_read_lock();
5418         trace_access_lock(iter->cpu_file);
5419         while (trace_find_next_entry_inc(iter) != NULL) {
5420                 enum print_line_t ret;
5421                 int save_len = iter->seq.seq.len;
5422 
5423                 ret = print_trace_line(iter);
5424                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5425                         /* don't print partial lines */
5426                         iter->seq.seq.len = save_len;
5427                         break;
5428                 }
5429                 if (ret != TRACE_TYPE_NO_CONSUME)
5430                         trace_consume(iter);
5431 
5432                 if (trace_seq_used(&iter->seq) >= cnt)
5433                         break;
5434 
5435                 /*
5436                  * Setting the full flag means we reached the trace_seq buffer
5437                  * size and we should leave by partial output condition above.
5438                  * One of the trace_seq_* functions is not used properly.
5439                  */
5440                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5441                           iter->ent->type);
5442         }
5443         trace_access_unlock(iter->cpu_file);
5444         trace_event_read_unlock();
5445 
5446         /* Now copy what we have to the user */
5447         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5448         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5449                 trace_seq_init(&iter->seq);
5450 
5451         /*
5452          * If there was nothing to send to user, in spite of consuming trace
5453          * entries, go back to wait for more entries.
5454          */
5455         if (sret == -EBUSY)
5456                 goto waitagain;
5457 
5458 out:
5459         mutex_unlock(&iter->mutex);
5460 
5461         return sret;
5462 }
5463 
5464 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5465                                      unsigned int idx)
5466 {
5467         __free_page(spd->pages[idx]);
5468 }
5469 
5470 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5471         .can_merge              = 0,
5472         .confirm                = generic_pipe_buf_confirm,
5473         .release                = generic_pipe_buf_release,
5474         .steal                  = generic_pipe_buf_steal,
5475         .get                    = generic_pipe_buf_get,
5476 };
5477 
5478 static size_t
5479 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5480 {
5481         size_t count;
5482         int save_len;
5483         int ret;
5484 
5485         /* Seq buffer is page-sized, exactly what we need. */
5486         for (;;) {
5487                 save_len = iter->seq.seq.len;
5488                 ret = print_trace_line(iter);
5489 
5490                 if (trace_seq_has_overflowed(&iter->seq)) {
5491                         iter->seq.seq.len = save_len;
5492                         break;
5493                 }
5494 
5495                 /*
5496                  * This should not be hit, because it should only
5497                  * be set if the iter->seq overflowed. But check it
5498                  * anyway to be safe.
5499                  */
5500                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5501                         iter->seq.seq.len = save_len;
5502                         break;
5503                 }
5504 
5505                 count = trace_seq_used(&iter->seq) - save_len;
5506                 if (rem < count) {
5507                         rem = 0;
5508                         iter->seq.seq.len = save_len;
5509                         break;
5510                 }
5511 
5512                 if (ret != TRACE_TYPE_NO_CONSUME)
5513                         trace_consume(iter);
5514                 rem -= count;
5515                 if (!trace_find_next_entry_inc(iter))   {
5516                         rem = 0;
5517                         iter->ent = NULL;
5518                         break;
5519                 }
5520         }
5521 
5522         return rem;
5523 }
5524 
5525 static ssize_t tracing_splice_read_pipe(struct file *filp,
5526                                         loff_t *ppos,
5527                                         struct pipe_inode_info *pipe,
5528                                         size_t len,
5529                                         unsigned int flags)
5530 {
5531         struct page *pages_def[PIPE_DEF_BUFFERS];
5532         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5533         struct trace_iterator *iter = filp->private_data;
5534         struct splice_pipe_desc spd = {
5535                 .pages          = pages_def,
5536                 .partial        = partial_def,
5537                 .nr_pages       = 0, /* This gets updated below. */
5538                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5539                 .flags          = flags,
5540                 .ops            = &tracing_pipe_buf_ops,
5541                 .spd_release    = tracing_spd_release_pipe,
5542         };
5543         ssize_t ret;
5544         size_t rem;
5545         unsigned int i;
5546 
5547         if (splice_grow_spd(pipe, &spd))
5548                 return -ENOMEM;
5549 
5550         mutex_lock(&iter->mutex);
5551 
5552         if (iter->trace->splice_read) {
5553                 ret = iter->trace->splice_read(iter, filp,
5554                                                ppos, pipe, len, flags);
5555                 if (ret)
5556                         goto out_err;
5557         }
5558 
5559         ret = tracing_wait_pipe(filp);
5560         if (ret <= 0)
5561                 goto out_err;
5562 
5563         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5564                 ret = -EFAULT;
5565                 goto out_err;
5566         }
5567 
5568         trace_event_read_lock();
5569         trace_access_lock(iter->cpu_file);
5570 
5571         /* Fill as many pages as possible. */
5572         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5573                 spd.pages[i] = alloc_page(GFP_KERNEL);
5574                 if (!spd.pages[i])
5575                         break;
5576 
5577                 rem = tracing_fill_pipe_page(rem, iter);
5578 
5579                 /* Copy the data into the page, so we can start over. */
5580                 ret = trace_seq_to_buffer(&iter->seq,
5581                                           page_address(spd.pages[i]),
5582                                           trace_seq_used(&iter->seq));
5583                 if (ret < 0) {
5584                         __free_page(spd.pages[i]);
5585                         break;
5586                 }
5587                 spd.partial[i].offset = 0;
5588                 spd.partial[i].len = trace_seq_used(&iter->seq);
5589 
5590                 trace_seq_init(&iter->seq);
5591         }
5592 
5593         trace_access_unlock(iter->cpu_file);
5594         trace_event_read_unlock();
5595         mutex_unlock(&iter->mutex);
5596 
5597         spd.nr_pages = i;
5598 
5599         if (i)
5600                 ret = splice_to_pipe(pipe, &spd);
5601         else
5602                 ret = 0;
5603 out:
5604         splice_shrink_spd(&spd);
5605         return ret;
5606 
5607 out_err:
5608         mutex_unlock(&iter->mutex);
5609         goto out;
5610 }
5611 
5612 static ssize_t
5613 tracing_entries_read(struct file *filp, char __user *ubuf,
5614                      size_t cnt, loff_t *ppos)
5615 {
5616         struct inode *inode = file_inode(filp);
5617         struct trace_array *tr = inode->i_private;
5618         int cpu = tracing_get_cpu(inode);
5619         char buf[64];
5620         int r = 0;
5621         ssize_t ret;
5622 
5623         mutex_lock(&trace_types_lock);
5624 
5625         if (cpu == RING_BUFFER_ALL_CPUS) {
5626                 int cpu, buf_size_same;
5627                 unsigned long size;
5628 
5629                 size = 0;
5630                 buf_size_same = 1;
5631                 /* check if all cpu sizes are same */
5632                 for_each_tracing_cpu(cpu) {
5633                         /* fill in the size from first enabled cpu */
5634                         if (size == 0)
5635                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5636                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5637                                 buf_size_same = 0;
5638                                 break;
5639                         }
5640                 }
5641 
5642                 if (buf_size_same) {
5643                         if (!ring_buffer_expanded)
5644                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
5645                                             size >> 10,
5646                                             trace_buf_size >> 10);
5647                         else
5648                                 r = sprintf(buf, "%lu\n", size >> 10);
5649                 } else
5650                         r = sprintf(buf, "X\n");
5651         } else
5652                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5653 
5654         mutex_unlock(&trace_types_lock);
5655 
5656         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5657         return ret;
5658 }
5659 
5660 static ssize_t
5661 tracing_entries_write(struct file *filp, const char __user *ubuf,
5662                       size_t cnt, loff_t *ppos)
5663 {
5664         struct inode *inode = file_inode(filp);
5665         struct trace_array *tr = inode->i_private;
5666         unsigned long val;
5667         int ret;
5668 
5669         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5670         if (ret)
5671                 return ret;
5672 
5673         /* must have at least 1 entry */
5674         if (!val)
5675                 return -EINVAL;
5676 
5677         /* value is in KB */
5678         val <<= 10;
5679         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5680         if (ret < 0)
5681                 return ret;
5682 
5683         *ppos += cnt;
5684 
5685         return cnt;
5686 }
5687 
5688 static ssize_t
5689 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5690                                 size_t cnt, loff_t *ppos)
5691 {
5692         struct trace_array *tr = filp->private_data;
5693         char buf[64];
5694         int r, cpu;
5695         unsigned long size = 0, expanded_size = 0;
5696 
5697         mutex_lock(&trace_types_lock);
5698         for_each_tracing_cpu(cpu) {
5699                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5700                 if (!ring_buffer_expanded)
5701                         expanded_size += trace_buf_size >> 10;
5702         }
5703         if (ring_buffer_expanded)
5704                 r = sprintf(buf, "%lu\n", size);
5705         else
5706                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5707         mutex_unlock(&trace_types_lock);
5708 
5709         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5710 }
5711 
5712 static ssize_t
5713 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5714                           size_t cnt, loff_t *ppos)
5715 {
5716         /*
5717          * There is no need to read what the user has written, this function
5718          * is just to make sure that there is no error when "echo" is used
5719          */
5720 
5721         *ppos += cnt;
5722 
5723         return cnt;
5724 }
5725 
5726 static int
5727 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5728 {
5729         struct trace_array *tr = inode->i_private;
5730 
5731         /* disable tracing ? */
5732         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5733                 tracer_tracing_off(tr);
5734         /* resize the ring buffer to 0 */
5735         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5736 
5737         trace_array_put(tr);
5738 
5739         return 0;
5740 }
5741 
5742 static ssize_t
5743 tracing_mark_write(struct file *filp, const char __user *ubuf,
5744                                         size_t cnt, loff_t *fpos)
5745 {
5746         struct trace_array *tr = filp->private_data;
5747         struct ring_buffer_event *event;
5748         struct ring_buffer *buffer;
5749         struct print_entry *entry;
5750         unsigned long irq_flags;
5751         const char faulted[] = "<faulted>";
5752         ssize_t written;
5753         int size;
5754         int len;
5755 
5756 /* Used in tracing_mark_raw_write() as well */
5757 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
5758 
5759         if (tracing_disabled)
5760                 return -EINVAL;
5761 
5762         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5763                 return -EINVAL;
5764 
5765         if (cnt > TRACE_BUF_SIZE)
5766                 cnt = TRACE_BUF_SIZE;
5767 
5768         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5769 
5770         local_save_flags(irq_flags);
5771         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
5772 
5773         /* If less than "<faulted>", then make sure we can still add that */
5774         if (cnt < FAULTED_SIZE)
5775                 size += FAULTED_SIZE - cnt;
5776 
5777         buffer = tr->trace_buffer.buffer;
5778         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5779                                             irq_flags, preempt_count());
5780         if (unlikely(!event))
5781                 /* Ring buffer disabled, return as if not open for write */
5782                 return -EBADF;
5783 
5784         entry = ring_buffer_event_data(event);
5785         entry->ip = _THIS_IP_;
5786 
5787         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
5788         if (len) {
5789                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
5790                 cnt = FAULTED_SIZE;
5791                 written = -EFAULT;
5792         } else
5793                 written = cnt;
5794         len = cnt;
5795 
5796         if (entry->buf[cnt - 1] != '\n') {
5797                 entry->buf[cnt] = '\n';
5798                 entry->buf[cnt + 1] = '\0';
5799         } else
5800                 entry->buf[cnt] = '\0';
5801 
5802         __buffer_unlock_commit(buffer, event);
5803 
5804         if (written > 0)
5805                 *fpos += written;
5806 
5807         return written;
5808 }
5809 
5810 /* Limit it for now to 3K (including tag) */
5811 #define RAW_DATA_MAX_SIZE (1024*3)
5812 
5813 static ssize_t
5814 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
5815                                         size_t cnt, loff_t *fpos)
5816 {
5817         struct trace_array *tr = filp->private_data;
5818         struct ring_buffer_event *event;
5819         struct ring_buffer *buffer;
5820         struct raw_data_entry *entry;
5821         const char faulted[] = "<faulted>";
5822         unsigned long irq_flags;
5823         ssize_t written;
5824         int size;
5825         int len;
5826 
5827 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
5828 
5829         if (tracing_disabled)
5830                 return -EINVAL;
5831 
5832         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5833                 return -EINVAL;
5834 
5835         /* The marker must at least have a tag id */
5836         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
5837                 return -EINVAL;
5838 
5839         if (cnt > TRACE_BUF_SIZE)
5840                 cnt = TRACE_BUF_SIZE;
5841 
5842         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5843 
5844         local_save_flags(irq_flags);
5845         size = sizeof(*entry) + cnt;
5846         if (cnt < FAULT_SIZE_ID)
5847                 size += FAULT_SIZE_ID - cnt;
5848 
5849         buffer = tr->trace_buffer.buffer;
5850         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
5851                                             irq_flags, preempt_count());
5852         if (!event)
5853                 /* Ring buffer disabled, return as if not open for write */
5854                 return -EBADF;
5855 
5856         entry = ring_buffer_event_data(event);
5857 
5858         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
5859         if (len) {
5860                 entry->id = -1;
5861                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
5862                 written = -EFAULT;
5863         } else
5864                 written = cnt;
5865 
5866         __buffer_unlock_commit(buffer, event);
5867 
5868         if (written > 0)
5869                 *fpos += written;
5870 
5871         return written;
5872 }
5873 
5874 static int tracing_clock_show(struct seq_file *m, void *v)
5875 {
5876         struct trace_array *tr = m->private;
5877         int i;
5878 
5879         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5880                 seq_printf(m,
5881                         "%s%s%s%s", i ? " " : "",
5882                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5883                         i == tr->clock_id ? "]" : "");
5884         seq_putc(m, '\n');
5885 
5886         return 0;
5887 }
5888 
5889 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
5890 {
5891         int i;
5892 
5893         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
5894                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
5895                         break;
5896         }
5897         if (i == ARRAY_SIZE(trace_clocks))
5898                 return -EINVAL;
5899 
5900         mutex_lock(&trace_types_lock);
5901 
5902         tr->clock_id = i;
5903 
5904         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
5905 
5906         /*
5907          * New clock may not be consistent with the previous clock.
5908          * Reset the buffer so that it doesn't have incomparable timestamps.
5909          */
5910         tracing_reset_online_cpus(&tr->trace_buffer);
5911 
5912 #ifdef CONFIG_TRACER_MAX_TRACE
5913         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
5914                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
5915         tracing_reset_online_cpus(&tr->max_buffer);
5916 #endif
5917 
5918         mutex_unlock(&trace_types_lock);
5919 
5920         return 0;
5921 }
5922 
5923 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
5924                                    size_t cnt, loff_t *fpos)
5925 {
5926         struct seq_file *m = filp->private_data;
5927         struct trace_array *tr = m->private;
5928         char buf[64];
5929         const char *clockstr;
5930         int ret;
5931 
5932         if (cnt >= sizeof(buf))
5933                 return -EINVAL;
5934 
5935         if (copy_from_user(buf, ubuf, cnt))
5936                 return -EFAULT;
5937 
5938         buf[cnt] = 0;
5939 
5940         clockstr = strstrip(buf);
5941 
5942         ret = tracing_set_clock(tr, clockstr);
5943         if (ret)
5944                 return ret;
5945 
5946         *fpos += cnt;
5947 
5948         return cnt;
5949 }
5950 
5951 static int tracing_clock_open(struct inode *inode, struct file *file)
5952 {
5953         struct trace_array *tr = inode->i_private;
5954         int ret;
5955 
5956         if (tracing_disabled)
5957                 return -ENODEV;
5958 
5959         if (trace_array_get(tr))
5960                 return -ENODEV;
5961 
5962         ret = single_open(file, tracing_clock_show, inode->i_private);
5963         if (ret < 0)
5964                 trace_array_put(tr);
5965 
5966         return ret;
5967 }
5968 
5969 struct ftrace_buffer_info {
5970         struct trace_iterator   iter;
5971         void                    *spare;
5972         unsigned int            read;
5973 };
5974 
5975 #ifdef CONFIG_TRACER_SNAPSHOT
5976 static int tracing_snapshot_open(struct inode *inode, struct file *file)
5977 {
5978         struct trace_array *tr = inode->i_private;
5979         struct trace_iterator *iter;
5980         struct seq_file *m;
5981         int ret = 0;
5982 
5983         if (trace_array_get(tr) < 0)