~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/kernel/trace/trace.c

Version: ~ [ linux-5.8-rc4 ] ~ [ linux-5.7.7 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.50 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.131 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.187 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.229 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.229 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.19.8 ] ~ [ linux-3.18.140 ] ~ [ linux-3.17.8 ] ~ [ linux-3.16.85 ] ~ [ linux-3.15.10 ] ~ [ linux-3.14.79 ] ~ [ linux-3.13.11 ] ~ [ linux-3.12.74 ] ~ [ linux-3.11.10 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0
  2 /*
  3  * ring buffer based function tracer
  4  *
  5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
  6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
  7  *
  8  * Originally taken from the RT patch by:
  9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
 10  *
 11  * Based on code from the latency_tracer, that is:
 12  *  Copyright (C) 2004-2006 Ingo Molnar
 13  *  Copyright (C) 2004 Nadia Yvette Chambers
 14  */
 15 #include <linux/ring_buffer.h>
 16 #include <generated/utsrelease.h>
 17 #include <linux/stacktrace.h>
 18 #include <linux/writeback.h>
 19 #include <linux/kallsyms.h>
 20 #include <linux/seq_file.h>
 21 #include <linux/notifier.h>
 22 #include <linux/irqflags.h>
 23 #include <linux/debugfs.h>
 24 #include <linux/tracefs.h>
 25 #include <linux/pagemap.h>
 26 #include <linux/hardirq.h>
 27 #include <linux/linkage.h>
 28 #include <linux/uaccess.h>
 29 #include <linux/vmalloc.h>
 30 #include <linux/ftrace.h>
 31 #include <linux/module.h>
 32 #include <linux/percpu.h>
 33 #include <linux/splice.h>
 34 #include <linux/kdebug.h>
 35 #include <linux/string.h>
 36 #include <linux/mount.h>
 37 #include <linux/rwsem.h>
 38 #include <linux/slab.h>
 39 #include <linux/ctype.h>
 40 #include <linux/init.h>
 41 #include <linux/poll.h>
 42 #include <linux/nmi.h>
 43 #include <linux/fs.h>
 44 #include <linux/trace.h>
 45 #include <linux/sched/clock.h>
 46 #include <linux/sched/rt.h>
 47 
 48 #include "trace.h"
 49 #include "trace_output.h"
 50 
 51 /*
 52  * On boot up, the ring buffer is set to the minimum size, so that
 53  * we do not waste memory on systems that are not using tracing.
 54  */
 55 bool ring_buffer_expanded;
 56 
 57 /*
 58  * We need to change this state when a selftest is running.
 59  * A selftest will lurk into the ring-buffer to count the
 60  * entries inserted during the selftest although some concurrent
 61  * insertions into the ring-buffer such as trace_printk could occurred
 62  * at the same time, giving false positive or negative results.
 63  */
 64 static bool __read_mostly tracing_selftest_running;
 65 
 66 /*
 67  * If a tracer is running, we do not want to run SELFTEST.
 68  */
 69 bool __read_mostly tracing_selftest_disabled;
 70 
 71 /* Pipe tracepoints to printk */
 72 struct trace_iterator *tracepoint_print_iter;
 73 int tracepoint_printk;
 74 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
 75 
 76 /* For tracers that don't implement custom flags */
 77 static struct tracer_opt dummy_tracer_opt[] = {
 78         { }
 79 };
 80 
 81 static int
 82 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
 83 {
 84         return 0;
 85 }
 86 
 87 /*
 88  * To prevent the comm cache from being overwritten when no
 89  * tracing is active, only save the comm when a trace event
 90  * occurred.
 91  */
 92 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
 93 
 94 /*
 95  * Kill all tracing for good (never come back).
 96  * It is initialized to 1 but will turn to zero if the initialization
 97  * of the tracer is successful. But that is the only place that sets
 98  * this back to zero.
 99  */
100 static int tracing_disabled = 1;
101 
102 cpumask_var_t __read_mostly     tracing_buffer_mask;
103 
104 /*
105  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
106  *
107  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
108  * is set, then ftrace_dump is called. This will output the contents
109  * of the ftrace buffers to the console.  This is very useful for
110  * capturing traces that lead to crashes and outputing it to a
111  * serial console.
112  *
113  * It is default off, but you can enable it with either specifying
114  * "ftrace_dump_on_oops" in the kernel command line, or setting
115  * /proc/sys/kernel/ftrace_dump_on_oops
116  * Set 1 if you want to dump buffers of all CPUs
117  * Set 2 if you want to dump the buffer of the CPU that triggered oops
118  */
119 
120 enum ftrace_dump_mode ftrace_dump_on_oops;
121 
122 /* When set, tracing will stop when a WARN*() is hit */
123 int __disable_trace_on_warning;
124 
125 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
126 /* Map of enums to their values, for "eval_map" file */
127 struct trace_eval_map_head {
128         struct module                   *mod;
129         unsigned long                   length;
130 };
131 
132 union trace_eval_map_item;
133 
134 struct trace_eval_map_tail {
135         /*
136          * "end" is first and points to NULL as it must be different
137          * than "mod" or "eval_string"
138          */
139         union trace_eval_map_item       *next;
140         const char                      *end;   /* points to NULL */
141 };
142 
143 static DEFINE_MUTEX(trace_eval_mutex);
144 
145 /*
146  * The trace_eval_maps are saved in an array with two extra elements,
147  * one at the beginning, and one at the end. The beginning item contains
148  * the count of the saved maps (head.length), and the module they
149  * belong to if not built in (head.mod). The ending item contains a
150  * pointer to the next array of saved eval_map items.
151  */
152 union trace_eval_map_item {
153         struct trace_eval_map           map;
154         struct trace_eval_map_head      head;
155         struct trace_eval_map_tail      tail;
156 };
157 
158 static union trace_eval_map_item *trace_eval_maps;
159 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
160 
161 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
162 static void ftrace_trace_userstack(struct ring_buffer *buffer,
163                                    unsigned long flags, int pc);
164 
165 #define MAX_TRACER_SIZE         100
166 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
167 static char *default_bootup_tracer;
168 
169 static bool allocate_snapshot;
170 
171 static int __init set_cmdline_ftrace(char *str)
172 {
173         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
174         default_bootup_tracer = bootup_tracer_buf;
175         /* We are using ftrace early, expand it */
176         ring_buffer_expanded = true;
177         return 1;
178 }
179 __setup("ftrace=", set_cmdline_ftrace);
180 
181 static int __init set_ftrace_dump_on_oops(char *str)
182 {
183         if (*str++ != '=' || !*str) {
184                 ftrace_dump_on_oops = DUMP_ALL;
185                 return 1;
186         }
187 
188         if (!strcmp("orig_cpu", str)) {
189                 ftrace_dump_on_oops = DUMP_ORIG;
190                 return 1;
191         }
192 
193         return 0;
194 }
195 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
196 
197 static int __init stop_trace_on_warning(char *str)
198 {
199         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
200                 __disable_trace_on_warning = 1;
201         return 1;
202 }
203 __setup("traceoff_on_warning", stop_trace_on_warning);
204 
205 static int __init boot_alloc_snapshot(char *str)
206 {
207         allocate_snapshot = true;
208         /* We also need the main ring buffer expanded */
209         ring_buffer_expanded = true;
210         return 1;
211 }
212 __setup("alloc_snapshot", boot_alloc_snapshot);
213 
214 
215 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
216 
217 static int __init set_trace_boot_options(char *str)
218 {
219         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
220         return 0;
221 }
222 __setup("trace_options=", set_trace_boot_options);
223 
224 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
225 static char *trace_boot_clock __initdata;
226 
227 static int __init set_trace_boot_clock(char *str)
228 {
229         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
230         trace_boot_clock = trace_boot_clock_buf;
231         return 0;
232 }
233 __setup("trace_clock=", set_trace_boot_clock);
234 
235 static int __init set_tracepoint_printk(char *str)
236 {
237         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
238                 tracepoint_printk = 1;
239         return 1;
240 }
241 __setup("tp_printk", set_tracepoint_printk);
242 
243 unsigned long long ns2usecs(u64 nsec)
244 {
245         nsec += 500;
246         do_div(nsec, 1000);
247         return nsec;
248 }
249 
250 /* trace_flags holds trace_options default values */
251 #define TRACE_DEFAULT_FLAGS                                             \
252         (FUNCTION_DEFAULT_FLAGS |                                       \
253          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
254          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
255          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
256          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
257 
258 /* trace_options that are only supported by global_trace */
259 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
260                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
261 
262 /* trace_flags that are default zero for instances */
263 #define ZEROED_TRACE_FLAGS \
264         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
265 
266 /*
267  * The global_trace is the descriptor that holds the top-level tracing
268  * buffers for the live tracing.
269  */
270 static struct trace_array global_trace = {
271         .trace_flags = TRACE_DEFAULT_FLAGS,
272 };
273 
274 LIST_HEAD(ftrace_trace_arrays);
275 
276 int trace_array_get(struct trace_array *this_tr)
277 {
278         struct trace_array *tr;
279         int ret = -ENODEV;
280 
281         mutex_lock(&trace_types_lock);
282         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
283                 if (tr == this_tr) {
284                         tr->ref++;
285                         ret = 0;
286                         break;
287                 }
288         }
289         mutex_unlock(&trace_types_lock);
290 
291         return ret;
292 }
293 
294 static void __trace_array_put(struct trace_array *this_tr)
295 {
296         WARN_ON(!this_tr->ref);
297         this_tr->ref--;
298 }
299 
300 void trace_array_put(struct trace_array *this_tr)
301 {
302         mutex_lock(&trace_types_lock);
303         __trace_array_put(this_tr);
304         mutex_unlock(&trace_types_lock);
305 }
306 
307 int call_filter_check_discard(struct trace_event_call *call, void *rec,
308                               struct ring_buffer *buffer,
309                               struct ring_buffer_event *event)
310 {
311         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
312             !filter_match_preds(call->filter, rec)) {
313                 __trace_event_discard_commit(buffer, event);
314                 return 1;
315         }
316 
317         return 0;
318 }
319 
320 void trace_free_pid_list(struct trace_pid_list *pid_list)
321 {
322         vfree(pid_list->pids);
323         kfree(pid_list);
324 }
325 
326 /**
327  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
328  * @filtered_pids: The list of pids to check
329  * @search_pid: The PID to find in @filtered_pids
330  *
331  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
332  */
333 bool
334 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
335 {
336         /*
337          * If pid_max changed after filtered_pids was created, we
338          * by default ignore all pids greater than the previous pid_max.
339          */
340         if (search_pid >= filtered_pids->pid_max)
341                 return false;
342 
343         return test_bit(search_pid, filtered_pids->pids);
344 }
345 
346 /**
347  * trace_ignore_this_task - should a task be ignored for tracing
348  * @filtered_pids: The list of pids to check
349  * @task: The task that should be ignored if not filtered
350  *
351  * Checks if @task should be traced or not from @filtered_pids.
352  * Returns true if @task should *NOT* be traced.
353  * Returns false if @task should be traced.
354  */
355 bool
356 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
357 {
358         /*
359          * Return false, because if filtered_pids does not exist,
360          * all pids are good to trace.
361          */
362         if (!filtered_pids)
363                 return false;
364 
365         return !trace_find_filtered_pid(filtered_pids, task->pid);
366 }
367 
368 /**
369  * trace_filter_add_remove_task - Add or remove a task from a pid_list
370  * @pid_list: The list to modify
371  * @self: The current task for fork or NULL for exit
372  * @task: The task to add or remove
373  *
374  * If adding a task, if @self is defined, the task is only added if @self
375  * is also included in @pid_list. This happens on fork and tasks should
376  * only be added when the parent is listed. If @self is NULL, then the
377  * @task pid will be removed from the list, which would happen on exit
378  * of a task.
379  */
380 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
381                                   struct task_struct *self,
382                                   struct task_struct *task)
383 {
384         if (!pid_list)
385                 return;
386 
387         /* For forks, we only add if the forking task is listed */
388         if (self) {
389                 if (!trace_find_filtered_pid(pid_list, self->pid))
390                         return;
391         }
392 
393         /* Sorry, but we don't support pid_max changing after setting */
394         if (task->pid >= pid_list->pid_max)
395                 return;
396 
397         /* "self" is set for forks, and NULL for exits */
398         if (self)
399                 set_bit(task->pid, pid_list->pids);
400         else
401                 clear_bit(task->pid, pid_list->pids);
402 }
403 
404 /**
405  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
406  * @pid_list: The pid list to show
407  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
408  * @pos: The position of the file
409  *
410  * This is used by the seq_file "next" operation to iterate the pids
411  * listed in a trace_pid_list structure.
412  *
413  * Returns the pid+1 as we want to display pid of zero, but NULL would
414  * stop the iteration.
415  */
416 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
417 {
418         unsigned long pid = (unsigned long)v;
419 
420         (*pos)++;
421 
422         /* pid already is +1 of the actual prevous bit */
423         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
424 
425         /* Return pid + 1 to allow zero to be represented */
426         if (pid < pid_list->pid_max)
427                 return (void *)(pid + 1);
428 
429         return NULL;
430 }
431 
432 /**
433  * trace_pid_start - Used for seq_file to start reading pid lists
434  * @pid_list: The pid list to show
435  * @pos: The position of the file
436  *
437  * This is used by seq_file "start" operation to start the iteration
438  * of listing pids.
439  *
440  * Returns the pid+1 as we want to display pid of zero, but NULL would
441  * stop the iteration.
442  */
443 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
444 {
445         unsigned long pid;
446         loff_t l = 0;
447 
448         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
449         if (pid >= pid_list->pid_max)
450                 return NULL;
451 
452         /* Return pid + 1 so that zero can be the exit value */
453         for (pid++; pid && l < *pos;
454              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
455                 ;
456         return (void *)pid;
457 }
458 
459 /**
460  * trace_pid_show - show the current pid in seq_file processing
461  * @m: The seq_file structure to write into
462  * @v: A void pointer of the pid (+1) value to display
463  *
464  * Can be directly used by seq_file operations to display the current
465  * pid value.
466  */
467 int trace_pid_show(struct seq_file *m, void *v)
468 {
469         unsigned long pid = (unsigned long)v - 1;
470 
471         seq_printf(m, "%lu\n", pid);
472         return 0;
473 }
474 
475 /* 128 should be much more than enough */
476 #define PID_BUF_SIZE            127
477 
478 int trace_pid_write(struct trace_pid_list *filtered_pids,
479                     struct trace_pid_list **new_pid_list,
480                     const char __user *ubuf, size_t cnt)
481 {
482         struct trace_pid_list *pid_list;
483         struct trace_parser parser;
484         unsigned long val;
485         int nr_pids = 0;
486         ssize_t read = 0;
487         ssize_t ret = 0;
488         loff_t pos;
489         pid_t pid;
490 
491         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
492                 return -ENOMEM;
493 
494         /*
495          * Always recreate a new array. The write is an all or nothing
496          * operation. Always create a new array when adding new pids by
497          * the user. If the operation fails, then the current list is
498          * not modified.
499          */
500         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
501         if (!pid_list) {
502                 trace_parser_put(&parser);
503                 return -ENOMEM;
504         }
505 
506         pid_list->pid_max = READ_ONCE(pid_max);
507 
508         /* Only truncating will shrink pid_max */
509         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
510                 pid_list->pid_max = filtered_pids->pid_max;
511 
512         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
513         if (!pid_list->pids) {
514                 trace_parser_put(&parser);
515                 kfree(pid_list);
516                 return -ENOMEM;
517         }
518 
519         if (filtered_pids) {
520                 /* copy the current bits to the new max */
521                 for_each_set_bit(pid, filtered_pids->pids,
522                                  filtered_pids->pid_max) {
523                         set_bit(pid, pid_list->pids);
524                         nr_pids++;
525                 }
526         }
527 
528         while (cnt > 0) {
529 
530                 pos = 0;
531 
532                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
533                 if (ret < 0 || !trace_parser_loaded(&parser))
534                         break;
535 
536                 read += ret;
537                 ubuf += ret;
538                 cnt -= ret;
539 
540                 ret = -EINVAL;
541                 if (kstrtoul(parser.buffer, 0, &val))
542                         break;
543                 if (val >= pid_list->pid_max)
544                         break;
545 
546                 pid = (pid_t)val;
547 
548                 set_bit(pid, pid_list->pids);
549                 nr_pids++;
550 
551                 trace_parser_clear(&parser);
552                 ret = 0;
553         }
554         trace_parser_put(&parser);
555 
556         if (ret < 0) {
557                 trace_free_pid_list(pid_list);
558                 return ret;
559         }
560 
561         if (!nr_pids) {
562                 /* Cleared the list of pids */
563                 trace_free_pid_list(pid_list);
564                 read = ret;
565                 pid_list = NULL;
566         }
567 
568         *new_pid_list = pid_list;
569 
570         return read;
571 }
572 
573 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
574 {
575         u64 ts;
576 
577         /* Early boot up does not have a buffer yet */
578         if (!buf->buffer)
579                 return trace_clock_local();
580 
581         ts = ring_buffer_time_stamp(buf->buffer, cpu);
582         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
583 
584         return ts;
585 }
586 
587 u64 ftrace_now(int cpu)
588 {
589         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
590 }
591 
592 /**
593  * tracing_is_enabled - Show if global_trace has been disabled
594  *
595  * Shows if the global trace has been enabled or not. It uses the
596  * mirror flag "buffer_disabled" to be used in fast paths such as for
597  * the irqsoff tracer. But it may be inaccurate due to races. If you
598  * need to know the accurate state, use tracing_is_on() which is a little
599  * slower, but accurate.
600  */
601 int tracing_is_enabled(void)
602 {
603         /*
604          * For quick access (irqsoff uses this in fast path), just
605          * return the mirror variable of the state of the ring buffer.
606          * It's a little racy, but we don't really care.
607          */
608         smp_rmb();
609         return !global_trace.buffer_disabled;
610 }
611 
612 /*
613  * trace_buf_size is the size in bytes that is allocated
614  * for a buffer. Note, the number of bytes is always rounded
615  * to page size.
616  *
617  * This number is purposely set to a low number of 16384.
618  * If the dump on oops happens, it will be much appreciated
619  * to not have to wait for all that output. Anyway this can be
620  * boot time and run time configurable.
621  */
622 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
623 
624 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
625 
626 /* trace_types holds a link list of available tracers. */
627 static struct tracer            *trace_types __read_mostly;
628 
629 /*
630  * trace_types_lock is used to protect the trace_types list.
631  */
632 DEFINE_MUTEX(trace_types_lock);
633 
634 /*
635  * serialize the access of the ring buffer
636  *
637  * ring buffer serializes readers, but it is low level protection.
638  * The validity of the events (which returns by ring_buffer_peek() ..etc)
639  * are not protected by ring buffer.
640  *
641  * The content of events may become garbage if we allow other process consumes
642  * these events concurrently:
643  *   A) the page of the consumed events may become a normal page
644  *      (not reader page) in ring buffer, and this page will be rewrited
645  *      by events producer.
646  *   B) The page of the consumed events may become a page for splice_read,
647  *      and this page will be returned to system.
648  *
649  * These primitives allow multi process access to different cpu ring buffer
650  * concurrently.
651  *
652  * These primitives don't distinguish read-only and read-consume access.
653  * Multi read-only access are also serialized.
654  */
655 
656 #ifdef CONFIG_SMP
657 static DECLARE_RWSEM(all_cpu_access_lock);
658 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
659 
660 static inline void trace_access_lock(int cpu)
661 {
662         if (cpu == RING_BUFFER_ALL_CPUS) {
663                 /* gain it for accessing the whole ring buffer. */
664                 down_write(&all_cpu_access_lock);
665         } else {
666                 /* gain it for accessing a cpu ring buffer. */
667 
668                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
669                 down_read(&all_cpu_access_lock);
670 
671                 /* Secondly block other access to this @cpu ring buffer. */
672                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
673         }
674 }
675 
676 static inline void trace_access_unlock(int cpu)
677 {
678         if (cpu == RING_BUFFER_ALL_CPUS) {
679                 up_write(&all_cpu_access_lock);
680         } else {
681                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
682                 up_read(&all_cpu_access_lock);
683         }
684 }
685 
686 static inline void trace_access_lock_init(void)
687 {
688         int cpu;
689 
690         for_each_possible_cpu(cpu)
691                 mutex_init(&per_cpu(cpu_access_lock, cpu));
692 }
693 
694 #else
695 
696 static DEFINE_MUTEX(access_lock);
697 
698 static inline void trace_access_lock(int cpu)
699 {
700         (void)cpu;
701         mutex_lock(&access_lock);
702 }
703 
704 static inline void trace_access_unlock(int cpu)
705 {
706         (void)cpu;
707         mutex_unlock(&access_lock);
708 }
709 
710 static inline void trace_access_lock_init(void)
711 {
712 }
713 
714 #endif
715 
716 #ifdef CONFIG_STACKTRACE
717 static void __ftrace_trace_stack(struct ring_buffer *buffer,
718                                  unsigned long flags,
719                                  int skip, int pc, struct pt_regs *regs);
720 static inline void ftrace_trace_stack(struct trace_array *tr,
721                                       struct ring_buffer *buffer,
722                                       unsigned long flags,
723                                       int skip, int pc, struct pt_regs *regs);
724 
725 #else
726 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
727                                         unsigned long flags,
728                                         int skip, int pc, struct pt_regs *regs)
729 {
730 }
731 static inline void ftrace_trace_stack(struct trace_array *tr,
732                                       struct ring_buffer *buffer,
733                                       unsigned long flags,
734                                       int skip, int pc, struct pt_regs *regs)
735 {
736 }
737 
738 #endif
739 
740 static __always_inline void
741 trace_event_setup(struct ring_buffer_event *event,
742                   int type, unsigned long flags, int pc)
743 {
744         struct trace_entry *ent = ring_buffer_event_data(event);
745 
746         tracing_generic_entry_update(ent, type, flags, pc);
747 }
748 
749 static __always_inline struct ring_buffer_event *
750 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
751                           int type,
752                           unsigned long len,
753                           unsigned long flags, int pc)
754 {
755         struct ring_buffer_event *event;
756 
757         event = ring_buffer_lock_reserve(buffer, len);
758         if (event != NULL)
759                 trace_event_setup(event, type, flags, pc);
760 
761         return event;
762 }
763 
764 void tracer_tracing_on(struct trace_array *tr)
765 {
766         if (tr->trace_buffer.buffer)
767                 ring_buffer_record_on(tr->trace_buffer.buffer);
768         /*
769          * This flag is looked at when buffers haven't been allocated
770          * yet, or by some tracers (like irqsoff), that just want to
771          * know if the ring buffer has been disabled, but it can handle
772          * races of where it gets disabled but we still do a record.
773          * As the check is in the fast path of the tracers, it is more
774          * important to be fast than accurate.
775          */
776         tr->buffer_disabled = 0;
777         /* Make the flag seen by readers */
778         smp_wmb();
779 }
780 
781 /**
782  * tracing_on - enable tracing buffers
783  *
784  * This function enables tracing buffers that may have been
785  * disabled with tracing_off.
786  */
787 void tracing_on(void)
788 {
789         tracer_tracing_on(&global_trace);
790 }
791 EXPORT_SYMBOL_GPL(tracing_on);
792 
793 
794 static __always_inline void
795 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
796 {
797         __this_cpu_write(trace_taskinfo_save, true);
798 
799         /* If this is the temp buffer, we need to commit fully */
800         if (this_cpu_read(trace_buffered_event) == event) {
801                 /* Length is in event->array[0] */
802                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
803                 /* Release the temp buffer */
804                 this_cpu_dec(trace_buffered_event_cnt);
805         } else
806                 ring_buffer_unlock_commit(buffer, event);
807 }
808 
809 /**
810  * __trace_puts - write a constant string into the trace buffer.
811  * @ip:    The address of the caller
812  * @str:   The constant string to write
813  * @size:  The size of the string.
814  */
815 int __trace_puts(unsigned long ip, const char *str, int size)
816 {
817         struct ring_buffer_event *event;
818         struct ring_buffer *buffer;
819         struct print_entry *entry;
820         unsigned long irq_flags;
821         int alloc;
822         int pc;
823 
824         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
825                 return 0;
826 
827         pc = preempt_count();
828 
829         if (unlikely(tracing_selftest_running || tracing_disabled))
830                 return 0;
831 
832         alloc = sizeof(*entry) + size + 2; /* possible \n added */
833 
834         local_save_flags(irq_flags);
835         buffer = global_trace.trace_buffer.buffer;
836         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
837                                             irq_flags, pc);
838         if (!event)
839                 return 0;
840 
841         entry = ring_buffer_event_data(event);
842         entry->ip = ip;
843 
844         memcpy(&entry->buf, str, size);
845 
846         /* Add a newline if necessary */
847         if (entry->buf[size - 1] != '\n') {
848                 entry->buf[size] = '\n';
849                 entry->buf[size + 1] = '\0';
850         } else
851                 entry->buf[size] = '\0';
852 
853         __buffer_unlock_commit(buffer, event);
854         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
855 
856         return size;
857 }
858 EXPORT_SYMBOL_GPL(__trace_puts);
859 
860 /**
861  * __trace_bputs - write the pointer to a constant string into trace buffer
862  * @ip:    The address of the caller
863  * @str:   The constant string to write to the buffer to
864  */
865 int __trace_bputs(unsigned long ip, const char *str)
866 {
867         struct ring_buffer_event *event;
868         struct ring_buffer *buffer;
869         struct bputs_entry *entry;
870         unsigned long irq_flags;
871         int size = sizeof(struct bputs_entry);
872         int pc;
873 
874         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
875                 return 0;
876 
877         pc = preempt_count();
878 
879         if (unlikely(tracing_selftest_running || tracing_disabled))
880                 return 0;
881 
882         local_save_flags(irq_flags);
883         buffer = global_trace.trace_buffer.buffer;
884         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
885                                             irq_flags, pc);
886         if (!event)
887                 return 0;
888 
889         entry = ring_buffer_event_data(event);
890         entry->ip                       = ip;
891         entry->str                      = str;
892 
893         __buffer_unlock_commit(buffer, event);
894         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
895 
896         return 1;
897 }
898 EXPORT_SYMBOL_GPL(__trace_bputs);
899 
900 #ifdef CONFIG_TRACER_SNAPSHOT
901 void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data)
902 {
903         struct tracer *tracer = tr->current_trace;
904         unsigned long flags;
905 
906         if (in_nmi()) {
907                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
908                 internal_trace_puts("*** snapshot is being ignored        ***\n");
909                 return;
910         }
911 
912         if (!tr->allocated_snapshot) {
913                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
914                 internal_trace_puts("*** stopping trace here!   ***\n");
915                 tracing_off();
916                 return;
917         }
918 
919         /* Note, snapshot can not be used when the tracer uses it */
920         if (tracer->use_max_tr) {
921                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
922                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
923                 return;
924         }
925 
926         local_irq_save(flags);
927         update_max_tr(tr, current, smp_processor_id(), cond_data);
928         local_irq_restore(flags);
929 }
930 
931 void tracing_snapshot_instance(struct trace_array *tr)
932 {
933         tracing_snapshot_instance_cond(tr, NULL);
934 }
935 
936 /**
937  * tracing_snapshot - take a snapshot of the current buffer.
938  *
939  * This causes a swap between the snapshot buffer and the current live
940  * tracing buffer. You can use this to take snapshots of the live
941  * trace when some condition is triggered, but continue to trace.
942  *
943  * Note, make sure to allocate the snapshot with either
944  * a tracing_snapshot_alloc(), or by doing it manually
945  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
946  *
947  * If the snapshot buffer is not allocated, it will stop tracing.
948  * Basically making a permanent snapshot.
949  */
950 void tracing_snapshot(void)
951 {
952         struct trace_array *tr = &global_trace;
953 
954         tracing_snapshot_instance(tr);
955 }
956 EXPORT_SYMBOL_GPL(tracing_snapshot);
957 
958 /**
959  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
960  * @tr:         The tracing instance to snapshot
961  * @cond_data:  The data to be tested conditionally, and possibly saved
962  *
963  * This is the same as tracing_snapshot() except that the snapshot is
964  * conditional - the snapshot will only happen if the
965  * cond_snapshot.update() implementation receiving the cond_data
966  * returns true, which means that the trace array's cond_snapshot
967  * update() operation used the cond_data to determine whether the
968  * snapshot should be taken, and if it was, presumably saved it along
969  * with the snapshot.
970  */
971 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
972 {
973         tracing_snapshot_instance_cond(tr, cond_data);
974 }
975 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
976 
977 /**
978  * tracing_snapshot_cond_data - get the user data associated with a snapshot
979  * @tr:         The tracing instance
980  *
981  * When the user enables a conditional snapshot using
982  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
983  * with the snapshot.  This accessor is used to retrieve it.
984  *
985  * Should not be called from cond_snapshot.update(), since it takes
986  * the tr->max_lock lock, which the code calling
987  * cond_snapshot.update() has already done.
988  *
989  * Returns the cond_data associated with the trace array's snapshot.
990  */
991 void *tracing_cond_snapshot_data(struct trace_array *tr)
992 {
993         void *cond_data = NULL;
994 
995         arch_spin_lock(&tr->max_lock);
996 
997         if (tr->cond_snapshot)
998                 cond_data = tr->cond_snapshot->cond_data;
999 
1000         arch_spin_unlock(&tr->max_lock);
1001 
1002         return cond_data;
1003 }
1004 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1005 
1006 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
1007                                         struct trace_buffer *size_buf, int cpu_id);
1008 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
1009 
1010 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1011 {
1012         int ret;
1013 
1014         if (!tr->allocated_snapshot) {
1015 
1016                 /* allocate spare buffer */
1017                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1018                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
1019                 if (ret < 0)
1020                         return ret;
1021 
1022                 tr->allocated_snapshot = true;
1023         }
1024 
1025         return 0;
1026 }
1027 
1028 static void free_snapshot(struct trace_array *tr)
1029 {
1030         /*
1031          * We don't free the ring buffer. instead, resize it because
1032          * The max_tr ring buffer has some state (e.g. ring->clock) and
1033          * we want preserve it.
1034          */
1035         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1036         set_buffer_entries(&tr->max_buffer, 1);
1037         tracing_reset_online_cpus(&tr->max_buffer);
1038         tr->allocated_snapshot = false;
1039 }
1040 
1041 /**
1042  * tracing_alloc_snapshot - allocate snapshot buffer.
1043  *
1044  * This only allocates the snapshot buffer if it isn't already
1045  * allocated - it doesn't also take a snapshot.
1046  *
1047  * This is meant to be used in cases where the snapshot buffer needs
1048  * to be set up for events that can't sleep but need to be able to
1049  * trigger a snapshot.
1050  */
1051 int tracing_alloc_snapshot(void)
1052 {
1053         struct trace_array *tr = &global_trace;
1054         int ret;
1055 
1056         ret = tracing_alloc_snapshot_instance(tr);
1057         WARN_ON(ret < 0);
1058 
1059         return ret;
1060 }
1061 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1062 
1063 /**
1064  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1065  *
1066  * This is similar to tracing_snapshot(), but it will allocate the
1067  * snapshot buffer if it isn't already allocated. Use this only
1068  * where it is safe to sleep, as the allocation may sleep.
1069  *
1070  * This causes a swap between the snapshot buffer and the current live
1071  * tracing buffer. You can use this to take snapshots of the live
1072  * trace when some condition is triggered, but continue to trace.
1073  */
1074 void tracing_snapshot_alloc(void)
1075 {
1076         int ret;
1077 
1078         ret = tracing_alloc_snapshot();
1079         if (ret < 0)
1080                 return;
1081 
1082         tracing_snapshot();
1083 }
1084 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1085 
1086 /**
1087  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1088  * @tr:         The tracing instance
1089  * @cond_data:  User data to associate with the snapshot
1090  * @update:     Implementation of the cond_snapshot update function
1091  *
1092  * Check whether the conditional snapshot for the given instance has
1093  * already been enabled, or if the current tracer is already using a
1094  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1095  * save the cond_data and update function inside.
1096  *
1097  * Returns 0 if successful, error otherwise.
1098  */
1099 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1100                                  cond_update_fn_t update)
1101 {
1102         struct cond_snapshot *cond_snapshot;
1103         int ret = 0;
1104 
1105         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1106         if (!cond_snapshot)
1107                 return -ENOMEM;
1108 
1109         cond_snapshot->cond_data = cond_data;
1110         cond_snapshot->update = update;
1111 
1112         mutex_lock(&trace_types_lock);
1113 
1114         ret = tracing_alloc_snapshot_instance(tr);
1115         if (ret)
1116                 goto fail_unlock;
1117 
1118         if (tr->current_trace->use_max_tr) {
1119                 ret = -EBUSY;
1120                 goto fail_unlock;
1121         }
1122 
1123         /*
1124          * The cond_snapshot can only change to NULL without the
1125          * trace_types_lock. We don't care if we race with it going
1126          * to NULL, but we want to make sure that it's not set to
1127          * something other than NULL when we get here, which we can
1128          * do safely with only holding the trace_types_lock and not
1129          * having to take the max_lock.
1130          */
1131         if (tr->cond_snapshot) {
1132                 ret = -EBUSY;
1133                 goto fail_unlock;
1134         }
1135 
1136         arch_spin_lock(&tr->max_lock);
1137         tr->cond_snapshot = cond_snapshot;
1138         arch_spin_unlock(&tr->max_lock);
1139 
1140         mutex_unlock(&trace_types_lock);
1141 
1142         return ret;
1143 
1144  fail_unlock:
1145         mutex_unlock(&trace_types_lock);
1146         kfree(cond_snapshot);
1147         return ret;
1148 }
1149 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1150 
1151 /**
1152  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1153  * @tr:         The tracing instance
1154  *
1155  * Check whether the conditional snapshot for the given instance is
1156  * enabled; if so, free the cond_snapshot associated with it,
1157  * otherwise return -EINVAL.
1158  *
1159  * Returns 0 if successful, error otherwise.
1160  */
1161 int tracing_snapshot_cond_disable(struct trace_array *tr)
1162 {
1163         int ret = 0;
1164 
1165         arch_spin_lock(&tr->max_lock);
1166 
1167         if (!tr->cond_snapshot)
1168                 ret = -EINVAL;
1169         else {
1170                 kfree(tr->cond_snapshot);
1171                 tr->cond_snapshot = NULL;
1172         }
1173 
1174         arch_spin_unlock(&tr->max_lock);
1175 
1176         return ret;
1177 }
1178 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1179 #else
1180 void tracing_snapshot(void)
1181 {
1182         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1183 }
1184 EXPORT_SYMBOL_GPL(tracing_snapshot);
1185 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1186 {
1187         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1188 }
1189 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1190 int tracing_alloc_snapshot(void)
1191 {
1192         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1193         return -ENODEV;
1194 }
1195 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1196 void tracing_snapshot_alloc(void)
1197 {
1198         /* Give warning */
1199         tracing_snapshot();
1200 }
1201 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1202 void *tracing_cond_snapshot_data(struct trace_array *tr)
1203 {
1204         return NULL;
1205 }
1206 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1207 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1208 {
1209         return -ENODEV;
1210 }
1211 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1212 int tracing_snapshot_cond_disable(struct trace_array *tr)
1213 {
1214         return false;
1215 }
1216 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1217 #endif /* CONFIG_TRACER_SNAPSHOT */
1218 
1219 void tracer_tracing_off(struct trace_array *tr)
1220 {
1221         if (tr->trace_buffer.buffer)
1222                 ring_buffer_record_off(tr->trace_buffer.buffer);
1223         /*
1224          * This flag is looked at when buffers haven't been allocated
1225          * yet, or by some tracers (like irqsoff), that just want to
1226          * know if the ring buffer has been disabled, but it can handle
1227          * races of where it gets disabled but we still do a record.
1228          * As the check is in the fast path of the tracers, it is more
1229          * important to be fast than accurate.
1230          */
1231         tr->buffer_disabled = 1;
1232         /* Make the flag seen by readers */
1233         smp_wmb();
1234 }
1235 
1236 /**
1237  * tracing_off - turn off tracing buffers
1238  *
1239  * This function stops the tracing buffers from recording data.
1240  * It does not disable any overhead the tracers themselves may
1241  * be causing. This function simply causes all recording to
1242  * the ring buffers to fail.
1243  */
1244 void tracing_off(void)
1245 {
1246         tracer_tracing_off(&global_trace);
1247 }
1248 EXPORT_SYMBOL_GPL(tracing_off);
1249 
1250 void disable_trace_on_warning(void)
1251 {
1252         if (__disable_trace_on_warning)
1253                 tracing_off();
1254 }
1255 
1256 /**
1257  * tracer_tracing_is_on - show real state of ring buffer enabled
1258  * @tr : the trace array to know if ring buffer is enabled
1259  *
1260  * Shows real state of the ring buffer if it is enabled or not.
1261  */
1262 bool tracer_tracing_is_on(struct trace_array *tr)
1263 {
1264         if (tr->trace_buffer.buffer)
1265                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1266         return !tr->buffer_disabled;
1267 }
1268 
1269 /**
1270  * tracing_is_on - show state of ring buffers enabled
1271  */
1272 int tracing_is_on(void)
1273 {
1274         return tracer_tracing_is_on(&global_trace);
1275 }
1276 EXPORT_SYMBOL_GPL(tracing_is_on);
1277 
1278 static int __init set_buf_size(char *str)
1279 {
1280         unsigned long buf_size;
1281 
1282         if (!str)
1283                 return 0;
1284         buf_size = memparse(str, &str);
1285         /* nr_entries can not be zero */
1286         if (buf_size == 0)
1287                 return 0;
1288         trace_buf_size = buf_size;
1289         return 1;
1290 }
1291 __setup("trace_buf_size=", set_buf_size);
1292 
1293 static int __init set_tracing_thresh(char *str)
1294 {
1295         unsigned long threshold;
1296         int ret;
1297 
1298         if (!str)
1299                 return 0;
1300         ret = kstrtoul(str, 0, &threshold);
1301         if (ret < 0)
1302                 return 0;
1303         tracing_thresh = threshold * 1000;
1304         return 1;
1305 }
1306 __setup("tracing_thresh=", set_tracing_thresh);
1307 
1308 unsigned long nsecs_to_usecs(unsigned long nsecs)
1309 {
1310         return nsecs / 1000;
1311 }
1312 
1313 /*
1314  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1315  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1316  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1317  * of strings in the order that the evals (enum) were defined.
1318  */
1319 #undef C
1320 #define C(a, b) b
1321 
1322 /* These must match the bit postions in trace_iterator_flags */
1323 static const char *trace_options[] = {
1324         TRACE_FLAGS
1325         NULL
1326 };
1327 
1328 static struct {
1329         u64 (*func)(void);
1330         const char *name;
1331         int in_ns;              /* is this clock in nanoseconds? */
1332 } trace_clocks[] = {
1333         { trace_clock_local,            "local",        1 },
1334         { trace_clock_global,           "global",       1 },
1335         { trace_clock_counter,          "counter",      0 },
1336         { trace_clock_jiffies,          "uptime",       0 },
1337         { trace_clock,                  "perf",         1 },
1338         { ktime_get_mono_fast_ns,       "mono",         1 },
1339         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1340         { ktime_get_boot_fast_ns,       "boot",         1 },
1341         ARCH_TRACE_CLOCKS
1342 };
1343 
1344 bool trace_clock_in_ns(struct trace_array *tr)
1345 {
1346         if (trace_clocks[tr->clock_id].in_ns)
1347                 return true;
1348 
1349         return false;
1350 }
1351 
1352 /*
1353  * trace_parser_get_init - gets the buffer for trace parser
1354  */
1355 int trace_parser_get_init(struct trace_parser *parser, int size)
1356 {
1357         memset(parser, 0, sizeof(*parser));
1358 
1359         parser->buffer = kmalloc(size, GFP_KERNEL);
1360         if (!parser->buffer)
1361                 return 1;
1362 
1363         parser->size = size;
1364         return 0;
1365 }
1366 
1367 /*
1368  * trace_parser_put - frees the buffer for trace parser
1369  */
1370 void trace_parser_put(struct trace_parser *parser)
1371 {
1372         kfree(parser->buffer);
1373         parser->buffer = NULL;
1374 }
1375 
1376 /*
1377  * trace_get_user - reads the user input string separated by  space
1378  * (matched by isspace(ch))
1379  *
1380  * For each string found the 'struct trace_parser' is updated,
1381  * and the function returns.
1382  *
1383  * Returns number of bytes read.
1384  *
1385  * See kernel/trace/trace.h for 'struct trace_parser' details.
1386  */
1387 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1388         size_t cnt, loff_t *ppos)
1389 {
1390         char ch;
1391         size_t read = 0;
1392         ssize_t ret;
1393 
1394         if (!*ppos)
1395                 trace_parser_clear(parser);
1396 
1397         ret = get_user(ch, ubuf++);
1398         if (ret)
1399                 goto out;
1400 
1401         read++;
1402         cnt--;
1403 
1404         /*
1405          * The parser is not finished with the last write,
1406          * continue reading the user input without skipping spaces.
1407          */
1408         if (!parser->cont) {
1409                 /* skip white space */
1410                 while (cnt && isspace(ch)) {
1411                         ret = get_user(ch, ubuf++);
1412                         if (ret)
1413                                 goto out;
1414                         read++;
1415                         cnt--;
1416                 }
1417 
1418                 parser->idx = 0;
1419 
1420                 /* only spaces were written */
1421                 if (isspace(ch) || !ch) {
1422                         *ppos += read;
1423                         ret = read;
1424                         goto out;
1425                 }
1426         }
1427 
1428         /* read the non-space input */
1429         while (cnt && !isspace(ch) && ch) {
1430                 if (parser->idx < parser->size - 1)
1431                         parser->buffer[parser->idx++] = ch;
1432                 else {
1433                         ret = -EINVAL;
1434                         goto out;
1435                 }
1436                 ret = get_user(ch, ubuf++);
1437                 if (ret)
1438                         goto out;
1439                 read++;
1440                 cnt--;
1441         }
1442 
1443         /* We either got finished input or we have to wait for another call. */
1444         if (isspace(ch) || !ch) {
1445                 parser->buffer[parser->idx] = 0;
1446                 parser->cont = false;
1447         } else if (parser->idx < parser->size - 1) {
1448                 parser->cont = true;
1449                 parser->buffer[parser->idx++] = ch;
1450                 /* Make sure the parsed string always terminates with '\0'. */
1451                 parser->buffer[parser->idx] = 0;
1452         } else {
1453                 ret = -EINVAL;
1454                 goto out;
1455         }
1456 
1457         *ppos += read;
1458         ret = read;
1459 
1460 out:
1461         return ret;
1462 }
1463 
1464 /* TODO add a seq_buf_to_buffer() */
1465 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1466 {
1467         int len;
1468 
1469         if (trace_seq_used(s) <= s->seq.readpos)
1470                 return -EBUSY;
1471 
1472         len = trace_seq_used(s) - s->seq.readpos;
1473         if (cnt > len)
1474                 cnt = len;
1475         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1476 
1477         s->seq.readpos += cnt;
1478         return cnt;
1479 }
1480 
1481 unsigned long __read_mostly     tracing_thresh;
1482 
1483 #ifdef CONFIG_TRACER_MAX_TRACE
1484 /*
1485  * Copy the new maximum trace into the separate maximum-trace
1486  * structure. (this way the maximum trace is permanently saved,
1487  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1488  */
1489 static void
1490 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1491 {
1492         struct trace_buffer *trace_buf = &tr->trace_buffer;
1493         struct trace_buffer *max_buf = &tr->max_buffer;
1494         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1495         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1496 
1497         max_buf->cpu = cpu;
1498         max_buf->time_start = data->preempt_timestamp;
1499 
1500         max_data->saved_latency = tr->max_latency;
1501         max_data->critical_start = data->critical_start;
1502         max_data->critical_end = data->critical_end;
1503 
1504         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1505         max_data->pid = tsk->pid;
1506         /*
1507          * If tsk == current, then use current_uid(), as that does not use
1508          * RCU. The irq tracer can be called out of RCU scope.
1509          */
1510         if (tsk == current)
1511                 max_data->uid = current_uid();
1512         else
1513                 max_data->uid = task_uid(tsk);
1514 
1515         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1516         max_data->policy = tsk->policy;
1517         max_data->rt_priority = tsk->rt_priority;
1518 
1519         /* record this tasks comm */
1520         tracing_record_cmdline(tsk);
1521 }
1522 
1523 /**
1524  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1525  * @tr: tracer
1526  * @tsk: the task with the latency
1527  * @cpu: The cpu that initiated the trace.
1528  * @cond_data: User data associated with a conditional snapshot
1529  *
1530  * Flip the buffers between the @tr and the max_tr and record information
1531  * about which task was the cause of this latency.
1532  */
1533 void
1534 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1535               void *cond_data)
1536 {
1537         if (tr->stop_count)
1538                 return;
1539 
1540         WARN_ON_ONCE(!irqs_disabled());
1541 
1542         if (!tr->allocated_snapshot) {
1543                 /* Only the nop tracer should hit this when disabling */
1544                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1545                 return;
1546         }
1547 
1548         arch_spin_lock(&tr->max_lock);
1549 
1550         /* Inherit the recordable setting from trace_buffer */
1551         if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1552                 ring_buffer_record_on(tr->max_buffer.buffer);
1553         else
1554                 ring_buffer_record_off(tr->max_buffer.buffer);
1555 
1556 #ifdef CONFIG_TRACER_SNAPSHOT
1557         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1558                 goto out_unlock;
1559 #endif
1560         swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
1561 
1562         __update_max_tr(tr, tsk, cpu);
1563 
1564  out_unlock:
1565         arch_spin_unlock(&tr->max_lock);
1566 }
1567 
1568 /**
1569  * update_max_tr_single - only copy one trace over, and reset the rest
1570  * @tr: tracer
1571  * @tsk: task with the latency
1572  * @cpu: the cpu of the buffer to copy.
1573  *
1574  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1575  */
1576 void
1577 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1578 {
1579         int ret;
1580 
1581         if (tr->stop_count)
1582                 return;
1583 
1584         WARN_ON_ONCE(!irqs_disabled());
1585         if (!tr->allocated_snapshot) {
1586                 /* Only the nop tracer should hit this when disabling */
1587                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1588                 return;
1589         }
1590 
1591         arch_spin_lock(&tr->max_lock);
1592 
1593         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1594 
1595         if (ret == -EBUSY) {
1596                 /*
1597                  * We failed to swap the buffer due to a commit taking
1598                  * place on this CPU. We fail to record, but we reset
1599                  * the max trace buffer (no one writes directly to it)
1600                  * and flag that it failed.
1601                  */
1602                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1603                         "Failed to swap buffers due to commit in progress\n");
1604         }
1605 
1606         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1607 
1608         __update_max_tr(tr, tsk, cpu);
1609         arch_spin_unlock(&tr->max_lock);
1610 }
1611 #endif /* CONFIG_TRACER_MAX_TRACE */
1612 
1613 static int wait_on_pipe(struct trace_iterator *iter, int full)
1614 {
1615         /* Iterators are static, they should be filled or empty */
1616         if (trace_buffer_iter(iter, iter->cpu_file))
1617                 return 0;
1618 
1619         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1620                                 full);
1621 }
1622 
1623 #ifdef CONFIG_FTRACE_STARTUP_TEST
1624 static bool selftests_can_run;
1625 
1626 struct trace_selftests {
1627         struct list_head                list;
1628         struct tracer                   *type;
1629 };
1630 
1631 static LIST_HEAD(postponed_selftests);
1632 
1633 static int save_selftest(struct tracer *type)
1634 {
1635         struct trace_selftests *selftest;
1636 
1637         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1638         if (!selftest)
1639                 return -ENOMEM;
1640 
1641         selftest->type = type;
1642         list_add(&selftest->list, &postponed_selftests);
1643         return 0;
1644 }
1645 
1646 static int run_tracer_selftest(struct tracer *type)
1647 {
1648         struct trace_array *tr = &global_trace;
1649         struct tracer *saved_tracer = tr->current_trace;
1650         int ret;
1651 
1652         if (!type->selftest || tracing_selftest_disabled)
1653                 return 0;
1654 
1655         /*
1656          * If a tracer registers early in boot up (before scheduling is
1657          * initialized and such), then do not run its selftests yet.
1658          * Instead, run it a little later in the boot process.
1659          */
1660         if (!selftests_can_run)
1661                 return save_selftest(type);
1662 
1663         /*
1664          * Run a selftest on this tracer.
1665          * Here we reset the trace buffer, and set the current
1666          * tracer to be this tracer. The tracer can then run some
1667          * internal tracing to verify that everything is in order.
1668          * If we fail, we do not register this tracer.
1669          */
1670         tracing_reset_online_cpus(&tr->trace_buffer);
1671 
1672         tr->current_trace = type;
1673 
1674 #ifdef CONFIG_TRACER_MAX_TRACE
1675         if (type->use_max_tr) {
1676                 /* If we expanded the buffers, make sure the max is expanded too */
1677                 if (ring_buffer_expanded)
1678                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1679                                            RING_BUFFER_ALL_CPUS);
1680                 tr->allocated_snapshot = true;
1681         }
1682 #endif
1683 
1684         /* the test is responsible for initializing and enabling */
1685         pr_info("Testing tracer %s: ", type->name);
1686         ret = type->selftest(type, tr);
1687         /* the test is responsible for resetting too */
1688         tr->current_trace = saved_tracer;
1689         if (ret) {
1690                 printk(KERN_CONT "FAILED!\n");
1691                 /* Add the warning after printing 'FAILED' */
1692                 WARN_ON(1);
1693                 return -1;
1694         }
1695         /* Only reset on passing, to avoid touching corrupted buffers */
1696         tracing_reset_online_cpus(&tr->trace_buffer);
1697 
1698 #ifdef CONFIG_TRACER_MAX_TRACE
1699         if (type->use_max_tr) {
1700                 tr->allocated_snapshot = false;
1701 
1702                 /* Shrink the max buffer again */
1703                 if (ring_buffer_expanded)
1704                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1705                                            RING_BUFFER_ALL_CPUS);
1706         }
1707 #endif
1708 
1709         printk(KERN_CONT "PASSED\n");
1710         return 0;
1711 }
1712 
1713 static __init int init_trace_selftests(void)
1714 {
1715         struct trace_selftests *p, *n;
1716         struct tracer *t, **last;
1717         int ret;
1718 
1719         selftests_can_run = true;
1720 
1721         mutex_lock(&trace_types_lock);
1722 
1723         if (list_empty(&postponed_selftests))
1724                 goto out;
1725 
1726         pr_info("Running postponed tracer tests:\n");
1727 
1728         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1729                 /* This loop can take minutes when sanitizers are enabled, so
1730                  * lets make sure we allow RCU processing.
1731                  */
1732                 cond_resched();
1733                 ret = run_tracer_selftest(p->type);
1734                 /* If the test fails, then warn and remove from available_tracers */
1735                 if (ret < 0) {
1736                         WARN(1, "tracer: %s failed selftest, disabling\n",
1737                              p->type->name);
1738                         last = &trace_types;
1739                         for (t = trace_types; t; t = t->next) {
1740                                 if (t == p->type) {
1741                                         *last = t->next;
1742                                         break;
1743                                 }
1744                                 last = &t->next;
1745                         }
1746                 }
1747                 list_del(&p->list);
1748                 kfree(p);
1749         }
1750 
1751  out:
1752         mutex_unlock(&trace_types_lock);
1753 
1754         return 0;
1755 }
1756 core_initcall(init_trace_selftests);
1757 #else
1758 static inline int run_tracer_selftest(struct tracer *type)
1759 {
1760         return 0;
1761 }
1762 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1763 
1764 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1765 
1766 static void __init apply_trace_boot_options(void);
1767 
1768 /**
1769  * register_tracer - register a tracer with the ftrace system.
1770  * @type: the plugin for the tracer
1771  *
1772  * Register a new plugin tracer.
1773  */
1774 int __init register_tracer(struct tracer *type)
1775 {
1776         struct tracer *t;
1777         int ret = 0;
1778 
1779         if (!type->name) {
1780                 pr_info("Tracer must have a name\n");
1781                 return -1;
1782         }
1783 
1784         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1785                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1786                 return -1;
1787         }
1788 
1789         mutex_lock(&trace_types_lock);
1790 
1791         tracing_selftest_running = true;
1792 
1793         for (t = trace_types; t; t = t->next) {
1794                 if (strcmp(type->name, t->name) == 0) {
1795                         /* already found */
1796                         pr_info("Tracer %s already registered\n",
1797                                 type->name);
1798                         ret = -1;
1799                         goto out;
1800                 }
1801         }
1802 
1803         if (!type->set_flag)
1804                 type->set_flag = &dummy_set_flag;
1805         if (!type->flags) {
1806                 /*allocate a dummy tracer_flags*/
1807                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1808                 if (!type->flags) {
1809                         ret = -ENOMEM;
1810                         goto out;
1811                 }
1812                 type->flags->val = 0;
1813                 type->flags->opts = dummy_tracer_opt;
1814         } else
1815                 if (!type->flags->opts)
1816                         type->flags->opts = dummy_tracer_opt;
1817 
1818         /* store the tracer for __set_tracer_option */
1819         type->flags->trace = type;
1820 
1821         ret = run_tracer_selftest(type);
1822         if (ret < 0)
1823                 goto out;
1824 
1825         type->next = trace_types;
1826         trace_types = type;
1827         add_tracer_options(&global_trace, type);
1828 
1829  out:
1830         tracing_selftest_running = false;
1831         mutex_unlock(&trace_types_lock);
1832 
1833         if (ret || !default_bootup_tracer)
1834                 goto out_unlock;
1835 
1836         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1837                 goto out_unlock;
1838 
1839         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1840         /* Do we want this tracer to start on bootup? */
1841         tracing_set_tracer(&global_trace, type->name);
1842         default_bootup_tracer = NULL;
1843 
1844         apply_trace_boot_options();
1845 
1846         /* disable other selftests, since this will break it. */
1847         tracing_selftest_disabled = true;
1848 #ifdef CONFIG_FTRACE_STARTUP_TEST
1849         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1850                type->name);
1851 #endif
1852 
1853  out_unlock:
1854         return ret;
1855 }
1856 
1857 void tracing_reset(struct trace_buffer *buf, int cpu)
1858 {
1859         struct ring_buffer *buffer = buf->buffer;
1860 
1861         if (!buffer)
1862                 return;
1863 
1864         ring_buffer_record_disable(buffer);
1865 
1866         /* Make sure all commits have finished */
1867         synchronize_rcu();
1868         ring_buffer_reset_cpu(buffer, cpu);
1869 
1870         ring_buffer_record_enable(buffer);
1871 }
1872 
1873 void tracing_reset_online_cpus(struct trace_buffer *buf)
1874 {
1875         struct ring_buffer *buffer = buf->buffer;
1876         int cpu;
1877 
1878         if (!buffer)
1879                 return;
1880 
1881         ring_buffer_record_disable(buffer);
1882 
1883         /* Make sure all commits have finished */
1884         synchronize_rcu();
1885 
1886         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1887 
1888         for_each_online_cpu(cpu)
1889                 ring_buffer_reset_cpu(buffer, cpu);
1890 
1891         ring_buffer_record_enable(buffer);
1892 }
1893 
1894 /* Must have trace_types_lock held */
1895 void tracing_reset_all_online_cpus(void)
1896 {
1897         struct trace_array *tr;
1898 
1899         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1900                 if (!tr->clear_trace)
1901                         continue;
1902                 tr->clear_trace = false;
1903                 tracing_reset_online_cpus(&tr->trace_buffer);
1904 #ifdef CONFIG_TRACER_MAX_TRACE
1905                 tracing_reset_online_cpus(&tr->max_buffer);
1906 #endif
1907         }
1908 }
1909 
1910 static int *tgid_map;
1911 
1912 #define SAVED_CMDLINES_DEFAULT 128
1913 #define NO_CMDLINE_MAP UINT_MAX
1914 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1915 struct saved_cmdlines_buffer {
1916         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1917         unsigned *map_cmdline_to_pid;
1918         unsigned cmdline_num;
1919         int cmdline_idx;
1920         char *saved_cmdlines;
1921 };
1922 static struct saved_cmdlines_buffer *savedcmd;
1923 
1924 /* temporary disable recording */
1925 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1926 
1927 static inline char *get_saved_cmdlines(int idx)
1928 {
1929         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1930 }
1931 
1932 static inline void set_cmdline(int idx, const char *cmdline)
1933 {
1934         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1935 }
1936 
1937 static int allocate_cmdlines_buffer(unsigned int val,
1938                                     struct saved_cmdlines_buffer *s)
1939 {
1940         s->map_cmdline_to_pid = kmalloc_array(val,
1941                                               sizeof(*s->map_cmdline_to_pid),
1942                                               GFP_KERNEL);
1943         if (!s->map_cmdline_to_pid)
1944                 return -ENOMEM;
1945 
1946         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
1947         if (!s->saved_cmdlines) {
1948                 kfree(s->map_cmdline_to_pid);
1949                 return -ENOMEM;
1950         }
1951 
1952         s->cmdline_idx = 0;
1953         s->cmdline_num = val;
1954         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1955                sizeof(s->map_pid_to_cmdline));
1956         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1957                val * sizeof(*s->map_cmdline_to_pid));
1958 
1959         return 0;
1960 }
1961 
1962 static int trace_create_savedcmd(void)
1963 {
1964         int ret;
1965 
1966         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1967         if (!savedcmd)
1968                 return -ENOMEM;
1969 
1970         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1971         if (ret < 0) {
1972                 kfree(savedcmd);
1973                 savedcmd = NULL;
1974                 return -ENOMEM;
1975         }
1976 
1977         return 0;
1978 }
1979 
1980 int is_tracing_stopped(void)
1981 {
1982         return global_trace.stop_count;
1983 }
1984 
1985 /**
1986  * tracing_start - quick start of the tracer
1987  *
1988  * If tracing is enabled but was stopped by tracing_stop,
1989  * this will start the tracer back up.
1990  */
1991 void tracing_start(void)
1992 {
1993         struct ring_buffer *buffer;
1994         unsigned long flags;
1995 
1996         if (tracing_disabled)
1997                 return;
1998 
1999         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2000         if (--global_trace.stop_count) {
2001                 if (global_trace.stop_count < 0) {
2002                         /* Someone screwed up their debugging */
2003                         WARN_ON_ONCE(1);
2004                         global_trace.stop_count = 0;
2005                 }
2006                 goto out;
2007         }
2008 
2009         /* Prevent the buffers from switching */
2010         arch_spin_lock(&global_trace.max_lock);
2011 
2012         buffer = global_trace.trace_buffer.buffer;
2013         if (buffer)
2014                 ring_buffer_record_enable(buffer);
2015 
2016 #ifdef CONFIG_TRACER_MAX_TRACE
2017         buffer = global_trace.max_buffer.buffer;
2018         if (buffer)
2019                 ring_buffer_record_enable(buffer);
2020 #endif
2021 
2022         arch_spin_unlock(&global_trace.max_lock);
2023 
2024  out:
2025         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2026 }
2027 
2028 static void tracing_start_tr(struct trace_array *tr)
2029 {
2030         struct ring_buffer *buffer;
2031         unsigned long flags;
2032 
2033         if (tracing_disabled)
2034                 return;
2035 
2036         /* If global, we need to also start the max tracer */
2037         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2038                 return tracing_start();
2039 
2040         raw_spin_lock_irqsave(&tr->start_lock, flags);
2041 
2042         if (--tr->stop_count) {
2043                 if (tr->stop_count < 0) {
2044                         /* Someone screwed up their debugging */
2045                         WARN_ON_ONCE(1);
2046                         tr->stop_count = 0;
2047                 }
2048                 goto out;
2049         }
2050 
2051         buffer = tr->trace_buffer.buffer;
2052         if (buffer)
2053                 ring_buffer_record_enable(buffer);
2054 
2055  out:
2056         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2057 }
2058 
2059 /**
2060  * tracing_stop - quick stop of the tracer
2061  *
2062  * Light weight way to stop tracing. Use in conjunction with
2063  * tracing_start.
2064  */
2065 void tracing_stop(void)
2066 {
2067         struct ring_buffer *buffer;
2068         unsigned long flags;
2069 
2070         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2071         if (global_trace.stop_count++)
2072                 goto out;
2073 
2074         /* Prevent the buffers from switching */
2075         arch_spin_lock(&global_trace.max_lock);
2076 
2077         buffer = global_trace.trace_buffer.buffer;
2078         if (buffer)
2079                 ring_buffer_record_disable(buffer);
2080 
2081 #ifdef CONFIG_TRACER_MAX_TRACE
2082         buffer = global_trace.max_buffer.buffer;
2083         if (buffer)
2084                 ring_buffer_record_disable(buffer);
2085 #endif
2086 
2087         arch_spin_unlock(&global_trace.max_lock);
2088 
2089  out:
2090         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2091 }
2092 
2093 static void tracing_stop_tr(struct trace_array *tr)
2094 {
2095         struct ring_buffer *buffer;
2096         unsigned long flags;
2097 
2098         /* If global, we need to also stop the max tracer */
2099         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2100                 return tracing_stop();
2101 
2102         raw_spin_lock_irqsave(&tr->start_lock, flags);
2103         if (tr->stop_count++)
2104                 goto out;
2105 
2106         buffer = tr->trace_buffer.buffer;
2107         if (buffer)
2108                 ring_buffer_record_disable(buffer);
2109 
2110  out:
2111         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2112 }
2113 
2114 static int trace_save_cmdline(struct task_struct *tsk)
2115 {
2116         unsigned pid, idx;
2117 
2118         /* treat recording of idle task as a success */
2119         if (!tsk->pid)
2120                 return 1;
2121 
2122         if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2123                 return 0;
2124 
2125         /*
2126          * It's not the end of the world if we don't get
2127          * the lock, but we also don't want to spin
2128          * nor do we want to disable interrupts,
2129          * so if we miss here, then better luck next time.
2130          */
2131         if (!arch_spin_trylock(&trace_cmdline_lock))
2132                 return 0;
2133 
2134         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2135         if (idx == NO_CMDLINE_MAP) {
2136                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2137 
2138                 /*
2139                  * Check whether the cmdline buffer at idx has a pid
2140                  * mapped. We are going to overwrite that entry so we
2141                  * need to clear the map_pid_to_cmdline. Otherwise we
2142                  * would read the new comm for the old pid.
2143                  */
2144                 pid = savedcmd->map_cmdline_to_pid[idx];
2145                 if (pid != NO_CMDLINE_MAP)
2146                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2147 
2148                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2149                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2150 
2151                 savedcmd->cmdline_idx = idx;
2152         }
2153 
2154         set_cmdline(idx, tsk->comm);
2155 
2156         arch_spin_unlock(&trace_cmdline_lock);
2157 
2158         return 1;
2159 }
2160 
2161 static void __trace_find_cmdline(int pid, char comm[])
2162 {
2163         unsigned map;
2164 
2165         if (!pid) {
2166                 strcpy(comm, "<idle>");
2167                 return;
2168         }
2169 
2170         if (WARN_ON_ONCE(pid < 0)) {
2171                 strcpy(comm, "<XXX>");
2172                 return;
2173         }
2174 
2175         if (pid > PID_MAX_DEFAULT) {
2176                 strcpy(comm, "<...>");
2177                 return;
2178         }
2179 
2180         map = savedcmd->map_pid_to_cmdline[pid];
2181         if (map != NO_CMDLINE_MAP)
2182                 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2183         else
2184                 strcpy(comm, "<...>");
2185 }
2186 
2187 void trace_find_cmdline(int pid, char comm[])
2188 {
2189         preempt_disable();
2190         arch_spin_lock(&trace_cmdline_lock);
2191 
2192         __trace_find_cmdline(pid, comm);
2193 
2194         arch_spin_unlock(&trace_cmdline_lock);
2195         preempt_enable();
2196 }
2197 
2198 int trace_find_tgid(int pid)
2199 {
2200         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2201                 return 0;
2202 
2203         return tgid_map[pid];
2204 }
2205 
2206 static int trace_save_tgid(struct task_struct *tsk)
2207 {
2208         /* treat recording of idle task as a success */
2209         if (!tsk->pid)
2210                 return 1;
2211 
2212         if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2213                 return 0;
2214 
2215         tgid_map[tsk->pid] = tsk->tgid;
2216         return 1;
2217 }
2218 
2219 static bool tracing_record_taskinfo_skip(int flags)
2220 {
2221         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2222                 return true;
2223         if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2224                 return true;
2225         if (!__this_cpu_read(trace_taskinfo_save))
2226                 return true;
2227         return false;
2228 }
2229 
2230 /**
2231  * tracing_record_taskinfo - record the task info of a task
2232  *
2233  * @task:  task to record
2234  * @flags: TRACE_RECORD_CMDLINE for recording comm
2235  *         TRACE_RECORD_TGID for recording tgid
2236  */
2237 void tracing_record_taskinfo(struct task_struct *task, int flags)
2238 {
2239         bool done;
2240 
2241         if (tracing_record_taskinfo_skip(flags))
2242                 return;
2243 
2244         /*
2245          * Record as much task information as possible. If some fail, continue
2246          * to try to record the others.
2247          */
2248         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2249         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2250 
2251         /* If recording any information failed, retry again soon. */
2252         if (!done)
2253                 return;
2254 
2255         __this_cpu_write(trace_taskinfo_save, false);
2256 }
2257 
2258 /**
2259  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2260  *
2261  * @prev: previous task during sched_switch
2262  * @next: next task during sched_switch
2263  * @flags: TRACE_RECORD_CMDLINE for recording comm
2264  *         TRACE_RECORD_TGID for recording tgid
2265  */
2266 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2267                                           struct task_struct *next, int flags)
2268 {
2269         bool done;
2270 
2271         if (tracing_record_taskinfo_skip(flags))
2272                 return;
2273 
2274         /*
2275          * Record as much task information as possible. If some fail, continue
2276          * to try to record the others.
2277          */
2278         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2279         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2280         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2281         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2282 
2283         /* If recording any information failed, retry again soon. */
2284         if (!done)
2285                 return;
2286 
2287         __this_cpu_write(trace_taskinfo_save, false);
2288 }
2289 
2290 /* Helpers to record a specific task information */
2291 void tracing_record_cmdline(struct task_struct *task)
2292 {
2293         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2294 }
2295 
2296 void tracing_record_tgid(struct task_struct *task)
2297 {
2298         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2299 }
2300 
2301 /*
2302  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2303  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2304  * simplifies those functions and keeps them in sync.
2305  */
2306 enum print_line_t trace_handle_return(struct trace_seq *s)
2307 {
2308         return trace_seq_has_overflowed(s) ?
2309                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2310 }
2311 EXPORT_SYMBOL_GPL(trace_handle_return);
2312 
2313 void
2314 tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2315                              unsigned long flags, int pc)
2316 {
2317         struct task_struct *tsk = current;
2318 
2319         entry->preempt_count            = pc & 0xff;
2320         entry->pid                      = (tsk) ? tsk->pid : 0;
2321         entry->type                     = type;
2322         entry->flags =
2323 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2324                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2325 #else
2326                 TRACE_FLAG_IRQS_NOSUPPORT |
2327 #endif
2328                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2329                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2330                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2331                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2332                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2333 }
2334 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2335 
2336 struct ring_buffer_event *
2337 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2338                           int type,
2339                           unsigned long len,
2340                           unsigned long flags, int pc)
2341 {
2342         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2343 }
2344 
2345 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2346 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2347 static int trace_buffered_event_ref;
2348 
2349 /**
2350  * trace_buffered_event_enable - enable buffering events
2351  *
2352  * When events are being filtered, it is quicker to use a temporary
2353  * buffer to write the event data into if there's a likely chance
2354  * that it will not be committed. The discard of the ring buffer
2355  * is not as fast as committing, and is much slower than copying
2356  * a commit.
2357  *
2358  * When an event is to be filtered, allocate per cpu buffers to
2359  * write the event data into, and if the event is filtered and discarded
2360  * it is simply dropped, otherwise, the entire data is to be committed
2361  * in one shot.
2362  */
2363 void trace_buffered_event_enable(void)
2364 {
2365         struct ring_buffer_event *event;
2366         struct page *page;
2367         int cpu;
2368 
2369         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2370 
2371         if (trace_buffered_event_ref++)
2372                 return;
2373 
2374         for_each_tracing_cpu(cpu) {
2375                 page = alloc_pages_node(cpu_to_node(cpu),
2376                                         GFP_KERNEL | __GFP_NORETRY, 0);
2377                 if (!page)
2378                         goto failed;
2379 
2380                 event = page_address(page);
2381                 memset(event, 0, sizeof(*event));
2382 
2383                 per_cpu(trace_buffered_event, cpu) = event;
2384 
2385                 preempt_disable();
2386                 if (cpu == smp_processor_id() &&
2387                     this_cpu_read(trace_buffered_event) !=
2388                     per_cpu(trace_buffered_event, cpu))
2389                         WARN_ON_ONCE(1);
2390                 preempt_enable();
2391         }
2392 
2393         return;
2394  failed:
2395         trace_buffered_event_disable();
2396 }
2397 
2398 static void enable_trace_buffered_event(void *data)
2399 {
2400         /* Probably not needed, but do it anyway */
2401         smp_rmb();
2402         this_cpu_dec(trace_buffered_event_cnt);
2403 }
2404 
2405 static void disable_trace_buffered_event(void *data)
2406 {
2407         this_cpu_inc(trace_buffered_event_cnt);
2408 }
2409 
2410 /**
2411  * trace_buffered_event_disable - disable buffering events
2412  *
2413  * When a filter is removed, it is faster to not use the buffered
2414  * events, and to commit directly into the ring buffer. Free up
2415  * the temp buffers when there are no more users. This requires
2416  * special synchronization with current events.
2417  */
2418 void trace_buffered_event_disable(void)
2419 {
2420         int cpu;
2421 
2422         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2423 
2424         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2425                 return;
2426 
2427         if (--trace_buffered_event_ref)
2428                 return;
2429 
2430         preempt_disable();
2431         /* For each CPU, set the buffer as used. */
2432         smp_call_function_many(tracing_buffer_mask,
2433                                disable_trace_buffered_event, NULL, 1);
2434         preempt_enable();
2435 
2436         /* Wait for all current users to finish */
2437         synchronize_rcu();
2438 
2439         for_each_tracing_cpu(cpu) {
2440                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2441                 per_cpu(trace_buffered_event, cpu) = NULL;
2442         }
2443         /*
2444          * Make sure trace_buffered_event is NULL before clearing
2445          * trace_buffered_event_cnt.
2446          */
2447         smp_wmb();
2448 
2449         preempt_disable();
2450         /* Do the work on each cpu */
2451         smp_call_function_many(tracing_buffer_mask,
2452                                enable_trace_buffered_event, NULL, 1);
2453         preempt_enable();
2454 }
2455 
2456 static struct ring_buffer *temp_buffer;
2457 
2458 struct ring_buffer_event *
2459 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2460                           struct trace_event_file *trace_file,
2461                           int type, unsigned long len,
2462                           unsigned long flags, int pc)
2463 {
2464         struct ring_buffer_event *entry;
2465         int val;
2466 
2467         *current_rb = trace_file->tr->trace_buffer.buffer;
2468 
2469         if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2470              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2471             (entry = this_cpu_read(trace_buffered_event))) {
2472                 /* Try to use the per cpu buffer first */
2473                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2474                 if (val == 1) {
2475                         trace_event_setup(entry, type, flags, pc);
2476                         entry->array[0] = len;
2477                         return entry;
2478                 }
2479                 this_cpu_dec(trace_buffered_event_cnt);
2480         }
2481 
2482         entry = __trace_buffer_lock_reserve(*current_rb,
2483                                             type, len, flags, pc);
2484         /*
2485          * If tracing is off, but we have triggers enabled
2486          * we still need to look at the event data. Use the temp_buffer
2487          * to store the trace event for the tigger to use. It's recusive
2488          * safe and will not be recorded anywhere.
2489          */
2490         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2491                 *current_rb = temp_buffer;
2492                 entry = __trace_buffer_lock_reserve(*current_rb,
2493                                                     type, len, flags, pc);
2494         }
2495         return entry;
2496 }
2497 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2498 
2499 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2500 static DEFINE_MUTEX(tracepoint_printk_mutex);
2501 
2502 static void output_printk(struct trace_event_buffer *fbuffer)
2503 {
2504         struct trace_event_call *event_call;
2505         struct trace_event *event;
2506         unsigned long flags;
2507         struct trace_iterator *iter = tracepoint_print_iter;
2508 
2509         /* We should never get here if iter is NULL */
2510         if (WARN_ON_ONCE(!iter))
2511                 return;
2512 
2513         event_call = fbuffer->trace_file->event_call;
2514         if (!event_call || !event_call->event.funcs ||
2515             !event_call->event.funcs->trace)
2516                 return;
2517 
2518         event = &fbuffer->trace_file->event_call->event;
2519 
2520         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2521         trace_seq_init(&iter->seq);
2522         iter->ent = fbuffer->entry;
2523         event_call->event.funcs->trace(iter, 0, event);
2524         trace_seq_putc(&iter->seq, 0);
2525         printk("%s", iter->seq.buffer);
2526 
2527         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2528 }
2529 
2530 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2531                              void __user *buffer, size_t *lenp,
2532                              loff_t *ppos)
2533 {
2534         int save_tracepoint_printk;
2535         int ret;
2536 
2537         mutex_lock(&tracepoint_printk_mutex);
2538         save_tracepoint_printk = tracepoint_printk;
2539 
2540         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2541 
2542         /*
2543          * This will force exiting early, as tracepoint_printk
2544          * is always zero when tracepoint_printk_iter is not allocated
2545          */
2546         if (!tracepoint_print_iter)
2547                 tracepoint_printk = 0;
2548 
2549         if (save_tracepoint_printk == tracepoint_printk)
2550                 goto out;
2551 
2552         if (tracepoint_printk)
2553                 static_key_enable(&tracepoint_printk_key.key);
2554         else
2555                 static_key_disable(&tracepoint_printk_key.key);
2556 
2557  out:
2558         mutex_unlock(&tracepoint_printk_mutex);
2559 
2560         return ret;
2561 }
2562 
2563 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2564 {
2565         if (static_key_false(&tracepoint_printk_key.key))
2566                 output_printk(fbuffer);
2567 
2568         event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2569                                     fbuffer->event, fbuffer->entry,
2570                                     fbuffer->flags, fbuffer->pc);
2571 }
2572 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2573 
2574 /*
2575  * Skip 3:
2576  *
2577  *   trace_buffer_unlock_commit_regs()
2578  *   trace_event_buffer_commit()
2579  *   trace_event_raw_event_xxx()
2580  */
2581 # define STACK_SKIP 3
2582 
2583 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2584                                      struct ring_buffer *buffer,
2585                                      struct ring_buffer_event *event,
2586                                      unsigned long flags, int pc,
2587                                      struct pt_regs *regs)
2588 {
2589         __buffer_unlock_commit(buffer, event);
2590 
2591         /*
2592          * If regs is not set, then skip the necessary functions.
2593          * Note, we can still get here via blktrace, wakeup tracer
2594          * and mmiotrace, but that's ok if they lose a function or
2595          * two. They are not that meaningful.
2596          */
2597         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2598         ftrace_trace_userstack(buffer, flags, pc);
2599 }
2600 
2601 /*
2602  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2603  */
2604 void
2605 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2606                                    struct ring_buffer_event *event)
2607 {
2608         __buffer_unlock_commit(buffer, event);
2609 }
2610 
2611 static void
2612 trace_process_export(struct trace_export *export,
2613                struct ring_buffer_event *event)
2614 {
2615         struct trace_entry *entry;
2616         unsigned int size = 0;
2617 
2618         entry = ring_buffer_event_data(event);
2619         size = ring_buffer_event_length(event);
2620         export->write(export, entry, size);
2621 }
2622 
2623 static DEFINE_MUTEX(ftrace_export_lock);
2624 
2625 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2626 
2627 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2628 
2629 static inline void ftrace_exports_enable(void)
2630 {
2631         static_branch_enable(&ftrace_exports_enabled);
2632 }
2633 
2634 static inline void ftrace_exports_disable(void)
2635 {
2636         static_branch_disable(&ftrace_exports_enabled);
2637 }
2638 
2639 static void ftrace_exports(struct ring_buffer_event *event)
2640 {
2641         struct trace_export *export;
2642 
2643         preempt_disable_notrace();
2644 
2645         export = rcu_dereference_raw_notrace(ftrace_exports_list);
2646         while (export) {
2647                 trace_process_export(export, event);
2648                 export = rcu_dereference_raw_notrace(export->next);
2649         }
2650 
2651         preempt_enable_notrace();
2652 }
2653 
2654 static inline void
2655 add_trace_export(struct trace_export **list, struct trace_export *export)
2656 {
2657         rcu_assign_pointer(export->next, *list);
2658         /*
2659          * We are entering export into the list but another
2660          * CPU might be walking that list. We need to make sure
2661          * the export->next pointer is valid before another CPU sees
2662          * the export pointer included into the list.
2663          */
2664         rcu_assign_pointer(*list, export);
2665 }
2666 
2667 static inline int
2668 rm_trace_export(struct trace_export **list, struct trace_export *export)
2669 {
2670         struct trace_export **p;
2671 
2672         for (p = list; *p != NULL; p = &(*p)->next)
2673                 if (*p == export)
2674                         break;
2675 
2676         if (*p != export)
2677                 return -1;
2678 
2679         rcu_assign_pointer(*p, (*p)->next);
2680 
2681         return 0;
2682 }
2683 
2684 static inline void
2685 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2686 {
2687         if (*list == NULL)
2688                 ftrace_exports_enable();
2689 
2690         add_trace_export(list, export);
2691 }
2692 
2693 static inline int
2694 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2695 {
2696         int ret;
2697 
2698         ret = rm_trace_export(list, export);
2699         if (*list == NULL)
2700                 ftrace_exports_disable();
2701 
2702         return ret;
2703 }
2704 
2705 int register_ftrace_export(struct trace_export *export)
2706 {
2707         if (WARN_ON_ONCE(!export->write))
2708                 return -1;
2709 
2710         mutex_lock(&ftrace_export_lock);
2711 
2712         add_ftrace_export(&ftrace_exports_list, export);
2713 
2714         mutex_unlock(&ftrace_export_lock);
2715 
2716         return 0;
2717 }
2718 EXPORT_SYMBOL_GPL(register_ftrace_export);
2719 
2720 int unregister_ftrace_export(struct trace_export *export)
2721 {
2722         int ret;
2723 
2724         mutex_lock(&ftrace_export_lock);
2725 
2726         ret = rm_ftrace_export(&ftrace_exports_list, export);
2727 
2728         mutex_unlock(&ftrace_export_lock);
2729 
2730         return ret;
2731 }
2732 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2733 
2734 void
2735 trace_function(struct trace_array *tr,
2736                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2737                int pc)
2738 {
2739         struct trace_event_call *call = &event_function;
2740         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2741         struct ring_buffer_event *event;
2742         struct ftrace_entry *entry;
2743 
2744         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2745                                             flags, pc);
2746         if (!event)
2747                 return;
2748         entry   = ring_buffer_event_data(event);
2749         entry->ip                       = ip;
2750         entry->parent_ip                = parent_ip;
2751 
2752         if (!call_filter_check_discard(call, entry, buffer, event)) {
2753                 if (static_branch_unlikely(&ftrace_exports_enabled))
2754                         ftrace_exports(event);
2755                 __buffer_unlock_commit(buffer, event);
2756         }
2757 }
2758 
2759 #ifdef CONFIG_STACKTRACE
2760 
2761 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2762 #define FTRACE_KSTACK_NESTING   4
2763 
2764 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2765 
2766 struct ftrace_stack {
2767         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2768 };
2769 
2770 
2771 struct ftrace_stacks {
2772         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
2773 };
2774 
2775 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2776 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2777 
2778 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2779                                  unsigned long flags,
2780                                  int skip, int pc, struct pt_regs *regs)
2781 {
2782         struct trace_event_call *call = &event_kernel_stack;
2783         struct ring_buffer_event *event;
2784         unsigned int size, nr_entries;
2785         struct ftrace_stack *fstack;
2786         struct stack_entry *entry;
2787         int stackidx;
2788 
2789         /*
2790          * Add one, for this function and the call to save_stack_trace()
2791          * If regs is set, then these functions will not be in the way.
2792          */
2793 #ifndef CONFIG_UNWINDER_ORC
2794         if (!regs)
2795                 skip++;
2796 #endif
2797 
2798         /*
2799          * Since events can happen in NMIs there's no safe way to
2800          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2801          * or NMI comes in, it will just have to use the default
2802          * FTRACE_STACK_SIZE.
2803          */
2804         preempt_disable_notrace();
2805 
2806         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2807 
2808         /* This should never happen. If it does, yell once and skip */
2809         if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
2810                 goto out;
2811 
2812         /*
2813          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2814          * interrupt will either see the value pre increment or post
2815          * increment. If the interrupt happens pre increment it will have
2816          * restored the counter when it returns.  We just need a barrier to
2817          * keep gcc from moving things around.
2818          */
2819         barrier();
2820 
2821         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2822         size = ARRAY_SIZE(fstack->calls);
2823 
2824         if (regs) {
2825                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
2826                                                    size, skip);
2827         } else {
2828                 nr_entries = stack_trace_save(fstack->calls, size, skip);
2829         }
2830 
2831         size = nr_entries * sizeof(unsigned long);
2832         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2833                                             sizeof(*entry) + size, flags, pc);
2834         if (!event)
2835                 goto out;
2836         entry = ring_buffer_event_data(event);
2837 
2838         memcpy(&entry->caller, fstack->calls, size);
2839         entry->size = nr_entries;
2840 
2841         if (!call_filter_check_discard(call, entry, buffer, event))
2842                 __buffer_unlock_commit(buffer, event);
2843 
2844  out:
2845         /* Again, don't let gcc optimize things here */
2846         barrier();
2847         __this_cpu_dec(ftrace_stack_reserve);
2848         preempt_enable_notrace();
2849 
2850 }
2851 
2852 static inline void ftrace_trace_stack(struct trace_array *tr,
2853                                       struct ring_buffer *buffer,
2854                                       unsigned long flags,
2855                                       int skip, int pc, struct pt_regs *regs)
2856 {
2857         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2858                 return;
2859 
2860         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2861 }
2862 
2863 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2864                    int pc)
2865 {
2866         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2867 
2868         if (rcu_is_watching()) {
2869                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2870                 return;
2871         }
2872 
2873         /*
2874          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2875          * but if the above rcu_is_watching() failed, then the NMI
2876          * triggered someplace critical, and rcu_irq_enter() should
2877          * not be called from NMI.
2878          */
2879         if (unlikely(in_nmi()))
2880                 return;
2881 
2882         rcu_irq_enter_irqson();
2883         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2884         rcu_irq_exit_irqson();
2885 }
2886 
2887 /**
2888  * trace_dump_stack - record a stack back trace in the trace buffer
2889  * @skip: Number of functions to skip (helper handlers)
2890  */
2891 void trace_dump_stack(int skip)
2892 {
2893         unsigned long flags;
2894 
2895         if (tracing_disabled || tracing_selftest_running)
2896                 return;
2897 
2898         local_save_flags(flags);
2899 
2900 #ifndef CONFIG_UNWINDER_ORC
2901         /* Skip 1 to skip this function. */
2902         skip++;
2903 #endif
2904         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2905                              flags, skip, preempt_count(), NULL);
2906 }
2907 EXPORT_SYMBOL_GPL(trace_dump_stack);
2908 
2909 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
2910 static DEFINE_PER_CPU(int, user_stack_count);
2911 
2912 static void
2913 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2914 {
2915         struct trace_event_call *call = &event_user_stack;
2916         struct ring_buffer_event *event;
2917         struct userstack_entry *entry;
2918 
2919         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2920                 return;
2921 
2922         /*
2923          * NMIs can not handle page faults, even with fix ups.
2924          * The save user stack can (and often does) fault.
2925          */
2926         if (unlikely(in_nmi()))
2927                 return;
2928 
2929         /*
2930          * prevent recursion, since the user stack tracing may
2931          * trigger other kernel events.
2932          */
2933         preempt_disable();
2934         if (__this_cpu_read(user_stack_count))
2935                 goto out;
2936 
2937         __this_cpu_inc(user_stack_count);
2938 
2939         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2940                                             sizeof(*entry), flags, pc);
2941         if (!event)
2942                 goto out_drop_count;
2943         entry   = ring_buffer_event_data(event);
2944 
2945         entry->tgid             = current->tgid;
2946         memset(&entry->caller, 0, sizeof(entry->caller));
2947 
2948         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
2949         if (!call_filter_check_discard(call, entry, buffer, event))
2950                 __buffer_unlock_commit(buffer, event);
2951 
2952  out_drop_count:
2953         __this_cpu_dec(user_stack_count);
2954  out:
2955         preempt_enable();
2956 }
2957 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
2958 static void ftrace_trace_userstack(struct ring_buffer *buffer,
2959                                    unsigned long flags, int pc)
2960 {
2961 }
2962 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
2963 
2964 #endif /* CONFIG_STACKTRACE */
2965 
2966 /* created for use with alloc_percpu */
2967 struct trace_buffer_struct {
2968         int nesting;
2969         char buffer[4][TRACE_BUF_SIZE];
2970 };
2971 
2972 static struct trace_buffer_struct *trace_percpu_buffer;
2973 
2974 /*
2975  * Thise allows for lockless recording.  If we're nested too deeply, then
2976  * this returns NULL.
2977  */
2978 static char *get_trace_buf(void)
2979 {
2980         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2981 
2982         if (!buffer || buffer->nesting >= 4)
2983                 return NULL;
2984 
2985         buffer->nesting++;
2986 
2987         /* Interrupts must see nesting incremented before we use the buffer */
2988         barrier();
2989         return &buffer->buffer[buffer->nesting][0];
2990 }
2991 
2992 static void put_trace_buf(void)
2993 {
2994         /* Don't let the decrement of nesting leak before this */
2995         barrier();
2996         this_cpu_dec(trace_percpu_buffer->nesting);
2997 }
2998 
2999 static int alloc_percpu_trace_buffer(void)
3000 {
3001         struct trace_buffer_struct *buffers;
3002 
3003         buffers = alloc_percpu(struct trace_buffer_struct);
3004         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
3005                 return -ENOMEM;
3006 
3007         trace_percpu_buffer = buffers;
3008         return 0;
3009 }
3010 
3011 static int buffers_allocated;
3012 
3013 void trace_printk_init_buffers(void)
3014 {
3015         if (buffers_allocated)
3016                 return;
3017 
3018         if (alloc_percpu_trace_buffer())
3019                 return;
3020 
3021         /* trace_printk() is for debug use only. Don't use it in production. */
3022 
3023         pr_warn("\n");
3024         pr_warn("**********************************************************\n");
3025         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3026         pr_warn("**                                                      **\n");
3027         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3028         pr_warn("**                                                      **\n");
3029         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3030         pr_warn("** unsafe for production use.                           **\n");
3031         pr_warn("**                                                      **\n");
3032         pr_warn("** If you see this message and you are not debugging    **\n");
3033         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3034         pr_warn("**                                                      **\n");
3035         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3036         pr_warn("**********************************************************\n");
3037 
3038         /* Expand the buffers to set size */
3039         tracing_update_buffers();
3040 
3041         buffers_allocated = 1;
3042 
3043         /*
3044          * trace_printk_init_buffers() can be called by modules.
3045          * If that happens, then we need to start cmdline recording
3046          * directly here. If the global_trace.buffer is already
3047          * allocated here, then this was called by module code.
3048          */
3049         if (global_trace.trace_buffer.buffer)
3050                 tracing_start_cmdline_record();
3051 }
3052 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3053 
3054 void trace_printk_start_comm(void)
3055 {
3056         /* Start tracing comms if trace printk is set */
3057         if (!buffers_allocated)
3058                 return;
3059         tracing_start_cmdline_record();
3060 }
3061 
3062 static void trace_printk_start_stop_comm(int enabled)
3063 {
3064         if (!buffers_allocated)
3065                 return;
3066 
3067         if (enabled)
3068                 tracing_start_cmdline_record();
3069         else
3070                 tracing_stop_cmdline_record();
3071 }
3072 
3073 /**
3074  * trace_vbprintk - write binary msg to tracing buffer
3075  * @ip:    The address of the caller
3076  * @fmt:   The string format to write to the buffer
3077  * @args:  Arguments for @fmt
3078  */
3079 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3080 {
3081         struct trace_event_call *call = &event_bprint;
3082         struct ring_buffer_event *event;
3083         struct ring_buffer *buffer;
3084         struct trace_array *tr = &global_trace;
3085         struct bprint_entry *entry;
3086         unsigned long flags;
3087         char *tbuffer;
3088         int len = 0, size, pc;
3089 
3090         if (unlikely(tracing_selftest_running || tracing_disabled))
3091                 return 0;
3092 
3093         /* Don't pollute graph traces with trace_vprintk internals */
3094         pause_graph_tracing();
3095 
3096         pc = preempt_count();
3097         preempt_disable_notrace();
3098 
3099         tbuffer = get_trace_buf();
3100         if (!tbuffer) {
3101                 len = 0;
3102                 goto out_nobuffer;
3103         }
3104 
3105         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3106 
3107         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3108                 goto out;
3109 
3110         local_save_flags(flags);
3111         size = sizeof(*entry) + sizeof(u32) * len;
3112         buffer = tr->trace_buffer.buffer;
3113         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3114                                             flags, pc);
3115         if (!event)
3116                 goto out;
3117         entry = ring_buffer_event_data(event);
3118         entry->ip                       = ip;
3119         entry->fmt                      = fmt;
3120 
3121         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3122         if (!call_filter_check_discard(call, entry, buffer, event)) {
3123                 __buffer_unlock_commit(buffer, event);
3124                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3125         }
3126 
3127 out:
3128         put_trace_buf();
3129 
3130 out_nobuffer:
3131         preempt_enable_notrace();
3132         unpause_graph_tracing();
3133 
3134         return len;
3135 }
3136 EXPORT_SYMBOL_GPL(trace_vbprintk);
3137 
3138 __printf(3, 0)
3139 static int
3140 __trace_array_vprintk(struct ring_buffer *buffer,
3141                       unsigned long ip, const char *fmt, va_list args)
3142 {
3143         struct trace_event_call *call = &event_print;
3144         struct ring_buffer_event *event;
3145         int len = 0, size, pc;
3146         struct print_entry *entry;
3147         unsigned long flags;
3148         char *tbuffer;
3149 
3150         if (tracing_disabled || tracing_selftest_running)
3151                 return 0;
3152 
3153         /* Don't pollute graph traces with trace_vprintk internals */
3154         pause_graph_tracing();
3155 
3156         pc = preempt_count();
3157         preempt_disable_notrace();
3158 
3159 
3160         tbuffer = get_trace_buf();
3161         if (!tbuffer) {
3162                 len = 0;
3163                 goto out_nobuffer;
3164         }
3165 
3166         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3167 
3168         local_save_flags(flags);
3169         size = sizeof(*entry) + len + 1;
3170         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3171                                             flags, pc);
3172         if (!event)
3173                 goto out;
3174         entry = ring_buffer_event_data(event);
3175         entry->ip = ip;
3176 
3177         memcpy(&entry->buf, tbuffer, len + 1);
3178         if (!call_filter_check_discard(call, entry, buffer, event)) {
3179                 __buffer_unlock_commit(buffer, event);
3180                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3181         }
3182 
3183 out:
3184         put_trace_buf();
3185 
3186 out_nobuffer:
3187         preempt_enable_notrace();
3188         unpause_graph_tracing();
3189 
3190         return len;
3191 }
3192 
3193 __printf(3, 0)
3194 int trace_array_vprintk(struct trace_array *tr,
3195                         unsigned long ip, const char *fmt, va_list args)
3196 {
3197         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3198 }
3199 
3200 __printf(3, 0)
3201 int trace_array_printk(struct trace_array *tr,
3202                        unsigned long ip, const char *fmt, ...)
3203 {
3204         int ret;
3205         va_list ap;
3206 
3207         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3208                 return 0;
3209 
3210         va_start(ap, fmt);
3211         ret = trace_array_vprintk(tr, ip, fmt, ap);
3212         va_end(ap);
3213         return ret;
3214 }
3215 EXPORT_SYMBOL_GPL(trace_array_printk);
3216 
3217 __printf(3, 4)
3218 int trace_array_printk_buf(struct ring_buffer *buffer,
3219                            unsigned long ip, const char *fmt, ...)
3220 {
3221         int ret;
3222         va_list ap;
3223 
3224         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3225                 return 0;
3226 
3227         va_start(ap, fmt);
3228         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3229         va_end(ap);
3230         return ret;
3231 }
3232 
3233 __printf(2, 0)
3234 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3235 {
3236         return trace_array_vprintk(&global_trace, ip, fmt, args);
3237 }
3238 EXPORT_SYMBOL_GPL(trace_vprintk);
3239 
3240 static void trace_iterator_increment(struct trace_iterator *iter)
3241 {
3242         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3243 
3244         iter->idx++;
3245         if (buf_iter)
3246                 ring_buffer_read(buf_iter, NULL);
3247 }
3248 
3249 static struct trace_entry *
3250 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3251                 unsigned long *lost_events)
3252 {
3253         struct ring_buffer_event *event;
3254         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3255 
3256         if (buf_iter)
3257                 event = ring_buffer_iter_peek(buf_iter, ts);
3258         else
3259                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3260                                          lost_events);
3261 
3262         if (event) {
3263                 iter->ent_size = ring_buffer_event_length(event);
3264                 return ring_buffer_event_data(event);
3265         }
3266         iter->ent_size = 0;
3267         return NULL;
3268 }
3269 
3270 static struct trace_entry *
3271 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3272                   unsigned long *missing_events, u64 *ent_ts)
3273 {
3274         struct ring_buffer *buffer = iter->trace_buffer->buffer;
3275         struct trace_entry *ent, *next = NULL;
3276         unsigned long lost_events = 0, next_lost = 0;
3277         int cpu_file = iter->cpu_file;
3278         u64 next_ts = 0, ts;
3279         int next_cpu = -1;
3280         int next_size = 0;
3281         int cpu;
3282 
3283         /*
3284          * If we are in a per_cpu trace file, don't bother by iterating over
3285          * all cpu and peek directly.
3286          */
3287         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3288                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3289                         return NULL;
3290                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3291                 if (ent_cpu)
3292                         *ent_cpu = cpu_file;
3293 
3294                 return ent;
3295         }
3296 
3297         for_each_tracing_cpu(cpu) {
3298 
3299                 if (ring_buffer_empty_cpu(buffer, cpu))
3300                         continue;
3301 
3302                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3303 
3304                 /*
3305                  * Pick the entry with the smallest timestamp:
3306                  */
3307                 if (ent && (!next || ts < next_ts)) {
3308                         next = ent;
3309                         next_cpu = cpu;
3310                         next_ts = ts;
3311                         next_lost = lost_events;
3312                         next_size = iter->ent_size;
3313                 }
3314         }
3315 
3316         iter->ent_size = next_size;
3317 
3318         if (ent_cpu)
3319                 *ent_cpu = next_cpu;
3320 
3321         if (ent_ts)
3322                 *ent_ts = next_ts;
3323 
3324         if (missing_events)
3325                 *missing_events = next_lost;
3326 
3327         return next;
3328 }
3329 
3330 /* Find the next real entry, without updating the iterator itself */
3331 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3332                                           int *ent_cpu, u64 *ent_ts)
3333 {
3334         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3335 }
3336 
3337 /* Find the next real entry, and increment the iterator to the next entry */
3338 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3339 {
3340         iter->ent = __find_next_entry(iter, &iter->cpu,
3341                                       &iter->lost_events, &iter->ts);
3342 
3343         if (iter->ent)
3344                 trace_iterator_increment(iter);
3345 
3346         return iter->ent ? iter : NULL;
3347 }
3348 
3349 static void trace_consume(struct trace_iterator *iter)
3350 {
3351         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3352                             &iter->lost_events);
3353 }
3354 
3355 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3356 {
3357         struct trace_iterator *iter = m->private;
3358         int i = (int)*pos;
3359         void *ent;
3360 
3361         WARN_ON_ONCE(iter->leftover);
3362 
3363         (*pos)++;
3364 
3365         /* can't go backwards */
3366         if (iter->idx > i)
3367                 return NULL;
3368 
3369         if (iter->idx < 0)
3370                 ent = trace_find_next_entry_inc(iter);
3371         else
3372                 ent = iter;
3373 
3374         while (ent && iter->idx < i)
3375                 ent = trace_find_next_entry_inc(iter);
3376 
3377         iter->pos = *pos;
3378 
3379         return ent;
3380 }
3381 
3382 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3383 {
3384         struct ring_buffer_event *event;
3385         struct ring_buffer_iter *buf_iter;
3386         unsigned long entries = 0;
3387         u64 ts;
3388 
3389         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3390 
3391         buf_iter = trace_buffer_iter(iter, cpu);
3392         if (!buf_iter)
3393                 return;
3394 
3395         ring_buffer_iter_reset(buf_iter);
3396 
3397         /*
3398          * We could have the case with the max latency tracers
3399          * that a reset never took place on a cpu. This is evident
3400          * by the timestamp being before the start of the buffer.
3401          */
3402         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3403                 if (ts >= iter->trace_buffer->time_start)
3404                         break;
3405                 entries++;
3406                 ring_buffer_read(buf_iter, NULL);
3407         }
3408 
3409         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3410 }
3411 
3412 /*
3413  * The current tracer is copied to avoid a global locking
3414  * all around.
3415  */
3416 static void *s_start(struct seq_file *m, loff_t *pos)
3417 {
3418         struct trace_iterator *iter = m->private;
3419         struct trace_array *tr = iter->tr;
3420         int cpu_file = iter->cpu_file;
3421         void *p = NULL;
3422         loff_t l = 0;
3423         int cpu;
3424 
3425         /*
3426          * copy the tracer to avoid using a global lock all around.
3427          * iter->trace is a copy of current_trace, the pointer to the
3428          * name may be used instead of a strcmp(), as iter->trace->name
3429          * will point to the same string as current_trace->name.
3430          */
3431         mutex_lock(&trace_types_lock);
3432         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3433                 *iter->trace = *tr->current_trace;
3434         mutex_unlock(&trace_types_lock);
3435 
3436 #ifdef CONFIG_TRACER_MAX_TRACE
3437         if (iter->snapshot && iter->trace->use_max_tr)
3438                 return ERR_PTR(-EBUSY);
3439 #endif
3440 
3441         if (!iter->snapshot)
3442                 atomic_inc(&trace_record_taskinfo_disabled);
3443 
3444         if (*pos != iter->pos) {
3445                 iter->ent = NULL;
3446                 iter->cpu = 0;
3447                 iter->idx = -1;
3448 
3449                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3450                         for_each_tracing_cpu(cpu)
3451                                 tracing_iter_reset(iter, cpu);
3452                 } else
3453                         tracing_iter_reset(iter, cpu_file);
3454 
3455                 iter->leftover = 0;
3456                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3457                         ;
3458 
3459         } else {
3460                 /*
3461                  * If we overflowed the seq_file before, then we want
3462                  * to just reuse the trace_seq buffer again.
3463                  */
3464                 if (iter->leftover)
3465                         p = iter;
3466                 else {
3467                         l = *pos - 1;
3468                         p = s_next(m, p, &l);
3469                 }
3470         }
3471 
3472         trace_event_read_lock();
3473         trace_access_lock(cpu_file);
3474         return p;
3475 }
3476 
3477 static void s_stop(struct seq_file *m, void *p)
3478 {
3479         struct trace_iterator *iter = m->private;
3480 
3481 #ifdef CONFIG_TRACER_MAX_TRACE
3482         if (iter->snapshot && iter->trace->use_max_tr)
3483                 return;
3484 #endif
3485 
3486         if (!iter->snapshot)
3487                 atomic_dec(&trace_record_taskinfo_disabled);
3488 
3489         trace_access_unlock(iter->cpu_file);
3490         trace_event_read_unlock();
3491 }
3492 
3493 static void
3494 get_total_entries_cpu(struct trace_buffer *buf, unsigned long *total,
3495                       unsigned long *entries, int cpu)
3496 {
3497         unsigned long count;
3498 
3499         count = ring_buffer_entries_cpu(buf->buffer, cpu);
3500         /*
3501          * If this buffer has skipped entries, then we hold all
3502          * entries for the trace and we need to ignore the
3503          * ones before the time stamp.
3504          */
3505         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3506                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3507                 /* total is the same as the entries */
3508                 *total = count;
3509         } else
3510                 *total = count +
3511                         ring_buffer_overrun_cpu(buf->buffer, cpu);
3512         *entries = count;
3513 }
3514 
3515 static void
3516 get_total_entries(struct trace_buffer *buf,
3517                   unsigned long *total, unsigned long *entries)
3518 {
3519         unsigned long t, e;
3520         int cpu;
3521 
3522         *total = 0;
3523         *entries = 0;
3524 
3525         for_each_tracing_cpu(cpu) {
3526                 get_total_entries_cpu(buf, &t, &e, cpu);
3527                 *total += t;
3528                 *entries += e;
3529         }
3530 }
3531 
3532 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3533 {
3534         unsigned long total, entries;
3535 
3536         if (!tr)
3537                 tr = &global_trace;
3538 
3539         get_total_entries_cpu(&tr->trace_buffer, &total, &entries, cpu);
3540 
3541         return entries;
3542 }
3543 
3544 unsigned long trace_total_entries(struct trace_array *tr)
3545 {
3546         unsigned long total, entries;
3547 
3548         if (!tr)
3549                 tr = &global_trace;
3550 
3551         get_total_entries(&tr->trace_buffer, &total, &entries);
3552 
3553         return entries;
3554 }
3555 
3556 static void print_lat_help_header(struct seq_file *m)
3557 {
3558         seq_puts(m, "#                  _------=> CPU#            \n"
3559                     "#                 / _-----=> irqs-off        \n"
3560                     "#                | / _----=> need-resched    \n"
3561                     "#                || / _---=> hardirq/softirq \n"
3562                     "#                ||| / _--=> preempt-depth   \n"
3563                     "#                |||| /     delay            \n"
3564                     "#  cmd     pid   ||||| time  |   caller      \n"
3565                     "#     \\   /      |||||  \\    |   /         \n");
3566 }
3567 
3568 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3569 {
3570         unsigned long total;
3571         unsigned long entries;
3572 
3573         get_total_entries(buf, &total, &entries);
3574         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3575                    entries, total, num_online_cpus());
3576         seq_puts(m, "#\n");
3577 }
3578 
3579 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3580                                    unsigned int flags)
3581 {
3582         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3583 
3584         print_event_info(buf, m);
3585 
3586         seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3587         seq_printf(m, "#              | |     %s    |       |         |\n",      tgid ? "  |      " : "");
3588 }
3589 
3590 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3591                                        unsigned int flags)
3592 {
3593         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3594         const char *space = "          ";
3595         int prec = tgid ? 10 : 2;
3596 
3597         print_event_info(buf, m);
3598 
3599         seq_printf(m, "#                          %.*s  _-----=> irqs-off\n", prec, space);
3600         seq_printf(m, "#                          %.*s / _----=> need-resched\n", prec, space);
3601         seq_printf(m, "#                          %.*s| / _---=> hardirq/softirq\n", prec, space);
3602         seq_printf(m, "#                          %.*s|| / _--=> preempt-depth\n", prec, space);
3603         seq_printf(m, "#                          %.*s||| /     delay\n", prec, space);
3604         seq_printf(m, "#           TASK-PID %.*sCPU#  ||||    TIMESTAMP  FUNCTION\n", prec, "   TGID   ");
3605         seq_printf(m, "#              | |   %.*s  |   ||||       |         |\n", prec, "     |    ");
3606 }
3607 
3608 void
3609 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3610 {
3611         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3612         struct trace_buffer *buf = iter->trace_buffer;
3613         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3614         struct tracer *type = iter->trace;
3615         unsigned long entries;
3616         unsigned long total;
3617         const char *name = "preemption";
3618 
3619         name = type->name;
3620 
3621         get_total_entries(buf, &total, &entries);
3622 
3623         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3624                    name, UTS_RELEASE);
3625         seq_puts(m, "# -----------------------------------"
3626                  "---------------------------------\n");
3627         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3628                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3629                    nsecs_to_usecs(data->saved_latency),
3630                    entries,
3631                    total,
3632                    buf->cpu,
3633 #if defined(CONFIG_PREEMPT_NONE)
3634                    "server",
3635 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3636                    "desktop",
3637 #elif defined(CONFIG_PREEMPT)
3638                    "preempt",
3639 #else
3640                    "unknown",
3641 #endif
3642                    /* These are reserved for later use */
3643                    0, 0, 0, 0);
3644 #ifdef CONFIG_SMP
3645         seq_printf(m, " #P:%d)\n", num_online_cpus());
3646 #else
3647         seq_puts(m, ")\n");
3648 #endif
3649         seq_puts(m, "#    -----------------\n");
3650         seq_printf(m, "#    | task: %.16s-%d "
3651                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3652                    data->comm, data->pid,
3653                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3654                    data->policy, data->rt_priority);
3655         seq_puts(m, "#    -----------------\n");
3656 
3657         if (data->critical_start) {
3658                 seq_puts(m, "#  => started at: ");
3659                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3660                 trace_print_seq(m, &iter->seq);
3661                 seq_puts(m, "\n#  => ended at:   ");
3662                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3663                 trace_print_seq(m, &iter->seq);
3664                 seq_puts(m, "\n#\n");
3665         }
3666 
3667         seq_puts(m, "#\n");
3668 }
3669 
3670 static void test_cpu_buff_start(struct trace_iterator *iter)
3671 {
3672         struct trace_seq *s = &iter->seq;
3673         struct trace_array *tr = iter->tr;
3674 
3675         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3676                 return;
3677 
3678         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3679                 return;
3680 
3681         if (cpumask_available(iter->started) &&
3682             cpumask_test_cpu(iter->cpu, iter->started))
3683                 return;
3684 
3685         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3686                 return;
3687 
3688         if (cpumask_available(iter->started))
3689                 cpumask_set_cpu(iter->cpu, iter->started);
3690 
3691         /* Don't print started cpu buffer for the first entry of the trace */
3692         if (iter->idx > 1)
3693                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3694                                 iter->cpu);
3695 }
3696 
3697 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3698 {
3699         struct trace_array *tr = iter->tr;
3700         struct trace_seq *s = &iter->seq;
3701         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3702         struct trace_entry *entry;
3703         struct trace_event *event;
3704 
3705         entry = iter->ent;
3706 
3707         test_cpu_buff_start(iter);
3708 
3709         event = ftrace_find_event(entry->type);
3710 
3711         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3712                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3713                         trace_print_lat_context(iter);
3714                 else
3715                         trace_print_context(iter);
3716         }
3717 
3718         if (trace_seq_has_overflowed(s))
3719                 return TRACE_TYPE_PARTIAL_LINE;
3720 
3721         if (event)
3722                 return event->funcs->trace(iter, sym_flags, event);
3723 
3724         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3725 
3726         return trace_handle_return(s);
3727 }
3728 
3729 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3730 {
3731         struct trace_array *tr = iter->tr;
3732         struct trace_seq *s = &iter->seq;
3733         struct trace_entry *entry;
3734         struct trace_event *event;
3735 
3736         entry = iter->ent;
3737 
3738         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3739                 trace_seq_printf(s, "%d %d %llu ",
3740                                  entry->pid, iter->cpu, iter->ts);
3741 
3742         if (trace_seq_has_overflowed(s))
3743                 return TRACE_TYPE_PARTIAL_LINE;
3744 
3745         event = ftrace_find_event(entry->type);
3746         if (event)
3747                 return event->funcs->raw(iter, 0, event);
3748 
3749         trace_seq_printf(s, "%d ?\n", entry->type);
3750 
3751         return trace_handle_return(s);
3752 }
3753 
3754 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3755 {
3756         struct trace_array *tr = iter->tr;
3757         struct trace_seq *s = &iter->seq;
3758         unsigned char newline = '\n';
3759         struct trace_entry *entry;
3760         struct trace_event *event;
3761 
3762         entry = iter->ent;
3763 
3764         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3765                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3766                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3767                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3768                 if (trace_seq_has_overflowed(s))
3769                         return TRACE_TYPE_PARTIAL_LINE;
3770         }
3771 
3772         event = ftrace_find_event(entry->type);
3773         if (event) {
3774                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3775                 if (ret != TRACE_TYPE_HANDLED)
3776                         return ret;
3777         }
3778 
3779         SEQ_PUT_FIELD(s, newline);
3780 
3781         return trace_handle_return(s);
3782 }
3783 
3784 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3785 {
3786         struct trace_array *tr = iter->tr;
3787         struct trace_seq *s = &iter->seq;
3788         struct trace_entry *entry;
3789         struct trace_event *event;
3790 
3791         entry = iter->ent;
3792 
3793         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3794                 SEQ_PUT_FIELD(s, entry->pid);
3795                 SEQ_PUT_FIELD(s, iter->cpu);
3796                 SEQ_PUT_FIELD(s, iter->ts);
3797                 if (trace_seq_has_overflowed(s))
3798                         return TRACE_TYPE_PARTIAL_LINE;
3799         }
3800 
3801         event = ftrace_find_event(entry->type);
3802         return event ? event->funcs->binary(iter, 0, event) :
3803                 TRACE_TYPE_HANDLED;
3804 }
3805 
3806 int trace_empty(struct trace_iterator *iter)
3807 {
3808         struct ring_buffer_iter *buf_iter;
3809         int cpu;
3810 
3811         /* If we are looking at one CPU buffer, only check that one */
3812         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3813                 cpu = iter->cpu_file;
3814                 buf_iter = trace_buffer_iter(iter, cpu);
3815                 if (buf_iter) {
3816                         if (!ring_buffer_iter_empty(buf_iter))
3817                                 return 0;
3818                 } else {
3819                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3820                                 return 0;
3821                 }
3822                 return 1;
3823         }
3824 
3825         for_each_tracing_cpu(cpu) {
3826                 buf_iter = trace_buffer_iter(iter, cpu);
3827                 if (buf_iter) {
3828                         if (!ring_buffer_iter_empty(buf_iter))
3829                                 return 0;
3830                 } else {
3831                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3832                                 return 0;
3833                 }
3834         }
3835 
3836         return 1;
3837 }
3838 
3839 /*  Called with trace_event_read_lock() held. */
3840 enum print_line_t print_trace_line(struct trace_iterator *iter)
3841 {
3842         struct trace_array *tr = iter->tr;
3843         unsigned long trace_flags = tr->trace_flags;
3844         enum print_line_t ret;
3845 
3846         if (iter->lost_events) {
3847                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3848                                  iter->cpu, iter->lost_events);
3849                 if (trace_seq_has_overflowed(&iter->seq))
3850                         return TRACE_TYPE_PARTIAL_LINE;
3851         }
3852 
3853         if (iter->trace && iter->trace->print_line) {
3854                 ret = iter->trace->print_line(iter);
3855                 if (ret != TRACE_TYPE_UNHANDLED)
3856                         return ret;
3857         }
3858 
3859         if (iter->ent->type == TRACE_BPUTS &&
3860                         trace_flags & TRACE_ITER_PRINTK &&
3861                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3862                 return trace_print_bputs_msg_only(iter);
3863 
3864         if (iter->ent->type == TRACE_BPRINT &&
3865                         trace_flags & TRACE_ITER_PRINTK &&
3866                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3867                 return trace_print_bprintk_msg_only(iter);
3868 
3869         if (iter->ent->type == TRACE_PRINT &&
3870                         trace_flags & TRACE_ITER_PRINTK &&
3871                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3872                 return trace_print_printk_msg_only(iter);
3873 
3874         if (trace_flags & TRACE_ITER_BIN)
3875                 return print_bin_fmt(iter);
3876 
3877         if (trace_flags & TRACE_ITER_HEX)
3878                 return print_hex_fmt(iter);
3879 
3880         if (trace_flags & TRACE_ITER_RAW)
3881                 return print_raw_fmt(iter);
3882 
3883         return print_trace_fmt(iter);
3884 }
3885 
3886 void trace_latency_header(struct seq_file *m)
3887 {
3888         struct trace_iterator *iter = m->private;
3889         struct trace_array *tr = iter->tr;
3890 
3891         /* print nothing if the buffers are empty */
3892         if (trace_empty(iter))
3893                 return;
3894 
3895         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3896                 print_trace_header(m, iter);
3897 
3898         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3899                 print_lat_help_header(m);
3900 }
3901 
3902 void trace_default_header(struct seq_file *m)
3903 {
3904         struct trace_iterator *iter = m->private;
3905         struct trace_array *tr = iter->tr;
3906         unsigned long trace_flags = tr->trace_flags;
3907 
3908         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3909                 return;
3910 
3911         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3912                 /* print nothing if the buffers are empty */
3913                 if (trace_empty(iter))
3914                         return;
3915                 print_trace_header(m, iter);
3916                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3917                         print_lat_help_header(m);
3918         } else {
3919                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3920                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3921                                 print_func_help_header_irq(iter->trace_buffer,
3922                                                            m, trace_flags);
3923                         else
3924                                 print_func_help_header(iter->trace_buffer, m,
3925                                                        trace_flags);
3926                 }
3927         }
3928 }
3929 
3930 static void test_ftrace_alive(struct seq_file *m)
3931 {
3932         if (!ftrace_is_dead())
3933                 return;
3934         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3935                     "#          MAY BE MISSING FUNCTION EVENTS\n");
3936 }
3937 
3938 #ifdef CONFIG_TRACER_MAX_TRACE
3939 static void show_snapshot_main_help(struct seq_file *m)
3940 {
3941         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3942                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3943                     "#                      Takes a snapshot of the main buffer.\n"
3944                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3945                     "#                      (Doesn't have to be '2' works with any number that\n"
3946                     "#                       is not a '' or '1')\n");
3947 }
3948 
3949 static void show_snapshot_percpu_help(struct seq_file *m)
3950 {
3951         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3952 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3953         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3954                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
3955 #else
3956         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3957                     "#                     Must use main snapshot file to allocate.\n");
3958 #endif
3959         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3960                     "#                      (Doesn't have to be '2' works with any number that\n"
3961                     "#                       is not a '' or '1')\n");
3962 }
3963 
3964 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3965 {
3966         if (iter->tr->allocated_snapshot)
3967                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3968         else
3969                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3970 
3971         seq_puts(m, "# Snapshot commands:\n");
3972         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3973                 show_snapshot_main_help(m);
3974         else
3975                 show_snapshot_percpu_help(m);
3976 }
3977 #else
3978 /* Should never be called */
3979 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3980 #endif
3981 
3982 static int s_show(struct seq_file *m, void *v)
3983 {
3984         struct trace_iterator *iter = v;
3985         int ret;
3986 
3987         if (iter->ent == NULL) {
3988                 if (iter->tr) {
3989                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
3990                         seq_puts(m, "#\n");
3991                         test_ftrace_alive(m);
3992                 }
3993                 if (iter->snapshot && trace_empty(iter))
3994                         print_snapshot_help(m, iter);
3995                 else if (iter->trace && iter->trace->print_header)
3996                         iter->trace->print_header(m);
3997                 else
3998                         trace_default_header(m);
3999 
4000         } else if (iter->leftover) {
4001                 /*
4002                  * If we filled the seq_file buffer earlier, we
4003                  * want to just show it now.
4004                  */
4005                 ret = trace_print_seq(m, &iter->seq);
4006 
4007                 /* ret should this time be zero, but you never know */
4008                 iter->leftover = ret;
4009 
4010         } else {
4011                 print_trace_line(iter);
4012                 ret = trace_print_seq(m, &iter->seq);
4013                 /*
4014                  * If we overflow the seq_file buffer, then it will
4015                  * ask us for this data again at start up.
4016                  * Use that instead.
4017                  *  ret is 0 if seq_file write succeeded.
4018                  *        -1 otherwise.
4019                  */
4020                 iter->leftover = ret;
4021         }
4022 
4023         return 0;
4024 }
4025 
4026 /*
4027  * Should be used after trace_array_get(), trace_types_lock
4028  * ensures that i_cdev was already initialized.
4029  */
4030 static inline int tracing_get_cpu(struct inode *inode)
4031 {
4032         if (inode->i_cdev) /* See trace_create_cpu_file() */
4033                 return (long)inode->i_cdev - 1;
4034         return RING_BUFFER_ALL_CPUS;
4035 }
4036 
4037 static const struct seq_operations tracer_seq_ops = {
4038         .start          = s_start,
4039         .next           = s_next,
4040         .stop           = s_stop,
4041         .show           = s_show,
4042 };
4043 
4044 static struct trace_iterator *
4045 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4046 {
4047         struct trace_array *tr = inode->i_private;
4048         struct trace_iterator *iter;
4049         int cpu;
4050 
4051         if (tracing_disabled)
4052                 return ERR_PTR(-ENODEV);
4053 
4054         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4055         if (!iter)
4056                 return ERR_PTR(-ENOMEM);
4057 
4058         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4059                                     GFP_KERNEL);
4060         if (!iter->buffer_iter)
4061                 goto release;
4062 
4063         /*
4064          * We make a copy of the current tracer to avoid concurrent
4065          * changes on it while we are reading.
4066          */
4067         mutex_lock(&trace_types_lock);
4068         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4069         if (!iter->trace)
4070                 goto fail;
4071 
4072         *iter->trace = *tr->current_trace;
4073 
4074         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4075                 goto fail;
4076 
4077         iter->tr = tr;
4078 
4079 #ifdef CONFIG_TRACER_MAX_TRACE
4080         /* Currently only the top directory has a snapshot */
4081         if (tr->current_trace->print_max || snapshot)
4082                 iter->trace_buffer = &tr->max_buffer;
4083         else
4084 #endif
4085                 iter->trace_buffer = &tr->trace_buffer;
4086         iter->snapshot = snapshot;
4087         iter->pos = -1;
4088         iter->cpu_file = tracing_get_cpu(inode);
4089         mutex_init(&iter->mutex);
4090 
4091         /* Notify the tracer early; before we stop tracing. */
4092         if (iter->trace && iter->trace->open)
4093                 iter->trace->open(iter);
4094 
4095         /* Annotate start of buffers if we had overruns */
4096         if (ring_buffer_overruns(iter->trace_buffer->buffer))
4097                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4098 
4099         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4100         if (trace_clocks[tr->clock_id].in_ns)
4101                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4102 
4103         /* stop the trace while dumping if we are not opening "snapshot" */
4104         if (!iter->snapshot)
4105                 tracing_stop_tr(tr);
4106 
4107         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4108                 for_each_tracing_cpu(cpu) {
4109                         iter->buffer_iter[cpu] =
4110                                 ring_buffer_read_prepare(iter->trace_buffer->buffer,
4111                                                          cpu, GFP_KERNEL);
4112                 }
4113                 ring_buffer_read_prepare_sync();
4114                 for_each_tracing_cpu(cpu) {
4115                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4116                         tracing_iter_reset(iter, cpu);
4117                 }
4118         } else {
4119                 cpu = iter->cpu_file;
4120                 iter->buffer_iter[cpu] =
4121                         ring_buffer_read_prepare(iter->trace_buffer->buffer,
4122                                                  cpu, GFP_KERNEL);
4123                 ring_buffer_read_prepare_sync();
4124                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4125                 tracing_iter_reset(iter, cpu);
4126         }
4127 
4128         mutex_unlock(&trace_types_lock);
4129 
4130         return iter;
4131 
4132  fail:
4133         mutex_unlock(&trace_types_lock);
4134         kfree(iter->trace);
4135         kfree(iter->buffer_iter);
4136 release:
4137         seq_release_private(inode, file);
4138         return ERR_PTR(-ENOMEM);
4139 }
4140 
4141 int tracing_open_generic(struct inode *inode, struct file *filp)
4142 {
4143         if (tracing_disabled)
4144                 return -ENODEV;
4145 
4146         filp->private_data = inode->i_private;
4147         return 0;
4148 }
4149 
4150 bool tracing_is_disabled(void)
4151 {
4152         return (tracing_disabled) ? true: false;
4153 }
4154 
4155 /*
4156  * Open and update trace_array ref count.
4157  * Must have the current trace_array passed to it.
4158  */
4159 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4160 {
4161         struct trace_array *tr = inode->i_private;
4162 
4163         if (tracing_disabled)
4164                 return -ENODEV;
4165 
4166         if (trace_array_get(tr) < 0)
4167                 return -ENODEV;
4168 
4169         filp->private_data = inode->i_private;
4170 
4171         return 0;
4172 }
4173 
4174 static int tracing_release(struct inode *inode, struct file *file)
4175 {
4176         struct trace_array *tr = inode->i_private;
4177         struct seq_file *m = file->private_data;
4178         struct trace_iterator *iter;
4179         int cpu;
4180 
4181         if (!(file->f_mode & FMODE_READ)) {
4182                 trace_array_put(tr);
4183                 return 0;
4184         }
4185 
4186         /* Writes do not use seq_file */
4187         iter = m->private;
4188         mutex_lock(&trace_types_lock);
4189 
4190         for_each_tracing_cpu(cpu) {
4191                 if (iter->buffer_iter[cpu])
4192                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4193         }
4194 
4195         if (iter->trace && iter->trace->close)
4196                 iter->trace->close(iter);
4197 
4198         if (!iter->snapshot)
4199                 /* reenable tracing if it was previously enabled */
4200                 tracing_start_tr(tr);
4201 
4202         __trace_array_put(tr);
4203 
4204         mutex_unlock(&trace_types_lock);
4205 
4206         mutex_destroy(&iter->mutex);
4207         free_cpumask_var(iter->started);
4208         kfree(iter->trace);
4209         kfree(iter->buffer_iter);
4210         seq_release_private(inode, file);
4211 
4212         return 0;
4213 }
4214 
4215 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4216 {
4217         struct trace_array *tr = inode->i_private;
4218 
4219         trace_array_put(tr);
4220         return 0;
4221 }
4222 
4223 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4224 {
4225         struct trace_array *tr = inode->i_private;
4226 
4227         trace_array_put(tr);
4228 
4229         return single_release(inode, file);
4230 }
4231 
4232 static int tracing_open(struct inode *inode, struct file *file)
4233 {
4234         struct trace_array *tr = inode->i_private;
4235         struct trace_iterator *iter;
4236         int ret = 0;
4237 
4238         if (trace_array_get(tr) < 0)
4239                 return -ENODEV;
4240 
4241         /* If this file was open for write, then erase contents */
4242         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4243                 int cpu = tracing_get_cpu(inode);
4244                 struct trace_buffer *trace_buf = &tr->trace_buffer;
4245 
4246 #ifdef CONFIG_TRACER_MAX_TRACE
4247                 if (tr->current_trace->print_max)
4248                         trace_buf = &tr->max_buffer;
4249 #endif
4250 
4251                 if (cpu == RING_BUFFER_ALL_CPUS)
4252                         tracing_reset_online_cpus(trace_buf);
4253                 else
4254                         tracing_reset(trace_buf, cpu);
4255         }
4256 
4257         if (file->f_mode & FMODE_READ) {
4258                 iter = __tracing_open(inode, file, false);
4259                 if (IS_ERR(iter))
4260                         ret = PTR_ERR(iter);
4261                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4262                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4263         }
4264 
4265         if (ret < 0)
4266                 trace_array_put(tr);
4267 
4268         return ret;
4269 }
4270 
4271 /*
4272  * Some tracers are not suitable for instance buffers.
4273  * A tracer is always available for the global array (toplevel)
4274  * or if it explicitly states that it is.
4275  */
4276 static bool
4277 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4278 {
4279         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4280 }
4281 
4282 /* Find the next tracer that this trace array may use */
4283 static struct tracer *
4284 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4285 {
4286         while (t && !trace_ok_for_array(t, tr))
4287                 t = t->next;
4288 
4289         return t;
4290 }
4291 
4292 static void *
4293 t_next(struct seq_file *m, void *v, loff_t *pos)
4294 {
4295         struct trace_array *tr = m->private;
4296         struct tracer *t = v;
4297 
4298         (*pos)++;
4299 
4300         if (t)
4301                 t = get_tracer_for_array(tr, t->next);
4302 
4303         return t;
4304 }
4305 
4306 static void *t_start(struct seq_file *m, loff_t *pos)
4307 {
4308         struct trace_array *tr = m->private;
4309         struct tracer *t;
4310         loff_t l = 0;
4311 
4312         mutex_lock(&trace_types_lock);
4313 
4314         t = get_tracer_for_array(tr, trace_types);
4315         for (; t && l < *pos; t = t_next(m, t, &l))
4316                         ;
4317 
4318         return t;
4319 }
4320 
4321 static void t_stop(struct seq_file *m, void *p)
4322 {
4323         mutex_unlock(&trace_types_lock);
4324 }
4325 
4326 static int t_show(struct seq_file *m, void *v)
4327 {
4328         struct tracer *t = v;
4329 
4330         if (!t)
4331                 return 0;
4332 
4333         seq_puts(m, t->name);
4334         if (t->next)
4335                 seq_putc(m, ' ');
4336         else
4337                 seq_putc(m, '\n');
4338 
4339         return 0;
4340 }
4341 
4342 static const struct seq_operations show_traces_seq_ops = {
4343         .start          = t_start,
4344         .next           = t_next,
4345         .stop           = t_stop,
4346         .show           = t_show,
4347 };
4348 
4349 static int show_traces_open(struct inode *inode, struct file *file)
4350 {
4351         struct trace_array *tr = inode->i_private;
4352         struct seq_file *m;
4353         int ret;
4354 
4355         if (tracing_disabled)
4356                 return -ENODEV;
4357 
4358         if (trace_array_get(tr) < 0)
4359                 return -ENODEV;
4360 
4361         ret = seq_open(file, &show_traces_seq_ops);
4362         if (ret) {
4363                 trace_array_put(tr);
4364                 return ret;
4365         }
4366 
4367         m = file->private_data;
4368         m->private = tr;
4369 
4370         return 0;
4371 }
4372 
4373 static int show_traces_release(struct inode *inode, struct file *file)
4374 {
4375         struct trace_array *tr = inode->i_private;
4376 
4377         trace_array_put(tr);
4378         return seq_release(inode, file);
4379 }
4380 
4381 static ssize_t
4382 tracing_write_stub(struct file *filp, const char __user *ubuf,
4383                    size_t count, loff_t *ppos)
4384 {
4385         return count;
4386 }
4387 
4388 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4389 {
4390         int ret;
4391 
4392         if (file->f_mode & FMODE_READ)
4393                 ret = seq_lseek(file, offset, whence);
4394         else
4395                 file->f_pos = ret = 0;
4396 
4397         return ret;
4398 }
4399 
4400 static const struct file_operations tracing_fops = {
4401         .open           = tracing_open,
4402         .read           = seq_read,
4403         .write          = tracing_write_stub,
4404         .llseek         = tracing_lseek,
4405         .release        = tracing_release,
4406 };
4407 
4408 static const struct file_operations show_traces_fops = {
4409         .open           = show_traces_open,
4410         .read           = seq_read,
4411         .llseek         = seq_lseek,
4412         .release        = show_traces_release,
4413 };
4414 
4415 static ssize_t
4416 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4417                      size_t count, loff_t *ppos)
4418 {
4419         struct trace_array *tr = file_inode(filp)->i_private;
4420         char *mask_str;
4421         int len;
4422 
4423         len = snprintf(NULL, 0, "%*pb\n",
4424                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
4425         mask_str = kmalloc(len, GFP_KERNEL);
4426         if (!mask_str)
4427                 return -ENOMEM;
4428 
4429         len = snprintf(mask_str, len, "%*pb\n",
4430                        cpumask_pr_args(tr->tracing_cpumask));
4431         if (len >= count) {
4432                 count = -EINVAL;
4433                 goto out_err;
4434         }
4435         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4436 
4437 out_err:
4438         kfree(mask_str);
4439 
4440         return count;
4441 }
4442 
4443 static ssize_t
4444 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4445                       size_t count, loff_t *ppos)
4446 {
4447         struct trace_array *tr = file_inode(filp)->i_private;
4448         cpumask_var_t tracing_cpumask_new;
4449         int err, cpu;
4450 
4451         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4452                 return -ENOMEM;
4453 
4454         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4455         if (err)
4456                 goto err_unlock;
4457 
4458         local_irq_disable();
4459         arch_spin_lock(&tr->max_lock);
4460         for_each_tracing_cpu(cpu) {
4461                 /*
4462                  * Increase/decrease the disabled counter if we are
4463                  * about to flip a bit in the cpumask:
4464                  */
4465                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4466                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4467                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4468                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4469                 }
4470                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4471                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4472                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4473                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4474                 }
4475         }
4476         arch_spin_unlock(&tr->max_lock);
4477         local_irq_enable();
4478 
4479         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4480         free_cpumask_var(tracing_cpumask_new);
4481 
4482         return count;
4483 
4484 err_unlock:
4485         free_cpumask_var(tracing_cpumask_new);
4486 
4487         return err;
4488 }
4489 
4490 static const struct file_operations tracing_cpumask_fops = {
4491         .open           = tracing_open_generic_tr,
4492         .read           = tracing_cpumask_read,
4493         .write          = tracing_cpumask_write,
4494         .release        = tracing_release_generic_tr,
4495         .llseek         = generic_file_llseek,
4496 };
4497 
4498 static int tracing_trace_options_show(struct seq_file *m, void *v)
4499 {
4500         struct tracer_opt *trace_opts;
4501         struct trace_array *tr = m->private;
4502         u32 tracer_flags;
4503         int i;
4504 
4505         mutex_lock(&trace_types_lock);
4506         tracer_flags = tr->current_trace->flags->val;
4507         trace_opts = tr->current_trace->flags->opts;
4508 
4509         for (i = 0; trace_options[i]; i++) {
4510                 if (tr->trace_flags & (1 << i))
4511                         seq_printf(m, "%s\n", trace_options[i]);
4512                 else
4513                         seq_printf(m, "no%s\n", trace_options[i]);
4514         }
4515 
4516         for (i = 0; trace_opts[i].name; i++) {
4517                 if (tracer_flags & trace_opts[i].bit)
4518                         seq_printf(m, "%s\n", trace_opts[i].name);
4519                 else
4520                         seq_printf(m, "no%s\n", trace_opts[i].name);
4521         }
4522         mutex_unlock(&trace_types_lock);
4523 
4524         return 0;
4525 }
4526 
4527 static int __set_tracer_option(struct trace_array *tr,
4528                                struct tracer_flags *tracer_flags,
4529                                struct tracer_opt *opts, int neg)
4530 {
4531         struct tracer *trace = tracer_flags->trace;
4532         int ret;
4533 
4534         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4535         if (ret)
4536                 return ret;
4537 
4538         if (neg)
4539                 tracer_flags->val &= ~opts->bit;
4540         else
4541                 tracer_flags->val |= opts->bit;
4542         return 0;
4543 }
4544 
4545 /* Try to assign a tracer specific option */
4546 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4547 {
4548         struct tracer *trace = tr->current_trace;
4549         struct tracer_flags *tracer_flags = trace->flags;
4550         struct tracer_opt *opts = NULL;
4551         int i;
4552 
4553         for (i = 0; tracer_flags->opts[i].name; i++) {
4554                 opts = &tracer_flags->opts[i];
4555 
4556                 if (strcmp(cmp, opts->name) == 0)
4557                         return __set_tracer_option(tr, trace->flags, opts, neg);
4558         }
4559 
4560         return -EINVAL;
4561 }
4562 
4563 /* Some tracers require overwrite to stay enabled */
4564 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4565 {
4566         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4567                 return -1;
4568 
4569         return 0;
4570 }
4571 
4572 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4573 {
4574         /* do nothing if flag is already set */
4575         if (!!(tr->trace_flags & mask) == !!enabled)
4576                 return 0;
4577 
4578         /* Give the tracer a chance to approve the change */
4579         if (tr->current_trace->flag_changed)
4580                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4581                         return -EINVAL;
4582 
4583         if (enabled)
4584                 tr->trace_flags |= mask;
4585         else
4586                 tr->trace_flags &= ~mask;
4587 
4588         if (mask == TRACE_ITER_RECORD_CMD)
4589                 trace_event_enable_cmd_record(enabled);
4590 
4591         if (mask == TRACE_ITER_RECORD_TGID) {
4592                 if (!tgid_map)
4593                         tgid_map = kcalloc(PID_MAX_DEFAULT + 1,
4594                                            sizeof(*tgid_map),
4595                                            GFP_KERNEL);
4596                 if (!tgid_map) {
4597                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4598                         return -ENOMEM;
4599                 }
4600 
4601                 trace_event_enable_tgid_record(enabled);
4602         }
4603 
4604         if (mask == TRACE_ITER_EVENT_FORK)
4605                 trace_event_follow_fork(tr, enabled);
4606 
4607         if (mask == TRACE_ITER_FUNC_FORK)
4608                 ftrace_pid_follow_fork(tr, enabled);
4609 
4610         if (mask == TRACE_ITER_OVERWRITE) {
4611                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4612 #ifdef CONFIG_TRACER_MAX_TRACE
4613                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4614 #endif
4615         }
4616 
4617         if (mask == TRACE_ITER_PRINTK) {
4618                 trace_printk_start_stop_comm(enabled);
4619                 trace_printk_control(enabled);
4620         }
4621 
4622         return 0;
4623 }
4624 
4625 static int trace_set_options(struct trace_array *tr, char *option)
4626 {
4627         char *cmp;
4628         int neg = 0;
4629         int ret;
4630         size_t orig_len = strlen(option);
4631         int len;
4632 
4633         cmp = strstrip(option);
4634 
4635         len = str_has_prefix(cmp, "no");
4636         if (len)
4637                 neg = 1;
4638 
4639         cmp += len;
4640 
4641         mutex_lock(&trace_types_lock);
4642 
4643         ret = match_string(trace_options, -1, cmp);
4644         /* If no option could be set, test the specific tracer options */
4645         if (ret < 0)
4646                 ret = set_tracer_option(tr, cmp, neg);
4647         else
4648                 ret = set_tracer_flag(tr, 1 << ret, !neg);
4649 
4650         mutex_unlock(&trace_types_lock);
4651 
4652         /*
4653          * If the first trailing whitespace is replaced with '\0' by strstrip,
4654          * turn it back into a space.
4655          */
4656         if (orig_len > strlen(option))
4657                 option[strlen(option)] = ' ';
4658 
4659         return ret;
4660 }
4661 
4662 static void __init apply_trace_boot_options(void)
4663 {
4664         char *buf = trace_boot_options_buf;
4665         char *option;
4666 
4667         while (true) {
4668                 option = strsep(&buf, ",");
4669 
4670                 if (!option)
4671                         break;
4672 
4673                 if (*option)
4674                         trace_set_options(&global_trace, option);
4675 
4676                 /* Put back the comma to allow this to be called again */
4677                 if (buf)
4678                         *(buf - 1) = ',';
4679         }
4680 }
4681 
4682 static ssize_t
4683 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4684                         size_t cnt, loff_t *ppos)
4685 {
4686         struct seq_file *m = filp->private_data;
4687         struct trace_array *tr = m->private;
4688         char buf[64];
4689         int ret;
4690 
4691         if (cnt >= sizeof(buf))
4692                 return -EINVAL;
4693 
4694         if (copy_from_user(buf, ubuf, cnt))
4695                 return -EFAULT;
4696 
4697         buf[cnt] = 0;
4698 
4699         ret = trace_set_options(tr, buf);
4700         if (ret < 0)
4701                 return ret;
4702 
4703         *ppos += cnt;
4704 
4705         return cnt;
4706 }
4707 
4708 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4709 {
4710         struct trace_array *tr = inode->i_private;
4711         int ret;
4712 
4713         if (tracing_disabled)
4714                 return -ENODEV;
4715 
4716         if (trace_array_get(tr) < 0)
4717                 return -ENODEV;
4718 
4719         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4720         if (ret < 0)
4721                 trace_array_put(tr);
4722 
4723         return ret;
4724 }
4725 
4726 static const struct file_operations tracing_iter_fops = {
4727         .open           = tracing_trace_options_open,
4728         .read           = seq_read,
4729         .llseek         = seq_lseek,
4730         .release        = tracing_single_release_tr,
4731         .write          = tracing_trace_options_write,
4732 };
4733 
4734 static const char readme_msg[] =
4735         "tracing mini-HOWTO:\n\n"
4736         "# echo 0 > tracing_on : quick way to disable tracing\n"
4737         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4738         " Important files:\n"
4739         "  trace\t\t\t- The static contents of the buffer\n"
4740         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4741         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4742         "  current_tracer\t- function and latency tracers\n"
4743         "  available_tracers\t- list of configured tracers for current_tracer\n"
4744         "  error_log\t- error log for failed commands (that support it)\n"
4745         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4746         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4747         "  trace_clock\t\t-change the clock used to order events\n"
4748         "       local:   Per cpu clock but may not be synced across CPUs\n"
4749         "      global:   Synced across CPUs but slows tracing down.\n"
4750         "     counter:   Not a clock, but just an increment\n"
4751         "      uptime:   Jiffy counter from time of boot\n"
4752         "        perf:   Same clock that perf events use\n"
4753 #ifdef CONFIG_X86_64
4754         "     x86-tsc:   TSC cycle counter\n"
4755 #endif
4756         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
4757         "       delta:   Delta difference against a buffer-wide timestamp\n"
4758         "    absolute:   Absolute (standalone) timestamp\n"
4759         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4760         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4761         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4762         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4763         "\t\t\t  Remove sub-buffer with rmdir\n"
4764         "  trace_options\t\t- Set format or modify how tracing happens\n"
4765         "\t\t\t  Disable an option by prefixing 'no' to the\n"
4766         "\t\t\t  option name\n"
4767         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4768 #ifdef CONFIG_DYNAMIC_FTRACE
4769         "\n  available_filter_functions - list of functions that can be filtered on\n"
4770         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4771         "\t\t\t  functions\n"
4772         "\t     accepts: func_full_name or glob-matching-pattern\n"
4773         "\t     modules: Can select a group via module\n"
4774         "\t      Format: :mod:<module-name>\n"
4775         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4776         "\t    triggers: a command to perform when function is hit\n"
4777         "\t      Format: <function>:<trigger>[:count]\n"
4778         "\t     trigger: traceon, traceoff\n"
4779         "\t\t      enable_event:<system>:<event>\n"
4780         "\t\t      disable_event:<system>:<event>\n"
4781 #ifdef CONFIG_STACKTRACE
4782         "\t\t      stacktrace\n"
4783 #endif
4784 #ifdef CONFIG_TRACER_SNAPSHOT
4785         "\t\t      snapshot\n"
4786 #endif
4787         "\t\t      dump\n"
4788         "\t\t      cpudump\n"
4789         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4790         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4791         "\t     The first one will disable tracing every time do_fault is hit\n"
4792         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4793         "\t       The first time do trap is hit and it disables tracing, the\n"
4794         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4795         "\t       the counter will not decrement. It only decrements when the\n"
4796         "\t       trigger did work\n"
4797         "\t     To remove trigger without count:\n"
4798         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4799         "\t     To remove trigger with a count:\n"
4800         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4801         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4802         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4803         "\t    modules: Can select a group via module command :mod:\n"
4804         "\t    Does not accept triggers\n"
4805 #endif /* CONFIG_DYNAMIC_FTRACE */
4806 #ifdef CONFIG_FUNCTION_TRACER
4807         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4808         "\t\t    (function)\n"
4809 #endif
4810 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4811         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4812         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4813         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4814 #endif
4815 #ifdef CONFIG_TRACER_SNAPSHOT
4816         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4817         "\t\t\t  snapshot buffer. Read the contents for more\n"
4818         "\t\t\t  information\n"
4819 #endif
4820 #ifdef CONFIG_STACK_TRACER
4821         "  stack_trace\t\t- Shows the max stack trace when active\n"
4822         "  stack_max_size\t- Shows current max stack size that was traced\n"
4823         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4824         "\t\t\t  new trace)\n"
4825 #ifdef CONFIG_DYNAMIC_FTRACE
4826         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4827         "\t\t\t  traces\n"
4828 #endif
4829 #endif /* CONFIG_STACK_TRACER */
4830 #ifdef CONFIG_DYNAMIC_EVENTS
4831         "  dynamic_events\t\t- Add/remove/show the generic dynamic events\n"
4832         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4833 #endif
4834 #ifdef CONFIG_KPROBE_EVENTS
4835         "  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4836         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4837 #endif
4838 #ifdef CONFIG_UPROBE_EVENTS
4839         "  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4840         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4841 #endif
4842 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4843         "\t  accepts: event-definitions (one definition per line)\n"
4844         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4845         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4846 #ifdef CONFIG_HIST_TRIGGERS
4847         "\t           s:[synthetic/]<event> <field> [<field>]\n"
4848 #endif
4849         "\t           -:[<group>/]<event>\n"
4850 #ifdef CONFIG_KPROBE_EVENTS
4851         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4852   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4853 #endif
4854 #ifdef CONFIG_UPROBE_EVENTS
4855   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
4856 #endif
4857         "\t     args: <name>=fetcharg[:type]\n"
4858         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4859 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
4860         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
4861 #else
4862         "\t           $stack<index>, $stack, $retval, $comm,\n"
4863 #endif
4864         "\t           +|-[u]<offset>(<fetcharg>)\n"
4865         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
4866         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
4867         "\t           <type>\\[<array-size>\\]\n"
4868 #ifdef CONFIG_HIST_TRIGGERS
4869         "\t    field: <stype> <name>;\n"
4870         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
4871         "\t           [unsigned] char/int/long\n"
4872 #endif
4873 #endif
4874         "  events/\t\t- Directory containing all trace event subsystems:\n"
4875         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4876         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4877         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4878         "\t\t\t  events\n"
4879         "      filter\t\t- If set, only events passing filter are traced\n"
4880         "  events/<system>/<event>/\t- Directory containing control files for\n"
4881         "\t\t\t  <event>:\n"
4882         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4883         "      filter\t\t- If set, only events passing filter are traced\n"
4884         "      trigger\t\t- If set, a command to perform when event is hit\n"
4885         "\t    Format: <trigger>[:count][if <filter>]\n"
4886         "\t   trigger: traceon, traceoff\n"
4887         "\t            enable_event:<system>:<event>\n"
4888         "\t            disable_event:<system>:<event>\n"
4889 #ifdef CONFIG_HIST_TRIGGERS
4890         "\t            enable_hist:<system>:<event>\n"
4891         "\t            disable_hist:<system>:<event>\n"
4892 #endif
4893 #ifdef CONFIG_STACKTRACE
4894         "\t\t    stacktrace\n"
4895 #endif
4896 #ifdef CONFIG_TRACER_SNAPSHOT
4897         "\t\t    snapshot\n"
4898 #endif
4899 #ifdef CONFIG_HIST_TRIGGERS
4900         "\t\t    hist (see below)\n"
4901 #endif
4902         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4903         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4904         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4905         "\t                  events/block/block_unplug/trigger\n"
4906         "\t   The first disables tracing every time block_unplug is hit.\n"
4907         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4908         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4909         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4910         "\t   Like function triggers, the counter is only decremented if it\n"
4911         "\t    enabled or disabled tracing.\n"
4912         "\t   To remove a trigger without a count:\n"
4913         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4914         "\t   To remove a trigger with a count:\n"
4915         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4916         "\t   Filters can be ignored when removing a trigger.\n"
4917 #ifdef CONFIG_HIST_TRIGGERS
4918         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4919         "\t    Format: hist:keys=<field1[,field2,...]>\n"
4920         "\t            [:values=<field1[,field2,...]>]\n"
4921         "\t            [:sort=<field1[,field2,...]>]\n"
4922         "\t            [:size=#entries]\n"
4923         "\t            [:pause][:continue][:clear]\n"
4924         "\t            [:name=histname1]\n"
4925         "\t            [:<handler>.<action>]\n"
4926         "\t            [if <filter>]\n\n"
4927         "\t    When a matching event is hit, an entry is added to a hash\n"
4928         "\t    table using the key(s) and value(s) named, and the value of a\n"
4929         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4930         "\t    correspond to fields in the event's format description.  Keys\n"
4931         "\t    can be any field, or the special string 'stacktrace'.\n"
4932         "\t    Compound keys consisting of up to two fields can be specified\n"
4933         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4934         "\t    fields.  Sort keys consisting of up to two fields can be\n"
4935         "\t    specified using the 'sort' keyword.  The sort direction can\n"
4936         "\t    be modified by appending '.descending' or '.ascending' to a\n"
4937         "\t    sort field.  The 'size' parameter can be used to specify more\n"
4938         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4939         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4940         "\t    its histogram data will be shared with other triggers of the\n"
4941         "\t    same name, and trigger hits will update this common data.\n\n"
4942         "\t    Reading the 'hist' file for the event will dump the hash\n"
4943         "\t    table in its entirety to stdout.  If there are multiple hist\n"
4944         "\t    triggers attached to an event, there will be a table for each\n"
4945         "\t    trigger in the output.  The table displayed for a named\n"
4946         "\t    trigger will be the same as any other instance having the\n"
4947         "\t    same name.  The default format used to display a given field\n"
4948         "\t    can be modified by appending any of the following modifiers\n"
4949         "\t    to the field name, as applicable:\n\n"
4950         "\t            .hex        display a number as a hex value\n"
4951         "\t            .sym        display an address as a symbol\n"
4952         "\t            .sym-offset display an address as a symbol and offset\n"
4953         "\t            .execname   display a common_pid as a program name\n"
4954         "\t            .syscall    display a syscall id as a syscall name\n"
4955         "\t            .log2       display log2 value rather than raw number\n"
4956         "\t            .usecs      display a common_timestamp in microseconds\n\n"
4957         "\t    The 'pause' parameter can be used to pause an existing hist\n"
4958         "\t    trigger or to start a hist trigger but not log any events\n"
4959         "\t    until told to do so.  'continue' can be used to start or\n"
4960         "\t    restart a paused hist trigger.\n\n"
4961         "\t    The 'clear' parameter will clear the contents of a running\n"
4962         "\t    hist trigger and leave its current paused/active state\n"
4963         "\t    unchanged.\n\n"
4964         "\t    The enable_hist and disable_hist triggers can be used to\n"
4965         "\t    have one event conditionally start and stop another event's\n"
4966         "\t    already-attached hist trigger.  The syntax is analogous to\n"
4967         "\t    the enable_event and disable_event triggers.\n\n"
4968         "\t    Hist trigger handlers and actions are executed whenever a\n"
4969         "\t    a histogram entry is added or updated.  They take the form:\n\n"
4970         "\t        <handler>.<action>\n\n"
4971         "\t    The available handlers are:\n\n"
4972         "\t        onmatch(matching.event)  - invoke on addition or update\n"
4973         "\t        onmax(var)               - invoke if var exceeds current max\n"
4974         "\t        onchange(var)            - invoke action if var changes\n\n"
4975         "\t    The available actions are:\n\n"
4976         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
4977         "\t        save(field,...)                      - save current event fields\n"
4978 #ifdef CONFIG_TRACER_SNAPSHOT
4979         "\t        snapshot()                           - snapshot the trace buffer\n"
4980 #endif
4981 #endif
4982 ;
4983 
4984 static ssize_t
4985 tracing_readme_read(struct file *filp, char __user *ubuf,
4986                        size_t cnt, loff_t *ppos)
4987 {
4988         return simple_read_from_buffer(ubuf, cnt, ppos,
4989                                         readme_msg, strlen(readme_msg));
4990 }
4991 
4992 static const struct file_operations tracing_readme_fops = {
4993         .open           = tracing_open_generic,
4994         .read           = tracing_readme_read,
4995         .llseek         = generic_file_llseek,
4996 };
4997 
4998 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4999 {
5000         int *ptr = v;
5001 
5002         if (*pos || m->count)
5003                 ptr++;
5004 
5005         (*pos)++;
5006 
5007         for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5008                 if (trace_find_tgid(*ptr))
5009                         return ptr;
5010         }
5011 
5012         return NULL;
5013 }
5014 
5015 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5016 {
5017         void *v;
5018         loff_t l = 0;
5019 
5020         if (!tgid_map)
5021                 return NULL;
5022 
5023         v = &tgid_map[0];
5024         while (l <= *pos) {
5025                 v = saved_tgids_next(m, v, &l);
5026                 if (!v)
5027                         return NULL;
5028         }
5029 
5030         return v;
5031 }
5032 
5033 static void saved_tgids_stop(struct seq_file *m, void *v)
5034 {
5035 }
5036 
5037 static int saved_tgids_show(struct seq_file *m, void *v)
5038 {
5039         int pid = (int *)v - tgid_map;
5040 
5041         seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5042         return 0;
5043 }
5044 
5045 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5046         .start          = saved_tgids_start,
5047         .stop           = saved_tgids_stop,
5048         .next           = saved_tgids_next,
5049         .show           = saved_tgids_show,
5050 };
5051 
5052 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5053 {
5054         if (tracing_disabled)
5055                 return -ENODEV;
5056 
5057         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5058 }
5059 
5060 
5061 static const struct file_operations tracing_saved_tgids_fops = {
5062         .open           = tracing_saved_tgids_open,
5063         .read           = seq_read,
5064         .llseek         = seq_lseek,
5065         .release        = seq_release,
5066 };
5067 
5068 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5069 {
5070         unsigned int *ptr = v;
5071 
5072         if (*pos || m->count)
5073                 ptr++;
5074 
5075         (*pos)++;
5076 
5077         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5078              ptr++) {
5079                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5080                         continue;
5081 
5082                 return ptr;
5083         }
5084 
5085         return NULL;
5086 }
5087 
5088 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5089 {
5090         void *v;
5091         loff_t l = 0;
5092 
5093         preempt_disable();
5094         arch_spin_lock(&trace_cmdline_lock);
5095 
5096         v = &savedcmd->map_cmdline_to_pid[0];
5097         while (l <= *pos) {
5098                 v = saved_cmdlines_next(m, v, &l);
5099                 if (!v)
5100                         return NULL;
5101         }
5102 
5103         return v;
5104 }
5105 
5106 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5107 {
5108         arch_spin_unlock(&trace_cmdline_lock);
5109         preempt_enable();
5110 }
5111 
5112 static int saved_cmdlines_show(struct seq_file *m, void *v)
5113 {
5114         char buf[TASK_COMM_LEN];
5115         unsigned int *pid = v;
5116 
5117         __trace_find_cmdline(*pid, buf);
5118         seq_printf(m, "%d %s\n", *pid, buf);
5119         return 0;
5120 }
5121 
5122 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5123         .start          = saved_cmdlines_start,
5124         .next           = saved_cmdlines_next,
5125         .stop           = saved_cmdlines_stop,
5126         .show           = saved_cmdlines_show,
5127 };
5128 
5129 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5130 {
5131         if (tracing_disabled)
5132                 return -ENODEV;
5133 
5134         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5135 }
5136 
5137 static const struct file_operations tracing_saved_cmdlines_fops = {
5138         .open           = tracing_saved_cmdlines_open,
5139         .read           = seq_read,
5140         .llseek         = seq_lseek,
5141         .release        = seq_release,
5142 };
5143 
5144 static ssize_t
5145 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5146                                  size_t cnt, loff_t *ppos)
5147 {
5148         char buf[64];
5149         int r;
5150 
5151         arch_spin_lock(&trace_cmdline_lock);
5152         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5153         arch_spin_unlock(&trace_cmdline_lock);
5154 
5155         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5156 }
5157 
5158 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5159 {
5160         kfree(s->saved_cmdlines);
5161         kfree(s->map_cmdline_to_pid);
5162         kfree(s);
5163 }
5164 
5165 static int tracing_resize_saved_cmdlines(unsigned int val)
5166 {
5167         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5168 
5169         s = kmalloc(sizeof(*s), GFP_KERNEL);
5170         if (!s)
5171                 return -ENOMEM;
5172 
5173         if (allocate_cmdlines_buffer(val, s) < 0) {
5174                 kfree(s);
5175                 return -ENOMEM;
5176         }
5177 
5178         arch_spin_lock(&trace_cmdline_lock);
5179         savedcmd_temp = savedcmd;
5180         savedcmd = s;
5181         arch_spin_unlock(&trace_cmdline_lock);
5182         free_saved_cmdlines_buffer(savedcmd_temp);
5183 
5184         return 0;
5185 }
5186 
5187 static ssize_t
5188 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5189                                   size_t cnt, loff_t *ppos)
5190 {
5191         unsigned long val;
5192         int ret;
5193 
5194         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5195         if (ret)
5196                 return ret;
5197 
5198         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5199         if (!val || val > PID_MAX_DEFAULT)
5200                 return -EINVAL;
5201 
5202         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5203         if (ret < 0)
5204                 return ret;
5205 
5206         *ppos += cnt;
5207 
5208         return cnt;
5209 }
5210 
5211 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5212         .open           = tracing_open_generic,
5213         .read           = tracing_saved_cmdlines_size_read,
5214         .write          = tracing_saved_cmdlines_size_write,
5215 };
5216 
5217 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5218 static union trace_eval_map_item *
5219 update_eval_map(union trace_eval_map_item *ptr)
5220 {
5221         if (!ptr->map.eval_string) {
5222                 if (ptr->tail.next) {
5223                         ptr = ptr->tail.next;
5224                         /* Set ptr to the next real item (skip head) */
5225                         ptr++;
5226                 } else
5227                         return NULL;
5228         }
5229         return ptr;
5230 }
5231 
5232 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5233 {
5234         union trace_eval_map_item *ptr = v;
5235 
5236         /*
5237          * Paranoid! If ptr points to end, we don't want to increment past it.
5238          * This really should never happen.
5239          */
5240         ptr = update_eval_map(ptr);
5241         if (WARN_ON_ONCE(!ptr))
5242                 return NULL;
5243 
5244         ptr++;
5245 
5246         (*pos)++;
5247 
5248         ptr = update_eval_map(ptr);
5249 
5250         return ptr;
5251 }
5252 
5253 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5254 {
5255         union trace_eval_map_item *v;
5256         loff_t l = 0;
5257 
5258         mutex_lock(&trace_eval_mutex);
5259 
5260         v = trace_eval_maps;
5261         if (v)
5262                 v++;
5263 
5264         while (v && l < *pos) {
5265                 v = eval_map_next(m, v, &l);
5266         }
5267 
5268         return v;
5269 }
5270 
5271 static void eval_map_stop(struct seq_file *m, void *v)
5272 {
5273         mutex_unlock(&trace_eval_mutex);
5274 }
5275 
5276 static int eval_map_show(struct seq_file *m, void *v)
5277 {
5278         union trace_eval_map_item *ptr = v;
5279 
5280         seq_printf(m, "%s %ld (%s)\n",
5281                    ptr->map.eval_string, ptr->map.eval_value,
5282                    ptr->map.system);
5283 
5284         return 0;
5285 }
5286 
5287 static const struct seq_operations tracing_eval_map_seq_ops = {
5288         .start          = eval_map_start,
5289         .next           = eval_map_next,
5290         .stop           = eval_map_stop,
5291         .show           = eval_map_show,
5292 };
5293 
5294 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5295 {
5296         if (tracing_disabled)
5297                 return -ENODEV;
5298 
5299         return seq_open(filp, &tracing_eval_map_seq_ops);
5300 }
5301 
5302 static const struct file_operations tracing_eval_map_fops = {
5303         .open           = tracing_eval_map_open,
5304         .read           = seq_read,
5305         .llseek         = seq_lseek,
5306         .release        = seq_release,
5307 };
5308 
5309 static inline union trace_eval_map_item *
5310 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5311 {
5312         /* Return tail of array given the head */
5313         return ptr + ptr->head.length + 1;
5314 }
5315 
5316 static void
5317 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5318                            int len)
5319 {
5320         struct trace_eval_map **stop;
5321         struct trace_eval_map **map;
5322         union trace_eval_map_item *map_array;
5323         union trace_eval_map_item *ptr;
5324 
5325         stop = start + len;
5326 
5327         /*
5328          * The trace_eval_maps contains the map plus a head and tail item,
5329          * where the head holds the module and length of array, and the
5330          * tail holds a pointer to the next list.
5331          */
5332         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5333         if (!map_array) {
5334                 pr_warn("Unable to allocate trace eval mapping\n");
5335                 return;
5336         }
5337 
5338         mutex_lock(&trace_eval_mutex);
5339 
5340         if (!trace_eval_maps)
5341                 trace_eval_maps = map_array;
5342         else {
5343                 ptr = trace_eval_maps;
5344                 for (;;) {
5345                         ptr = trace_eval_jmp_to_tail(ptr);
5346                         if (!ptr->tail.next)
5347                                 break;
5348                         ptr = ptr->tail.next;
5349 
5350                 }
5351                 ptr->tail.next = map_array;
5352         }
5353         map_array->head.mod = mod;
5354         map_array->head.length = len;
5355         map_array++;
5356 
5357         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5358                 map_array->map = **map;
5359                 map_array++;
5360         }
5361         memset(map_array, 0, sizeof(*map_array));
5362 
5363         mutex_unlock(&trace_eval_mutex);
5364 }
5365 
5366 static void trace_create_eval_file(struct dentry *d_tracer)
5367 {
5368         trace_create_file("eval_map", 0444, d_tracer,
5369                           NULL, &tracing_eval_map_fops);
5370 }
5371 
5372 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5373 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5374 static inline void trace_insert_eval_map_file(struct module *mod,
5375                               struct trace_eval_map **start, int len) { }
5376 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5377 
5378 static void trace_insert_eval_map(struct module *mod,
5379                                   struct trace_eval_map **start, int len)
5380 {
5381         struct trace_eval_map **map;
5382 
5383         if (len <= 0)
5384                 return;
5385 
5386         map = start;
5387 
5388         trace_event_eval_update(map, len);
5389 
5390         trace_insert_eval_map_file(mod, start, len);
5391 }
5392 
5393 static ssize_t
5394 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5395                        size_t cnt, loff_t *ppos)
5396 {
5397         struct trace_array *tr = filp->private_data;
5398         char buf[MAX_TRACER_SIZE+2];
5399         int r;
5400 
5401         mutex_lock(&trace_types_lock);
5402         r = sprintf(buf, "%s\n", tr->current_trace->name);
5403         mutex_unlock(&trace_types_lock);
5404 
5405         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5406 }
5407 
5408 int tracer_init(struct tracer *t, struct trace_array *tr)
5409 {
5410         tracing_reset_online_cpus(&tr->trace_buffer);
5411         return t->init(tr);
5412 }
5413 
5414 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5415 {
5416         int cpu;
5417 
5418         for_each_tracing_cpu(cpu)
5419                 per_cpu_ptr(buf->data, cpu)->entries = val;
5420 }
5421 
5422 #ifdef CONFIG_TRACER_MAX_TRACE
5423 /* resize @tr's buffer to the size of @size_tr's entries */
5424 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5425                                         struct trace_buffer *size_buf, int cpu_id)
5426 {
5427         int cpu, ret = 0;
5428 
5429         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5430                 for_each_tracing_cpu(cpu) {
5431                         ret = ring_buffer_resize(trace_buf->buffer,
5432                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5433                         if (ret < 0)
5434                                 break;
5435                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5436                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5437                 }
5438         } else {
5439                 ret = ring_buffer_resize(trace_buf->buffer,
5440                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5441                 if (ret == 0)
5442                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5443                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5444         }
5445 
5446         return ret;
5447 }
5448 #endif /* CONFIG_TRACER_MAX_TRACE */
5449 
5450 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5451                                         unsigned long size, int cpu)
5452 {
5453         int ret;
5454 
5455         /*
5456          * If kernel or user changes the size of the ring buffer
5457          * we use the size that was given, and we can forget about
5458          * expanding it later.
5459          */
5460         ring_buffer_expanded = true;
5461 
5462         /* May be called before buffers are initialized */
5463         if (!tr->trace_buffer.buffer)
5464                 return 0;
5465 
5466         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5467         if (ret < 0)
5468                 return ret;
5469 
5470 #ifdef CONFIG_TRACER_MAX_TRACE
5471         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5472             !tr->current_trace->use_max_tr)
5473                 goto out;
5474 
5475         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5476         if (ret < 0) {
5477                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5478                                                      &tr->trace_buffer, cpu);
5479                 if (r < 0) {
5480                         /*
5481                          * AARGH! We are left with different
5482                          * size max buffer!!!!
5483                          * The max buffer is our "snapshot" buffer.
5484                          * When a tracer needs a snapshot (one of the
5485                          * latency tracers), it swaps the max buffer
5486                          * with the saved snap shot. We succeeded to
5487                          * update the size of the main buffer, but failed to
5488                          * update the size of the max buffer. But when we tried
5489                          * to reset the main buffer to the original size, we
5490                          * failed there too. This is very unlikely to
5491                          * happen, but if it does, warn and kill all
5492                          * tracing.
5493                          */
5494                         WARN_ON(1);
5495                         tracing_disabled = 1;
5496                 }
5497                 return ret;
5498         }
5499 
5500         if (cpu == RING_BUFFER_ALL_CPUS)
5501                 set_buffer_entries(&tr->max_buffer, size);
5502         else
5503                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5504 
5505  out:
5506 #endif /* CONFIG_TRACER_MAX_TRACE */
5507 
5508         if (cpu == RING_BUFFER_ALL_CPUS)
5509                 set_buffer_entries(&tr->trace_buffer, size);
5510         else
5511                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5512 
5513         return ret;
5514 }
5515 
5516 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5517                                           unsigned long size, int cpu_id)
5518 {
5519         int ret = size;
5520 
5521         mutex_lock(&trace_types_lock);
5522 
5523         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5524                 /* make sure, this cpu is enabled in the mask */
5525                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5526                         ret = -EINVAL;
5527                         goto out;
5528                 }
5529         }
5530 
5531         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5532         if (ret < 0)
5533                 ret = -ENOMEM;
5534 
5535 out:
5536         mutex_unlock(&trace_types_lock);
5537 
5538         return ret;
5539 }
5540 
5541 
5542 /**
5543  * tracing_update_buffers - used by tracing facility to expand ring buffers
5544  *
5545  * To save on memory when the tracing is never used on a system with it
5546  * configured in. The ring buffers are set to a minimum size. But once
5547  * a user starts to use the tracing facility, then they need to grow
5548  * to their default size.
5549  *
5550  * This function is to be called when a tracer is about to be used.
5551  */
5552 int tracing_update_buffers(void)
5553 {
5554         int ret = 0;
5555 
5556         mutex_lock(&trace_types_lock);
5557         if (!ring_buffer_expanded)
5558                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5559                                                 RING_BUFFER_ALL_CPUS);
5560         mutex_unlock(&trace_types_lock);
5561 
5562         return ret;
5563 }
5564 
5565 struct trace_option_dentry;
5566 
5567 static void
5568 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5569 
5570 /*
5571  * Used to clear out the tracer before deletion of an instance.
5572  * Must have trace_types_lock held.
5573  */
5574 static void tracing_set_nop(struct trace_array *tr)
5575 {
5576         if (tr->current_trace == &nop_trace)
5577                 return;
5578         
5579         tr->current_trace->enabled--;
5580 
5581         if (tr->current_trace->reset)
5582                 tr->current_trace->reset(tr);
5583 
5584         tr->current_trace = &nop_trace;
5585 }
5586 
5587 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5588 {
5589         /* Only enable if the directory has been created already. */
5590         if (!tr->dir)
5591                 return;
5592 
5593         create_trace_option_files(tr, t);
5594 }
5595 
5596 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5597 {
5598         struct tracer *t;
5599 #ifdef CONFIG_TRACER_MAX_TRACE
5600         bool had_max_tr;
5601 #endif
5602         int ret = 0;
5603 
5604         mutex_lock(&trace_types_lock);
5605 
5606         if (!ring_buffer_expanded) {
5607                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5608                                                 RING_BUFFER_ALL_CPUS);
5609                 if (ret < 0)
5610                         goto out;
5611                 ret = 0;
5612         }
5613 
5614         for (t = trace_types; t; t = t->next) {
5615                 if (strcmp(t->name, buf) == 0)
5616                         break;
5617         }
5618         if (!t) {
5619                 ret = -EINVAL;
5620                 goto out;
5621         }
5622         if (t == tr->current_trace)
5623                 goto out;
5624 
5625 #ifdef CONFIG_TRACER_SNAPSHOT
5626         if (t->use_max_tr) {
5627                 arch_spin_lock(&tr->max_lock);
5628                 if (tr->cond_snapshot)
5629                         ret = -EBUSY;
5630                 arch_spin_unlock(&tr->max_lock);
5631                 if (ret)
5632                         goto out;
5633         }
5634 #endif
5635         /* Some tracers won't work on kernel command line */
5636         if (system_state < SYSTEM_RUNNING && t->noboot) {
5637                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5638                         t->name);
5639                 goto out;
5640         }
5641 
5642         /* Some tracers are only allowed for the top level buffer */
5643         if (!trace_ok_for_array(t, tr)) {
5644                 ret = -EINVAL;
5645                 goto out;
5646         }
5647 
5648         /* If trace pipe files are being read, we can't change the tracer */
5649         if (tr->current_trace->ref) {
5650                 ret = -EBUSY;
5651                 goto out;
5652         }
5653 
5654         trace_branch_disable();
5655 
5656         tr->current_trace->enabled--;
5657 
5658         if (tr->current_trace->reset)
5659                 tr->current_trace->reset(tr);
5660 
5661         /* Current trace needs to be nop_trace before synchronize_rcu */
5662         tr->current_trace = &nop_trace;
5663 
5664 #ifdef CONFIG_TRACER_MAX_TRACE
5665         had_max_tr = tr->allocated_snapshot;
5666 
5667         if (had_max_tr && !t->use_max_tr) {
5668                 /*
5669                  * We need to make sure that the update_max_tr sees that
5670                  * current_trace changed to nop_trace to keep it from
5671                  * swapping the buffers after we resize it.
5672                  * The update_max_tr is called from interrupts disabled
5673                  * so a synchronized_sched() is sufficient.
5674                  */
5675                 synchronize_rcu();
5676                 free_snapshot(tr);
5677         }
5678 #endif
5679 
5680 #ifdef CONFIG_TRACER_MAX_TRACE
5681         if (t->use_max_tr && !had_max_tr) {
5682                 ret = tracing_alloc_snapshot_instance(tr);
5683                 if (ret < 0)
5684                         goto out;
5685         }
5686 #endif
5687 
5688         if (t->init) {
5689                 ret = tracer_init(t, tr);
5690                 if (ret)
5691                         goto out;
5692         }
5693 
5694         tr->current_trace = t;
5695         tr->current_trace->enabled++;
5696         trace_branch_enable(tr);
5697  out:
5698         mutex_unlock(&trace_types_lock);
5699 
5700         return ret;
5701 }
5702 
5703 static ssize_t
5704 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5705                         size_t cnt, loff_t *ppos)
5706 {
5707         struct trace_array *tr = filp->private_data;
5708         char buf[MAX_TRACER_SIZE+1];
5709         int i;
5710         size_t ret;
5711         int err;
5712 
5713         ret = cnt;
5714 
5715         if (cnt > MAX_TRACER_SIZE)
5716                 cnt = MAX_TRACER_SIZE;
5717 
5718         if (copy_from_user(buf, ubuf, cnt))
5719                 return -EFAULT;
5720 
5721         buf[cnt] = 0;
5722 
5723         /* strip ending whitespace. */
5724         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5725                 buf[i] = 0;
5726 
5727         err = tracing_set_tracer(tr, buf);
5728         if (err)
5729                 return err;
5730 
5731         *ppos += ret;
5732 
5733         return ret;
5734 }
5735 
5736 static ssize_t
5737 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5738                    size_t cnt, loff_t *ppos)
5739 {
5740         char buf[64];
5741         int r;
5742 
5743         r = snprintf(buf, sizeof(buf), "%ld\n",
5744                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5745         if (r > sizeof(buf))
5746                 r = sizeof(buf);
5747         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5748 }
5749 
5750 static ssize_t
5751 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5752                     size_t cnt, loff_t *ppos)
5753 {
5754         unsigned long val;
5755         int ret;
5756 
5757         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5758         if (ret)
5759                 return ret;
5760 
5761         *ptr = val * 1000;
5762 
5763         return cnt;
5764 }
5765 
5766 static ssize_t
5767 tracing_thresh_read(struct file *filp, char __user *ubuf,
5768                     size_t cnt, loff_t *ppos)
5769 {
5770         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5771 }
5772 
5773 static ssize_t
5774 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5775                      size_t cnt, loff_t *ppos)
5776 {
5777         struct trace_array *tr = filp->private_data;
5778         int ret;
5779 
5780         mutex_lock(&trace_types_lock);
5781         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5782         if (ret < 0)
5783                 goto out;
5784 
5785         if (tr->current_trace->update_thresh) {
5786                 ret = tr->current_trace->update_thresh(tr);
5787                 if (ret < 0)
5788                         goto out;
5789         }
5790 
5791         ret = cnt;
5792 out:
5793         mutex_unlock(&trace_types_lock);
5794 
5795         return ret;
5796 }
5797 
5798 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5799 
5800 static ssize_t
5801 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5802                      size_t cnt, loff_t *ppos)
5803 {
5804         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5805 }
5806 
5807 static ssize_t
5808 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5809                       size_t cnt, loff_t *ppos)
5810 {
5811         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5812 }
5813 
5814 #endif
5815 
5816 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5817 {
5818         struct trace_array *tr = inode->i_private;
5819         struct trace_iterator *iter;
5820         int ret = 0;
5821 
5822         if (tracing_disabled)
5823                 return -ENODEV;
5824 
5825         if (trace_array_get(tr) < 0)
5826                 return -ENODEV;
5827 
5828         mutex_lock(&trace_types_lock);
5829 
5830         /* create a buffer to store the information to pass to userspace */
5831         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5832         if (!iter) {
5833                 ret = -ENOMEM;
5834                 __trace_array_put(tr);
5835                 goto out;
5836         }
5837 
5838         trace_seq_init(&iter->seq);
5839         iter->trace = tr->current_trace;
5840 
5841         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5842                 ret = -ENOMEM;
5843                 goto fail;
5844         }
5845 
5846         /* trace pipe does not show start of buffer */
5847         cpumask_setall(iter->started);
5848 
5849         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5850                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5851 
5852         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5853         if (trace_clocks[tr->clock_id].in_ns)
5854                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5855 
5856         iter->tr = tr;
5857         iter->trace_buffer = &tr->trace_buffer;
5858         iter->cpu_file = tracing_get_cpu(inode);
5859         mutex_init(&iter->mutex);
5860         filp->private_data = iter;
5861 
5862         if (iter->trace->pipe_open)
5863                 iter->trace->pipe_open(iter);
5864 
5865         nonseekable_open(inode, filp);
5866 
5867         tr->current_trace->ref++;
5868 out:
5869         mutex_unlock(&trace_types_lock);
5870         return ret;
5871 
5872 fail:
5873         kfree(iter);
5874         __trace_array_put(tr);
5875         mutex_unlock(&trace_types_lock);
5876         return ret;
5877 }
5878 
5879 static int tracing_release_pipe(struct inode *inode, struct file *file)
5880 {
5881         struct trace_iterator *iter = file->private_data;
5882         struct trace_array *tr = inode->i_private;
5883 
5884         mutex_lock(&trace_types_lock);
5885 
5886         tr->current_trace->ref--;
5887 
5888         if (iter->trace->pipe_close)
5889                 iter->trace->pipe_close(iter);
5890 
5891         mutex_unlock(&trace_types_lock);
5892 
5893         free_cpumask_var(iter->started);
5894         mutex_destroy(&iter->mutex);
5895         kfree(iter);
5896 
5897         trace_array_put(tr);
5898 
5899         return 0;
5900 }
5901 
5902 static __poll_t
5903 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5904 {
5905         struct trace_array *tr = iter->tr;
5906 
5907         /* Iterators are static, they should be filled or empty */
5908         if (trace_buffer_iter(iter, iter->cpu_file))
5909                 return EPOLLIN | EPOLLRDNORM;
5910 
5911         if (tr->trace_flags & TRACE_ITER_BLOCK)
5912                 /*
5913                  * Always select as readable when in blocking mode
5914                  */
5915                 return EPOLLIN | EPOLLRDNORM;
5916         else
5917                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5918                                              filp, poll_table);
5919 }
5920 
5921 static __poll_t
5922 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5923 {
5924         struct trace_iterator *iter = filp->private_data;
5925 
5926         return trace_poll(iter, filp, poll_table);
5927 }
5928 
5929 /* Must be called with iter->mutex held. */
5930 static int tracing_wait_pipe(struct file *filp)
5931 {
5932         struct trace_iterator *iter = filp->private_data;
5933         int ret;
5934 
5935         while (trace_empty(iter)) {
5936 
5937                 if ((filp->f_flags & O_NONBLOCK)) {
5938                         return -EAGAIN;
5939                 }
5940 
5941                 /*
5942                  * We block until we read something and tracing is disabled.
5943                  * We still block if tracing is disabled, but we have never
5944                  * read anything. This allows a user to cat this file, and
5945                  * then enable tracing. But after we have read something,
5946                  * we give an EOF when tracing is again disabled.
5947                  *
5948                  * iter->pos will be 0 if we haven't read anything.
5949                  */
5950                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5951                         break;
5952 
5953                 mutex_unlock(&iter->mutex);
5954 
5955                 ret = wait_on_pipe(iter, 0);
5956 
5957                 mutex_lock(&iter->mutex);
5958 
5959                 if (ret)
5960                         return ret;
5961         }
5962 
5963         return 1;
5964 }
5965 
5966 /*
5967  * Consumer reader.
5968  */
5969 static ssize_t
5970 tracing_read_pipe(struct file *filp, char __user *ubuf,
5971                   size_t cnt, loff_t *ppos)
5972 {
5973         struct trace_iterator *iter = filp->private_data;
5974         ssize_t sret;
5975 
5976         /*
5977          * Avoid more than one consumer on a single file descriptor
5978          * This is just a matter of traces coherency, the ring buffer itself
5979          * is protected.
5980          */
5981         mutex_lock(&iter->mutex);
5982 
5983         /* return any leftover data */
5984         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5985         if (sret != -EBUSY)
5986                 goto out;
5987 
5988         trace_seq_init(&iter->seq);
5989 
5990         if (iter->trace->read) {
5991                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5992                 if (sret)
5993                         goto out;
5994         }
5995 
5996 waitagain:
5997         sret = tracing_wait_pipe(filp);
5998         if (sret <= 0)
5999                 goto out;
6000 
6001         /* stop when tracing is finished */
6002         if (trace_empty(iter)) {
6003                 sret = 0;
6004                 goto out;
6005         }
6006 
6007         if (cnt >= PAGE_SIZE)
6008                 cnt = PAGE_SIZE - 1;
6009 
6010         /* reset all but tr, trace, and overruns */
6011         memset(&iter->seq, 0,
6012                sizeof(struct trace_iterator) -
6013                offsetof(struct trace_iterator, seq));
6014         cpumask_clear(iter->started);
6015         trace_seq_init(&iter->seq);
6016         iter->pos = -1;
6017 
6018         trace_event_read_lock();
6019         trace_access_lock(iter->cpu_file);
6020         while (trace_find_next_entry_inc(iter) != NULL) {
6021                 enum print_line_t ret;
6022                 int save_len = iter->seq.seq.len;
6023 
6024                 ret = print_trace_line(iter);
6025                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6026                         /* don't print partial lines */
6027                         iter->seq.seq.len = save_len;
6028                         break;
6029                 }
6030                 if (ret != TRACE_TYPE_NO_CONSUME)
6031                         trace_consume(iter);
6032 
6033                 if (trace_seq_used(&iter->seq) >= cnt)
6034                         break;
6035 
6036                 /*
6037                  * Setting the full flag means we reached the trace_seq buffer
6038                  * size and we should leave by partial output condition above.
6039                  * One of the trace_seq_* functions is not used properly.
6040                  */
6041                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6042                           iter->ent->type);
6043         }
6044         trace_access_unlock(iter->cpu_file);
6045         trace_event_read_unlock();
6046 
6047         /* Now copy what we have to the user */
6048         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6049         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6050                 trace_seq_init(&iter->seq);
6051 
6052         /*
6053          * If there was nothing to send to user, in spite of consuming trace
6054          * entries, go back to wait for more entries.
6055          */
6056         if (sret == -EBUSY)
6057                 goto waitagain;
6058 
6059 out:
6060         mutex_unlock(&iter->mutex);
6061 
6062         return sret;
6063 }
6064 
6065 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6066                                      unsigned int idx)
6067 {
6068         __free_page(spd->pages[idx]);
6069 }
6070 
6071 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
6072         .confirm                = generic_pipe_buf_confirm,
6073         .release                = generic_pipe_buf_release,
6074         .steal                  = generic_pipe_buf_steal,
6075         .get                    = generic_pipe_buf_get,
6076 };
6077 
6078 static size_t
6079 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6080 {
6081         size_t count;
6082         int save_len;
6083         int ret;
6084 
6085         /* Seq buffer is page-sized, exactly what we need. */
6086         for (;;) {
6087                 save_len = iter->seq.seq.len;
6088                 ret = print_trace_line(iter);
6089 
6090                 if (trace_seq_has_overflowed(&iter->seq)) {
6091                         iter->seq.seq.len = save_len;
6092                         break;
6093                 }
6094 
6095                 /*
6096                  * This should not be hit, because it should only
6097                  * be set if the iter->seq overflowed. But check it
6098                  * anyway to be safe.
6099                  */
6100                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6101                         iter->seq.seq.len = save_len;
6102                         break;
6103                 }
6104 
6105                 count = trace_seq_used(&iter->seq) - save_len;
6106                 if (rem < count) {
6107                         rem = 0;
6108                         iter->seq.seq.len = save_len;
6109                         break;
6110                 }
6111 
6112                 if (ret != TRACE_TYPE_NO_CONSUME)
6113                         trace_consume(iter);
6114                 rem -= count;
6115                 if (!trace_find_next_entry_inc(iter))   {
6116                         rem = 0;
6117                         iter->ent = NULL;
6118                         break;
6119                 }
6120         }
6121 
6122         return rem;
6123 }
6124 
6125 static ssize_t tracing_splice_read_pipe(struct file *filp,
6126                                         loff_t *ppos,
6127                                         struct pipe_inode_info *pipe,
6128                                         size_t len,
6129                                         unsigned int flags)
6130 {
6131         struct page *pages_def[PIPE_DEF_BUFFERS];
6132         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6133         struct trace_iterator *iter = filp->private_data;
6134         struct splice_pipe_desc spd = {
6135                 .pages          = pages_def,
6136                 .partial        = partial_def,
6137                 .nr_pages       = 0, /* This gets updated below. */
6138                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6139                 .ops            = &tracing_pipe_buf_ops,
6140                 .spd_release    = tracing_spd_release_pipe,
6141         };
6142         ssize_t ret;
6143         size_t rem;
6144         unsigned int i;
6145 
6146         if (splice_grow_spd(pipe, &spd))
6147                 return -ENOMEM;
6148 
6149         mutex_lock(&iter->mutex);
6150 
6151         if (iter->trace->splice_read) {
6152                 ret = iter->trace->splice_read(iter, filp,
6153                                                ppos, pipe, len, flags);
6154                 if (ret)
6155                         goto out_err;
6156         }
6157 
6158         ret = tracing_wait_pipe(filp);
6159         if (ret <= 0)
6160                 goto out_err;
6161 
6162         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6163                 ret = -EFAULT;
6164                 goto out_err;
6165         }
6166 
6167         trace_event_read_lock();
6168         trace_access_lock(iter->cpu_file);
6169 
6170         /* Fill as many pages as possible. */
6171         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6172                 spd.pages[i] = alloc_page(GFP_KERNEL);
6173                 if (!spd.pages[i])
6174                         break;
6175 
6176                 rem = tracing_fill_pipe_page(rem, iter);
6177 
6178                 /* Copy the data into the page, so we can start over. */
6179                 ret = trace_seq_to_buffer(&iter->seq,
6180                                           page_address(spd.pages[i]),
6181                                           trace_seq_used(&iter->seq));
6182                 if (ret < 0) {
6183                         __free_page(spd.pages[i]);
6184                         break;
6185                 }
6186                 spd.partial[i].offset = 0;
6187                 spd.partial[i].len = trace_seq_used(&iter->seq);
6188 
6189                 trace_seq_init(&iter->seq);
6190         }
6191 
6192         trace_access_unlock(iter->cpu_file);
6193         trace_event_read_unlock();
6194         mutex_unlock(&iter->mutex);
6195 
6196         spd.nr_pages = i;
6197 
6198         if (i)
6199                 ret = splice_to_pipe(pipe, &spd);
6200         else
6201                 ret = 0;
6202 out:
6203         splice_shrink_spd(&spd);
6204         return ret;
6205 
6206 out_err:
6207         mutex_unlock(&iter->mutex);
6208         goto out;
6209 }
6210 
6211 static ssize_t
6212 tracing_entries_read(struct file *filp, char __user *ubuf,
6213                      size_t cnt, loff_t *ppos)
6214 {
6215         struct inode *inode = file_inode(filp);
6216         struct trace_array *tr = inode->i_private;
6217         int cpu = tracing_get_cpu(inode);
6218         char buf[64];
6219         int r = 0;
6220         ssize_t ret;
6221 
6222         mutex_lock(&trace_types_lock);
6223 
6224         if (cpu == RING_BUFFER_ALL_CPUS) {
6225                 int cpu, buf_size_same;
6226                 unsigned long size;
6227 
6228                 size = 0;
6229                 buf_size_same = 1;
6230                 /* check if all cpu sizes are same */
6231                 for_each_tracing_cpu(cpu) {
6232                         /* fill in the size from first enabled cpu */
6233                         if (size == 0)
6234                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
6235                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
6236                                 buf_size_same = 0;
6237                                 break;
6238                         }
6239                 }
6240 
6241                 if (buf_size_same) {
6242                         if (!ring_buffer_expanded)
6243                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6244                                             size >> 10,
6245                                             trace_buf_size >> 10);
6246                         else
6247                                 r = sprintf(buf, "%lu\n", size >> 10);
6248                 } else
6249                         r = sprintf(buf, "X\n");
6250         } else
6251                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
6252 
6253         mutex_unlock(&trace_types_lock);
6254 
6255         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6256         return ret;
6257 }
6258 
6259 static ssize_t
6260 tracing_entries_write(struct file *filp, const char __user *ubuf,
6261                       size_t cnt, loff_t *ppos)
6262 {
6263         struct inode *inode = file_inode(filp);
6264         struct trace_array *tr = inode->i_private;
6265         unsigned long val;
6266         int ret;
6267 
6268         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6269         if (ret)
6270                 return ret;
6271 
6272         /* must have at least 1 entry */
6273         if (!val)
6274                 return -EINVAL;
6275 
6276         /* value is in KB */
6277         val <<= 10;
6278         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6279         if (ret < 0)
6280                 return ret;
6281 
6282         *ppos += cnt;
6283 
6284         return cnt;
6285 }
6286 
6287 static ssize_t
6288 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6289                                 size_t cnt, loff_t *ppos)
6290 {
6291         struct trace_array *tr = filp->private_data;
6292         char buf[64];
6293         int r, cpu;
6294         unsigned long size = 0, expanded_size = 0;
6295 
6296         mutex_lock(&trace_types_lock);
6297         for_each_tracing_cpu(cpu) {
6298                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6299                 if (!ring_buffer_expanded)
6300                         expanded_size += trace_buf_size >> 10;
6301         }
6302         if (ring_buffer_expanded)
6303                 r = sprintf(buf, "%lu\n", size);
6304         else
6305                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6306         mutex_unlock(&trace_types_lock);
6307 
6308         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6309 }
6310 
6311 static ssize_t
6312 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6313                           size_t cnt, loff_t *ppos)
6314 {
6315         /*
6316          * There is no need to read what the user has written, this function
6317          * is just to make sure that there is no error when "echo" is used
6318          */
6319 
6320         *ppos += cnt;
6321 
6322         return cnt;
6323 }
6324 
6325 static int
6326 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6327 {
6328         struct trace_array *tr = inode->i_private;
6329 
6330         /* disable tracing ? */
6331         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6332                 tracer_tracing_off(tr);
6333         /* resize the ring buffer to 0 */
6334         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6335 
6336         trace_array_put(tr);
6337 
6338         return 0;
6339 }
6340 
6341 static ssize_t
6342 tracing_mark_write(struct file *filp, const char __user *ubuf,
6343                                         size_t cnt, loff_t *fpos)
6344 {
6345         struct trace_array *tr = filp->private_data;
6346         struct ring_buffer_event *event;
6347         enum event_trigger_type tt = ETT_NONE;
6348         struct ring_buffer *buffer;
6349         struct print_entry *entry;
6350         unsigned long irq_flags;
6351         ssize_t written;
6352         int size;
6353         int len;
6354 
6355 /* Used in tracing_mark_raw_write() as well */
6356 #define FAULTED_STR "<faulted>"
6357 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6358 
6359         if (tracing_disabled)
6360                 return -EINVAL;
6361 
6362         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6363                 return -EINVAL;
6364 
6365         if (cnt > TRACE_BUF_SIZE)
6366                 cnt = TRACE_BUF_SIZE;
6367 
6368         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6369 
6370         local_save_flags(irq_flags);
6371         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6372 
6373         /* If less than "<faulted>", then make sure we can still add that */
6374         if (cnt < FAULTED_SIZE)
6375                 size += FAULTED_SIZE - cnt;
6376 
6377         buffer = tr->trace_buffer.buffer;
6378         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6379                                             irq_flags, preempt_count());
6380         if (unlikely(!event))
6381                 /* Ring buffer disabled, return as if not open for write */
6382                 return -EBADF;
6383 
6384         entry = ring_buffer_event_data(event);
6385         entry->ip = _THIS_IP_;
6386 
6387         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6388         if (len) {
6389                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6390                 cnt = FAULTED_SIZE;
6391                 written = -EFAULT;
6392         } else
6393                 written = cnt;
6394         len = cnt;
6395 
6396         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6397                 /* do not add \n before testing triggers, but add \0 */
6398                 entry->buf[cnt] = '\0';
6399                 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6400         }
6401 
6402         if (entry->buf[cnt - 1] != '\n') {
6403                 entry->buf[cnt] = '\n';
6404                 entry->buf[cnt + 1] = '\0';
6405         } else
6406                 entry->buf[cnt] = '\0';
6407 
6408         __buffer_unlock_commit(buffer, event);
6409 
6410         if (tt)
6411                 event_triggers_post_call(tr->trace_marker_file, tt);
6412 
6413         if (written > 0)
6414                 *fpos += written;
6415 
6416         return written;
6417 }
6418 
6419 /* Limit it for now to 3K (including tag) */
6420 #define RAW_DATA_MAX_SIZE (1024*3)
6421 
6422 static ssize_t
6423 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6424                                         size_t cnt, loff_t *fpos)
6425 {
6426         struct trace_array *tr = filp->private_data;
6427         struct ring_buffer_event *event;
6428         struct ring_buffer *buffer;
6429         struct raw_data_entry *entry;
6430         unsigned long irq_flags;
6431         ssize_t written;
6432         int size;
6433         int len;
6434 
6435 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6436 
6437         if (tracing_disabled)
6438                 return -EINVAL;
6439 
6440         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6441                 return -EINVAL;
6442 
6443         /* The marker must at least have a tag id */
6444         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6445                 return -EINVAL;
6446 
6447         if (cnt > TRACE_BUF_SIZE)
6448                 cnt = TRACE_BUF_SIZE;
6449 
6450         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6451 
6452         local_save_flags(irq_flags);
6453         size = sizeof(*entry) + cnt;
6454         if (cnt < FAULT_SIZE_ID)
6455                 size += FAULT_SIZE_ID - cnt;
6456 
6457         buffer = tr->trace_buffer.buffer;
6458         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6459                                             irq_flags, preempt_count());
6460         if (!event)
6461                 /* Ring buffer disabled, return as if not open for write */
6462                 return -EBADF;
6463 
6464         entry = ring_buffer_event_data(event);
6465 
6466         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6467         if (len) {
6468                 entry->id = -1;
6469                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6470                 written = -EFAULT;
6471         } else
6472                 written = cnt;
6473 
6474         __buffer_unlock_commit(buffer, event);
6475 
6476         if (written > 0)
6477                 *fpos += written;
6478 
6479         return written;
6480 }
6481 
6482 static int tracing_clock_show(struct seq_file *m, void *v)
6483 {
6484         struct trace_array *tr = m->private;
6485         int i;
6486 
6487         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6488                 seq_printf(m,
6489                         "%s%s%s%s", i ? " " : "",
6490                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6491                         i == tr->clock_id ? "]" : "");
6492         seq_putc(m, '\n');
6493 
6494         return 0;
6495 }
6496 
6497 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6498 {
6499         int i;
6500 
6501         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6502                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6503                         break;
6504         }
6505         if (i == ARRAY_SIZE(trace_clocks))
6506                 return -EINVAL;
6507 
6508         mutex_lock(&trace_types_lock);
6509 
6510         tr->clock_id = i;
6511 
6512         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6513 
6514         /*
6515          * New clock may not be consistent with the previous clock.
6516          * Reset the buffer so that it doesn't have incomparable timestamps.
6517          */
6518         tracing_reset_online_cpus(&tr->trace_buffer);
6519 
6520 #ifdef CONFIG_TRACER_MAX_TRACE
6521         if (tr->max_buffer.buffer)
6522                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6523         tracing_reset_online_cpus(&tr->max_buffer);
6524 #endif
6525 
6526         mutex_unlock(&trace_types_lock);
6527 
6528         return 0;
6529 }
6530 
6531 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6532                                    size_t cnt, loff_t *fpos)
6533 {
6534         struct seq_file *m = filp->private_data;
6535         struct trace_array *tr = m->private;
6536         char buf[64];
6537         const char *clockstr;
6538         int ret;
6539 
6540         if (cnt >= sizeof(buf))
6541                 return -EINVAL;
6542 
6543         if (copy_from_user(buf, ubuf, cnt))
6544                 return -EFAULT;
6545 
6546         buf[cnt] = 0;
6547 
6548         clockstr = strstrip(buf);
6549 
6550         ret = tracing_set_clock(tr, clockstr);
6551         if (ret)
6552                 return ret;
6553 
6554         *fpos += cnt;
6555 
6556         return cnt;
6557 }
6558 
6559 static int tracing_clock_open(struct inode *inode, struct file *file)
6560 {
6561         struct trace_array *tr = inode->i_private;
6562         int ret;
6563 
6564         if (tracing_disabled)
6565                 return -ENODEV;
6566 
6567         if (trace_array_get(tr))
6568                 return -ENODEV;
6569 
6570         ret = single_open(file, tracing_clock_show, inode->i_private);
6571         if (ret < 0)
6572                 trace_array_put(tr);
6573 
6574         return ret;
6575 }
6576 
6577 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6578 {
6579         struct trace_array *tr = m->private;
6580 
6581         mutex_lock(&trace_types_lock);
6582 
6583         if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6584                 seq_puts(m, "delta [absolute]\n");
6585         else
6586                 seq_puts(m, "[delta] absolute\n");
6587 
6588         mutex_unlock(&trace_types_lock);
6589 
6590         return 0;
6591 }
6592 
6593 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6594 {
6595         struct trace_array *tr = inode->i_private;
6596         int ret;
6597 
6598         if (tracing_disabled)
6599                 return -ENODEV;
6600 
6601         if (trace_array_get(tr))
6602                 return -ENODEV;
6603 
6604         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6605         if (ret < 0)
6606                 trace_array_put(tr);
6607