~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/kernel/trace/trace.c

Version: ~ [ linux-5.16-rc3 ] ~ [ linux-5.15.5 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.82 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.162 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.218 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.256 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.291 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.293 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.18.140 ] ~ [ linux-3.16.85 ] ~ [ linux-3.14.79 ] ~ [ linux-3.12.74 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * ring buffer based function tracer
  3  *
  4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
  5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
  6  *
  7  * Originally taken from the RT patch by:
  8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
  9  *
 10  * Based on code from the latency_tracer, that is:
 11  *  Copyright (C) 2004-2006 Ingo Molnar
 12  *  Copyright (C) 2004 Nadia Yvette Chambers
 13  */
 14 #include <linux/ring_buffer.h>
 15 #include <generated/utsrelease.h>
 16 #include <linux/stacktrace.h>
 17 #include <linux/writeback.h>
 18 #include <linux/kallsyms.h>
 19 #include <linux/seq_file.h>
 20 #include <linux/notifier.h>
 21 #include <linux/irqflags.h>
 22 #include <linux/debugfs.h>
 23 #include <linux/tracefs.h>
 24 #include <linux/pagemap.h>
 25 #include <linux/hardirq.h>
 26 #include <linux/linkage.h>
 27 #include <linux/uaccess.h>
 28 #include <linux/vmalloc.h>
 29 #include <linux/ftrace.h>
 30 #include <linux/module.h>
 31 #include <linux/percpu.h>
 32 #include <linux/splice.h>
 33 #include <linux/kdebug.h>
 34 #include <linux/string.h>
 35 #include <linux/mount.h>
 36 #include <linux/rwsem.h>
 37 #include <linux/slab.h>
 38 #include <linux/ctype.h>
 39 #include <linux/init.h>
 40 #include <linux/poll.h>
 41 #include <linux/nmi.h>
 42 #include <linux/fs.h>
 43 #include <linux/trace.h>
 44 #include <linux/sched/rt.h>
 45 
 46 #include "trace.h"
 47 #include "trace_output.h"
 48 
 49 /*
 50  * On boot up, the ring buffer is set to the minimum size, so that
 51  * we do not waste memory on systems that are not using tracing.
 52  */
 53 bool ring_buffer_expanded;
 54 
 55 /*
 56  * We need to change this state when a selftest is running.
 57  * A selftest will lurk into the ring-buffer to count the
 58  * entries inserted during the selftest although some concurrent
 59  * insertions into the ring-buffer such as trace_printk could occurred
 60  * at the same time, giving false positive or negative results.
 61  */
 62 static bool __read_mostly tracing_selftest_running;
 63 
 64 /*
 65  * If a tracer is running, we do not want to run SELFTEST.
 66  */
 67 bool __read_mostly tracing_selftest_disabled;
 68 
 69 /* Pipe tracepoints to printk */
 70 struct trace_iterator *tracepoint_print_iter;
 71 int tracepoint_printk;
 72 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
 73 
 74 /* For tracers that don't implement custom flags */
 75 static struct tracer_opt dummy_tracer_opt[] = {
 76         { }
 77 };
 78 
 79 static int
 80 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
 81 {
 82         return 0;
 83 }
 84 
 85 /*
 86  * To prevent the comm cache from being overwritten when no
 87  * tracing is active, only save the comm when a trace event
 88  * occurred.
 89  */
 90 static DEFINE_PER_CPU(bool, trace_cmdline_save);
 91 
 92 /*
 93  * Kill all tracing for good (never come back).
 94  * It is initialized to 1 but will turn to zero if the initialization
 95  * of the tracer is successful. But that is the only place that sets
 96  * this back to zero.
 97  */
 98 static int tracing_disabled = 1;
 99 
100 cpumask_var_t __read_mostly     tracing_buffer_mask;
101 
102 /*
103  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
104  *
105  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
106  * is set, then ftrace_dump is called. This will output the contents
107  * of the ftrace buffers to the console.  This is very useful for
108  * capturing traces that lead to crashes and outputing it to a
109  * serial console.
110  *
111  * It is default off, but you can enable it with either specifying
112  * "ftrace_dump_on_oops" in the kernel command line, or setting
113  * /proc/sys/kernel/ftrace_dump_on_oops
114  * Set 1 if you want to dump buffers of all CPUs
115  * Set 2 if you want to dump the buffer of the CPU that triggered oops
116  */
117 
118 enum ftrace_dump_mode ftrace_dump_on_oops;
119 
120 /* When set, tracing will stop when a WARN*() is hit */
121 int __disable_trace_on_warning;
122 
123 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
124 /* Map of enums to their values, for "enum_map" file */
125 struct trace_enum_map_head {
126         struct module                   *mod;
127         unsigned long                   length;
128 };
129 
130 union trace_enum_map_item;
131 
132 struct trace_enum_map_tail {
133         /*
134          * "end" is first and points to NULL as it must be different
135          * than "mod" or "enum_string"
136          */
137         union trace_enum_map_item       *next;
138         const char                      *end;   /* points to NULL */
139 };
140 
141 static DEFINE_MUTEX(trace_enum_mutex);
142 
143 /*
144  * The trace_enum_maps are saved in an array with two extra elements,
145  * one at the beginning, and one at the end. The beginning item contains
146  * the count of the saved maps (head.length), and the module they
147  * belong to if not built in (head.mod). The ending item contains a
148  * pointer to the next array of saved enum_map items.
149  */
150 union trace_enum_map_item {
151         struct trace_enum_map           map;
152         struct trace_enum_map_head      head;
153         struct trace_enum_map_tail      tail;
154 };
155 
156 static union trace_enum_map_item *trace_enum_maps;
157 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
158 
159 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
160 
161 #define MAX_TRACER_SIZE         100
162 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
163 static char *default_bootup_tracer;
164 
165 static bool allocate_snapshot;
166 
167 static int __init set_cmdline_ftrace(char *str)
168 {
169         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
170         default_bootup_tracer = bootup_tracer_buf;
171         /* We are using ftrace early, expand it */
172         ring_buffer_expanded = true;
173         return 1;
174 }
175 __setup("ftrace=", set_cmdline_ftrace);
176 
177 static int __init set_ftrace_dump_on_oops(char *str)
178 {
179         if (*str++ != '=' || !*str) {
180                 ftrace_dump_on_oops = DUMP_ALL;
181                 return 1;
182         }
183 
184         if (!strcmp("orig_cpu", str)) {
185                 ftrace_dump_on_oops = DUMP_ORIG;
186                 return 1;
187         }
188 
189         return 0;
190 }
191 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
192 
193 static int __init stop_trace_on_warning(char *str)
194 {
195         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
196                 __disable_trace_on_warning = 1;
197         return 1;
198 }
199 __setup("traceoff_on_warning", stop_trace_on_warning);
200 
201 static int __init boot_alloc_snapshot(char *str)
202 {
203         allocate_snapshot = true;
204         /* We also need the main ring buffer expanded */
205         ring_buffer_expanded = true;
206         return 1;
207 }
208 __setup("alloc_snapshot", boot_alloc_snapshot);
209 
210 
211 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
212 
213 static int __init set_trace_boot_options(char *str)
214 {
215         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
216         return 0;
217 }
218 __setup("trace_options=", set_trace_boot_options);
219 
220 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
221 static char *trace_boot_clock __initdata;
222 
223 static int __init set_trace_boot_clock(char *str)
224 {
225         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
226         trace_boot_clock = trace_boot_clock_buf;
227         return 0;
228 }
229 __setup("trace_clock=", set_trace_boot_clock);
230 
231 static int __init set_tracepoint_printk(char *str)
232 {
233         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
234                 tracepoint_printk = 1;
235         return 1;
236 }
237 __setup("tp_printk", set_tracepoint_printk);
238 
239 unsigned long long ns2usecs(u64 nsec)
240 {
241         nsec += 500;
242         do_div(nsec, 1000);
243         return nsec;
244 }
245 
246 /* trace_flags holds trace_options default values */
247 #define TRACE_DEFAULT_FLAGS                                             \
248         (FUNCTION_DEFAULT_FLAGS |                                       \
249          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
250          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
251          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
252          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
253 
254 /* trace_options that are only supported by global_trace */
255 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
256                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
257 
258 /* trace_flags that are default zero for instances */
259 #define ZEROED_TRACE_FLAGS \
260         TRACE_ITER_EVENT_FORK
261 
262 /*
263  * The global_trace is the descriptor that holds the top-level tracing
264  * buffers for the live tracing.
265  */
266 static struct trace_array global_trace = {
267         .trace_flags = TRACE_DEFAULT_FLAGS,
268 };
269 
270 LIST_HEAD(ftrace_trace_arrays);
271 
272 int trace_array_get(struct trace_array *this_tr)
273 {
274         struct trace_array *tr;
275         int ret = -ENODEV;
276 
277         mutex_lock(&trace_types_lock);
278         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
279                 if (tr == this_tr) {
280                         tr->ref++;
281                         ret = 0;
282                         break;
283                 }
284         }
285         mutex_unlock(&trace_types_lock);
286 
287         return ret;
288 }
289 
290 static void __trace_array_put(struct trace_array *this_tr)
291 {
292         WARN_ON(!this_tr->ref);
293         this_tr->ref--;
294 }
295 
296 void trace_array_put(struct trace_array *this_tr)
297 {
298         mutex_lock(&trace_types_lock);
299         __trace_array_put(this_tr);
300         mutex_unlock(&trace_types_lock);
301 }
302 
303 int call_filter_check_discard(struct trace_event_call *call, void *rec,
304                               struct ring_buffer *buffer,
305                               struct ring_buffer_event *event)
306 {
307         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
308             !filter_match_preds(call->filter, rec)) {
309                 __trace_event_discard_commit(buffer, event);
310                 return 1;
311         }
312 
313         return 0;
314 }
315 
316 void trace_free_pid_list(struct trace_pid_list *pid_list)
317 {
318         vfree(pid_list->pids);
319         kfree(pid_list);
320 }
321 
322 /**
323  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
324  * @filtered_pids: The list of pids to check
325  * @search_pid: The PID to find in @filtered_pids
326  *
327  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
328  */
329 bool
330 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
331 {
332         /*
333          * If pid_max changed after filtered_pids was created, we
334          * by default ignore all pids greater than the previous pid_max.
335          */
336         if (search_pid >= filtered_pids->pid_max)
337                 return false;
338 
339         return test_bit(search_pid, filtered_pids->pids);
340 }
341 
342 /**
343  * trace_ignore_this_task - should a task be ignored for tracing
344  * @filtered_pids: The list of pids to check
345  * @task: The task that should be ignored if not filtered
346  *
347  * Checks if @task should be traced or not from @filtered_pids.
348  * Returns true if @task should *NOT* be traced.
349  * Returns false if @task should be traced.
350  */
351 bool
352 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
353 {
354         /*
355          * Return false, because if filtered_pids does not exist,
356          * all pids are good to trace.
357          */
358         if (!filtered_pids)
359                 return false;
360 
361         return !trace_find_filtered_pid(filtered_pids, task->pid);
362 }
363 
364 /**
365  * trace_pid_filter_add_remove - Add or remove a task from a pid_list
366  * @pid_list: The list to modify
367  * @self: The current task for fork or NULL for exit
368  * @task: The task to add or remove
369  *
370  * If adding a task, if @self is defined, the task is only added if @self
371  * is also included in @pid_list. This happens on fork and tasks should
372  * only be added when the parent is listed. If @self is NULL, then the
373  * @task pid will be removed from the list, which would happen on exit
374  * of a task.
375  */
376 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
377                                   struct task_struct *self,
378                                   struct task_struct *task)
379 {
380         if (!pid_list)
381                 return;
382 
383         /* For forks, we only add if the forking task is listed */
384         if (self) {
385                 if (!trace_find_filtered_pid(pid_list, self->pid))
386                         return;
387         }
388 
389         /* Sorry, but we don't support pid_max changing after setting */
390         if (task->pid >= pid_list->pid_max)
391                 return;
392 
393         /* "self" is set for forks, and NULL for exits */
394         if (self)
395                 set_bit(task->pid, pid_list->pids);
396         else
397                 clear_bit(task->pid, pid_list->pids);
398 }
399 
400 /**
401  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
402  * @pid_list: The pid list to show
403  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
404  * @pos: The position of the file
405  *
406  * This is used by the seq_file "next" operation to iterate the pids
407  * listed in a trace_pid_list structure.
408  *
409  * Returns the pid+1 as we want to display pid of zero, but NULL would
410  * stop the iteration.
411  */
412 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
413 {
414         unsigned long pid = (unsigned long)v;
415 
416         (*pos)++;
417 
418         /* pid already is +1 of the actual prevous bit */
419         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
420 
421         /* Return pid + 1 to allow zero to be represented */
422         if (pid < pid_list->pid_max)
423                 return (void *)(pid + 1);
424 
425         return NULL;
426 }
427 
428 /**
429  * trace_pid_start - Used for seq_file to start reading pid lists
430  * @pid_list: The pid list to show
431  * @pos: The position of the file
432  *
433  * This is used by seq_file "start" operation to start the iteration
434  * of listing pids.
435  *
436  * Returns the pid+1 as we want to display pid of zero, but NULL would
437  * stop the iteration.
438  */
439 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
440 {
441         unsigned long pid;
442         loff_t l = 0;
443 
444         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
445         if (pid >= pid_list->pid_max)
446                 return NULL;
447 
448         /* Return pid + 1 so that zero can be the exit value */
449         for (pid++; pid && l < *pos;
450              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
451                 ;
452         return (void *)pid;
453 }
454 
455 /**
456  * trace_pid_show - show the current pid in seq_file processing
457  * @m: The seq_file structure to write into
458  * @v: A void pointer of the pid (+1) value to display
459  *
460  * Can be directly used by seq_file operations to display the current
461  * pid value.
462  */
463 int trace_pid_show(struct seq_file *m, void *v)
464 {
465         unsigned long pid = (unsigned long)v - 1;
466 
467         seq_printf(m, "%lu\n", pid);
468         return 0;
469 }
470 
471 /* 128 should be much more than enough */
472 #define PID_BUF_SIZE            127
473 
474 int trace_pid_write(struct trace_pid_list *filtered_pids,
475                     struct trace_pid_list **new_pid_list,
476                     const char __user *ubuf, size_t cnt)
477 {
478         struct trace_pid_list *pid_list;
479         struct trace_parser parser;
480         unsigned long val;
481         int nr_pids = 0;
482         ssize_t read = 0;
483         ssize_t ret = 0;
484         loff_t pos;
485         pid_t pid;
486 
487         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
488                 return -ENOMEM;
489 
490         /*
491          * Always recreate a new array. The write is an all or nothing
492          * operation. Always create a new array when adding new pids by
493          * the user. If the operation fails, then the current list is
494          * not modified.
495          */
496         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
497         if (!pid_list)
498                 return -ENOMEM;
499 
500         pid_list->pid_max = READ_ONCE(pid_max);
501 
502         /* Only truncating will shrink pid_max */
503         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
504                 pid_list->pid_max = filtered_pids->pid_max;
505 
506         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
507         if (!pid_list->pids) {
508                 kfree(pid_list);
509                 return -ENOMEM;
510         }
511 
512         if (filtered_pids) {
513                 /* copy the current bits to the new max */
514                 for_each_set_bit(pid, filtered_pids->pids,
515                                  filtered_pids->pid_max) {
516                         set_bit(pid, pid_list->pids);
517                         nr_pids++;
518                 }
519         }
520 
521         while (cnt > 0) {
522 
523                 pos = 0;
524 
525                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
526                 if (ret < 0 || !trace_parser_loaded(&parser))
527                         break;
528 
529                 read += ret;
530                 ubuf += ret;
531                 cnt -= ret;
532 
533                 parser.buffer[parser.idx] = 0;
534 
535                 ret = -EINVAL;
536                 if (kstrtoul(parser.buffer, 0, &val))
537                         break;
538                 if (val >= pid_list->pid_max)
539                         break;
540 
541                 pid = (pid_t)val;
542 
543                 set_bit(pid, pid_list->pids);
544                 nr_pids++;
545 
546                 trace_parser_clear(&parser);
547                 ret = 0;
548         }
549         trace_parser_put(&parser);
550 
551         if (ret < 0) {
552                 trace_free_pid_list(pid_list);
553                 return ret;
554         }
555 
556         if (!nr_pids) {
557                 /* Cleared the list of pids */
558                 trace_free_pid_list(pid_list);
559                 read = ret;
560                 pid_list = NULL;
561         }
562 
563         *new_pid_list = pid_list;
564 
565         return read;
566 }
567 
568 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
569 {
570         u64 ts;
571 
572         /* Early boot up does not have a buffer yet */
573         if (!buf->buffer)
574                 return trace_clock_local();
575 
576         ts = ring_buffer_time_stamp(buf->buffer, cpu);
577         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
578 
579         return ts;
580 }
581 
582 u64 ftrace_now(int cpu)
583 {
584         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
585 }
586 
587 /**
588  * tracing_is_enabled - Show if global_trace has been disabled
589  *
590  * Shows if the global trace has been enabled or not. It uses the
591  * mirror flag "buffer_disabled" to be used in fast paths such as for
592  * the irqsoff tracer. But it may be inaccurate due to races. If you
593  * need to know the accurate state, use tracing_is_on() which is a little
594  * slower, but accurate.
595  */
596 int tracing_is_enabled(void)
597 {
598         /*
599          * For quick access (irqsoff uses this in fast path), just
600          * return the mirror variable of the state of the ring buffer.
601          * It's a little racy, but we don't really care.
602          */
603         smp_rmb();
604         return !global_trace.buffer_disabled;
605 }
606 
607 /*
608  * trace_buf_size is the size in bytes that is allocated
609  * for a buffer. Note, the number of bytes is always rounded
610  * to page size.
611  *
612  * This number is purposely set to a low number of 16384.
613  * If the dump on oops happens, it will be much appreciated
614  * to not have to wait for all that output. Anyway this can be
615  * boot time and run time configurable.
616  */
617 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
618 
619 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
620 
621 /* trace_types holds a link list of available tracers. */
622 static struct tracer            *trace_types __read_mostly;
623 
624 /*
625  * trace_types_lock is used to protect the trace_types list.
626  */
627 DEFINE_MUTEX(trace_types_lock);
628 
629 /*
630  * serialize the access of the ring buffer
631  *
632  * ring buffer serializes readers, but it is low level protection.
633  * The validity of the events (which returns by ring_buffer_peek() ..etc)
634  * are not protected by ring buffer.
635  *
636  * The content of events may become garbage if we allow other process consumes
637  * these events concurrently:
638  *   A) the page of the consumed events may become a normal page
639  *      (not reader page) in ring buffer, and this page will be rewrited
640  *      by events producer.
641  *   B) The page of the consumed events may become a page for splice_read,
642  *      and this page will be returned to system.
643  *
644  * These primitives allow multi process access to different cpu ring buffer
645  * concurrently.
646  *
647  * These primitives don't distinguish read-only and read-consume access.
648  * Multi read-only access are also serialized.
649  */
650 
651 #ifdef CONFIG_SMP
652 static DECLARE_RWSEM(all_cpu_access_lock);
653 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
654 
655 static inline void trace_access_lock(int cpu)
656 {
657         if (cpu == RING_BUFFER_ALL_CPUS) {
658                 /* gain it for accessing the whole ring buffer. */
659                 down_write(&all_cpu_access_lock);
660         } else {
661                 /* gain it for accessing a cpu ring buffer. */
662 
663                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
664                 down_read(&all_cpu_access_lock);
665 
666                 /* Secondly block other access to this @cpu ring buffer. */
667                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
668         }
669 }
670 
671 static inline void trace_access_unlock(int cpu)
672 {
673         if (cpu == RING_BUFFER_ALL_CPUS) {
674                 up_write(&all_cpu_access_lock);
675         } else {
676                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
677                 up_read(&all_cpu_access_lock);
678         }
679 }
680 
681 static inline void trace_access_lock_init(void)
682 {
683         int cpu;
684 
685         for_each_possible_cpu(cpu)
686                 mutex_init(&per_cpu(cpu_access_lock, cpu));
687 }
688 
689 #else
690 
691 static DEFINE_MUTEX(access_lock);
692 
693 static inline void trace_access_lock(int cpu)
694 {
695         (void)cpu;
696         mutex_lock(&access_lock);
697 }
698 
699 static inline void trace_access_unlock(int cpu)
700 {
701         (void)cpu;
702         mutex_unlock(&access_lock);
703 }
704 
705 static inline void trace_access_lock_init(void)
706 {
707 }
708 
709 #endif
710 
711 #ifdef CONFIG_STACKTRACE
712 static void __ftrace_trace_stack(struct ring_buffer *buffer,
713                                  unsigned long flags,
714                                  int skip, int pc, struct pt_regs *regs);
715 static inline void ftrace_trace_stack(struct trace_array *tr,
716                                       struct ring_buffer *buffer,
717                                       unsigned long flags,
718                                       int skip, int pc, struct pt_regs *regs);
719 
720 #else
721 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
722                                         unsigned long flags,
723                                         int skip, int pc, struct pt_regs *regs)
724 {
725 }
726 static inline void ftrace_trace_stack(struct trace_array *tr,
727                                       struct ring_buffer *buffer,
728                                       unsigned long flags,
729                                       int skip, int pc, struct pt_regs *regs)
730 {
731 }
732 
733 #endif
734 
735 static __always_inline void
736 trace_event_setup(struct ring_buffer_event *event,
737                   int type, unsigned long flags, int pc)
738 {
739         struct trace_entry *ent = ring_buffer_event_data(event);
740 
741         tracing_generic_entry_update(ent, flags, pc);
742         ent->type = type;
743 }
744 
745 static __always_inline struct ring_buffer_event *
746 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
747                           int type,
748                           unsigned long len,
749                           unsigned long flags, int pc)
750 {
751         struct ring_buffer_event *event;
752 
753         event = ring_buffer_lock_reserve(buffer, len);
754         if (event != NULL)
755                 trace_event_setup(event, type, flags, pc);
756 
757         return event;
758 }
759 
760 static void tracer_tracing_on(struct trace_array *tr)
761 {
762         if (tr->trace_buffer.buffer)
763                 ring_buffer_record_on(tr->trace_buffer.buffer);
764         /*
765          * This flag is looked at when buffers haven't been allocated
766          * yet, or by some tracers (like irqsoff), that just want to
767          * know if the ring buffer has been disabled, but it can handle
768          * races of where it gets disabled but we still do a record.
769          * As the check is in the fast path of the tracers, it is more
770          * important to be fast than accurate.
771          */
772         tr->buffer_disabled = 0;
773         /* Make the flag seen by readers */
774         smp_wmb();
775 }
776 
777 /**
778  * tracing_on - enable tracing buffers
779  *
780  * This function enables tracing buffers that may have been
781  * disabled with tracing_off.
782  */
783 void tracing_on(void)
784 {
785         tracer_tracing_on(&global_trace);
786 }
787 EXPORT_SYMBOL_GPL(tracing_on);
788 
789 
790 static __always_inline void
791 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
792 {
793         __this_cpu_write(trace_cmdline_save, true);
794 
795         /* If this is the temp buffer, we need to commit fully */
796         if (this_cpu_read(trace_buffered_event) == event) {
797                 /* Length is in event->array[0] */
798                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
799                 /* Release the temp buffer */
800                 this_cpu_dec(trace_buffered_event_cnt);
801         } else
802                 ring_buffer_unlock_commit(buffer, event);
803 }
804 
805 /**
806  * __trace_puts - write a constant string into the trace buffer.
807  * @ip:    The address of the caller
808  * @str:   The constant string to write
809  * @size:  The size of the string.
810  */
811 int __trace_puts(unsigned long ip, const char *str, int size)
812 {
813         struct ring_buffer_event *event;
814         struct ring_buffer *buffer;
815         struct print_entry *entry;
816         unsigned long irq_flags;
817         int alloc;
818         int pc;
819 
820         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
821                 return 0;
822 
823         pc = preempt_count();
824 
825         if (unlikely(tracing_selftest_running || tracing_disabled))
826                 return 0;
827 
828         alloc = sizeof(*entry) + size + 2; /* possible \n added */
829 
830         local_save_flags(irq_flags);
831         buffer = global_trace.trace_buffer.buffer;
832         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
833                                             irq_flags, pc);
834         if (!event)
835                 return 0;
836 
837         entry = ring_buffer_event_data(event);
838         entry->ip = ip;
839 
840         memcpy(&entry->buf, str, size);
841 
842         /* Add a newline if necessary */
843         if (entry->buf[size - 1] != '\n') {
844                 entry->buf[size] = '\n';
845                 entry->buf[size + 1] = '\0';
846         } else
847                 entry->buf[size] = '\0';
848 
849         __buffer_unlock_commit(buffer, event);
850         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
851 
852         return size;
853 }
854 EXPORT_SYMBOL_GPL(__trace_puts);
855 
856 /**
857  * __trace_bputs - write the pointer to a constant string into trace buffer
858  * @ip:    The address of the caller
859  * @str:   The constant string to write to the buffer to
860  */
861 int __trace_bputs(unsigned long ip, const char *str)
862 {
863         struct ring_buffer_event *event;
864         struct ring_buffer *buffer;
865         struct bputs_entry *entry;
866         unsigned long irq_flags;
867         int size = sizeof(struct bputs_entry);
868         int pc;
869 
870         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
871                 return 0;
872 
873         pc = preempt_count();
874 
875         if (unlikely(tracing_selftest_running || tracing_disabled))
876                 return 0;
877 
878         local_save_flags(irq_flags);
879         buffer = global_trace.trace_buffer.buffer;
880         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
881                                             irq_flags, pc);
882         if (!event)
883                 return 0;
884 
885         entry = ring_buffer_event_data(event);
886         entry->ip                       = ip;
887         entry->str                      = str;
888 
889         __buffer_unlock_commit(buffer, event);
890         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
891 
892         return 1;
893 }
894 EXPORT_SYMBOL_GPL(__trace_bputs);
895 
896 #ifdef CONFIG_TRACER_SNAPSHOT
897 /**
898  * trace_snapshot - take a snapshot of the current buffer.
899  *
900  * This causes a swap between the snapshot buffer and the current live
901  * tracing buffer. You can use this to take snapshots of the live
902  * trace when some condition is triggered, but continue to trace.
903  *
904  * Note, make sure to allocate the snapshot with either
905  * a tracing_snapshot_alloc(), or by doing it manually
906  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
907  *
908  * If the snapshot buffer is not allocated, it will stop tracing.
909  * Basically making a permanent snapshot.
910  */
911 void tracing_snapshot(void)
912 {
913         struct trace_array *tr = &global_trace;
914         struct tracer *tracer = tr->current_trace;
915         unsigned long flags;
916 
917         if (in_nmi()) {
918                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
919                 internal_trace_puts("*** snapshot is being ignored        ***\n");
920                 return;
921         }
922 
923         if (!tr->allocated_snapshot) {
924                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
925                 internal_trace_puts("*** stopping trace here!   ***\n");
926                 tracing_off();
927                 return;
928         }
929 
930         /* Note, snapshot can not be used when the tracer uses it */
931         if (tracer->use_max_tr) {
932                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
933                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
934                 return;
935         }
936 
937         local_irq_save(flags);
938         update_max_tr(tr, current, smp_processor_id());
939         local_irq_restore(flags);
940 }
941 EXPORT_SYMBOL_GPL(tracing_snapshot);
942 
943 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
944                                         struct trace_buffer *size_buf, int cpu_id);
945 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
946 
947 static int alloc_snapshot(struct trace_array *tr)
948 {
949         int ret;
950 
951         if (!tr->allocated_snapshot) {
952 
953                 /* allocate spare buffer */
954                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
955                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
956                 if (ret < 0)
957                         return ret;
958 
959                 tr->allocated_snapshot = true;
960         }
961 
962         return 0;
963 }
964 
965 static void free_snapshot(struct trace_array *tr)
966 {
967         /*
968          * We don't free the ring buffer. instead, resize it because
969          * The max_tr ring buffer has some state (e.g. ring->clock) and
970          * we want preserve it.
971          */
972         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
973         set_buffer_entries(&tr->max_buffer, 1);
974         tracing_reset_online_cpus(&tr->max_buffer);
975         tr->allocated_snapshot = false;
976 }
977 
978 /**
979  * tracing_alloc_snapshot - allocate snapshot buffer.
980  *
981  * This only allocates the snapshot buffer if it isn't already
982  * allocated - it doesn't also take a snapshot.
983  *
984  * This is meant to be used in cases where the snapshot buffer needs
985  * to be set up for events that can't sleep but need to be able to
986  * trigger a snapshot.
987  */
988 int tracing_alloc_snapshot(void)
989 {
990         struct trace_array *tr = &global_trace;
991         int ret;
992 
993         ret = alloc_snapshot(tr);
994         WARN_ON(ret < 0);
995 
996         return ret;
997 }
998 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
999 
1000 /**
1001  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
1002  *
1003  * This is similar to trace_snapshot(), but it will allocate the
1004  * snapshot buffer if it isn't already allocated. Use this only
1005  * where it is safe to sleep, as the allocation may sleep.
1006  *
1007  * This causes a swap between the snapshot buffer and the current live
1008  * tracing buffer. You can use this to take snapshots of the live
1009  * trace when some condition is triggered, but continue to trace.
1010  */
1011 void tracing_snapshot_alloc(void)
1012 {
1013         int ret;
1014 
1015         ret = tracing_alloc_snapshot();
1016         if (ret < 0)
1017                 return;
1018 
1019         tracing_snapshot();
1020 }
1021 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1022 #else
1023 void tracing_snapshot(void)
1024 {
1025         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1026 }
1027 EXPORT_SYMBOL_GPL(tracing_snapshot);
1028 int tracing_alloc_snapshot(void)
1029 {
1030         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1031         return -ENODEV;
1032 }
1033 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1034 void tracing_snapshot_alloc(void)
1035 {
1036         /* Give warning */
1037         tracing_snapshot();
1038 }
1039 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1040 #endif /* CONFIG_TRACER_SNAPSHOT */
1041 
1042 static void tracer_tracing_off(struct trace_array *tr)
1043 {
1044         if (tr->trace_buffer.buffer)
1045                 ring_buffer_record_off(tr->trace_buffer.buffer);
1046         /*
1047          * This flag is looked at when buffers haven't been allocated
1048          * yet, or by some tracers (like irqsoff), that just want to
1049          * know if the ring buffer has been disabled, but it can handle
1050          * races of where it gets disabled but we still do a record.
1051          * As the check is in the fast path of the tracers, it is more
1052          * important to be fast than accurate.
1053          */
1054         tr->buffer_disabled = 1;
1055         /* Make the flag seen by readers */
1056         smp_wmb();
1057 }
1058 
1059 /**
1060  * tracing_off - turn off tracing buffers
1061  *
1062  * This function stops the tracing buffers from recording data.
1063  * It does not disable any overhead the tracers themselves may
1064  * be causing. This function simply causes all recording to
1065  * the ring buffers to fail.
1066  */
1067 void tracing_off(void)
1068 {
1069         tracer_tracing_off(&global_trace);
1070 }
1071 EXPORT_SYMBOL_GPL(tracing_off);
1072 
1073 void disable_trace_on_warning(void)
1074 {
1075         if (__disable_trace_on_warning)
1076                 tracing_off();
1077 }
1078 
1079 /**
1080  * tracer_tracing_is_on - show real state of ring buffer enabled
1081  * @tr : the trace array to know if ring buffer is enabled
1082  *
1083  * Shows real state of the ring buffer if it is enabled or not.
1084  */
1085 int tracer_tracing_is_on(struct trace_array *tr)
1086 {
1087         if (tr->trace_buffer.buffer)
1088                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1089         return !tr->buffer_disabled;
1090 }
1091 
1092 /**
1093  * tracing_is_on - show state of ring buffers enabled
1094  */
1095 int tracing_is_on(void)
1096 {
1097         return tracer_tracing_is_on(&global_trace);
1098 }
1099 EXPORT_SYMBOL_GPL(tracing_is_on);
1100 
1101 static int __init set_buf_size(char *str)
1102 {
1103         unsigned long buf_size;
1104 
1105         if (!str)
1106                 return 0;
1107         buf_size = memparse(str, &str);
1108         /* nr_entries can not be zero */
1109         if (buf_size == 0)
1110                 return 0;
1111         trace_buf_size = buf_size;
1112         return 1;
1113 }
1114 __setup("trace_buf_size=", set_buf_size);
1115 
1116 static int __init set_tracing_thresh(char *str)
1117 {
1118         unsigned long threshold;
1119         int ret;
1120 
1121         if (!str)
1122                 return 0;
1123         ret = kstrtoul(str, 0, &threshold);
1124         if (ret < 0)
1125                 return 0;
1126         tracing_thresh = threshold * 1000;
1127         return 1;
1128 }
1129 __setup("tracing_thresh=", set_tracing_thresh);
1130 
1131 unsigned long nsecs_to_usecs(unsigned long nsecs)
1132 {
1133         return nsecs / 1000;
1134 }
1135 
1136 /*
1137  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1138  * It uses C(a, b) where 'a' is the enum name and 'b' is the string that
1139  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1140  * of strings in the order that the enums were defined.
1141  */
1142 #undef C
1143 #define C(a, b) b
1144 
1145 /* These must match the bit postions in trace_iterator_flags */
1146 static const char *trace_options[] = {
1147         TRACE_FLAGS
1148         NULL
1149 };
1150 
1151 static struct {
1152         u64 (*func)(void);
1153         const char *name;
1154         int in_ns;              /* is this clock in nanoseconds? */
1155 } trace_clocks[] = {
1156         { trace_clock_local,            "local",        1 },
1157         { trace_clock_global,           "global",       1 },
1158         { trace_clock_counter,          "counter",      0 },
1159         { trace_clock_jiffies,          "uptime",       0 },
1160         { trace_clock,                  "perf",         1 },
1161         { ktime_get_mono_fast_ns,       "mono",         1 },
1162         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1163         { ktime_get_boot_fast_ns,       "boot",         1 },
1164         ARCH_TRACE_CLOCKS
1165 };
1166 
1167 /*
1168  * trace_parser_get_init - gets the buffer for trace parser
1169  */
1170 int trace_parser_get_init(struct trace_parser *parser, int size)
1171 {
1172         memset(parser, 0, sizeof(*parser));
1173 
1174         parser->buffer = kmalloc(size, GFP_KERNEL);
1175         if (!parser->buffer)
1176                 return 1;
1177 
1178         parser->size = size;
1179         return 0;
1180 }
1181 
1182 /*
1183  * trace_parser_put - frees the buffer for trace parser
1184  */
1185 void trace_parser_put(struct trace_parser *parser)
1186 {
1187         kfree(parser->buffer);
1188         parser->buffer = NULL;
1189 }
1190 
1191 /*
1192  * trace_get_user - reads the user input string separated by  space
1193  * (matched by isspace(ch))
1194  *
1195  * For each string found the 'struct trace_parser' is updated,
1196  * and the function returns.
1197  *
1198  * Returns number of bytes read.
1199  *
1200  * See kernel/trace/trace.h for 'struct trace_parser' details.
1201  */
1202 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1203         size_t cnt, loff_t *ppos)
1204 {
1205         char ch;
1206         size_t read = 0;
1207         ssize_t ret;
1208 
1209         if (!*ppos)
1210                 trace_parser_clear(parser);
1211 
1212         ret = get_user(ch, ubuf++);
1213         if (ret)
1214                 goto out;
1215 
1216         read++;
1217         cnt--;
1218 
1219         /*
1220          * The parser is not finished with the last write,
1221          * continue reading the user input without skipping spaces.
1222          */
1223         if (!parser->cont) {
1224                 /* skip white space */
1225                 while (cnt && isspace(ch)) {
1226                         ret = get_user(ch, ubuf++);
1227                         if (ret)
1228                                 goto out;
1229                         read++;
1230                         cnt--;
1231                 }
1232 
1233                 /* only spaces were written */
1234                 if (isspace(ch)) {
1235                         *ppos += read;
1236                         ret = read;
1237                         goto out;
1238                 }
1239 
1240                 parser->idx = 0;
1241         }
1242 
1243         /* read the non-space input */
1244         while (cnt && !isspace(ch)) {
1245                 if (parser->idx < parser->size - 1)
1246                         parser->buffer[parser->idx++] = ch;
1247                 else {
1248                         ret = -EINVAL;
1249                         goto out;
1250                 }
1251                 ret = get_user(ch, ubuf++);
1252                 if (ret)
1253                         goto out;
1254                 read++;
1255                 cnt--;
1256         }
1257 
1258         /* We either got finished input or we have to wait for another call. */
1259         if (isspace(ch)) {
1260                 parser->buffer[parser->idx] = 0;
1261                 parser->cont = false;
1262         } else if (parser->idx < parser->size - 1) {
1263                 parser->cont = true;
1264                 parser->buffer[parser->idx++] = ch;
1265         } else {
1266                 ret = -EINVAL;
1267                 goto out;
1268         }
1269 
1270         *ppos += read;
1271         ret = read;
1272 
1273 out:
1274         return ret;
1275 }
1276 
1277 /* TODO add a seq_buf_to_buffer() */
1278 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1279 {
1280         int len;
1281 
1282         if (trace_seq_used(s) <= s->seq.readpos)
1283                 return -EBUSY;
1284 
1285         len = trace_seq_used(s) - s->seq.readpos;
1286         if (cnt > len)
1287                 cnt = len;
1288         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1289 
1290         s->seq.readpos += cnt;
1291         return cnt;
1292 }
1293 
1294 unsigned long __read_mostly     tracing_thresh;
1295 
1296 #ifdef CONFIG_TRACER_MAX_TRACE
1297 /*
1298  * Copy the new maximum trace into the separate maximum-trace
1299  * structure. (this way the maximum trace is permanently saved,
1300  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1301  */
1302 static void
1303 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1304 {
1305         struct trace_buffer *trace_buf = &tr->trace_buffer;
1306         struct trace_buffer *max_buf = &tr->max_buffer;
1307         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1308         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1309 
1310         max_buf->cpu = cpu;
1311         max_buf->time_start = data->preempt_timestamp;
1312 
1313         max_data->saved_latency = tr->max_latency;
1314         max_data->critical_start = data->critical_start;
1315         max_data->critical_end = data->critical_end;
1316 
1317         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1318         max_data->pid = tsk->pid;
1319         /*
1320          * If tsk == current, then use current_uid(), as that does not use
1321          * RCU. The irq tracer can be called out of RCU scope.
1322          */
1323         if (tsk == current)
1324                 max_data->uid = current_uid();
1325         else
1326                 max_data->uid = task_uid(tsk);
1327 
1328         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1329         max_data->policy = tsk->policy;
1330         max_data->rt_priority = tsk->rt_priority;
1331 
1332         /* record this tasks comm */
1333         tracing_record_cmdline(tsk);
1334 }
1335 
1336 /**
1337  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1338  * @tr: tracer
1339  * @tsk: the task with the latency
1340  * @cpu: The cpu that initiated the trace.
1341  *
1342  * Flip the buffers between the @tr and the max_tr and record information
1343  * about which task was the cause of this latency.
1344  */
1345 void
1346 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1347 {
1348         struct ring_buffer *buf;
1349 
1350         if (tr->stop_count)
1351                 return;
1352 
1353         WARN_ON_ONCE(!irqs_disabled());
1354 
1355         if (!tr->allocated_snapshot) {
1356                 /* Only the nop tracer should hit this when disabling */
1357                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1358                 return;
1359         }
1360 
1361         arch_spin_lock(&tr->max_lock);
1362 
1363         buf = tr->trace_buffer.buffer;
1364         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1365         tr->max_buffer.buffer = buf;
1366 
1367         __update_max_tr(tr, tsk, cpu);
1368         arch_spin_unlock(&tr->max_lock);
1369 }
1370 
1371 /**
1372  * update_max_tr_single - only copy one trace over, and reset the rest
1373  * @tr - tracer
1374  * @tsk - task with the latency
1375  * @cpu - the cpu of the buffer to copy.
1376  *
1377  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1378  */
1379 void
1380 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1381 {
1382         int ret;
1383 
1384         if (tr->stop_count)
1385                 return;
1386 
1387         WARN_ON_ONCE(!irqs_disabled());
1388         if (!tr->allocated_snapshot) {
1389                 /* Only the nop tracer should hit this when disabling */
1390                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1391                 return;
1392         }
1393 
1394         arch_spin_lock(&tr->max_lock);
1395 
1396         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1397 
1398         if (ret == -EBUSY) {
1399                 /*
1400                  * We failed to swap the buffer due to a commit taking
1401                  * place on this CPU. We fail to record, but we reset
1402                  * the max trace buffer (no one writes directly to it)
1403                  * and flag that it failed.
1404                  */
1405                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1406                         "Failed to swap buffers due to commit in progress\n");
1407         }
1408 
1409         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1410 
1411         __update_max_tr(tr, tsk, cpu);
1412         arch_spin_unlock(&tr->max_lock);
1413 }
1414 #endif /* CONFIG_TRACER_MAX_TRACE */
1415 
1416 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1417 {
1418         /* Iterators are static, they should be filled or empty */
1419         if (trace_buffer_iter(iter, iter->cpu_file))
1420                 return 0;
1421 
1422         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1423                                 full);
1424 }
1425 
1426 #ifdef CONFIG_FTRACE_STARTUP_TEST
1427 static int run_tracer_selftest(struct tracer *type)
1428 {
1429         struct trace_array *tr = &global_trace;
1430         struct tracer *saved_tracer = tr->current_trace;
1431         int ret;
1432 
1433         if (!type->selftest || tracing_selftest_disabled)
1434                 return 0;
1435 
1436         /*
1437          * Run a selftest on this tracer.
1438          * Here we reset the trace buffer, and set the current
1439          * tracer to be this tracer. The tracer can then run some
1440          * internal tracing to verify that everything is in order.
1441          * If we fail, we do not register this tracer.
1442          */
1443         tracing_reset_online_cpus(&tr->trace_buffer);
1444 
1445         tr->current_trace = type;
1446 
1447 #ifdef CONFIG_TRACER_MAX_TRACE
1448         if (type->use_max_tr) {
1449                 /* If we expanded the buffers, make sure the max is expanded too */
1450                 if (ring_buffer_expanded)
1451                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1452                                            RING_BUFFER_ALL_CPUS);
1453                 tr->allocated_snapshot = true;
1454         }
1455 #endif
1456 
1457         /* the test is responsible for initializing and enabling */
1458         pr_info("Testing tracer %s: ", type->name);
1459         ret = type->selftest(type, tr);
1460         /* the test is responsible for resetting too */
1461         tr->current_trace = saved_tracer;
1462         if (ret) {
1463                 printk(KERN_CONT "FAILED!\n");
1464                 /* Add the warning after printing 'FAILED' */
1465                 WARN_ON(1);
1466                 return -1;
1467         }
1468         /* Only reset on passing, to avoid touching corrupted buffers */
1469         tracing_reset_online_cpus(&tr->trace_buffer);
1470 
1471 #ifdef CONFIG_TRACER_MAX_TRACE
1472         if (type->use_max_tr) {
1473                 tr->allocated_snapshot = false;
1474 
1475                 /* Shrink the max buffer again */
1476                 if (ring_buffer_expanded)
1477                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1478                                            RING_BUFFER_ALL_CPUS);
1479         }
1480 #endif
1481 
1482         printk(KERN_CONT "PASSED\n");
1483         return 0;
1484 }
1485 #else
1486 static inline int run_tracer_selftest(struct tracer *type)
1487 {
1488         return 0;
1489 }
1490 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1491 
1492 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1493 
1494 static void __init apply_trace_boot_options(void);
1495 
1496 /**
1497  * register_tracer - register a tracer with the ftrace system.
1498  * @type - the plugin for the tracer
1499  *
1500  * Register a new plugin tracer.
1501  */
1502 int __init register_tracer(struct tracer *type)
1503 {
1504         struct tracer *t;
1505         int ret = 0;
1506 
1507         if (!type->name) {
1508                 pr_info("Tracer must have a name\n");
1509                 return -1;
1510         }
1511 
1512         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1513                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1514                 return -1;
1515         }
1516 
1517         mutex_lock(&trace_types_lock);
1518 
1519         tracing_selftest_running = true;
1520 
1521         for (t = trace_types; t; t = t->next) {
1522                 if (strcmp(type->name, t->name) == 0) {
1523                         /* already found */
1524                         pr_info("Tracer %s already registered\n",
1525                                 type->name);
1526                         ret = -1;
1527                         goto out;
1528                 }
1529         }
1530 
1531         if (!type->set_flag)
1532                 type->set_flag = &dummy_set_flag;
1533         if (!type->flags) {
1534                 /*allocate a dummy tracer_flags*/
1535                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1536                 if (!type->flags) {
1537                         ret = -ENOMEM;
1538                         goto out;
1539                 }
1540                 type->flags->val = 0;
1541                 type->flags->opts = dummy_tracer_opt;
1542         } else
1543                 if (!type->flags->opts)
1544                         type->flags->opts = dummy_tracer_opt;
1545 
1546         /* store the tracer for __set_tracer_option */
1547         type->flags->trace = type;
1548 
1549         ret = run_tracer_selftest(type);
1550         if (ret < 0)
1551                 goto out;
1552 
1553         type->next = trace_types;
1554         trace_types = type;
1555         add_tracer_options(&global_trace, type);
1556 
1557  out:
1558         tracing_selftest_running = false;
1559         mutex_unlock(&trace_types_lock);
1560 
1561         if (ret || !default_bootup_tracer)
1562                 goto out_unlock;
1563 
1564         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1565                 goto out_unlock;
1566 
1567         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1568         /* Do we want this tracer to start on bootup? */
1569         tracing_set_tracer(&global_trace, type->name);
1570         default_bootup_tracer = NULL;
1571 
1572         apply_trace_boot_options();
1573 
1574         /* disable other selftests, since this will break it. */
1575         tracing_selftest_disabled = true;
1576 #ifdef CONFIG_FTRACE_STARTUP_TEST
1577         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1578                type->name);
1579 #endif
1580 
1581  out_unlock:
1582         return ret;
1583 }
1584 
1585 void tracing_reset(struct trace_buffer *buf, int cpu)
1586 {
1587         struct ring_buffer *buffer = buf->buffer;
1588 
1589         if (!buffer)
1590                 return;
1591 
1592         ring_buffer_record_disable(buffer);
1593 
1594         /* Make sure all commits have finished */
1595         synchronize_sched();
1596         ring_buffer_reset_cpu(buffer, cpu);
1597 
1598         ring_buffer_record_enable(buffer);
1599 }
1600 
1601 void tracing_reset_online_cpus(struct trace_buffer *buf)
1602 {
1603         struct ring_buffer *buffer = buf->buffer;
1604         int cpu;
1605 
1606         if (!buffer)
1607                 return;
1608 
1609         ring_buffer_record_disable(buffer);
1610 
1611         /* Make sure all commits have finished */
1612         synchronize_sched();
1613 
1614         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1615 
1616         for_each_online_cpu(cpu)
1617                 ring_buffer_reset_cpu(buffer, cpu);
1618 
1619         ring_buffer_record_enable(buffer);
1620 }
1621 
1622 /* Must have trace_types_lock held */
1623 void tracing_reset_all_online_cpus(void)
1624 {
1625         struct trace_array *tr;
1626 
1627         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1628                 tracing_reset_online_cpus(&tr->trace_buffer);
1629 #ifdef CONFIG_TRACER_MAX_TRACE
1630                 tracing_reset_online_cpus(&tr->max_buffer);
1631 #endif
1632         }
1633 }
1634 
1635 #define SAVED_CMDLINES_DEFAULT 128
1636 #define NO_CMDLINE_MAP UINT_MAX
1637 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1638 struct saved_cmdlines_buffer {
1639         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1640         unsigned *map_cmdline_to_pid;
1641         unsigned cmdline_num;
1642         int cmdline_idx;
1643         char *saved_cmdlines;
1644 };
1645 static struct saved_cmdlines_buffer *savedcmd;
1646 
1647 /* temporary disable recording */
1648 static atomic_t trace_record_cmdline_disabled __read_mostly;
1649 
1650 static inline char *get_saved_cmdlines(int idx)
1651 {
1652         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1653 }
1654 
1655 static inline void set_cmdline(int idx, const char *cmdline)
1656 {
1657         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1658 }
1659 
1660 static int allocate_cmdlines_buffer(unsigned int val,
1661                                     struct saved_cmdlines_buffer *s)
1662 {
1663         s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1664                                         GFP_KERNEL);
1665         if (!s->map_cmdline_to_pid)
1666                 return -ENOMEM;
1667 
1668         s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1669         if (!s->saved_cmdlines) {
1670                 kfree(s->map_cmdline_to_pid);
1671                 return -ENOMEM;
1672         }
1673 
1674         s->cmdline_idx = 0;
1675         s->cmdline_num = val;
1676         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1677                sizeof(s->map_pid_to_cmdline));
1678         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1679                val * sizeof(*s->map_cmdline_to_pid));
1680 
1681         return 0;
1682 }
1683 
1684 static int trace_create_savedcmd(void)
1685 {
1686         int ret;
1687 
1688         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1689         if (!savedcmd)
1690                 return -ENOMEM;
1691 
1692         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1693         if (ret < 0) {
1694                 kfree(savedcmd);
1695                 savedcmd = NULL;
1696                 return -ENOMEM;
1697         }
1698 
1699         return 0;
1700 }
1701 
1702 int is_tracing_stopped(void)
1703 {
1704         return global_trace.stop_count;
1705 }
1706 
1707 /**
1708  * tracing_start - quick start of the tracer
1709  *
1710  * If tracing is enabled but was stopped by tracing_stop,
1711  * this will start the tracer back up.
1712  */
1713 void tracing_start(void)
1714 {
1715         struct ring_buffer *buffer;
1716         unsigned long flags;
1717 
1718         if (tracing_disabled)
1719                 return;
1720 
1721         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1722         if (--global_trace.stop_count) {
1723                 if (global_trace.stop_count < 0) {
1724                         /* Someone screwed up their debugging */
1725                         WARN_ON_ONCE(1);
1726                         global_trace.stop_count = 0;
1727                 }
1728                 goto out;
1729         }
1730 
1731         /* Prevent the buffers from switching */
1732         arch_spin_lock(&global_trace.max_lock);
1733 
1734         buffer = global_trace.trace_buffer.buffer;
1735         if (buffer)
1736                 ring_buffer_record_enable(buffer);
1737 
1738 #ifdef CONFIG_TRACER_MAX_TRACE
1739         buffer = global_trace.max_buffer.buffer;
1740         if (buffer)
1741                 ring_buffer_record_enable(buffer);
1742 #endif
1743 
1744         arch_spin_unlock(&global_trace.max_lock);
1745 
1746  out:
1747         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1748 }
1749 
1750 static void tracing_start_tr(struct trace_array *tr)
1751 {
1752         struct ring_buffer *buffer;
1753         unsigned long flags;
1754 
1755         if (tracing_disabled)
1756                 return;
1757 
1758         /* If global, we need to also start the max tracer */
1759         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1760                 return tracing_start();
1761 
1762         raw_spin_lock_irqsave(&tr->start_lock, flags);
1763 
1764         if (--tr->stop_count) {
1765                 if (tr->stop_count < 0) {
1766                         /* Someone screwed up their debugging */
1767                         WARN_ON_ONCE(1);
1768                         tr->stop_count = 0;
1769                 }
1770                 goto out;
1771         }
1772 
1773         buffer = tr->trace_buffer.buffer;
1774         if (buffer)
1775                 ring_buffer_record_enable(buffer);
1776 
1777  out:
1778         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1779 }
1780 
1781 /**
1782  * tracing_stop - quick stop of the tracer
1783  *
1784  * Light weight way to stop tracing. Use in conjunction with
1785  * tracing_start.
1786  */
1787 void tracing_stop(void)
1788 {
1789         struct ring_buffer *buffer;
1790         unsigned long flags;
1791 
1792         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1793         if (global_trace.stop_count++)
1794                 goto out;
1795 
1796         /* Prevent the buffers from switching */
1797         arch_spin_lock(&global_trace.max_lock);
1798 
1799         buffer = global_trace.trace_buffer.buffer;
1800         if (buffer)
1801                 ring_buffer_record_disable(buffer);
1802 
1803 #ifdef CONFIG_TRACER_MAX_TRACE
1804         buffer = global_trace.max_buffer.buffer;
1805         if (buffer)
1806                 ring_buffer_record_disable(buffer);
1807 #endif
1808 
1809         arch_spin_unlock(&global_trace.max_lock);
1810 
1811  out:
1812         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1813 }
1814 
1815 static void tracing_stop_tr(struct trace_array *tr)
1816 {
1817         struct ring_buffer *buffer;
1818         unsigned long flags;
1819 
1820         /* If global, we need to also stop the max tracer */
1821         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1822                 return tracing_stop();
1823 
1824         raw_spin_lock_irqsave(&tr->start_lock, flags);
1825         if (tr->stop_count++)
1826                 goto out;
1827 
1828         buffer = tr->trace_buffer.buffer;
1829         if (buffer)
1830                 ring_buffer_record_disable(buffer);
1831 
1832  out:
1833         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1834 }
1835 
1836 void trace_stop_cmdline_recording(void);
1837 
1838 static int trace_save_cmdline(struct task_struct *tsk)
1839 {
1840         unsigned pid, idx;
1841 
1842         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1843                 return 0;
1844 
1845         /*
1846          * It's not the end of the world if we don't get
1847          * the lock, but we also don't want to spin
1848          * nor do we want to disable interrupts,
1849          * so if we miss here, then better luck next time.
1850          */
1851         if (!arch_spin_trylock(&trace_cmdline_lock))
1852                 return 0;
1853 
1854         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1855         if (idx == NO_CMDLINE_MAP) {
1856                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1857 
1858                 /*
1859                  * Check whether the cmdline buffer at idx has a pid
1860                  * mapped. We are going to overwrite that entry so we
1861                  * need to clear the map_pid_to_cmdline. Otherwise we
1862                  * would read the new comm for the old pid.
1863                  */
1864                 pid = savedcmd->map_cmdline_to_pid[idx];
1865                 if (pid != NO_CMDLINE_MAP)
1866                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1867 
1868                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1869                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1870 
1871                 savedcmd->cmdline_idx = idx;
1872         }
1873 
1874         set_cmdline(idx, tsk->comm);
1875 
1876         arch_spin_unlock(&trace_cmdline_lock);
1877 
1878         return 1;
1879 }
1880 
1881 static void __trace_find_cmdline(int pid, char comm[])
1882 {
1883         unsigned map;
1884 
1885         if (!pid) {
1886                 strcpy(comm, "<idle>");
1887                 return;
1888         }
1889 
1890         if (WARN_ON_ONCE(pid < 0)) {
1891                 strcpy(comm, "<XXX>");
1892                 return;
1893         }
1894 
1895         if (pid > PID_MAX_DEFAULT) {
1896                 strcpy(comm, "<...>");
1897                 return;
1898         }
1899 
1900         map = savedcmd->map_pid_to_cmdline[pid];
1901         if (map != NO_CMDLINE_MAP)
1902                 strcpy(comm, get_saved_cmdlines(map));
1903         else
1904                 strcpy(comm, "<...>");
1905 }
1906 
1907 void trace_find_cmdline(int pid, char comm[])
1908 {
1909         preempt_disable();
1910         arch_spin_lock(&trace_cmdline_lock);
1911 
1912         __trace_find_cmdline(pid, comm);
1913 
1914         arch_spin_unlock(&trace_cmdline_lock);
1915         preempt_enable();
1916 }
1917 
1918 void tracing_record_cmdline(struct task_struct *tsk)
1919 {
1920         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1921                 return;
1922 
1923         if (!__this_cpu_read(trace_cmdline_save))
1924                 return;
1925 
1926         if (trace_save_cmdline(tsk))
1927                 __this_cpu_write(trace_cmdline_save, false);
1928 }
1929 
1930 void
1931 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1932                              int pc)
1933 {
1934         struct task_struct *tsk = current;
1935 
1936         entry->preempt_count            = pc & 0xff;
1937         entry->pid                      = (tsk) ? tsk->pid : 0;
1938         entry->flags =
1939 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1940                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1941 #else
1942                 TRACE_FLAG_IRQS_NOSUPPORT |
1943 #endif
1944                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
1945                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1946                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
1947                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1948                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1949 }
1950 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1951 
1952 struct ring_buffer_event *
1953 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1954                           int type,
1955                           unsigned long len,
1956                           unsigned long flags, int pc)
1957 {
1958         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
1959 }
1960 
1961 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
1962 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
1963 static int trace_buffered_event_ref;
1964 
1965 /**
1966  * trace_buffered_event_enable - enable buffering events
1967  *
1968  * When events are being filtered, it is quicker to use a temporary
1969  * buffer to write the event data into if there's a likely chance
1970  * that it will not be committed. The discard of the ring buffer
1971  * is not as fast as committing, and is much slower than copying
1972  * a commit.
1973  *
1974  * When an event is to be filtered, allocate per cpu buffers to
1975  * write the event data into, and if the event is filtered and discarded
1976  * it is simply dropped, otherwise, the entire data is to be committed
1977  * in one shot.
1978  */
1979 void trace_buffered_event_enable(void)
1980 {
1981         struct ring_buffer_event *event;
1982         struct page *page;
1983         int cpu;
1984 
1985         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
1986 
1987         if (trace_buffered_event_ref++)
1988                 return;
1989 
1990         for_each_tracing_cpu(cpu) {
1991                 page = alloc_pages_node(cpu_to_node(cpu),
1992                                         GFP_KERNEL | __GFP_NORETRY, 0);
1993                 if (!page)
1994                         goto failed;
1995 
1996                 event = page_address(page);
1997                 memset(event, 0, sizeof(*event));
1998 
1999                 per_cpu(trace_buffered_event, cpu) = event;
2000 
2001                 preempt_disable();
2002                 if (cpu == smp_processor_id() &&
2003                     this_cpu_read(trace_buffered_event) !=
2004                     per_cpu(trace_buffered_event, cpu))
2005                         WARN_ON_ONCE(1);
2006                 preempt_enable();
2007         }
2008 
2009         return;
2010  failed:
2011         trace_buffered_event_disable();
2012 }
2013 
2014 static void enable_trace_buffered_event(void *data)
2015 {
2016         /* Probably not needed, but do it anyway */
2017         smp_rmb();
2018         this_cpu_dec(trace_buffered_event_cnt);
2019 }
2020 
2021 static void disable_trace_buffered_event(void *data)
2022 {
2023         this_cpu_inc(trace_buffered_event_cnt);
2024 }
2025 
2026 /**
2027  * trace_buffered_event_disable - disable buffering events
2028  *
2029  * When a filter is removed, it is faster to not use the buffered
2030  * events, and to commit directly into the ring buffer. Free up
2031  * the temp buffers when there are no more users. This requires
2032  * special synchronization with current events.
2033  */
2034 void trace_buffered_event_disable(void)
2035 {
2036         int cpu;
2037 
2038         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2039 
2040         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2041                 return;
2042 
2043         if (--trace_buffered_event_ref)
2044                 return;
2045 
2046         preempt_disable();
2047         /* For each CPU, set the buffer as used. */
2048         smp_call_function_many(tracing_buffer_mask,
2049                                disable_trace_buffered_event, NULL, 1);
2050         preempt_enable();
2051 
2052         /* Wait for all current users to finish */
2053         synchronize_sched();
2054 
2055         for_each_tracing_cpu(cpu) {
2056                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2057                 per_cpu(trace_buffered_event, cpu) = NULL;
2058         }
2059         /*
2060          * Make sure trace_buffered_event is NULL before clearing
2061          * trace_buffered_event_cnt.
2062          */
2063         smp_wmb();
2064 
2065         preempt_disable();
2066         /* Do the work on each cpu */
2067         smp_call_function_many(tracing_buffer_mask,
2068                                enable_trace_buffered_event, NULL, 1);
2069         preempt_enable();
2070 }
2071 
2072 static struct ring_buffer *temp_buffer;
2073 
2074 struct ring_buffer_event *
2075 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2076                           struct trace_event_file *trace_file,
2077                           int type, unsigned long len,
2078                           unsigned long flags, int pc)
2079 {
2080         struct ring_buffer_event *entry;
2081         int val;
2082 
2083         *current_rb = trace_file->tr->trace_buffer.buffer;
2084 
2085         if ((trace_file->flags &
2086              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2087             (entry = this_cpu_read(trace_buffered_event))) {
2088                 /* Try to use the per cpu buffer first */
2089                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2090                 if (val == 1) {
2091                         trace_event_setup(entry, type, flags, pc);
2092                         entry->array[0] = len;
2093                         return entry;
2094                 }
2095                 this_cpu_dec(trace_buffered_event_cnt);
2096         }
2097 
2098         entry = __trace_buffer_lock_reserve(*current_rb,
2099                                             type, len, flags, pc);
2100         /*
2101          * If tracing is off, but we have triggers enabled
2102          * we still need to look at the event data. Use the temp_buffer
2103          * to store the trace event for the tigger to use. It's recusive
2104          * safe and will not be recorded anywhere.
2105          */
2106         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2107                 *current_rb = temp_buffer;
2108                 entry = __trace_buffer_lock_reserve(*current_rb,
2109                                                     type, len, flags, pc);
2110         }
2111         return entry;
2112 }
2113 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2114 
2115 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2116 static DEFINE_MUTEX(tracepoint_printk_mutex);
2117 
2118 static void output_printk(struct trace_event_buffer *fbuffer)
2119 {
2120         struct trace_event_call *event_call;
2121         struct trace_event *event;
2122         unsigned long flags;
2123         struct trace_iterator *iter = tracepoint_print_iter;
2124 
2125         /* We should never get here if iter is NULL */
2126         if (WARN_ON_ONCE(!iter))
2127                 return;
2128 
2129         event_call = fbuffer->trace_file->event_call;
2130         if (!event_call || !event_call->event.funcs ||
2131             !event_call->event.funcs->trace)
2132                 return;
2133 
2134         event = &fbuffer->trace_file->event_call->event;
2135 
2136         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2137         trace_seq_init(&iter->seq);
2138         iter->ent = fbuffer->entry;
2139         event_call->event.funcs->trace(iter, 0, event);
2140         trace_seq_putc(&iter->seq, 0);
2141         printk("%s", iter->seq.buffer);
2142 
2143         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2144 }
2145 
2146 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2147                              void __user *buffer, size_t *lenp,
2148                              loff_t *ppos)
2149 {
2150         int save_tracepoint_printk;
2151         int ret;
2152 
2153         mutex_lock(&tracepoint_printk_mutex);
2154         save_tracepoint_printk = tracepoint_printk;
2155 
2156         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2157 
2158         /*
2159          * This will force exiting early, as tracepoint_printk
2160          * is always zero when tracepoint_printk_iter is not allocated
2161          */
2162         if (!tracepoint_print_iter)
2163                 tracepoint_printk = 0;
2164 
2165         if (save_tracepoint_printk == tracepoint_printk)
2166                 goto out;
2167 
2168         if (tracepoint_printk)
2169                 static_key_enable(&tracepoint_printk_key.key);
2170         else
2171                 static_key_disable(&tracepoint_printk_key.key);
2172 
2173  out:
2174         mutex_unlock(&tracepoint_printk_mutex);
2175 
2176         return ret;
2177 }
2178 
2179 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2180 {
2181         if (static_key_false(&tracepoint_printk_key.key))
2182                 output_printk(fbuffer);
2183 
2184         event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2185                                     fbuffer->event, fbuffer->entry,
2186                                     fbuffer->flags, fbuffer->pc);
2187 }
2188 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2189 
2190 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2191                                      struct ring_buffer *buffer,
2192                                      struct ring_buffer_event *event,
2193                                      unsigned long flags, int pc,
2194                                      struct pt_regs *regs)
2195 {
2196         __buffer_unlock_commit(buffer, event);
2197 
2198         /*
2199          * If regs is not set, then skip the following callers:
2200          *   trace_buffer_unlock_commit_regs
2201          *   event_trigger_unlock_commit
2202          *   trace_event_buffer_commit
2203          *   trace_event_raw_event_sched_switch
2204          * Note, we can still get here via blktrace, wakeup tracer
2205          * and mmiotrace, but that's ok if they lose a function or
2206          * two. They are that meaningful.
2207          */
2208         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
2209         ftrace_trace_userstack(buffer, flags, pc);
2210 }
2211 
2212 /*
2213  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2214  */
2215 void
2216 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2217                                    struct ring_buffer_event *event)
2218 {
2219         __buffer_unlock_commit(buffer, event);
2220 }
2221 
2222 static void
2223 trace_process_export(struct trace_export *export,
2224                struct ring_buffer_event *event)
2225 {
2226         struct trace_entry *entry;
2227         unsigned int size = 0;
2228 
2229         entry = ring_buffer_event_data(event);
2230         size = ring_buffer_event_length(event);
2231         export->write(entry, size);
2232 }
2233 
2234 static DEFINE_MUTEX(ftrace_export_lock);
2235 
2236 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2237 
2238 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2239 
2240 static inline void ftrace_exports_enable(void)
2241 {
2242         static_branch_enable(&ftrace_exports_enabled);
2243 }
2244 
2245 static inline void ftrace_exports_disable(void)
2246 {
2247         static_branch_disable(&ftrace_exports_enabled);
2248 }
2249 
2250 void ftrace_exports(struct ring_buffer_event *event)
2251 {
2252         struct trace_export *export;
2253 
2254         preempt_disable_notrace();
2255 
2256         export = rcu_dereference_raw_notrace(ftrace_exports_list);
2257         while (export) {
2258                 trace_process_export(export, event);
2259                 export = rcu_dereference_raw_notrace(export->next);
2260         }
2261 
2262         preempt_enable_notrace();
2263 }
2264 
2265 static inline void
2266 add_trace_export(struct trace_export **list, struct trace_export *export)
2267 {
2268         rcu_assign_pointer(export->next, *list);
2269         /*
2270          * We are entering export into the list but another
2271          * CPU might be walking that list. We need to make sure
2272          * the export->next pointer is valid before another CPU sees
2273          * the export pointer included into the list.
2274          */
2275         rcu_assign_pointer(*list, export);
2276 }
2277 
2278 static inline int
2279 rm_trace_export(struct trace_export **list, struct trace_export *export)
2280 {
2281         struct trace_export **p;
2282 
2283         for (p = list; *p != NULL; p = &(*p)->next)
2284                 if (*p == export)
2285                         break;
2286 
2287         if (*p != export)
2288                 return -1;
2289 
2290         rcu_assign_pointer(*p, (*p)->next);
2291 
2292         return 0;
2293 }
2294 
2295 static inline void
2296 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2297 {
2298         if (*list == NULL)
2299                 ftrace_exports_enable();
2300 
2301         add_trace_export(list, export);
2302 }
2303 
2304 static inline int
2305 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2306 {
2307         int ret;
2308 
2309         ret = rm_trace_export(list, export);
2310         if (*list == NULL)
2311                 ftrace_exports_disable();
2312 
2313         return ret;
2314 }
2315 
2316 int register_ftrace_export(struct trace_export *export)
2317 {
2318         if (WARN_ON_ONCE(!export->write))
2319                 return -1;
2320 
2321         mutex_lock(&ftrace_export_lock);
2322 
2323         add_ftrace_export(&ftrace_exports_list, export);
2324 
2325         mutex_unlock(&ftrace_export_lock);
2326 
2327         return 0;
2328 }
2329 EXPORT_SYMBOL_GPL(register_ftrace_export);
2330 
2331 int unregister_ftrace_export(struct trace_export *export)
2332 {
2333         int ret;
2334 
2335         mutex_lock(&ftrace_export_lock);
2336 
2337         ret = rm_ftrace_export(&ftrace_exports_list, export);
2338 
2339         mutex_unlock(&ftrace_export_lock);
2340 
2341         return ret;
2342 }
2343 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2344 
2345 void
2346 trace_function(struct trace_array *tr,
2347                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2348                int pc)
2349 {
2350         struct trace_event_call *call = &event_function;
2351         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2352         struct ring_buffer_event *event;
2353         struct ftrace_entry *entry;
2354 
2355         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2356                                             flags, pc);
2357         if (!event)
2358                 return;
2359         entry   = ring_buffer_event_data(event);
2360         entry->ip                       = ip;
2361         entry->parent_ip                = parent_ip;
2362 
2363         if (!call_filter_check_discard(call, entry, buffer, event)) {
2364                 if (static_branch_unlikely(&ftrace_exports_enabled))
2365                         ftrace_exports(event);
2366                 __buffer_unlock_commit(buffer, event);
2367         }
2368 }
2369 
2370 #ifdef CONFIG_STACKTRACE
2371 
2372 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2373 struct ftrace_stack {
2374         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
2375 };
2376 
2377 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2378 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2379 
2380 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2381                                  unsigned long flags,
2382                                  int skip, int pc, struct pt_regs *regs)
2383 {
2384         struct trace_event_call *call = &event_kernel_stack;
2385         struct ring_buffer_event *event;
2386         struct stack_entry *entry;
2387         struct stack_trace trace;
2388         int use_stack;
2389         int size = FTRACE_STACK_ENTRIES;
2390 
2391         trace.nr_entries        = 0;
2392         trace.skip              = skip;
2393 
2394         /*
2395          * Add two, for this function and the call to save_stack_trace()
2396          * If regs is set, then these functions will not be in the way.
2397          */
2398         if (!regs)
2399                 trace.skip += 2;
2400 
2401         /*
2402          * Since events can happen in NMIs there's no safe way to
2403          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2404          * or NMI comes in, it will just have to use the default
2405          * FTRACE_STACK_SIZE.
2406          */
2407         preempt_disable_notrace();
2408 
2409         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2410         /*
2411          * We don't need any atomic variables, just a barrier.
2412          * If an interrupt comes in, we don't care, because it would
2413          * have exited and put the counter back to what we want.
2414          * We just need a barrier to keep gcc from moving things
2415          * around.
2416          */
2417         barrier();
2418         if (use_stack == 1) {
2419                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
2420                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
2421 
2422                 if (regs)
2423                         save_stack_trace_regs(regs, &trace);
2424                 else
2425                         save_stack_trace(&trace);
2426 
2427                 if (trace.nr_entries > size)
2428                         size = trace.nr_entries;
2429         } else
2430                 /* From now on, use_stack is a boolean */
2431                 use_stack = 0;
2432 
2433         size *= sizeof(unsigned long);
2434 
2435         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2436                                             sizeof(*entry) + size, flags, pc);
2437         if (!event)
2438                 goto out;
2439         entry = ring_buffer_event_data(event);
2440 
2441         memset(&entry->caller, 0, size);
2442 
2443         if (use_stack)
2444                 memcpy(&entry->caller, trace.entries,
2445                        trace.nr_entries * sizeof(unsigned long));
2446         else {
2447                 trace.max_entries       = FTRACE_STACK_ENTRIES;
2448                 trace.entries           = entry->caller;
2449                 if (regs)
2450                         save_stack_trace_regs(regs, &trace);
2451                 else
2452                         save_stack_trace(&trace);
2453         }
2454 
2455         entry->size = trace.nr_entries;
2456 
2457         if (!call_filter_check_discard(call, entry, buffer, event))
2458                 __buffer_unlock_commit(buffer, event);
2459 
2460  out:
2461         /* Again, don't let gcc optimize things here */
2462         barrier();
2463         __this_cpu_dec(ftrace_stack_reserve);
2464         preempt_enable_notrace();
2465 
2466 }
2467 
2468 static inline void ftrace_trace_stack(struct trace_array *tr,
2469                                       struct ring_buffer *buffer,
2470                                       unsigned long flags,
2471                                       int skip, int pc, struct pt_regs *regs)
2472 {
2473         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2474                 return;
2475 
2476         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2477 }
2478 
2479 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2480                    int pc)
2481 {
2482         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
2483 }
2484 
2485 /**
2486  * trace_dump_stack - record a stack back trace in the trace buffer
2487  * @skip: Number of functions to skip (helper handlers)
2488  */
2489 void trace_dump_stack(int skip)
2490 {
2491         unsigned long flags;
2492 
2493         if (tracing_disabled || tracing_selftest_running)
2494                 return;
2495 
2496         local_save_flags(flags);
2497 
2498         /*
2499          * Skip 3 more, seems to get us at the caller of
2500          * this function.
2501          */
2502         skip += 3;
2503         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2504                              flags, skip, preempt_count(), NULL);
2505 }
2506 
2507 static DEFINE_PER_CPU(int, user_stack_count);
2508 
2509 void
2510 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2511 {
2512         struct trace_event_call *call = &event_user_stack;
2513         struct ring_buffer_event *event;
2514         struct userstack_entry *entry;
2515         struct stack_trace trace;
2516 
2517         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2518                 return;
2519 
2520         /*
2521          * NMIs can not handle page faults, even with fix ups.
2522          * The save user stack can (and often does) fault.
2523          */
2524         if (unlikely(in_nmi()))
2525                 return;
2526 
2527         /*
2528          * prevent recursion, since the user stack tracing may
2529          * trigger other kernel events.
2530          */
2531         preempt_disable();
2532         if (__this_cpu_read(user_stack_count))
2533                 goto out;
2534 
2535         __this_cpu_inc(user_stack_count);
2536 
2537         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2538                                             sizeof(*entry), flags, pc);
2539         if (!event)
2540                 goto out_drop_count;
2541         entry   = ring_buffer_event_data(event);
2542 
2543         entry->tgid             = current->tgid;
2544         memset(&entry->caller, 0, sizeof(entry->caller));
2545 
2546         trace.nr_entries        = 0;
2547         trace.max_entries       = FTRACE_STACK_ENTRIES;
2548         trace.skip              = 0;
2549         trace.entries           = entry->caller;
2550 
2551         save_stack_trace_user(&trace);
2552         if (!call_filter_check_discard(call, entry, buffer, event))
2553                 __buffer_unlock_commit(buffer, event);
2554 
2555  out_drop_count:
2556         __this_cpu_dec(user_stack_count);
2557  out:
2558         preempt_enable();
2559 }
2560 
2561 #ifdef UNUSED
2562 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2563 {
2564         ftrace_trace_userstack(tr, flags, preempt_count());
2565 }
2566 #endif /* UNUSED */
2567 
2568 #endif /* CONFIG_STACKTRACE */
2569 
2570 /* created for use with alloc_percpu */
2571 struct trace_buffer_struct {
2572         int nesting;
2573         char buffer[4][TRACE_BUF_SIZE];
2574 };
2575 
2576 static struct trace_buffer_struct *trace_percpu_buffer;
2577 
2578 /*
2579  * Thise allows for lockless recording.  If we're nested too deeply, then
2580  * this returns NULL.
2581  */
2582 static char *get_trace_buf(void)
2583 {
2584         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2585 
2586         if (!buffer || buffer->nesting >= 4)
2587                 return NULL;
2588 
2589         return &buffer->buffer[buffer->nesting++][0];
2590 }
2591 
2592 static void put_trace_buf(void)
2593 {
2594         this_cpu_dec(trace_percpu_buffer->nesting);
2595 }
2596 
2597 static int alloc_percpu_trace_buffer(void)
2598 {
2599         struct trace_buffer_struct *buffers;
2600 
2601         buffers = alloc_percpu(struct trace_buffer_struct);
2602         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2603                 return -ENOMEM;
2604 
2605         trace_percpu_buffer = buffers;
2606         return 0;
2607 }
2608 
2609 static int buffers_allocated;
2610 
2611 void trace_printk_init_buffers(void)
2612 {
2613         if (buffers_allocated)
2614                 return;
2615 
2616         if (alloc_percpu_trace_buffer())
2617                 return;
2618 
2619         /* trace_printk() is for debug use only. Don't use it in production. */
2620 
2621         pr_warn("\n");
2622         pr_warn("**********************************************************\n");
2623         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2624         pr_warn("**                                                      **\n");
2625         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2626         pr_warn("**                                                      **\n");
2627         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2628         pr_warn("** unsafe for production use.                           **\n");
2629         pr_warn("**                                                      **\n");
2630         pr_warn("** If you see this message and you are not debugging    **\n");
2631         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2632         pr_warn("**                                                      **\n");
2633         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2634         pr_warn("**********************************************************\n");
2635 
2636         /* Expand the buffers to set size */
2637         tracing_update_buffers();
2638 
2639         buffers_allocated = 1;
2640 
2641         /*
2642          * trace_printk_init_buffers() can be called by modules.
2643          * If that happens, then we need to start cmdline recording
2644          * directly here. If the global_trace.buffer is already
2645          * allocated here, then this was called by module code.
2646          */
2647         if (global_trace.trace_buffer.buffer)
2648                 tracing_start_cmdline_record();
2649 }
2650 
2651 void trace_printk_start_comm(void)
2652 {
2653         /* Start tracing comms if trace printk is set */
2654         if (!buffers_allocated)
2655                 return;
2656         tracing_start_cmdline_record();
2657 }
2658 
2659 static void trace_printk_start_stop_comm(int enabled)
2660 {
2661         if (!buffers_allocated)
2662                 return;
2663 
2664         if (enabled)
2665                 tracing_start_cmdline_record();
2666         else
2667                 tracing_stop_cmdline_record();
2668 }
2669 
2670 /**
2671  * trace_vbprintk - write binary msg to tracing buffer
2672  *
2673  */
2674 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2675 {
2676         struct trace_event_call *call = &event_bprint;
2677         struct ring_buffer_event *event;
2678         struct ring_buffer *buffer;
2679         struct trace_array *tr = &global_trace;
2680         struct bprint_entry *entry;
2681         unsigned long flags;
2682         char *tbuffer;
2683         int len = 0, size, pc;
2684 
2685         if (unlikely(tracing_selftest_running || tracing_disabled))
2686                 return 0;
2687 
2688         /* Don't pollute graph traces with trace_vprintk internals */
2689         pause_graph_tracing();
2690 
2691         pc = preempt_count();
2692         preempt_disable_notrace();
2693 
2694         tbuffer = get_trace_buf();
2695         if (!tbuffer) {
2696                 len = 0;
2697                 goto out_nobuffer;
2698         }
2699 
2700         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2701 
2702         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2703                 goto out;
2704 
2705         local_save_flags(flags);
2706         size = sizeof(*entry) + sizeof(u32) * len;
2707         buffer = tr->trace_buffer.buffer;
2708         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2709                                             flags, pc);
2710         if (!event)
2711                 goto out;
2712         entry = ring_buffer_event_data(event);
2713         entry->ip                       = ip;
2714         entry->fmt                      = fmt;
2715 
2716         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2717         if (!call_filter_check_discard(call, entry, buffer, event)) {
2718                 __buffer_unlock_commit(buffer, event);
2719                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2720         }
2721 
2722 out:
2723         put_trace_buf();
2724 
2725 out_nobuffer:
2726         preempt_enable_notrace();
2727         unpause_graph_tracing();
2728 
2729         return len;
2730 }
2731 EXPORT_SYMBOL_GPL(trace_vbprintk);
2732 
2733 static int
2734 __trace_array_vprintk(struct ring_buffer *buffer,
2735                       unsigned long ip, const char *fmt, va_list args)
2736 {
2737         struct trace_event_call *call = &event_print;
2738         struct ring_buffer_event *event;
2739         int len = 0, size, pc;
2740         struct print_entry *entry;
2741         unsigned long flags;
2742         char *tbuffer;
2743 
2744         if (tracing_disabled || tracing_selftest_running)
2745                 return 0;
2746 
2747         /* Don't pollute graph traces with trace_vprintk internals */
2748         pause_graph_tracing();
2749 
2750         pc = preempt_count();
2751         preempt_disable_notrace();
2752 
2753 
2754         tbuffer = get_trace_buf();
2755         if (!tbuffer) {
2756                 len = 0;
2757                 goto out_nobuffer;
2758         }
2759 
2760         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2761 
2762         local_save_flags(flags);
2763         size = sizeof(*entry) + len + 1;
2764         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2765                                             flags, pc);
2766         if (!event)
2767                 goto out;
2768         entry = ring_buffer_event_data(event);
2769         entry->ip = ip;
2770 
2771         memcpy(&entry->buf, tbuffer, len + 1);
2772         if (!call_filter_check_discard(call, entry, buffer, event)) {
2773                 __buffer_unlock_commit(buffer, event);
2774                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2775         }
2776 
2777 out:
2778         put_trace_buf();
2779 
2780 out_nobuffer:
2781         preempt_enable_notrace();
2782         unpause_graph_tracing();
2783 
2784         return len;
2785 }
2786 
2787 int trace_array_vprintk(struct trace_array *tr,
2788                         unsigned long ip, const char *fmt, va_list args)
2789 {
2790         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2791 }
2792 
2793 int trace_array_printk(struct trace_array *tr,
2794                        unsigned long ip, const char *fmt, ...)
2795 {
2796         int ret;
2797         va_list ap;
2798 
2799         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2800                 return 0;
2801 
2802         va_start(ap, fmt);
2803         ret = trace_array_vprintk(tr, ip, fmt, ap);
2804         va_end(ap);
2805         return ret;
2806 }
2807 
2808 int trace_array_printk_buf(struct ring_buffer *buffer,
2809                            unsigned long ip, const char *fmt, ...)
2810 {
2811         int ret;
2812         va_list ap;
2813 
2814         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2815                 return 0;
2816 
2817         va_start(ap, fmt);
2818         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2819         va_end(ap);
2820         return ret;
2821 }
2822 
2823 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2824 {
2825         return trace_array_vprintk(&global_trace, ip, fmt, args);
2826 }
2827 EXPORT_SYMBOL_GPL(trace_vprintk);
2828 
2829 static void trace_iterator_increment(struct trace_iterator *iter)
2830 {
2831         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2832 
2833         iter->idx++;
2834         if (buf_iter)
2835                 ring_buffer_read(buf_iter, NULL);
2836 }
2837 
2838 static struct trace_entry *
2839 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2840                 unsigned long *lost_events)
2841 {
2842         struct ring_buffer_event *event;
2843         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2844 
2845         if (buf_iter)
2846                 event = ring_buffer_iter_peek(buf_iter, ts);
2847         else
2848                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2849                                          lost_events);
2850 
2851         if (event) {
2852                 iter->ent_size = ring_buffer_event_length(event);
2853                 return ring_buffer_event_data(event);
2854         }
2855         iter->ent_size = 0;
2856         return NULL;
2857 }
2858 
2859 static struct trace_entry *
2860 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2861                   unsigned long *missing_events, u64 *ent_ts)
2862 {
2863         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2864         struct trace_entry *ent, *next = NULL;
2865         unsigned long lost_events = 0, next_lost = 0;
2866         int cpu_file = iter->cpu_file;
2867         u64 next_ts = 0, ts;
2868         int next_cpu = -1;
2869         int next_size = 0;
2870         int cpu;
2871 
2872         /*
2873          * If we are in a per_cpu trace file, don't bother by iterating over
2874          * all cpu and peek directly.
2875          */
2876         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2877                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2878                         return NULL;
2879                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2880                 if (ent_cpu)
2881                         *ent_cpu = cpu_file;
2882 
2883                 return ent;
2884         }
2885 
2886         for_each_tracing_cpu(cpu) {
2887 
2888                 if (ring_buffer_empty_cpu(buffer, cpu))
2889                         continue;
2890 
2891                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2892 
2893                 /*
2894                  * Pick the entry with the smallest timestamp:
2895                  */
2896                 if (ent && (!next || ts < next_ts)) {
2897                         next = ent;
2898                         next_cpu = cpu;
2899                         next_ts = ts;
2900                         next_lost = lost_events;
2901                         next_size = iter->ent_size;
2902                 }
2903         }
2904 
2905         iter->ent_size = next_size;
2906 
2907         if (ent_cpu)
2908                 *ent_cpu = next_cpu;
2909 
2910         if (ent_ts)
2911                 *ent_ts = next_ts;
2912 
2913         if (missing_events)
2914                 *missing_events = next_lost;
2915 
2916         return next;
2917 }
2918 
2919 /* Find the next real entry, without updating the iterator itself */
2920 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2921                                           int *ent_cpu, u64 *ent_ts)
2922 {
2923         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2924 }
2925 
2926 /* Find the next real entry, and increment the iterator to the next entry */
2927 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2928 {
2929         iter->ent = __find_next_entry(iter, &iter->cpu,
2930                                       &iter->lost_events, &iter->ts);
2931 
2932         if (iter->ent)
2933                 trace_iterator_increment(iter);
2934 
2935         return iter->ent ? iter : NULL;
2936 }
2937 
2938 static void trace_consume(struct trace_iterator *iter)
2939 {
2940         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2941                             &iter->lost_events);
2942 }
2943 
2944 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2945 {
2946         struct trace_iterator *iter = m->private;
2947         int i = (int)*pos;
2948         void *ent;
2949 
2950         WARN_ON_ONCE(iter->leftover);
2951 
2952         (*pos)++;
2953 
2954         /* can't go backwards */
2955         if (iter->idx > i)
2956                 return NULL;
2957 
2958         if (iter->idx < 0)
2959                 ent = trace_find_next_entry_inc(iter);
2960         else
2961                 ent = iter;
2962 
2963         while (ent && iter->idx < i)
2964                 ent = trace_find_next_entry_inc(iter);
2965 
2966         iter->pos = *pos;
2967 
2968         return ent;
2969 }
2970 
2971 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2972 {
2973         struct ring_buffer_event *event;
2974         struct ring_buffer_iter *buf_iter;
2975         unsigned long entries = 0;
2976         u64 ts;
2977 
2978         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2979 
2980         buf_iter = trace_buffer_iter(iter, cpu);
2981         if (!buf_iter)
2982                 return;
2983 
2984         ring_buffer_iter_reset(buf_iter);
2985 
2986         /*
2987          * We could have the case with the max latency tracers
2988          * that a reset never took place on a cpu. This is evident
2989          * by the timestamp being before the start of the buffer.
2990          */
2991         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2992                 if (ts >= iter->trace_buffer->time_start)
2993                         break;
2994                 entries++;
2995                 ring_buffer_read(buf_iter, NULL);
2996         }
2997 
2998         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2999 }
3000 
3001 /*
3002  * The current tracer is copied to avoid a global locking
3003  * all around.
3004  */
3005 static void *s_start(struct seq_file *m, loff_t *pos)
3006 {
3007         struct trace_iterator *iter = m->private;
3008         struct trace_array *tr = iter->tr;
3009         int cpu_file = iter->cpu_file;
3010         void *p = NULL;
3011         loff_t l = 0;
3012         int cpu;
3013 
3014         /*
3015          * copy the tracer to avoid using a global lock all around.
3016          * iter->trace is a copy of current_trace, the pointer to the
3017          * name may be used instead of a strcmp(), as iter->trace->name
3018          * will point to the same string as current_trace->name.
3019          */
3020         mutex_lock(&trace_types_lock);
3021         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3022                 *iter->trace = *tr->current_trace;
3023         mutex_unlock(&trace_types_lock);
3024 
3025 #ifdef CONFIG_TRACER_MAX_TRACE
3026         if (iter->snapshot && iter->trace->use_max_tr)
3027                 return ERR_PTR(-EBUSY);
3028 #endif
3029 
3030         if (!iter->snapshot)
3031                 atomic_inc(&trace_record_cmdline_disabled);
3032 
3033         if (*pos != iter->pos) {
3034                 iter->ent = NULL;
3035                 iter->cpu = 0;
3036                 iter->idx = -1;
3037 
3038                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3039                         for_each_tracing_cpu(cpu)
3040                                 tracing_iter_reset(iter, cpu);
3041                 } else
3042                         tracing_iter_reset(iter, cpu_file);
3043 
3044                 iter->leftover = 0;
3045                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3046                         ;
3047 
3048         } else {
3049                 /*
3050                  * If we overflowed the seq_file before, then we want
3051                  * to just reuse the trace_seq buffer again.
3052                  */
3053                 if (iter->leftover)
3054                         p = iter;
3055                 else {
3056                         l = *pos - 1;
3057                         p = s_next(m, p, &l);
3058                 }
3059         }
3060 
3061         trace_event_read_lock();
3062         trace_access_lock(cpu_file);
3063         return p;
3064 }
3065 
3066 static void s_stop(struct seq_file *m, void *p)
3067 {
3068         struct trace_iterator *iter = m->private;
3069 
3070 #ifdef CONFIG_TRACER_MAX_TRACE
3071         if (iter->snapshot && iter->trace->use_max_tr)
3072                 return;
3073 #endif
3074 
3075         if (!iter->snapshot)
3076                 atomic_dec(&trace_record_cmdline_disabled);
3077 
3078         trace_access_unlock(iter->cpu_file);
3079         trace_event_read_unlock();
3080 }
3081 
3082 static void
3083 get_total_entries(struct trace_buffer *buf,
3084                   unsigned long *total, unsigned long *entries)
3085 {
3086         unsigned long count;
3087         int cpu;
3088 
3089         *total = 0;
3090         *entries = 0;
3091 
3092         for_each_tracing_cpu(cpu) {
3093                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3094                 /*
3095                  * If this buffer has skipped entries, then we hold all
3096                  * entries for the trace and we need to ignore the
3097                  * ones before the time stamp.
3098                  */
3099                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3100                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3101                         /* total is the same as the entries */
3102                         *total += count;
3103                 } else
3104                         *total += count +
3105                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
3106                 *entries += count;
3107         }
3108 }
3109 
3110 static void print_lat_help_header(struct seq_file *m)
3111 {
3112         seq_puts(m, "#                  _------=> CPU#            \n"
3113                     "#                 / _-----=> irqs-off        \n"
3114                     "#                | / _----=> need-resched    \n"
3115                     "#                || / _---=> hardirq/softirq \n"
3116                     "#                ||| / _--=> preempt-depth   \n"
3117                     "#                |||| /     delay            \n"
3118                     "#  cmd     pid   ||||| time  |   caller      \n"
3119                     "#     \\   /      |||||  \\    |   /         \n");
3120 }
3121 
3122 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3123 {
3124         unsigned long total;
3125         unsigned long entries;
3126 
3127         get_total_entries(buf, &total, &entries);
3128         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3129                    entries, total, num_online_cpus());
3130         seq_puts(m, "#\n");
3131 }
3132 
3133 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
3134 {
3135         print_event_info(buf, m);
3136         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n"
3137                     "#              | |       |          |         |\n");
3138 }
3139 
3140 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
3141 {
3142         print_event_info(buf, m);
3143         seq_puts(m, "#                              _-----=> irqs-off\n"
3144                     "#                             / _----=> need-resched\n"
3145                     "#                            | / _---=> hardirq/softirq\n"
3146                     "#                            || / _--=> preempt-depth\n"
3147                     "#                            ||| /     delay\n"
3148                     "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n"
3149                     "#              | |       |   ||||       |         |\n");
3150 }
3151 
3152 void
3153 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3154 {
3155         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3156         struct trace_buffer *buf = iter->trace_buffer;
3157         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3158         struct tracer *type = iter->trace;
3159         unsigned long entries;
3160         unsigned long total;
3161         const char *name = "preemption";
3162 
3163         name = type->name;
3164 
3165         get_total_entries(buf, &total, &entries);
3166 
3167         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3168                    name, UTS_RELEASE);
3169         seq_puts(m, "# -----------------------------------"
3170                  "---------------------------------\n");
3171         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3172                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3173                    nsecs_to_usecs(data->saved_latency),
3174                    entries,
3175                    total,
3176                    buf->cpu,
3177 #if defined(CONFIG_PREEMPT_NONE)
3178                    "server",
3179 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3180                    "desktop",
3181 #elif defined(CONFIG_PREEMPT)
3182                    "preempt",
3183 #else
3184                    "unknown",
3185 #endif
3186                    /* These are reserved for later use */
3187                    0, 0, 0, 0);
3188 #ifdef CONFIG_SMP
3189         seq_printf(m, " #P:%d)\n", num_online_cpus());
3190 #else
3191         seq_puts(m, ")\n");
3192 #endif
3193         seq_puts(m, "#    -----------------\n");
3194         seq_printf(m, "#    | task: %.16s-%d "
3195                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3196                    data->comm, data->pid,
3197                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3198                    data->policy, data->rt_priority);
3199         seq_puts(m, "#    -----------------\n");
3200 
3201         if (data->critical_start) {
3202                 seq_puts(m, "#  => started at: ");
3203                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3204                 trace_print_seq(m, &iter->seq);
3205                 seq_puts(m, "\n#  => ended at:   ");
3206                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3207                 trace_print_seq(m, &iter->seq);
3208                 seq_puts(m, "\n#\n");
3209         }
3210 
3211         seq_puts(m, "#\n");
3212 }
3213 
3214 static void test_cpu_buff_start(struct trace_iterator *iter)
3215 {
3216         struct trace_seq *s = &iter->seq;
3217         struct trace_array *tr = iter->tr;
3218 
3219         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3220                 return;
3221 
3222         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3223                 return;
3224 
3225         if (iter->started && cpumask_test_cpu(iter->cpu, iter->started))
3226                 return;
3227 
3228         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3229                 return;
3230 
3231         if (iter->started)
3232                 cpumask_set_cpu(iter->cpu, iter->started);
3233 
3234         /* Don't print started cpu buffer for the first entry of the trace */
3235         if (iter->idx > 1)
3236                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3237                                 iter->cpu);
3238 }
3239 
3240 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3241 {
3242         struct trace_array *tr = iter->tr;
3243         struct trace_seq *s = &iter->seq;
3244         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3245         struct trace_entry *entry;
3246         struct trace_event *event;
3247 
3248         entry = iter->ent;
3249 
3250         test_cpu_buff_start(iter);
3251 
3252         event = ftrace_find_event(entry->type);
3253 
3254         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3255                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3256                         trace_print_lat_context(iter);
3257                 else
3258                         trace_print_context(iter);
3259         }
3260 
3261         if (trace_seq_has_overflowed(s))
3262                 return TRACE_TYPE_PARTIAL_LINE;
3263 
3264         if (event)
3265                 return event->funcs->trace(iter, sym_flags, event);
3266 
3267         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3268 
3269         return trace_handle_return(s);
3270 }
3271 
3272 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3273 {
3274         struct trace_array *tr = iter->tr;
3275         struct trace_seq *s = &iter->seq;
3276         struct trace_entry *entry;
3277         struct trace_event *event;
3278 
3279         entry = iter->ent;
3280 
3281         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3282                 trace_seq_printf(s, "%d %d %llu ",
3283                                  entry->pid, iter->cpu, iter->ts);
3284 
3285         if (trace_seq_has_overflowed(s))
3286                 return TRACE_TYPE_PARTIAL_LINE;
3287 
3288         event = ftrace_find_event(entry->type);
3289         if (event)
3290                 return event->funcs->raw(iter, 0, event);
3291 
3292         trace_seq_printf(s, "%d ?\n", entry->type);
3293 
3294         return trace_handle_return(s);
3295 }
3296 
3297 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3298 {
3299         struct trace_array *tr = iter->tr;
3300         struct trace_seq *s = &iter->seq;
3301         unsigned char newline = '\n';
3302         struct trace_entry *entry;
3303         struct trace_event *event;
3304 
3305         entry = iter->ent;
3306 
3307         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3308                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3309                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3310                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3311                 if (trace_seq_has_overflowed(s))
3312                         return TRACE_TYPE_PARTIAL_LINE;
3313         }
3314 
3315         event = ftrace_find_event(entry->type);
3316         if (event) {
3317                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3318                 if (ret != TRACE_TYPE_HANDLED)
3319                         return ret;
3320         }
3321 
3322         SEQ_PUT_FIELD(s, newline);
3323 
3324         return trace_handle_return(s);
3325 }
3326 
3327 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3328 {
3329         struct trace_array *tr = iter->tr;
3330         struct trace_seq *s = &iter->seq;
3331         struct trace_entry *entry;
3332         struct trace_event *event;
3333 
3334         entry = iter->ent;
3335 
3336         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3337                 SEQ_PUT_FIELD(s, entry->pid);
3338                 SEQ_PUT_FIELD(s, iter->cpu);
3339                 SEQ_PUT_FIELD(s, iter->ts);
3340                 if (trace_seq_has_overflowed(s))
3341                         return TRACE_TYPE_PARTIAL_LINE;
3342         }
3343 
3344         event = ftrace_find_event(entry->type);
3345         return event ? event->funcs->binary(iter, 0, event) :
3346                 TRACE_TYPE_HANDLED;
3347 }
3348 
3349 int trace_empty(struct trace_iterator *iter)
3350 {
3351         struct ring_buffer_iter *buf_iter;
3352         int cpu;
3353 
3354         /* If we are looking at one CPU buffer, only check that one */
3355         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3356                 cpu = iter->cpu_file;
3357                 buf_iter = trace_buffer_iter(iter, cpu);
3358                 if (buf_iter) {
3359                         if (!ring_buffer_iter_empty(buf_iter))
3360                                 return 0;
3361                 } else {
3362                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3363                                 return 0;
3364                 }
3365                 return 1;
3366         }
3367 
3368         for_each_tracing_cpu(cpu) {
3369                 buf_iter = trace_buffer_iter(iter, cpu);
3370                 if (buf_iter) {
3371                         if (!ring_buffer_iter_empty(buf_iter))
3372                                 return 0;
3373                 } else {
3374                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3375                                 return 0;
3376                 }
3377         }
3378 
3379         return 1;
3380 }
3381 
3382 /*  Called with trace_event_read_lock() held. */
3383 enum print_line_t print_trace_line(struct trace_iterator *iter)
3384 {
3385         struct trace_array *tr = iter->tr;
3386         unsigned long trace_flags = tr->trace_flags;
3387         enum print_line_t ret;
3388 
3389         if (iter->lost_events) {
3390                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3391                                  iter->cpu, iter->lost_events);
3392                 if (trace_seq_has_overflowed(&iter->seq))
3393                         return TRACE_TYPE_PARTIAL_LINE;
3394         }
3395 
3396         if (iter->trace && iter->trace->print_line) {
3397                 ret = iter->trace->print_line(iter);
3398                 if (ret != TRACE_TYPE_UNHANDLED)
3399                         return ret;
3400         }
3401 
3402         if (iter->ent->type == TRACE_BPUTS &&
3403                         trace_flags & TRACE_ITER_PRINTK &&
3404                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3405                 return trace_print_bputs_msg_only(iter);
3406 
3407         if (iter->ent->type == TRACE_BPRINT &&
3408                         trace_flags & TRACE_ITER_PRINTK &&
3409                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3410                 return trace_print_bprintk_msg_only(iter);
3411 
3412         if (iter->ent->type == TRACE_PRINT &&
3413                         trace_flags & TRACE_ITER_PRINTK &&
3414                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3415                 return trace_print_printk_msg_only(iter);
3416 
3417         if (trace_flags & TRACE_ITER_BIN)
3418                 return print_bin_fmt(iter);
3419 
3420         if (trace_flags & TRACE_ITER_HEX)
3421                 return print_hex_fmt(iter);
3422 
3423         if (trace_flags & TRACE_ITER_RAW)
3424                 return print_raw_fmt(iter);
3425 
3426         return print_trace_fmt(iter);
3427 }
3428 
3429 void trace_latency_header(struct seq_file *m)
3430 {
3431         struct trace_iterator *iter = m->private;
3432         struct trace_array *tr = iter->tr;
3433 
3434         /* print nothing if the buffers are empty */
3435         if (trace_empty(iter))
3436                 return;
3437 
3438         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3439                 print_trace_header(m, iter);
3440 
3441         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3442                 print_lat_help_header(m);
3443 }
3444 
3445 void trace_default_header(struct seq_file *m)
3446 {
3447         struct trace_iterator *iter = m->private;
3448         struct trace_array *tr = iter->tr;
3449         unsigned long trace_flags = tr->trace_flags;
3450 
3451         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3452                 return;
3453 
3454         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3455                 /* print nothing if the buffers are empty */
3456                 if (trace_empty(iter))
3457                         return;
3458                 print_trace_header(m, iter);
3459                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3460                         print_lat_help_header(m);
3461         } else {
3462                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3463                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3464                                 print_func_help_header_irq(iter->trace_buffer, m);
3465                         else
3466                                 print_func_help_header(iter->trace_buffer, m);
3467                 }
3468         }
3469 }
3470 
3471 static void test_ftrace_alive(struct seq_file *m)
3472 {
3473         if (!ftrace_is_dead())
3474                 return;
3475         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3476                     "#          MAY BE MISSING FUNCTION EVENTS\n");
3477 }
3478 
3479 #ifdef CONFIG_TRACER_MAX_TRACE
3480 static void show_snapshot_main_help(struct seq_file *m)
3481 {
3482         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3483                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3484                     "#                      Takes a snapshot of the main buffer.\n"
3485                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3486                     "#                      (Doesn't have to be '2' works with any number that\n"
3487                     "#                       is not a '' or '1')\n");
3488 }
3489 
3490 static void show_snapshot_percpu_help(struct seq_file *m)
3491 {
3492         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3493 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3494         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3495                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
3496 #else
3497         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3498                     "#                     Must use main snapshot file to allocate.\n");
3499 #endif
3500         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3501                     "#                      (Doesn't have to be '2' works with any number that\n"
3502                     "#                       is not a '' or '1')\n");
3503 }
3504 
3505 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3506 {
3507         if (iter->tr->allocated_snapshot)
3508                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3509         else
3510                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3511 
3512         seq_puts(m, "# Snapshot commands:\n");
3513         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3514                 show_snapshot_main_help(m);
3515         else
3516                 show_snapshot_percpu_help(m);
3517 }
3518 #else
3519 /* Should never be called */
3520 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3521 #endif
3522 
3523 static int s_show(struct seq_file *m, void *v)
3524 {
3525         struct trace_iterator *iter = v;
3526         int ret;
3527 
3528         if (iter->ent == NULL) {
3529                 if (iter->tr) {
3530                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
3531                         seq_puts(m, "#\n");
3532                         test_ftrace_alive(m);
3533                 }
3534                 if (iter->snapshot && trace_empty(iter))
3535                         print_snapshot_help(m, iter);
3536                 else if (iter->trace && iter->trace->print_header)
3537                         iter->trace->print_header(m);
3538                 else
3539                         trace_default_header(m);
3540 
3541         } else if (iter->leftover) {
3542                 /*
3543                  * If we filled the seq_file buffer earlier, we
3544                  * want to just show it now.
3545                  */
3546                 ret = trace_print_seq(m, &iter->seq);
3547 
3548                 /* ret should this time be zero, but you never know */
3549                 iter->leftover = ret;
3550 
3551         } else {
3552                 print_trace_line(iter);
3553                 ret = trace_print_seq(m, &iter->seq);
3554                 /*
3555                  * If we overflow the seq_file buffer, then it will
3556                  * ask us for this data again at start up.
3557                  * Use that instead.
3558                  *  ret is 0 if seq_file write succeeded.
3559                  *        -1 otherwise.
3560                  */
3561                 iter->leftover = ret;
3562         }
3563 
3564         return 0;
3565 }
3566 
3567 /*
3568  * Should be used after trace_array_get(), trace_types_lock
3569  * ensures that i_cdev was already initialized.
3570  */
3571 static inline int tracing_get_cpu(struct inode *inode)
3572 {
3573         if (inode->i_cdev) /* See trace_create_cpu_file() */
3574                 return (long)inode->i_cdev - 1;
3575         return RING_BUFFER_ALL_CPUS;
3576 }
3577 
3578 static const struct seq_operations tracer_seq_ops = {
3579         .start          = s_start,
3580         .next           = s_next,
3581         .stop           = s_stop,
3582         .show           = s_show,
3583 };
3584 
3585 static struct trace_iterator *
3586 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3587 {
3588         struct trace_array *tr = inode->i_private;
3589         struct trace_iterator *iter;
3590         int cpu;
3591 
3592         if (tracing_disabled)
3593                 return ERR_PTR(-ENODEV);
3594 
3595         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3596         if (!iter)
3597                 return ERR_PTR(-ENOMEM);
3598 
3599         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3600                                     GFP_KERNEL);
3601         if (!iter->buffer_iter)
3602                 goto release;
3603 
3604         /*
3605          * We make a copy of the current tracer to avoid concurrent
3606          * changes on it while we are reading.
3607          */
3608         mutex_lock(&trace_types_lock);
3609         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3610         if (!iter->trace)
3611                 goto fail;
3612 
3613         *iter->trace = *tr->current_trace;
3614 
3615         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3616                 goto fail;
3617 
3618         iter->tr = tr;
3619 
3620 #ifdef CONFIG_TRACER_MAX_TRACE
3621         /* Currently only the top directory has a snapshot */
3622         if (tr->current_trace->print_max || snapshot)
3623                 iter->trace_buffer = &tr->max_buffer;
3624         else
3625 #endif
3626                 iter->trace_buffer = &tr->trace_buffer;
3627         iter->snapshot = snapshot;
3628         iter->pos = -1;
3629         iter->cpu_file = tracing_get_cpu(inode);
3630         mutex_init(&iter->mutex);
3631 
3632         /* Notify the tracer early; before we stop tracing. */
3633         if (iter->trace && iter->trace->open)
3634                 iter->trace->open(iter);
3635 
3636         /* Annotate start of buffers if we had overruns */
3637         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3638                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3639 
3640         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3641         if (trace_clocks[tr->clock_id].in_ns)
3642                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3643 
3644         /* stop the trace while dumping if we are not opening "snapshot" */
3645         if (!iter->snapshot)
3646                 tracing_stop_tr(tr);
3647 
3648         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3649                 for_each_tracing_cpu(cpu) {
3650                         iter->buffer_iter[cpu] =
3651                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3652                 }
3653                 ring_buffer_read_prepare_sync();
3654                 for_each_tracing_cpu(cpu) {
3655                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3656                         tracing_iter_reset(iter, cpu);
3657                 }
3658         } else {
3659                 cpu = iter->cpu_file;
3660                 iter->buffer_iter[cpu] =
3661                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3662                 ring_buffer_read_prepare_sync();
3663                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3664                 tracing_iter_reset(iter, cpu);
3665         }
3666 
3667         mutex_unlock(&trace_types_lock);
3668 
3669         return iter;
3670 
3671  fail:
3672         mutex_unlock(&trace_types_lock);
3673         kfree(iter->trace);
3674         kfree(iter->buffer_iter);
3675 release:
3676         seq_release_private(inode, file);
3677         return ERR_PTR(-ENOMEM);
3678 }
3679 
3680 int tracing_open_generic(struct inode *inode, struct file *filp)
3681 {
3682         if (tracing_disabled)
3683                 return -ENODEV;
3684 
3685         filp->private_data = inode->i_private;
3686         return 0;
3687 }
3688 
3689 bool tracing_is_disabled(void)
3690 {
3691         return (tracing_disabled) ? true: false;
3692 }
3693 
3694 /*
3695  * Open and update trace_array ref count.
3696  * Must have the current trace_array passed to it.
3697  */
3698 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3699 {
3700         struct trace_array *tr = inode->i_private;
3701 
3702         if (tracing_disabled)
3703                 return -ENODEV;
3704 
3705         if (trace_array_get(tr) < 0)
3706                 return -ENODEV;
3707 
3708         filp->private_data = inode->i_private;
3709 
3710         return 0;
3711 }
3712 
3713 static int tracing_release(struct inode *inode, struct file *file)
3714 {
3715         struct trace_array *tr = inode->i_private;
3716         struct seq_file *m = file->private_data;
3717         struct trace_iterator *iter;
3718         int cpu;
3719 
3720         if (!(file->f_mode & FMODE_READ)) {
3721                 trace_array_put(tr);
3722                 return 0;
3723         }
3724 
3725         /* Writes do not use seq_file */
3726         iter = m->private;
3727         mutex_lock(&trace_types_lock);
3728 
3729         for_each_tracing_cpu(cpu) {
3730                 if (iter->buffer_iter[cpu])
3731                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3732         }
3733 
3734         if (iter->trace && iter->trace->close)
3735                 iter->trace->close(iter);
3736 
3737         if (!iter->snapshot)
3738                 /* reenable tracing if it was previously enabled */
3739                 tracing_start_tr(tr);
3740 
3741         __trace_array_put(tr);
3742 
3743         mutex_unlock(&trace_types_lock);
3744 
3745         mutex_destroy(&iter->mutex);
3746         free_cpumask_var(iter->started);
3747         kfree(iter->trace);
3748         kfree(iter->buffer_iter);
3749         seq_release_private(inode, file);
3750 
3751         return 0;
3752 }
3753 
3754 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3755 {
3756         struct trace_array *tr = inode->i_private;
3757 
3758         trace_array_put(tr);
3759         return 0;
3760 }
3761 
3762 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3763 {
3764         struct trace_array *tr = inode->i_private;
3765 
3766         trace_array_put(tr);
3767 
3768         return single_release(inode, file);
3769 }
3770 
3771 static int tracing_open(struct inode *inode, struct file *file)
3772 {
3773         struct trace_array *tr = inode->i_private;
3774         struct trace_iterator *iter;
3775         int ret = 0;
3776 
3777         if (trace_array_get(tr) < 0)
3778                 return -ENODEV;
3779 
3780         /* If this file was open for write, then erase contents */
3781         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3782                 int cpu = tracing_get_cpu(inode);
3783 
3784                 if (cpu == RING_BUFFER_ALL_CPUS)
3785                         tracing_reset_online_cpus(&tr->trace_buffer);
3786                 else
3787                         tracing_reset(&tr->trace_buffer, cpu);
3788         }
3789 
3790         if (file->f_mode & FMODE_READ) {
3791                 iter = __tracing_open(inode, file, false);
3792                 if (IS_ERR(iter))
3793                         ret = PTR_ERR(iter);
3794                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3795                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3796         }
3797 
3798         if (ret < 0)
3799                 trace_array_put(tr);
3800 
3801         return ret;
3802 }
3803 
3804 /*
3805  * Some tracers are not suitable for instance buffers.
3806  * A tracer is always available for the global array (toplevel)
3807  * or if it explicitly states that it is.
3808  */
3809 static bool
3810 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3811 {
3812         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3813 }
3814 
3815 /* Find the next tracer that this trace array may use */
3816 static struct tracer *
3817 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3818 {
3819         while (t && !trace_ok_for_array(t, tr))
3820                 t = t->next;
3821 
3822         return t;
3823 }
3824 
3825 static void *
3826 t_next(struct seq_file *m, void *v, loff_t *pos)
3827 {
3828         struct trace_array *tr = m->private;
3829         struct tracer *t = v;
3830 
3831         (*pos)++;
3832 
3833         if (t)
3834                 t = get_tracer_for_array(tr, t->next);
3835 
3836         return t;
3837 }
3838 
3839 static void *t_start(struct seq_file *m, loff_t *pos)
3840 {
3841         struct trace_array *tr = m->private;
3842         struct tracer *t;
3843         loff_t l = 0;
3844 
3845         mutex_lock(&trace_types_lock);
3846 
3847         t = get_tracer_for_array(tr, trace_types);
3848         for (; t && l < *pos; t = t_next(m, t, &l))
3849                         ;
3850 
3851         return t;
3852 }
3853 
3854 static void t_stop(struct seq_file *m, void *p)
3855 {
3856         mutex_unlock(&trace_types_lock);
3857 }
3858 
3859 static int t_show(struct seq_file *m, void *v)
3860 {
3861         struct tracer *t = v;
3862 
3863         if (!t)
3864                 return 0;
3865 
3866         seq_puts(m, t->name);
3867         if (t->next)
3868                 seq_putc(m, ' ');
3869         else
3870                 seq_putc(m, '\n');
3871 
3872         return 0;
3873 }
3874 
3875 static const struct seq_operations show_traces_seq_ops = {
3876         .start          = t_start,
3877         .next           = t_next,
3878         .stop           = t_stop,
3879         .show           = t_show,
3880 };
3881 
3882 static int show_traces_open(struct inode *inode, struct file *file)
3883 {
3884         struct trace_array *tr = inode->i_private;
3885         struct seq_file *m;
3886         int ret;
3887 
3888         if (tracing_disabled)
3889                 return -ENODEV;
3890 
3891         ret = seq_open(file, &show_traces_seq_ops);
3892         if (ret)
3893                 return ret;
3894 
3895         m = file->private_data;
3896         m->private = tr;
3897 
3898         return 0;
3899 }
3900 
3901 static ssize_t
3902 tracing_write_stub(struct file *filp, const char __user *ubuf,
3903                    size_t count, loff_t *ppos)
3904 {
3905         return count;
3906 }
3907 
3908 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3909 {
3910         int ret;
3911 
3912         if (file->f_mode & FMODE_READ)
3913                 ret = seq_lseek(file, offset, whence);
3914         else
3915                 file->f_pos = ret = 0;
3916 
3917         return ret;
3918 }
3919 
3920 static const struct file_operations tracing_fops = {
3921         .open           = tracing_open,
3922         .read           = seq_read,
3923         .write          = tracing_write_stub,
3924         .llseek         = tracing_lseek,
3925         .release        = tracing_release,
3926 };
3927 
3928 static const struct file_operations show_traces_fops = {
3929         .open           = show_traces_open,
3930         .read           = seq_read,
3931         .release        = seq_release,
3932         .llseek         = seq_lseek,
3933 };
3934 
3935 /*
3936  * The tracer itself will not take this lock, but still we want
3937  * to provide a consistent cpumask to user-space:
3938  */
3939 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3940 
3941 /*
3942  * Temporary storage for the character representation of the
3943  * CPU bitmask (and one more byte for the newline):
3944  */
3945 static char mask_str[NR_CPUS + 1];
3946 
3947 static ssize_t
3948 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3949                      size_t count, loff_t *ppos)
3950 {
3951         struct trace_array *tr = file_inode(filp)->i_private;
3952         int len;
3953 
3954         mutex_lock(&tracing_cpumask_update_lock);
3955 
3956         len = snprintf(mask_str, count, "%*pb\n",
3957                        cpumask_pr_args(tr->tracing_cpumask));
3958         if (len >= count) {
3959                 count = -EINVAL;
3960                 goto out_err;
3961         }
3962         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3963 
3964 out_err:
3965         mutex_unlock(&tracing_cpumask_update_lock);
3966 
3967         return count;
3968 }
3969 
3970 static ssize_t
3971 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3972                       size_t count, loff_t *ppos)
3973 {
3974         struct trace_array *tr = file_inode(filp)->i_private;
3975         cpumask_var_t tracing_cpumask_new;
3976         int err, cpu;
3977 
3978         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3979                 return -ENOMEM;
3980 
3981         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3982         if (err)
3983                 goto err_unlock;
3984 
3985         mutex_lock(&tracing_cpumask_update_lock);
3986 
3987         local_irq_disable();
3988         arch_spin_lock(&tr->max_lock);
3989         for_each_tracing_cpu(cpu) {
3990                 /*
3991                  * Increase/decrease the disabled counter if we are
3992                  * about to flip a bit in the cpumask:
3993                  */
3994                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3995                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3996                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3997                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3998                 }
3999                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4000                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4001                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4002                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4003                 }
4004         }
4005         arch_spin_unlock(&tr->max_lock);
4006         local_irq_enable();
4007 
4008         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4009 
4010         mutex_unlock(&tracing_cpumask_update_lock);
4011         free_cpumask_var(tracing_cpumask_new);
4012 
4013         return count;
4014 
4015 err_unlock:
4016         free_cpumask_var(tracing_cpumask_new);
4017 
4018         return err;
4019 }
4020 
4021 static const struct file_operations tracing_cpumask_fops = {
4022         .open           = tracing_open_generic_tr,
4023         .read           = tracing_cpumask_read,
4024         .write          = tracing_cpumask_write,
4025         .release        = tracing_release_generic_tr,
4026         .llseek         = generic_file_llseek,
4027 };
4028 
4029 static int tracing_trace_options_show(struct seq_file *m, void *v)
4030 {
4031         struct tracer_opt *trace_opts;
4032         struct trace_array *tr = m->private;
4033         u32 tracer_flags;
4034         int i;
4035 
4036         mutex_lock(&trace_types_lock);
4037         tracer_flags = tr->current_trace->flags->val;
4038         trace_opts = tr->current_trace->flags->opts;
4039 
4040         for (i = 0; trace_options[i]; i++) {
4041                 if (tr->trace_flags & (1 << i))
4042                         seq_printf(m, "%s\n", trace_options[i]);
4043                 else
4044                         seq_printf(m, "no%s\n", trace_options[i]);
4045         }
4046 
4047         for (i = 0; trace_opts[i].name; i++) {
4048                 if (tracer_flags & trace_opts[i].bit)
4049                         seq_printf(m, "%s\n", trace_opts[i].name);
4050                 else
4051                         seq_printf(m, "no%s\n", trace_opts[i].name);
4052         }
4053         mutex_unlock(&trace_types_lock);
4054 
4055         return 0;
4056 }
4057 
4058 static int __set_tracer_option(struct trace_array *tr,
4059                                struct tracer_flags *tracer_flags,
4060                                struct tracer_opt *opts, int neg)
4061 {
4062         struct tracer *trace = tracer_flags->trace;
4063         int ret;
4064 
4065         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4066         if (ret)
4067                 return ret;
4068 
4069         if (neg)
4070                 tracer_flags->val &= ~opts->bit;
4071         else
4072                 tracer_flags->val |= opts->bit;
4073         return 0;
4074 }
4075 
4076 /* Try to assign a tracer specific option */
4077 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4078 {
4079         struct tracer *trace = tr->current_trace;
4080         struct tracer_flags *tracer_flags = trace->flags;
4081         struct tracer_opt *opts = NULL;
4082         int i;
4083 
4084         for (i = 0; tracer_flags->opts[i].name; i++) {
4085                 opts = &tracer_flags->opts[i];
4086 
4087                 if (strcmp(cmp, opts->name) == 0)
4088                         return __set_tracer_option(tr, trace->flags, opts, neg);
4089         }
4090 
4091         return -EINVAL;
4092 }
4093 
4094 /* Some tracers require overwrite to stay enabled */
4095 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4096 {
4097         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4098                 return -1;
4099 
4100         return 0;
4101 }
4102 
4103 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4104 {
4105         /* do nothing if flag is already set */
4106         if (!!(tr->trace_flags & mask) == !!enabled)
4107                 return 0;
4108 
4109         /* Give the tracer a chance to approve the change */
4110         if (tr->current_trace->flag_changed)
4111                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4112                         return -EINVAL;
4113 
4114         if (enabled)
4115                 tr->trace_flags |= mask;
4116         else
4117                 tr->trace_flags &= ~mask;
4118 
4119         if (mask == TRACE_ITER_RECORD_CMD)
4120                 trace_event_enable_cmd_record(enabled);
4121 
4122         if (mask == TRACE_ITER_EVENT_FORK)
4123                 trace_event_follow_fork(tr, enabled);
4124 
4125         if (mask == TRACE_ITER_OVERWRITE) {
4126                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4127 #ifdef CONFIG_TRACER_MAX_TRACE
4128                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4129 #endif
4130         }
4131 
4132         if (mask == TRACE_ITER_PRINTK) {
4133                 trace_printk_start_stop_comm(enabled);
4134                 trace_printk_control(enabled);
4135         }
4136 
4137         return 0;
4138 }
4139 
4140 static int trace_set_options(struct trace_array *tr, char *option)
4141 {
4142         char *cmp;
4143         int neg = 0;
4144         int ret = -ENODEV;
4145         int i;
4146         size_t orig_len = strlen(option);
4147 
4148         cmp = strstrip(option);
4149 
4150         if (strncmp(cmp, "no", 2) == 0) {
4151                 neg = 1;
4152                 cmp += 2;
4153         }
4154 
4155         mutex_lock(&trace_types_lock);
4156 
4157         for (i = 0; trace_options[i]; i++) {
4158                 if (strcmp(cmp, trace_options[i]) == 0) {
4159                         ret = set_tracer_flag(tr, 1 << i, !neg);
4160                         break;
4161                 }
4162         }
4163 
4164         /* If no option could be set, test the specific tracer options */
4165         if (!trace_options[i])
4166                 ret = set_tracer_option(tr, cmp, neg);
4167 
4168         mutex_unlock(&trace_types_lock);
4169 
4170         /*
4171          * If the first trailing whitespace is replaced with '\0' by strstrip,
4172          * turn it back into a space.
4173          */
4174         if (orig_len > strlen(option))
4175                 option[strlen(option)] = ' ';
4176 
4177         return ret;
4178 }
4179 
4180 static void __init apply_trace_boot_options(void)
4181 {
4182         char *buf = trace_boot_options_buf;
4183         char *option;
4184 
4185         while (true) {
4186                 option = strsep(&buf, ",");
4187 
4188                 if (!option)
4189                         break;
4190 
4191                 if (*option)
4192                         trace_set_options(&global_trace, option);
4193 
4194                 /* Put back the comma to allow this to be called again */
4195                 if (buf)
4196                         *(buf - 1) = ',';
4197         }
4198 }
4199 
4200 static ssize_t
4201 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4202                         size_t cnt, loff_t *ppos)
4203 {
4204         struct seq_file *m = filp->private_data;
4205         struct trace_array *tr = m->private;
4206         char buf[64];
4207         int ret;
4208 
4209         if (cnt >= sizeof(buf))
4210                 return -EINVAL;
4211 
4212         if (copy_from_user(buf, ubuf, cnt))
4213                 return -EFAULT;
4214 
4215         buf[cnt] = 0;
4216 
4217         ret = trace_set_options(tr, buf);
4218         if (ret < 0)
4219                 return ret;
4220 
4221         *ppos += cnt;
4222 
4223         return cnt;
4224 }
4225 
4226 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4227 {
4228         struct trace_array *tr = inode->i_private;
4229         int ret;
4230 
4231         if (tracing_disabled)
4232                 return -ENODEV;
4233 
4234         if (trace_array_get(tr) < 0)
4235                 return -ENODEV;
4236 
4237         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4238         if (ret < 0)
4239                 trace_array_put(tr);
4240 
4241         return ret;
4242 }
4243 
4244 static const struct file_operations tracing_iter_fops = {
4245         .open           = tracing_trace_options_open,
4246         .read           = seq_read,
4247         .llseek         = seq_lseek,
4248         .release        = tracing_single_release_tr,
4249         .write          = tracing_trace_options_write,
4250 };
4251 
4252 static const char readme_msg[] =
4253         "tracing mini-HOWTO:\n\n"
4254         "# echo 0 > tracing_on : quick way to disable tracing\n"
4255         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4256         " Important files:\n"
4257         "  trace\t\t\t- The static contents of the buffer\n"
4258         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4259         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4260         "  current_tracer\t- function and latency tracers\n"
4261         "  available_tracers\t- list of configured tracers for current_tracer\n"
4262         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4263         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4264         "  trace_clock\t\t-change the clock used to order events\n"
4265         "       local:   Per cpu clock but may not be synced across CPUs\n"
4266         "      global:   Synced across CPUs but slows tracing down.\n"
4267         "     counter:   Not a clock, but just an increment\n"
4268         "      uptime:   Jiffy counter from time of boot\n"
4269         "        perf:   Same clock that perf events use\n"
4270 #ifdef CONFIG_X86_64
4271         "     x86-tsc:   TSC cycle counter\n"
4272 #endif
4273         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4274         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4275         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4276         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4277         "\t\t\t  Remove sub-buffer with rmdir\n"
4278         "  trace_options\t\t- Set format or modify how tracing happens\n"
4279         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4280         "\t\t\t  option name\n"
4281         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4282 #ifdef CONFIG_DYNAMIC_FTRACE
4283         "\n  available_filter_functions - list of functions that can be filtered on\n"
4284         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4285         "\t\t\t  functions\n"
4286         "\t     accepts: func_full_name or glob-matching-pattern\n"
4287         "\t     modules: Can select a group via module\n"
4288         "\t      Format: :mod:<module-name>\n"
4289         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4290         "\t    triggers: a command to perform when function is hit\n"
4291         "\t      Format: <function>:<trigger>[:count]\n"
4292         "\t     trigger: traceon, traceoff\n"
4293         "\t\t      enable_event:<system>:<event>\n"
4294         "\t\t      disable_event:<system>:<event>\n"
4295 #ifdef CONFIG_STACKTRACE
4296         "\t\t      stacktrace\n"
4297 #endif
4298 #ifdef CONFIG_TRACER_SNAPSHOT
4299         "\t\t      snapshot\n"
4300 #endif
4301         "\t\t      dump\n"
4302         "\t\t      cpudump\n"
4303         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4304         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4305         "\t     The first one will disable tracing every time do_fault is hit\n"
4306         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4307         "\t       The first time do trap is hit and it disables tracing, the\n"
4308         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4309         "\t       the counter will not decrement. It only decrements when the\n"
4310         "\t       trigger did work\n"
4311         "\t     To remove trigger without count:\n"
4312         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4313         "\t     To remove trigger with a count:\n"
4314         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4315         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4316         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4317         "\t    modules: Can select a group via module command :mod:\n"
4318         "\t    Does not accept triggers\n"
4319 #endif /* CONFIG_DYNAMIC_FTRACE */
4320 #ifdef CONFIG_FUNCTION_TRACER
4321         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4322         "\t\t    (function)\n"
4323 #endif
4324 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4325         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4326         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4327         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4328 #endif
4329 #ifdef CONFIG_TRACER_SNAPSHOT
4330         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4331         "\t\t\t  snapshot buffer. Read the contents for more\n"
4332         "\t\t\t  information\n"
4333 #endif
4334 #ifdef CONFIG_STACK_TRACER
4335         "  stack_trace\t\t- Shows the max stack trace when active\n"
4336         "  stack_max_size\t- Shows current max stack size that was traced\n"
4337         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4338         "\t\t\t  new trace)\n"
4339 #ifdef CONFIG_DYNAMIC_FTRACE
4340         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4341         "\t\t\t  traces\n"
4342 #endif
4343 #endif /* CONFIG_STACK_TRACER */
4344 #ifdef CONFIG_KPROBE_EVENTS
4345         "  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4346         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4347 #endif
4348 #ifdef CONFIG_UPROBE_EVENTS
4349         "  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4350         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4351 #endif
4352 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4353         "\t  accepts: event-definitions (one definition per line)\n"
4354         "\t   Format: p|r[:[<group>/]<event>] <place> [<args>]\n"
4355         "\t           -:[<group>/]<event>\n"
4356 #ifdef CONFIG_KPROBE_EVENTS
4357         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4358 #endif
4359 #ifdef CONFIG_UPROBE_EVENTS
4360         "\t    place: <path>:<offset>\n"
4361 #endif
4362         "\t     args: <name>=fetcharg[:type]\n"
4363         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4364         "\t           $stack<index>, $stack, $retval, $comm\n"
4365         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4366         "\t           b<bit-width>@<bit-offset>/<container-size>\n"
4367 #endif
4368         "  events/\t\t- Directory containing all trace event subsystems:\n"
4369         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4370         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4371         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4372         "\t\t\t  events\n"
4373         "      filter\t\t- If set, only events passing filter are traced\n"
4374         "  events/<system>/<event>/\t- Directory containing control files for\n"
4375         "\t\t\t  <event>:\n"
4376         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4377         "      filter\t\t- If set, only events passing filter are traced\n"
4378         "      trigger\t\t- If set, a command to perform when event is hit\n"
4379         "\t    Format: <trigger>[:count][if <filter>]\n"
4380         "\t   trigger: traceon, traceoff\n"
4381         "\t            enable_event:<system>:<event>\n"
4382         "\t            disable_event:<system>:<event>\n"
4383 #ifdef CONFIG_HIST_TRIGGERS
4384         "\t            enable_hist:<system>:<event>\n"
4385         "\t            disable_hist:<system>:<event>\n"
4386 #endif
4387 #ifdef CONFIG_STACKTRACE
4388         "\t\t    stacktrace\n"
4389 #endif
4390 #ifdef CONFIG_TRACER_SNAPSHOT
4391         "\t\t    snapshot\n"
4392 #endif
4393 #ifdef CONFIG_HIST_TRIGGERS
4394         "\t\t    hist (see below)\n"
4395 #endif
4396         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4397         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4398         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4399         "\t                  events/block/block_unplug/trigger\n"
4400         "\t   The first disables tracing every time block_unplug is hit.\n"
4401         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4402         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4403         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4404         "\t   Like function triggers, the counter is only decremented if it\n"
4405         "\t    enabled or disabled tracing.\n"
4406         "\t   To remove a trigger without a count:\n"
4407         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4408         "\t   To remove a trigger with a count:\n"
4409         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4410         "\t   Filters can be ignored when removing a trigger.\n"
4411 #ifdef CONFIG_HIST_TRIGGERS
4412         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4413         "\t    Format: hist:keys=<field1[,field2,...]>\n"
4414         "\t            [:values=<field1[,field2,...]>]\n"
4415         "\t            [:sort=<field1[,field2,...]>]\n"
4416         "\t            [:size=#entries]\n"
4417         "\t            [:pause][:continue][:clear]\n"
4418         "\t            [:name=histname1]\n"
4419         "\t            [if <filter>]\n\n"
4420         "\t    When a matching event is hit, an entry is added to a hash\n"
4421         "\t    table using the key(s) and value(s) named, and the value of a\n"
4422         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4423         "\t    correspond to fields in the event's format description.  Keys\n"
4424         "\t    can be any field, or the special string 'stacktrace'.\n"
4425         "\t    Compound keys consisting of up to two fields can be specified\n"
4426         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4427         "\t    fields.  Sort keys consisting of up to two fields can be\n"
4428         "\t    specified using the 'sort' keyword.  The sort direction can\n"
4429         "\t    be modified by appending '.descending' or '.ascending' to a\n"
4430         "\t    sort field.  The 'size' parameter can be used to specify more\n"
4431         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4432         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4433         "\t    its histogram data will be shared with other triggers of the\n"
4434         "\t    same name, and trigger hits will update this common data.\n\n"
4435         "\t    Reading the 'hist' file for the event will dump the hash\n"
4436         "\t    table in its entirety to stdout.  If there are multiple hist\n"
4437         "\t    triggers attached to an event, there will be a table for each\n"
4438         "\t    trigger in the output.  The table displayed for a named\n"
4439         "\t    trigger will be the same as any other instance having the\n"
4440         "\t    same name.  The default format used to display a given field\n"
4441         "\t    can be modified by appending any of the following modifiers\n"
4442         "\t    to the field name, as applicable:\n\n"
4443         "\t            .hex        display a number as a hex value\n"
4444         "\t            .sym        display an address as a symbol\n"
4445         "\t            .sym-offset display an address as a symbol and offset\n"
4446         "\t            .execname   display a common_pid as a program name\n"
4447         "\t            .syscall    display a syscall id as a syscall name\n\n"
4448         "\t            .log2       display log2 value rather than raw number\n\n"
4449         "\t    The 'pause' parameter can be used to pause an existing hist\n"
4450         "\t    trigger or to start a hist trigger but not log any events\n"
4451         "\t    until told to do so.  'continue' can be used to start or\n"
4452         "\t    restart a paused hist trigger.\n\n"
4453         "\t    The 'clear' parameter will clear the contents of a running\n"
4454         "\t    hist trigger and leave its current paused/active state\n"
4455         "\t    unchanged.\n\n"
4456         "\t    The enable_hist and disable_hist triggers can be used to\n"
4457         "\t    have one event conditionally start and stop another event's\n"
4458         "\t    already-attached hist trigger.  The syntax is analagous to\n"
4459         "\t    the enable_event and disable_event triggers.\n"
4460 #endif
4461 ;
4462 
4463 static ssize_t
4464 tracing_readme_read(struct file *filp, char __user *ubuf,
4465                        size_t cnt, loff_t *ppos)
4466 {
4467         return simple_read_from_buffer(ubuf, cnt, ppos,
4468                                         readme_msg, strlen(readme_msg));
4469 }
4470 
4471 static const struct file_operations tracing_readme_fops = {
4472         .open           = tracing_open_generic,
4473         .read           = tracing_readme_read,
4474         .llseek         = generic_file_llseek,
4475 };
4476 
4477 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4478 {
4479         unsigned int *ptr = v;
4480 
4481         if (*pos || m->count)
4482                 ptr++;
4483 
4484         (*pos)++;
4485 
4486         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4487              ptr++) {
4488                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4489                         continue;
4490 
4491                 return ptr;
4492         }
4493 
4494         return NULL;
4495 }
4496 
4497 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4498 {
4499         void *v;
4500         loff_t l = 0;
4501 
4502         preempt_disable();
4503         arch_spin_lock(&trace_cmdline_lock);
4504 
4505         v = &savedcmd->map_cmdline_to_pid[0];
4506         while (l <= *pos) {
4507                 v = saved_cmdlines_next(m, v, &l);
4508                 if (!v)
4509                         return NULL;
4510         }
4511 
4512         return v;
4513 }
4514 
4515 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4516 {
4517         arch_spin_unlock(&trace_cmdline_lock);
4518         preempt_enable();
4519 }
4520 
4521 static int saved_cmdlines_show(struct seq_file *m, void *v)
4522 {
4523         char buf[TASK_COMM_LEN];
4524         unsigned int *pid = v;
4525 
4526         __trace_find_cmdline(*pid, buf);
4527         seq_printf(m, "%d %s\n", *pid, buf);
4528         return 0;
4529 }
4530 
4531 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4532         .start          = saved_cmdlines_start,
4533         .next           = saved_cmdlines_next,
4534         .stop           = saved_cmdlines_stop,
4535         .show           = saved_cmdlines_show,
4536 };
4537 
4538 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4539 {
4540         if (tracing_disabled)
4541                 return -ENODEV;
4542 
4543         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4544 }
4545 
4546 static const struct file_operations tracing_saved_cmdlines_fops = {
4547         .open           = tracing_saved_cmdlines_open,
4548         .read           = seq_read,
4549         .llseek         = seq_lseek,
4550         .release        = seq_release,
4551 };
4552 
4553 static ssize_t
4554 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4555                                  size_t cnt, loff_t *ppos)
4556 {
4557         char buf[64];
4558         int r;
4559 
4560         arch_spin_lock(&trace_cmdline_lock);
4561         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4562         arch_spin_unlock(&trace_cmdline_lock);
4563 
4564         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4565 }
4566 
4567 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4568 {
4569         kfree(s->saved_cmdlines);
4570         kfree(s->map_cmdline_to_pid);
4571         kfree(s);
4572 }
4573 
4574 static int tracing_resize_saved_cmdlines(unsigned int val)
4575 {
4576         struct saved_cmdlines_buffer *s, *savedcmd_temp;
4577 
4578         s = kmalloc(sizeof(*s), GFP_KERNEL);
4579         if (!s)
4580                 return -ENOMEM;
4581 
4582         if (allocate_cmdlines_buffer(val, s) < 0) {
4583                 kfree(s);
4584                 return -ENOMEM;
4585         }
4586 
4587         arch_spin_lock(&trace_cmdline_lock);
4588         savedcmd_temp = savedcmd;
4589         savedcmd = s;
4590         arch_spin_unlock(&trace_cmdline_lock);
4591         free_saved_cmdlines_buffer(savedcmd_temp);
4592 
4593         return 0;
4594 }
4595 
4596 static ssize_t
4597 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4598                                   size_t cnt, loff_t *ppos)
4599 {
4600         unsigned long val;
4601         int ret;
4602 
4603         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4604         if (ret)
4605                 return ret;
4606 
4607         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4608         if (!val || val > PID_MAX_DEFAULT)
4609                 return -EINVAL;
4610 
4611         ret = tracing_resize_saved_cmdlines((unsigned int)val);
4612         if (ret < 0)
4613                 return ret;
4614 
4615         *ppos += cnt;
4616 
4617         return cnt;
4618 }
4619 
4620 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4621         .open           = tracing_open_generic,
4622         .read           = tracing_saved_cmdlines_size_read,
4623         .write          = tracing_saved_cmdlines_size_write,
4624 };
4625 
4626 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
4627 static union trace_enum_map_item *
4628 update_enum_map(union trace_enum_map_item *ptr)
4629 {
4630         if (!ptr->map.enum_string) {
4631                 if (ptr->tail.next) {
4632                         ptr = ptr->tail.next;
4633                         /* Set ptr to the next real item (skip head) */
4634                         ptr++;
4635                 } else
4636                         return NULL;
4637         }
4638         return ptr;
4639 }
4640 
4641 static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
4642 {
4643         union trace_enum_map_item *ptr = v;
4644 
4645         /*
4646          * Paranoid! If ptr points to end, we don't want to increment past it.
4647          * This really should never happen.
4648          */
4649         ptr = update_enum_map(ptr);
4650         if (WARN_ON_ONCE(!ptr))
4651                 return NULL;
4652 
4653         ptr++;
4654 
4655         (*pos)++;
4656 
4657         ptr = update_enum_map(ptr);
4658 
4659         return ptr;
4660 }
4661 
4662 static void *enum_map_start(struct seq_file *m, loff_t *pos)
4663 {
4664         union trace_enum_map_item *v;
4665         loff_t l = 0;
4666 
4667         mutex_lock(&trace_enum_mutex);
4668 
4669         v = trace_enum_maps;
4670         if (v)
4671                 v++;
4672 
4673         while (v && l < *pos) {
4674                 v = enum_map_next(m, v, &l);
4675         }
4676 
4677         return v;
4678 }
4679 
4680 static void enum_map_stop(struct seq_file *m, void *v)
4681 {
4682         mutex_unlock(&trace_enum_mutex);
4683 }
4684 
4685 static int enum_map_show(struct seq_file *m, void *v)
4686 {
4687         union trace_enum_map_item *ptr = v;
4688 
4689         seq_printf(m, "%s %ld (%s)\n",
4690                    ptr->map.enum_string, ptr->map.enum_value,
4691                    ptr->map.system);
4692 
4693         return 0;
4694 }
4695 
4696 static const struct seq_operations tracing_enum_map_seq_ops = {
4697         .start          = enum_map_start,
4698         .next           = enum_map_next,
4699         .stop           = enum_map_stop,
4700         .show           = enum_map_show,
4701 };
4702 
4703 static int tracing_enum_map_open(struct inode *inode, struct file *filp)
4704 {
4705         if (tracing_disabled)
4706                 return -ENODEV;
4707 
4708         return seq_open(filp, &tracing_enum_map_seq_ops);
4709 }
4710 
4711 static const struct file_operations tracing_enum_map_fops = {
4712         .open           = tracing_enum_map_open,
4713         .read           = seq_read,
4714         .llseek         = seq_lseek,
4715         .release        = seq_release,
4716 };
4717 
4718 static inline union trace_enum_map_item *
4719 trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
4720 {
4721         /* Return tail of array given the head */
4722         return ptr + ptr->head.length + 1;
4723 }
4724 
4725 static void
4726 trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
4727                            int len)
4728 {
4729         struct trace_enum_map **stop;
4730         struct trace_enum_map **map;
4731         union trace_enum_map_item *map_array;
4732         union trace_enum_map_item *ptr;
4733 
4734         stop = start + len;
4735 
4736         /*
4737          * The trace_enum_maps contains the map plus a head and tail item,
4738          * where the head holds the module and length of array, and the
4739          * tail holds a pointer to the next list.
4740          */
4741         map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4742         if (!map_array) {
4743                 pr_warn("Unable to allocate trace enum mapping\n");
4744                 return;
4745         }
4746 
4747         mutex_lock(&trace_enum_mutex);
4748 
4749         if (!trace_enum_maps)
4750                 trace_enum_maps = map_array;
4751         else {
4752                 ptr = trace_enum_maps;
4753                 for (;;) {
4754                         ptr = trace_enum_jmp_to_tail(ptr);
4755                         if (!ptr->tail.next)
4756                                 break;
4757                         ptr = ptr->tail.next;
4758 
4759                 }
4760                 ptr->tail.next = map_array;
4761         }
4762         map_array->head.mod = mod;
4763         map_array->head.length = len;
4764         map_array++;
4765 
4766         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4767                 map_array->map = **map;
4768                 map_array++;
4769         }
4770         memset(map_array, 0, sizeof(*map_array));
4771 
4772         mutex_unlock(&trace_enum_mutex);
4773 }
4774 
4775 static void trace_create_enum_file(struct dentry *d_tracer)
4776 {
4777         trace_create_file("enum_map", 0444, d_tracer,
4778                           NULL, &tracing_enum_map_fops);
4779 }
4780 
4781 #else /* CONFIG_TRACE_ENUM_MAP_FILE */
4782 static inline void trace_create_enum_file(struct dentry *d_tracer) { }
4783 static inline void trace_insert_enum_map_file(struct module *mod,
4784                               struct trace_enum_map **start, int len) { }
4785 #endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
4786 
4787 static void trace_insert_enum_map(struct module *mod,
4788                                   struct trace_enum_map **start, int len)
4789 {
4790         struct trace_enum_map **map;
4791 
4792         if (len <= 0)
4793                 return;
4794 
4795         map = start;
4796 
4797         trace_event_enum_update(map, len);
4798 
4799         trace_insert_enum_map_file(mod, start, len);
4800 }
4801 
4802 static ssize_t
4803 tracing_set_trace_read(struct file *filp, char __user *ubuf,
4804                        size_t cnt, loff_t *ppos)
4805 {
4806         struct trace_array *tr = filp->private_data;
4807         char buf[MAX_TRACER_SIZE+2];
4808         int r;
4809 
4810         mutex_lock(&trace_types_lock);
4811         r = sprintf(buf, "%s\n", tr->current_trace->name);
4812         mutex_unlock(&trace_types_lock);
4813 
4814         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4815 }
4816 
4817 int tracer_init(struct tracer *t, struct trace_array *tr)
4818 {
4819         tracing_reset_online_cpus(&tr->trace_buffer);
4820         return t->init(tr);
4821 }
4822 
4823 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4824 {
4825         int cpu;
4826 
4827         for_each_tracing_cpu(cpu)
4828                 per_cpu_ptr(buf->data, cpu)->entries = val;
4829 }
4830 
4831 #ifdef CONFIG_TRACER_MAX_TRACE
4832 /* resize @tr's buffer to the size of @size_tr's entries */
4833 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4834                                         struct trace_buffer *size_buf, int cpu_id)
4835 {
4836         int cpu, ret = 0;
4837 
4838         if (cpu_id == RING_BUFFER_ALL_CPUS) {
4839                 for_each_tracing_cpu(cpu) {
4840                         ret = ring_buffer_resize(trace_buf->buffer,
4841                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4842                         if (ret < 0)
4843                                 break;
4844                         per_cpu_ptr(trace_buf->data, cpu)->entries =
4845                                 per_cpu_ptr(size_buf->data, cpu)->entries;
4846                 }
4847         } else {
4848                 ret = ring_buffer_resize(trace_buf->buffer,
4849                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4850                 if (ret == 0)
4851                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4852                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
4853         }
4854 
4855         return ret;
4856 }
4857 #endif /* CONFIG_TRACER_MAX_TRACE */
4858 
4859 static int __tracing_resize_ring_buffer(struct trace_array *tr,
4860                                         unsigned long size, int cpu)
4861 {
4862         int ret;
4863 
4864         /*
4865          * If kernel or user changes the size of the ring buffer
4866          * we use the size that was given, and we can forget about
4867          * expanding it later.
4868          */
4869         ring_buffer_expanded = true;
4870 
4871         /* May be called before buffers are initialized */
4872         if (!tr->trace_buffer.buffer)
4873                 return 0;
4874 
4875         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4876         if (ret < 0)
4877                 return ret;
4878 
4879 #ifdef CONFIG_TRACER_MAX_TRACE
4880         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
4881             !tr->current_trace->use_max_tr)
4882                 goto out;
4883 
4884         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4885         if (ret < 0) {
4886                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
4887                                                      &tr->trace_buffer, cpu);
4888                 if (r < 0) {
4889                         /*
4890                          * AARGH! We are left with different
4891                          * size max buffer!!!!
4892                          * The max buffer is our "snapshot" buffer.
4893                          * When a tracer needs a snapshot (one of the
4894                          * latency tracers), it swaps the max buffer
4895                          * with the saved snap shot. We succeeded to
4896                          * update the size of the main buffer, but failed to
4897                          * update the size of the max buffer. But when we tried
4898                          * to reset the main buffer to the original size, we
4899                          * failed there too. This is very unlikely to
4900                          * happen, but if it does, warn and kill all
4901                          * tracing.
4902                          */
4903                         WARN_ON(1);
4904                         tracing_disabled = 1;
4905                 }
4906                 return ret;
4907         }
4908 
4909         if (cpu == RING_BUFFER_ALL_CPUS)
4910                 set_buffer_entries(&tr->max_buffer, size);
4911         else
4912                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4913 
4914  out:
4915 #endif /* CONFIG_TRACER_MAX_TRACE */
4916 
4917         if (cpu == RING_BUFFER_ALL_CPUS)
4918                 set_buffer_entries(&tr->trace_buffer, size);
4919         else
4920                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4921 
4922         return ret;
4923 }
4924 
4925 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4926                                           unsigned long size, int cpu_id)
4927 {
4928         int ret = size;
4929 
4930         mutex_lock(&trace_types_lock);
4931 
4932         if (cpu_id != RING_BUFFER_ALL_CPUS) {
4933                 /* make sure, this cpu is enabled in the mask */
4934                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4935                         ret = -EINVAL;
4936                         goto out;
4937                 }
4938         }
4939 
4940         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4941         if (ret < 0)
4942                 ret = -ENOMEM;
4943 
4944 out:
4945         mutex_unlock(&trace_types_lock);
4946 
4947         return ret;
4948 }
4949 
4950 
4951 /**
4952  * tracing_update_buffers - used by tracing facility to expand ring buffers
4953  *
4954  * To save on memory when the tracing is never used on a system with it
4955  * configured in. The ring buffers are set to a minimum size. But once
4956  * a user starts to use the tracing facility, then they need to grow
4957  * to their default size.
4958  *
4959  * This function is to be called when a tracer is about to be used.
4960  */
4961 int tracing_update_buffers(void)
4962 {
4963         int ret = 0;
4964 
4965         mutex_lock(&trace_types_lock);
4966         if (!ring_buffer_expanded)
4967                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4968                                                 RING_BUFFER_ALL_CPUS);
4969         mutex_unlock(&trace_types_lock);
4970 
4971         return ret;
4972 }
4973 
4974 struct trace_option_dentry;
4975 
4976 static void
4977 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4978 
4979 /*
4980  * Used to clear out the tracer before deletion of an instance.
4981  * Must have trace_types_lock held.
4982  */
4983 static void tracing_set_nop(struct trace_array *tr)
4984 {
4985         if (tr->current_trace == &nop_trace)
4986                 return;
4987         
4988         tr->current_trace->enabled--;
4989 
4990         if (tr->current_trace->reset)
4991                 tr->current_trace->reset(tr);
4992 
4993         tr->current_trace = &nop_trace;
4994 }
4995 
4996 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
4997 {
4998         /* Only enable if the directory has been created already. */
4999         if (!tr->dir)
5000                 return;
5001 
5002         create_trace_option_files(tr, t);
5003 }
5004 
5005 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5006 {
5007         struct tracer *t;
5008 #ifdef CONFIG_TRACER_MAX_TRACE
5009         bool had_max_tr;
5010 #endif
5011         int ret = 0;
5012 
5013         mutex_lock(&trace_types_lock);
5014 
5015         if (!ring_buffer_expanded) {
5016                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5017                                                 RING_BUFFER_ALL_CPUS);
5018                 if (ret < 0)
5019                         goto out;
5020                 ret = 0;
5021         }
5022 
5023         for (t = trace_types; t; t = t->next) {
5024                 if (strcmp(t->name, buf) == 0)
5025                         break;
5026         }
5027         if (!t) {
5028                 ret = -EINVAL;
5029                 goto out;
5030         }
5031         if (t == tr->current_trace)
5032                 goto out;
5033 
5034         /* Some tracers are only allowed for the top level buffer */
5035         if (!trace_ok_for_array(t, tr)) {
5036                 ret = -EINVAL;
5037                 goto out;
5038         }
5039 
5040         /* If trace pipe files are being read, we can't change the tracer */
5041         if (tr->current_trace->ref) {
5042                 ret = -EBUSY;
5043                 goto out;
5044         }
5045 
5046         trace_branch_disable();
5047 
5048         tr->current_trace->enabled--;
5049 
5050         if (tr->current_trace->reset)
5051                 tr->current_trace->reset(tr);
5052 
5053         /* Current trace needs to be nop_trace before synchronize_sched */
5054         tr->current_trace = &nop_trace;
5055 
5056 #ifdef CONFIG_TRACER_MAX_TRACE
5057         had_max_tr = tr->allocated_snapshot;
5058 
5059         if (had_max_tr && !t->use_max_tr) {
5060                 /*
5061                  * We need to make sure that the update_max_tr sees that
5062                  * current_trace changed to nop_trace to keep it from
5063                  * swapping the buffers after we resize it.
5064                  * The update_max_tr is called from interrupts disabled
5065                  * so a synchronized_sched() is sufficient.
5066                  */
5067                 synchronize_sched();
5068                 free_snapshot(tr);
5069         }
5070 #endif
5071 
5072 #ifdef CONFIG_TRACER_MAX_TRACE
5073         if (t->use_max_tr && !had_max_tr) {
5074                 ret = alloc_snapshot(tr);
5075                 if (ret < 0)
5076                         goto out;
5077         }
5078 #endif
5079 
5080         if (t->init) {
5081                 ret = tracer_init(t, tr);
5082                 if (ret)
5083                         goto out;
5084         }
5085 
5086         tr->current_trace = t;
5087         tr->current_trace->enabled++;
5088         trace_branch_enable(tr);
5089  out:
5090         mutex_unlock(&trace_types_lock);
5091 
5092         return ret;
5093 }
5094 
5095 static ssize_t
5096 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5097                         size_t cnt, loff_t *ppos)
5098 {
5099         struct trace_array *tr = filp->private_data;
5100         char buf[MAX_TRACER_SIZE+1];
5101         int i;
5102         size_t ret;
5103         int err;
5104 
5105         ret = cnt;
5106 
5107         if (cnt > MAX_TRACER_SIZE)
5108                 cnt = MAX_TRACER_SIZE;
5109 
5110         if (copy_from_user(buf, ubuf, cnt))
5111                 return -EFAULT;
5112 
5113         buf[cnt] = 0;
5114 
5115         /* strip ending whitespace. */
5116         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5117                 buf[i] = 0;
5118 
5119         err = tracing_set_tracer(tr, buf);
5120         if (err)
5121                 return err;
5122 
5123         *ppos += ret;
5124 
5125         return ret;
5126 }
5127 
5128 static ssize_t
5129 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5130                    size_t cnt, loff_t *ppos)
5131 {
5132         char buf[64];
5133         int r;
5134 
5135         r = snprintf(buf, sizeof(buf), "%ld\n",
5136                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5137         if (r > sizeof(buf))
5138                 r = sizeof(buf);
5139         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5140 }
5141 
5142 static ssize_t
5143 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5144                     size_t cnt, loff_t *ppos)
5145 {
5146         unsigned long val;
5147         int ret;
5148 
5149         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5150         if (ret)
5151                 return ret;
5152 
5153         *ptr = val * 1000;
5154 
5155         return cnt;
5156 }
5157 
5158 static ssize_t
5159 tracing_thresh_read(struct file *filp, char __user *ubuf,
5160                     size_t cnt, loff_t *ppos)
5161 {
5162         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5163 }
5164 
5165 static ssize_t
5166 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5167                      size_t cnt, loff_t *ppos)
5168 {
5169         struct trace_array *tr = filp->private_data;
5170         int ret;
5171 
5172         mutex_lock(&trace_types_lock);
5173         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5174         if (ret < 0)
5175                 goto out;
5176 
5177         if (tr->current_trace->update_thresh) {
5178                 ret = tr->current_trace->update_thresh(tr);
5179                 if (ret < 0)
5180                         goto out;
5181         }
5182 
5183         ret = cnt;
5184 out:
5185         mutex_unlock(&trace_types_lock);
5186 
5187         return ret;
5188 }
5189 
5190 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5191 
5192 static ssize_t
5193 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5194                      size_t cnt, loff_t *ppos)
5195 {
5196         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5197 }
5198 
5199 static ssize_t
5200 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5201                       size_t cnt, loff_t *ppos)
5202 {
5203         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5204 }
5205 
5206 #endif
5207 
5208 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5209 {
5210         struct trace_array *tr = inode->i_private;
5211         struct trace_iterator *iter;
5212         int ret = 0;
5213 
5214         if (tracing_disabled)
5215                 return -ENODEV;
5216 
5217         if (trace_array_get(tr) < 0)
5218                 return -ENODEV;
5219 
5220         mutex_lock(&trace_types_lock);
5221 
5222         /* create a buffer to store the information to pass to userspace */
5223         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5224         if (!iter) {
5225                 ret = -ENOMEM;
5226                 __trace_array_put(tr);
5227                 goto out;
5228         }
5229 
5230         trace_seq_init(&iter->seq);
5231         iter->trace = tr->current_trace;
5232 
5233         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5234                 ret = -ENOMEM;
5235                 goto fail;
5236         }
5237 
5238         /* trace pipe does not show start of buffer */
5239         cpumask_setall(iter->started);
5240 
5241         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5242                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5243 
5244         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5245         if (trace_clocks[tr->clock_id].in_ns)
5246                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5247 
5248         iter->tr = tr;
5249         iter->trace_buffer = &tr->trace_buffer;
5250         iter->cpu_file = tracing_get_cpu(inode);
5251         mutex_init(&iter->mutex);
5252         filp->private_data = iter;
5253 
5254         if (iter->trace->pipe_open)
5255                 iter->trace->pipe_open(iter);
5256 
5257         nonseekable_open(inode, filp);
5258 
5259         tr->current_trace->ref++;
5260 out:
5261         mutex_unlock(&trace_types_lock);
5262         return ret;
5263 
5264 fail:
5265         kfree(iter->trace);
5266         kfree(iter);
5267         __trace_array_put(tr);
5268         mutex_unlock(&trace_types_lock);
5269         return ret;
5270 }
5271 
5272 static int tracing_release_pipe(struct inode *inode, struct file *file)
5273 {
5274         struct trace_iterator *iter = file->private_data;
5275         struct trace_array *tr = inode->i_private;
5276 
5277         mutex_lock(&trace_types_lock);
5278 
5279         tr->current_trace->ref--;
5280 
5281         if (iter->trace->pipe_close)
5282                 iter->trace->pipe_close(iter);
5283 
5284         mutex_unlock(&trace_types_lock);
5285 
5286         free_cpumask_var(iter->started);
5287         mutex_destroy(&iter->mutex);
5288         kfree(iter);
5289 
5290         trace_array_put(tr);
5291 
5292         return 0;
5293 }
5294 
5295 static unsigned int
5296 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5297 {
5298         struct trace_array *tr = iter->tr;
5299 
5300         /* Iterators are static, they should be filled or empty */
5301         if (trace_buffer_iter(iter, iter->cpu_file))
5302                 return POLLIN | POLLRDNORM;
5303 
5304         if (tr->trace_flags & TRACE_ITER_BLOCK)
5305                 /*
5306                  * Always select as readable when in blocking mode
5307                  */
5308                 return POLLIN | POLLRDNORM;
5309         else
5310                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5311                                              filp, poll_table);
5312 }
5313 
5314 static unsigned int
5315 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5316 {
5317         struct trace_iterator *iter = filp->private_data;
5318 
5319         return trace_poll(iter, filp, poll_table);
5320 }
5321 
5322 /* Must be called with iter->mutex held. */
5323 static int tracing_wait_pipe(struct file *filp)
5324 {
5325         struct trace_iterator *iter = filp->private_data;
5326         int ret;
5327 
5328         while (trace_empty(iter)) {
5329 
5330                 if ((filp->f_flags & O_NONBLOCK)) {
5331                         return -EAGAIN;
5332                 }
5333 
5334                 /*
5335                  * We block until we read something and tracing is disabled.
5336                  * We still block if tracing is disabled, but we have never
5337                  * read anything. This allows a user to cat this file, and
5338                  * then enable tracing. But after we have read something,
5339                  * we give an EOF when tracing is again disabled.
5340                  *
5341                  * iter->pos will be 0 if we haven't read anything.
5342                  */
5343                 if (!tracing_is_on() && iter->pos)
5344                         break;
5345 
5346                 mutex_unlock(&iter->mutex);
5347 
5348                 ret = wait_on_pipe(iter, false);
5349 
5350                 mutex_lock(&iter->mutex);
5351 
5352                 if (ret)
5353                         return ret;
5354         }
5355 
5356         return 1;
5357 }
5358 
5359 /*
5360  * Consumer reader.
5361  */
5362 static ssize_t
5363 tracing_read_pipe(struct file *filp, char __user *ubuf,
5364                   size_t cnt, loff_t *ppos)
5365 {
5366         struct trace_iterator *iter = filp->private_data;
5367         ssize_t sret;
5368 
5369         /*
5370          * Avoid more than one consumer on a single file descriptor
5371          * This is just a matter of traces coherency, the ring buffer itself
5372          * is protected.
5373          */
5374         mutex_lock(&iter->mutex);
5375 
5376         /* return any leftover data */
5377         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5378         if (sret != -EBUSY)
5379                 goto out;
5380 
5381         trace_seq_init(&iter->seq);
5382 
5383         if (iter->trace->read) {
5384                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5385                 if (sret)
5386                         goto out;
5387         }
5388 
5389 waitagain:
5390         sret = tracing_wait_pipe(filp);
5391         if (sret <= 0)
5392                 goto out;
5393 
5394         /* stop when tracing is finished */
5395         if (trace_empty(iter)) {
5396                 sret = 0;
5397                 goto out;
5398         }
5399 
5400         if (cnt >= PAGE_SIZE)
5401                 cnt = PAGE_SIZE - 1;
5402 
5403         /* reset all but tr, trace, and overruns */
5404         memset(&iter->seq, 0,
5405                sizeof(struct trace_iterator) -
5406                offsetof(struct trace_iterator, seq));
5407         cpumask_clear(iter->started);
5408         iter->pos = -1;
5409 
5410         trace_event_read_lock();
5411         trace_access_lock(iter->cpu_file);
5412         while (trace_find_next_entry_inc(iter) != NULL) {
5413                 enum print_line_t ret;
5414                 int save_len = iter->seq.seq.len;
5415 
5416                 ret = print_trace_line(iter);
5417                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5418                         /* don't print partial lines */
5419                         iter->seq.seq.len = save_len;
5420                         break;
5421                 }
5422                 if (ret != TRACE_TYPE_NO_CONSUME)
5423                         trace_consume(iter);
5424 
5425                 if (trace_seq_used(&iter->seq) >= cnt)
5426                         break;
5427 
5428                 /*
5429                  * Setting the full flag means we reached the trace_seq buffer
5430                  * size and we should leave by partial output condition above.
5431                  * One of the trace_seq_* functions is not used properly.
5432                  */
5433                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5434                           iter->ent->type);
5435         }
5436         trace_access_unlock(iter->cpu_file);
5437         trace_event_read_unlock();
5438 
5439         /* Now copy what we have to the user */
5440         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5441         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5442                 trace_seq_init(&iter->seq);
5443 
5444         /*
5445          * If there was nothing to send to user, in spite of consuming trace
5446          * entries, go back to wait for more entries.
5447          */
5448         if (sret == -EBUSY)
5449                 goto waitagain;
5450 
5451 out:
5452         mutex_unlock(&iter->mutex);
5453 
5454         return sret;
5455 }
5456 
5457 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5458                                      unsigned int idx)
5459 {
5460         __free_page(spd->pages[idx]);
5461 }
5462 
5463 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5464         .can_merge              = 0,
5465         .confirm                = generic_pipe_buf_confirm,
5466         .release                = generic_pipe_buf_release,
5467         .steal                  = generic_pipe_buf_steal,
5468         .get                    = generic_pipe_buf_get,
5469 };
5470 
5471 static size_t
5472 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5473 {
5474         size_t count;
5475         int save_len;
5476         int ret;
5477 
5478         /* Seq buffer is page-sized, exactly what we need. */
5479         for (;;) {
5480                 save_len = iter->seq.seq.len;
5481                 ret = print_trace_line(iter);
5482 
5483                 if (trace_seq_has_overflowed(&iter->seq)) {
5484                         iter->seq.seq.len = save_len;
5485                         break;
5486                 }
5487 
5488                 /*
5489                  * This should not be hit, because it should only
5490                  * be set if the iter->seq overflowed. But check it
5491                  * anyway to be safe.
5492                  */
5493                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5494                         iter->seq.seq.len = save_len;
5495                         break;
5496                 }
5497 
5498                 count = trace_seq_used(&iter->seq) - save_len;
5499                 if (rem < count) {
5500                         rem = 0;
5501                         iter->seq.seq.len = save_len;
5502                         break;
5503                 }
5504 
5505                 if (ret != TRACE_TYPE_NO_CONSUME)
5506                         trace_consume(iter);
5507                 rem -= count;
5508                 if (!trace_find_next_entry_inc(iter))   {
5509                         rem = 0;
5510                         iter->ent = NULL;
5511                         break;
5512                 }
5513         }
5514 
5515         return rem;
5516 }
5517 
5518 static ssize_t tracing_splice_read_pipe(struct file *filp,
5519                                         loff_t *ppos,
5520                                         struct pipe_inode_info *pipe,
5521                                         size_t len,
5522                                         unsigned int flags)
5523 {
5524         struct page *pages_def[PIPE_DEF_BUFFERS];
5525         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5526         struct trace_iterator *iter = filp->private_data;
5527         struct splice_pipe_desc spd = {
5528                 .pages          = pages_def,
5529                 .partial        = partial_def,
5530                 .nr_pages       = 0, /* This gets updated below. */
5531                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5532                 .flags          = flags,
5533                 .ops            = &tracing_pipe_buf_ops,
5534                 .spd_release    = tracing_spd_release_pipe,
5535         };
5536         ssize_t ret;
5537         size_t rem;
5538         unsigned int i;
5539 
5540         if (splice_grow_spd(pipe, &spd))
5541                 return -ENOMEM;
5542 
5543         mutex_lock(&iter->mutex);
5544 
5545         if (iter->trace->splice_read) {
5546                 ret = iter->trace->splice_read(iter, filp,
5547                                                ppos, pipe, len, flags);
5548                 if (ret)
5549                         goto out_err;
5550         }
5551 
5552         ret = tracing_wait_pipe(filp);
5553         if (ret <= 0)
5554                 goto out_err;
5555 
5556         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5557                 ret = -EFAULT;
5558                 goto out_err;
5559         }
5560 
5561         trace_event_read_lock();
5562         trace_access_lock(iter->cpu_file);
5563 
5564         /* Fill as many pages as possible. */
5565         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5566                 spd.pages[i] = alloc_page(GFP_KERNEL);
5567                 if (!spd.pages[i])
5568                         break;
5569 
5570                 rem = tracing_fill_pipe_page(rem, iter);
5571 
5572                 /* Copy the data into the page, so we can start over. */
5573                 ret = trace_seq_to_buffer(&iter->seq,
5574                                           page_address(spd.pages[i]),
5575                                           trace_seq_used(&iter->seq));
5576                 if (ret < 0) {
5577                         __free_page(spd.pages[i]);
5578                         break;
5579                 }
5580                 spd.partial[i].offset = 0;
5581                 spd.partial[i].len = trace_seq_used(&iter->seq);
5582 
5583                 trace_seq_init(&iter->seq);
5584         }
5585 
5586         trace_access_unlock(iter->cpu_file);
5587         trace_event_read_unlock();
5588         mutex_unlock(&iter->mutex);
5589 
5590         spd.nr_pages = i;
5591 
5592         if (i)
5593                 ret = splice_to_pipe(pipe, &spd);
5594         else
5595                 ret = 0;
5596 out:
5597         splice_shrink_spd(&spd);
5598         return ret;
5599 
5600 out_err:
5601         mutex_unlock(&iter->mutex);
5602         goto out;
5603 }
5604 
5605 static ssize_t
5606 tracing_entries_read(struct file *filp, char __user *ubuf,
5607                      size_t cnt, loff_t *ppos)
5608 {
5609         struct inode *inode = file_inode(filp);
5610         struct trace_array *tr = inode->i_private;
5611         int cpu = tracing_get_cpu(inode);
5612         char buf[64];
5613         int r = 0;
5614         ssize_t ret;
5615 
5616         mutex_lock(&trace_types_lock);
5617 
5618         if (cpu == RING_BUFFER_ALL_CPUS) {
5619                 int cpu, buf_size_same;
5620                 unsigned long size;
5621 
5622                 size = 0;
5623                 buf_size_same = 1;
5624                 /* check if all cpu sizes are same */
5625                 for_each_tracing_cpu(cpu) {
5626                         /* fill in the size from first enabled cpu */
5627                         if (size == 0)
5628                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5629                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5630                                 buf_size_same = 0;
5631                                 break;
5632                         }
5633                 }
5634 
5635                 if (buf_size_same) {
5636                         if (!ring_buffer_expanded)
5637                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
5638                                             size >> 10,
5639                                             trace_buf_size >> 10);
5640                         else
5641                                 r = sprintf(buf, "%lu\n", size >> 10);
5642                 } else
5643                         r = sprintf(buf, "X\n");
5644         } else
5645                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5646 
5647         mutex_unlock(&trace_types_lock);
5648 
5649         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5650         return ret;
5651 }
5652 
5653 static ssize_t
5654 tracing_entries_write(struct file *filp, const char __user *ubuf,
5655                       size_t cnt, loff_t *ppos)
5656 {
5657         struct inode *inode = file_inode(filp);
5658         struct trace_array *tr = inode->i_private;
5659         unsigned long val;
5660         int ret;
5661 
5662         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5663         if (ret)
5664                 return ret;
5665 
5666         /* must have at least 1 entry */
5667         if (!val)
5668                 return -EINVAL;
5669 
5670         /* value is in KB */
5671         val <<= 10;
5672         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5673         if (ret < 0)
5674                 return ret;
5675 
5676         *ppos += cnt;
5677 
5678         return cnt;
5679 }
5680 
5681 static ssize_t
5682 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5683                                 size_t cnt, loff_t *ppos)
5684 {
5685         struct trace_array *tr = filp->private_data;
5686         char buf[64];
5687         int r, cpu;
5688         unsigned long size = 0, expanded_size = 0;
5689 
5690         mutex_lock(&trace_types_lock);
5691         for_each_tracing_cpu(cpu) {
5692                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5693                 if (!ring_buffer_expanded)
5694                         expanded_size += trace_buf_size >> 10;
5695         }
5696         if (ring_buffer_expanded)
5697                 r = sprintf(buf, "%lu\n", size);
5698         else
5699                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5700         mutex_unlock(&trace_types_lock);
5701 
5702         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5703 }
5704 
5705 static ssize_t
5706 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5707                           size_t cnt, loff_t *ppos)
5708 {
5709         /*
5710          * There is no need to read what the user has written, this function
5711          * is just to make sure that there is no error when "echo" is used
5712          */
5713 
5714         *ppos += cnt;
5715 
5716         return cnt;
5717 }
5718 
5719 static int
5720 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5721 {
5722         struct trace_array *tr = inode->i_private;
5723 
5724         /* disable tracing ? */
5725         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5726                 tracer_tracing_off(tr);
5727         /* resize the ring buffer to 0 */
5728         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5729 
5730         trace_array_put(tr);
5731 
5732         return 0;
5733 }
5734 
5735 static ssize_t
5736 tracing_mark_write(struct file *filp, const char __user *ubuf,
5737                                         size_t cnt, loff_t *fpos)
5738 {
5739         struct trace_array *tr = filp->private_data;
5740         struct ring_buffer_event *event;
5741         struct ring_buffer *buffer;
5742         struct print_entry *entry;
5743         unsigned long irq_flags;
5744         const char faulted[] = "<faulted>";
5745         ssize_t written;
5746         int size;
5747         int len;
5748 
5749 /* Used in tracing_mark_raw_write() as well */
5750 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
5751 
5752         if (tracing_disabled)
5753                 return -EINVAL;
5754 
5755         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5756                 return -EINVAL;
5757 
5758         if (cnt > TRACE_BUF_SIZE)
5759                 cnt = TRACE_BUF_SIZE;
5760 
5761         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5762 
5763         local_save_flags(irq_flags);
5764         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
5765 
5766         /* If less than "<faulted>", then make sure we can still add that */
5767         if (cnt < FAULTED_SIZE)
5768                 size += FAULTED_SIZE - cnt;
5769 
5770         buffer = tr->trace_buffer.buffer;
5771         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5772                                             irq_flags, preempt_count());
5773         if (unlikely(!event))
5774                 /* Ring buffer disabled, return as if not open for write */
5775                 return -EBADF;
5776 
5777         entry = ring_buffer_event_data(event);
5778         entry->ip = _THIS_IP_;
5779 
5780         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
5781         if (len) {
5782                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
5783                 cnt = FAULTED_SIZE;
5784                 written = -EFAULT;
5785         } else
5786                 written = cnt;
5787         len = cnt;
5788 
5789         if (entry->buf[cnt - 1] != '\n') {
5790                 entry->buf[cnt] = '\n';
5791                 entry->buf[cnt + 1] = '\0';
5792         } else
5793                 entry->buf[cnt] = '\0';
5794 
5795         __buffer_unlock_commit(buffer, event);
5796 
5797         if (written > 0)
5798                 *fpos += written;
5799 
5800         return written;
5801 }
5802 
5803 /* Limit it for now to 3K (including tag) */
5804 #define RAW_DATA_MAX_SIZE (1024*3)
5805 
5806 static ssize_t
5807 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
5808                                         size_t cnt, loff_t *fpos)
5809 {
5810         struct trace_array *tr = filp->private_data;
5811         struct ring_buffer_event *event;
5812         struct ring_buffer *buffer;
5813         struct raw_data_entry *entry;
5814         const char faulted[] = "<faulted>";
5815         unsigned long irq_flags;
5816         ssize_t written;
5817         int size;
5818         int len;
5819 
5820 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
5821 
5822         if (tracing_disabled)
5823                 return -EINVAL;
5824 
5825         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5826                 return -EINVAL;
5827 
5828         /* The marker must at least have a tag id */
5829         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
5830                 return -EINVAL;
5831 
5832         if (cnt > TRACE_BUF_SIZE)
5833                 cnt = TRACE_BUF_SIZE;
5834 
5835         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5836 
5837         local_save_flags(irq_flags);
5838         size = sizeof(*entry) + cnt;
5839         if (cnt < FAULT_SIZE_ID)
5840                 size += FAULT_SIZE_ID - cnt;
5841 
5842         buffer = tr->trace_buffer.buffer;
5843         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
5844                                             irq_flags, preempt_count());
5845         if (!event)
5846                 /* Ring buffer disabled, return as if not open for write */
5847                 return -EBADF;
5848 
5849         entry = ring_buffer_event_data(event);
5850 
5851         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
5852         if (len) {
5853                 entry->id = -1;
5854                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
5855                 written = -EFAULT;
5856         } else
5857                 written = cnt;
5858 
5859         __buffer_unlock_commit(buffer, event);
5860 
5861         if (written > 0)
5862                 *fpos += written;
5863 
5864         return written;
5865 }
5866 
5867 static int tracing_clock_show(struct seq_file *m, void *v)
5868 {
5869         struct trace_array *tr = m->private;
5870         int i;
5871 
5872         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5873                 seq_printf(m,
5874                         "%s%s%s%s", i ? " " : "",
5875                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5876                         i == tr->clock_id ? "]" : "");
5877         seq_putc(m, '\n');
5878 
5879         return 0;
5880 }
5881 
5882 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
5883 {
5884         int i;
5885 
5886         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
5887                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
5888                         break;
5889         }
5890         if (i == ARRAY_SIZE(trace_clocks))
5891                 return -EINVAL;
5892 
5893         mutex_lock(&trace_types_lock);
5894 
5895         tr->clock_id = i;
5896 
5897         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
5898 
5899         /*
5900          * New clock may not be consistent with the previous clock.
5901          * Reset the buffer so that it doesn't have incomparable timestamps.
5902          */
5903         tracing_reset_online_cpus(&tr->trace_buffer);
5904 
5905 #ifdef CONFIG_TRACER_MAX_TRACE
5906         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
5907                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
5908         tracing_reset_online_cpus(&tr->max_buffer);
5909 #endif
5910 
5911         mutex_unlock(&trace_types_lock);
5912 
5913         return 0;
5914 }
5915 
5916 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
5917                                    size_t cnt, loff_t *fpos)
5918 {
5919         struct seq_file *m = filp->private_data;
5920         struct trace_array *tr = m->private;
5921         char buf[64];
5922         const char *clockstr;
5923         int ret;
5924 
5925         if (cnt >= sizeof(buf))
5926                 return -EINVAL;
5927 
5928         if (copy_from_user(buf, ubuf, cnt))
5929                 return -EFAULT;
5930 
5931         buf[cnt] = 0;
5932 
5933         clockstr = strstrip(buf);
5934 
5935         ret = tracing_set_clock(tr, clockstr);
5936         if (ret)
5937                 return ret;
5938 
5939         *fpos += cnt;
5940 
5941         return cnt;
5942 }
5943 
5944 static int tracing_clock_open(struct inode *inode, struct file *file)
5945 {
5946         struct trace_array *tr = inode->i_private;
5947         int ret;
5948 
5949         if (tracing_disabled)
5950                 return -ENODEV;
5951 
5952         if (trace_array_get(tr))
5953                 return -ENODEV;
5954 
5955         ret = single_open(file, tracing_clock_show, inode->i_private);
5956         if (ret < 0)
5957                 trace_array_put(tr);
5958 
5959         return ret;
5960 }
5961 
5962 struct ftrace_buffer_info {
5963         struct trace_iterator   iter;
5964         void                    *spare;
5965         unsigned int            read;
5966 };
5967 
5968 #ifdef CONFIG_TRACER_SNAPSHOT
5969 static int tracing_snapshot_open(struct inode *inode, struct file *file)
5970 {
5971         struct trace_array *tr = inode->i_private;
5972         struct trace_iterator *iter;
5973         struct seq_file *m;
5974         int ret = 0;
5975 
5976         if (trace_array_get(tr) < 0)
5977                 return -ENODEV;
5978 
5979         if (file->f_mode & FMODE_READ) {
5980                 iter = __tracing_open(inode, file, true);
5981                 if (IS_ERR(iter))
5982                         ret = PTR_ERR(iter);
5983         } else {
5984                 /* Writes still need the seq_file to hold the private data */
5985                 ret = -ENOMEM;
5986                 m = kzalloc(sizeof(*m), GFP_KERNEL);
5987                 if (!m)
5988                         goto out;
5989                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5990                 if (!iter) {
5991                         kfree(m);
5992                         goto out;
5993                 }
5994                 ret = 0;
5995 
5996                 iter->tr = tr;
5997                 iter->trace_buffer = &tr->max_buffer;
5998                 iter->cpu_file = tracing_get_cpu(inode);
5999                 m->private = iter;
6000                 file->private_data = m;
6001         }
6002 out:
6003         if (ret < 0)
6004                 trace_array_put(tr);
6005 
6006         return ret;
6007 }
6008 
6009 static ssize_t
6010 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6011                        loff_t *ppos)
6012 {
6013         struct seq_file *m = filp->private_data;
6014         struct trace_iterator *iter = m->private;
6015         struct trace_array *tr = iter->tr;
6016         unsigned long val;
6017         int ret;
6018 
6019         ret = tracing_update_buffers();
6020         if (ret < 0)
6021                 return ret;
6022 
6023         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6024         if (ret)
6025                 return ret;
6026 
6027         mutex_lock(&trace_types_lock);
6028 
6029         if (tr->current_trace->use_max_tr) {
6030                 ret = -EBUSY;
6031                 goto out;
6032         }
6033 
6034         switch (val) {
6035         case 0:
6036                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6037                         ret = -EINVAL;
6038                         break;
6039                 }
6040                 if (tr->allocated_snapshot)
6041                         free_snapshot(tr);
6042                 break;
6043         case 1:
6044 /* Only allow per-cpu swap if the ring buffer supports it */
6045 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6046                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6047                         ret = -EINVAL;
6048                         break;
6049                 }
6050 #endif
6051                 if (!tr->allocated_snapshot) {
6052