]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - kernel/trace/trace.c
tracing: Make sure RCU is watching before calling a stack trace
[karo-tx-linux.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/trace.h>
44 #include <linux/sched/rt.h>
45
46 #include "trace.h"
47 #include "trace_output.h"
48
49 /*
50  * On boot up, the ring buffer is set to the minimum size, so that
51  * we do not waste memory on systems that are not using tracing.
52  */
53 bool ring_buffer_expanded;
54
55 /*
56  * We need to change this state when a selftest is running.
57  * A selftest will lurk into the ring-buffer to count the
58  * entries inserted during the selftest although some concurrent
59  * insertions into the ring-buffer such as trace_printk could occurred
60  * at the same time, giving false positive or negative results.
61  */
62 static bool __read_mostly tracing_selftest_running;
63
64 /*
65  * If a tracer is running, we do not want to run SELFTEST.
66  */
67 bool __read_mostly tracing_selftest_disabled;
68
69 /* Pipe tracepoints to printk */
70 struct trace_iterator *tracepoint_print_iter;
71 int tracepoint_printk;
72 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
73
74 /* For tracers that don't implement custom flags */
75 static struct tracer_opt dummy_tracer_opt[] = {
76         { }
77 };
78
79 static int
80 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
81 {
82         return 0;
83 }
84
85 /*
86  * To prevent the comm cache from being overwritten when no
87  * tracing is active, only save the comm when a trace event
88  * occurred.
89  */
90 static DEFINE_PER_CPU(bool, trace_cmdline_save);
91
92 /*
93  * Kill all tracing for good (never come back).
94  * It is initialized to 1 but will turn to zero if the initialization
95  * of the tracer is successful. But that is the only place that sets
96  * this back to zero.
97  */
98 static int tracing_disabled = 1;
99
100 cpumask_var_t __read_mostly     tracing_buffer_mask;
101
102 /*
103  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
104  *
105  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
106  * is set, then ftrace_dump is called. This will output the contents
107  * of the ftrace buffers to the console.  This is very useful for
108  * capturing traces that lead to crashes and outputing it to a
109  * serial console.
110  *
111  * It is default off, but you can enable it with either specifying
112  * "ftrace_dump_on_oops" in the kernel command line, or setting
113  * /proc/sys/kernel/ftrace_dump_on_oops
114  * Set 1 if you want to dump buffers of all CPUs
115  * Set 2 if you want to dump the buffer of the CPU that triggered oops
116  */
117
118 enum ftrace_dump_mode ftrace_dump_on_oops;
119
120 /* When set, tracing will stop when a WARN*() is hit */
121 int __disable_trace_on_warning;
122
123 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
124 /* Map of enums to their values, for "enum_map" file */
125 struct trace_enum_map_head {
126         struct module                   *mod;
127         unsigned long                   length;
128 };
129
130 union trace_enum_map_item;
131
132 struct trace_enum_map_tail {
133         /*
134          * "end" is first and points to NULL as it must be different
135          * than "mod" or "enum_string"
136          */
137         union trace_enum_map_item       *next;
138         const char                      *end;   /* points to NULL */
139 };
140
141 static DEFINE_MUTEX(trace_enum_mutex);
142
143 /*
144  * The trace_enum_maps are saved in an array with two extra elements,
145  * one at the beginning, and one at the end. The beginning item contains
146  * the count of the saved maps (head.length), and the module they
147  * belong to if not built in (head.mod). The ending item contains a
148  * pointer to the next array of saved enum_map items.
149  */
150 union trace_enum_map_item {
151         struct trace_enum_map           map;
152         struct trace_enum_map_head      head;
153         struct trace_enum_map_tail      tail;
154 };
155
156 static union trace_enum_map_item *trace_enum_maps;
157 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
158
159 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
160
161 #define MAX_TRACER_SIZE         100
162 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
163 static char *default_bootup_tracer;
164
165 static bool allocate_snapshot;
166
167 static int __init set_cmdline_ftrace(char *str)
168 {
169         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
170         default_bootup_tracer = bootup_tracer_buf;
171         /* We are using ftrace early, expand it */
172         ring_buffer_expanded = true;
173         return 1;
174 }
175 __setup("ftrace=", set_cmdline_ftrace);
176
177 static int __init set_ftrace_dump_on_oops(char *str)
178 {
179         if (*str++ != '=' || !*str) {
180                 ftrace_dump_on_oops = DUMP_ALL;
181                 return 1;
182         }
183
184         if (!strcmp("orig_cpu", str)) {
185                 ftrace_dump_on_oops = DUMP_ORIG;
186                 return 1;
187         }
188
189         return 0;
190 }
191 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
192
193 static int __init stop_trace_on_warning(char *str)
194 {
195         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
196                 __disable_trace_on_warning = 1;
197         return 1;
198 }
199 __setup("traceoff_on_warning", stop_trace_on_warning);
200
201 static int __init boot_alloc_snapshot(char *str)
202 {
203         allocate_snapshot = true;
204         /* We also need the main ring buffer expanded */
205         ring_buffer_expanded = true;
206         return 1;
207 }
208 __setup("alloc_snapshot", boot_alloc_snapshot);
209
210
211 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
212
213 static int __init set_trace_boot_options(char *str)
214 {
215         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
216         return 0;
217 }
218 __setup("trace_options=", set_trace_boot_options);
219
220 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
221 static char *trace_boot_clock __initdata;
222
223 static int __init set_trace_boot_clock(char *str)
224 {
225         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
226         trace_boot_clock = trace_boot_clock_buf;
227         return 0;
228 }
229 __setup("trace_clock=", set_trace_boot_clock);
230
231 static int __init set_tracepoint_printk(char *str)
232 {
233         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
234                 tracepoint_printk = 1;
235         return 1;
236 }
237 __setup("tp_printk", set_tracepoint_printk);
238
239 unsigned long long ns2usecs(u64 nsec)
240 {
241         nsec += 500;
242         do_div(nsec, 1000);
243         return nsec;
244 }
245
246 /* trace_flags holds trace_options default values */
247 #define TRACE_DEFAULT_FLAGS                                             \
248         (FUNCTION_DEFAULT_FLAGS |                                       \
249          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
250          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
251          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
252          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
253
254 /* trace_options that are only supported by global_trace */
255 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
256                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
257
258 /* trace_flags that are default zero for instances */
259 #define ZEROED_TRACE_FLAGS \
260         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
261
262 /*
263  * The global_trace is the descriptor that holds the top-level tracing
264  * buffers for the live tracing.
265  */
266 static struct trace_array global_trace = {
267         .trace_flags = TRACE_DEFAULT_FLAGS,
268 };
269
270 LIST_HEAD(ftrace_trace_arrays);
271
272 int trace_array_get(struct trace_array *this_tr)
273 {
274         struct trace_array *tr;
275         int ret = -ENODEV;
276
277         mutex_lock(&trace_types_lock);
278         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
279                 if (tr == this_tr) {
280                         tr->ref++;
281                         ret = 0;
282                         break;
283                 }
284         }
285         mutex_unlock(&trace_types_lock);
286
287         return ret;
288 }
289
290 static void __trace_array_put(struct trace_array *this_tr)
291 {
292         WARN_ON(!this_tr->ref);
293         this_tr->ref--;
294 }
295
296 void trace_array_put(struct trace_array *this_tr)
297 {
298         mutex_lock(&trace_types_lock);
299         __trace_array_put(this_tr);
300         mutex_unlock(&trace_types_lock);
301 }
302
303 int call_filter_check_discard(struct trace_event_call *call, void *rec,
304                               struct ring_buffer *buffer,
305                               struct ring_buffer_event *event)
306 {
307         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
308             !filter_match_preds(call->filter, rec)) {
309                 __trace_event_discard_commit(buffer, event);
310                 return 1;
311         }
312
313         return 0;
314 }
315
316 void trace_free_pid_list(struct trace_pid_list *pid_list)
317 {
318         vfree(pid_list->pids);
319         kfree(pid_list);
320 }
321
322 /**
323  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
324  * @filtered_pids: The list of pids to check
325  * @search_pid: The PID to find in @filtered_pids
326  *
327  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
328  */
329 bool
330 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
331 {
332         /*
333          * If pid_max changed after filtered_pids was created, we
334          * by default ignore all pids greater than the previous pid_max.
335          */
336         if (search_pid >= filtered_pids->pid_max)
337                 return false;
338
339         return test_bit(search_pid, filtered_pids->pids);
340 }
341
342 /**
343  * trace_ignore_this_task - should a task be ignored for tracing
344  * @filtered_pids: The list of pids to check
345  * @task: The task that should be ignored if not filtered
346  *
347  * Checks if @task should be traced or not from @filtered_pids.
348  * Returns true if @task should *NOT* be traced.
349  * Returns false if @task should be traced.
350  */
351 bool
352 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
353 {
354         /*
355          * Return false, because if filtered_pids does not exist,
356          * all pids are good to trace.
357          */
358         if (!filtered_pids)
359                 return false;
360
361         return !trace_find_filtered_pid(filtered_pids, task->pid);
362 }
363
364 /**
365  * trace_pid_filter_add_remove - Add or remove a task from a pid_list
366  * @pid_list: The list to modify
367  * @self: The current task for fork or NULL for exit
368  * @task: The task to add or remove
369  *
370  * If adding a task, if @self is defined, the task is only added if @self
371  * is also included in @pid_list. This happens on fork and tasks should
372  * only be added when the parent is listed. If @self is NULL, then the
373  * @task pid will be removed from the list, which would happen on exit
374  * of a task.
375  */
376 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
377                                   struct task_struct *self,
378                                   struct task_struct *task)
379 {
380         if (!pid_list)
381                 return;
382
383         /* For forks, we only add if the forking task is listed */
384         if (self) {
385                 if (!trace_find_filtered_pid(pid_list, self->pid))
386                         return;
387         }
388
389         /* Sorry, but we don't support pid_max changing after setting */
390         if (task->pid >= pid_list->pid_max)
391                 return;
392
393         /* "self" is set for forks, and NULL for exits */
394         if (self)
395                 set_bit(task->pid, pid_list->pids);
396         else
397                 clear_bit(task->pid, pid_list->pids);
398 }
399
400 /**
401  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
402  * @pid_list: The pid list to show
403  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
404  * @pos: The position of the file
405  *
406  * This is used by the seq_file "next" operation to iterate the pids
407  * listed in a trace_pid_list structure.
408  *
409  * Returns the pid+1 as we want to display pid of zero, but NULL would
410  * stop the iteration.
411  */
412 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
413 {
414         unsigned long pid = (unsigned long)v;
415
416         (*pos)++;
417
418         /* pid already is +1 of the actual prevous bit */
419         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
420
421         /* Return pid + 1 to allow zero to be represented */
422         if (pid < pid_list->pid_max)
423                 return (void *)(pid + 1);
424
425         return NULL;
426 }
427
428 /**
429  * trace_pid_start - Used for seq_file to start reading pid lists
430  * @pid_list: The pid list to show
431  * @pos: The position of the file
432  *
433  * This is used by seq_file "start" operation to start the iteration
434  * of listing pids.
435  *
436  * Returns the pid+1 as we want to display pid of zero, but NULL would
437  * stop the iteration.
438  */
439 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
440 {
441         unsigned long pid;
442         loff_t l = 0;
443
444         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
445         if (pid >= pid_list->pid_max)
446                 return NULL;
447
448         /* Return pid + 1 so that zero can be the exit value */
449         for (pid++; pid && l < *pos;
450              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
451                 ;
452         return (void *)pid;
453 }
454
455 /**
456  * trace_pid_show - show the current pid in seq_file processing
457  * @m: The seq_file structure to write into
458  * @v: A void pointer of the pid (+1) value to display
459  *
460  * Can be directly used by seq_file operations to display the current
461  * pid value.
462  */
463 int trace_pid_show(struct seq_file *m, void *v)
464 {
465         unsigned long pid = (unsigned long)v - 1;
466
467         seq_printf(m, "%lu\n", pid);
468         return 0;
469 }
470
471 /* 128 should be much more than enough */
472 #define PID_BUF_SIZE            127
473
474 int trace_pid_write(struct trace_pid_list *filtered_pids,
475                     struct trace_pid_list **new_pid_list,
476                     const char __user *ubuf, size_t cnt)
477 {
478         struct trace_pid_list *pid_list;
479         struct trace_parser parser;
480         unsigned long val;
481         int nr_pids = 0;
482         ssize_t read = 0;
483         ssize_t ret = 0;
484         loff_t pos;
485         pid_t pid;
486
487         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
488                 return -ENOMEM;
489
490         /*
491          * Always recreate a new array. The write is an all or nothing
492          * operation. Always create a new array when adding new pids by
493          * the user. If the operation fails, then the current list is
494          * not modified.
495          */
496         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
497         if (!pid_list)
498                 return -ENOMEM;
499
500         pid_list->pid_max = READ_ONCE(pid_max);
501
502         /* Only truncating will shrink pid_max */
503         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
504                 pid_list->pid_max = filtered_pids->pid_max;
505
506         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
507         if (!pid_list->pids) {
508                 kfree(pid_list);
509                 return -ENOMEM;
510         }
511
512         if (filtered_pids) {
513                 /* copy the current bits to the new max */
514                 for_each_set_bit(pid, filtered_pids->pids,
515                                  filtered_pids->pid_max) {
516                         set_bit(pid, pid_list->pids);
517                         nr_pids++;
518                 }
519         }
520
521         while (cnt > 0) {
522
523                 pos = 0;
524
525                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
526                 if (ret < 0 || !trace_parser_loaded(&parser))
527                         break;
528
529                 read += ret;
530                 ubuf += ret;
531                 cnt -= ret;
532
533                 parser.buffer[parser.idx] = 0;
534
535                 ret = -EINVAL;
536                 if (kstrtoul(parser.buffer, 0, &val))
537                         break;
538                 if (val >= pid_list->pid_max)
539                         break;
540
541                 pid = (pid_t)val;
542
543                 set_bit(pid, pid_list->pids);
544                 nr_pids++;
545
546                 trace_parser_clear(&parser);
547                 ret = 0;
548         }
549         trace_parser_put(&parser);
550
551         if (ret < 0) {
552                 trace_free_pid_list(pid_list);
553                 return ret;
554         }
555
556         if (!nr_pids) {
557                 /* Cleared the list of pids */
558                 trace_free_pid_list(pid_list);
559                 read = ret;
560                 pid_list = NULL;
561         }
562
563         *new_pid_list = pid_list;
564
565         return read;
566 }
567
568 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
569 {
570         u64 ts;
571
572         /* Early boot up does not have a buffer yet */
573         if (!buf->buffer)
574                 return trace_clock_local();
575
576         ts = ring_buffer_time_stamp(buf->buffer, cpu);
577         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
578
579         return ts;
580 }
581
582 u64 ftrace_now(int cpu)
583 {
584         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
585 }
586
587 /**
588  * tracing_is_enabled - Show if global_trace has been disabled
589  *
590  * Shows if the global trace has been enabled or not. It uses the
591  * mirror flag "buffer_disabled" to be used in fast paths such as for
592  * the irqsoff tracer. But it may be inaccurate due to races. If you
593  * need to know the accurate state, use tracing_is_on() which is a little
594  * slower, but accurate.
595  */
596 int tracing_is_enabled(void)
597 {
598         /*
599          * For quick access (irqsoff uses this in fast path), just
600          * return the mirror variable of the state of the ring buffer.
601          * It's a little racy, but we don't really care.
602          */
603         smp_rmb();
604         return !global_trace.buffer_disabled;
605 }
606
607 /*
608  * trace_buf_size is the size in bytes that is allocated
609  * for a buffer. Note, the number of bytes is always rounded
610  * to page size.
611  *
612  * This number is purposely set to a low number of 16384.
613  * If the dump on oops happens, it will be much appreciated
614  * to not have to wait for all that output. Anyway this can be
615  * boot time and run time configurable.
616  */
617 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
618
619 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
620
621 /* trace_types holds a link list of available tracers. */
622 static struct tracer            *trace_types __read_mostly;
623
624 /*
625  * trace_types_lock is used to protect the trace_types list.
626  */
627 DEFINE_MUTEX(trace_types_lock);
628
629 /*
630  * serialize the access of the ring buffer
631  *
632  * ring buffer serializes readers, but it is low level protection.
633  * The validity of the events (which returns by ring_buffer_peek() ..etc)
634  * are not protected by ring buffer.
635  *
636  * The content of events may become garbage if we allow other process consumes
637  * these events concurrently:
638  *   A) the page of the consumed events may become a normal page
639  *      (not reader page) in ring buffer, and this page will be rewrited
640  *      by events producer.
641  *   B) The page of the consumed events may become a page for splice_read,
642  *      and this page will be returned to system.
643  *
644  * These primitives allow multi process access to different cpu ring buffer
645  * concurrently.
646  *
647  * These primitives don't distinguish read-only and read-consume access.
648  * Multi read-only access are also serialized.
649  */
650
651 #ifdef CONFIG_SMP
652 static DECLARE_RWSEM(all_cpu_access_lock);
653 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
654
655 static inline void trace_access_lock(int cpu)
656 {
657         if (cpu == RING_BUFFER_ALL_CPUS) {
658                 /* gain it for accessing the whole ring buffer. */
659                 down_write(&all_cpu_access_lock);
660         } else {
661                 /* gain it for accessing a cpu ring buffer. */
662
663                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
664                 down_read(&all_cpu_access_lock);
665
666                 /* Secondly block other access to this @cpu ring buffer. */
667                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
668         }
669 }
670
671 static inline void trace_access_unlock(int cpu)
672 {
673         if (cpu == RING_BUFFER_ALL_CPUS) {
674                 up_write(&all_cpu_access_lock);
675         } else {
676                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
677                 up_read(&all_cpu_access_lock);
678         }
679 }
680
681 static inline void trace_access_lock_init(void)
682 {
683         int cpu;
684
685         for_each_possible_cpu(cpu)
686                 mutex_init(&per_cpu(cpu_access_lock, cpu));
687 }
688
689 #else
690
691 static DEFINE_MUTEX(access_lock);
692
693 static inline void trace_access_lock(int cpu)
694 {
695         (void)cpu;
696         mutex_lock(&access_lock);
697 }
698
699 static inline void trace_access_unlock(int cpu)
700 {
701         (void)cpu;
702         mutex_unlock(&access_lock);
703 }
704
705 static inline void trace_access_lock_init(void)
706 {
707 }
708
709 #endif
710
711 #ifdef CONFIG_STACKTRACE
712 static void __ftrace_trace_stack(struct ring_buffer *buffer,
713                                  unsigned long flags,
714                                  int skip, int pc, struct pt_regs *regs);
715 static inline void ftrace_trace_stack(struct trace_array *tr,
716                                       struct ring_buffer *buffer,
717                                       unsigned long flags,
718                                       int skip, int pc, struct pt_regs *regs);
719
720 #else
721 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
722                                         unsigned long flags,
723                                         int skip, int pc, struct pt_regs *regs)
724 {
725 }
726 static inline void ftrace_trace_stack(struct trace_array *tr,
727                                       struct ring_buffer *buffer,
728                                       unsigned long flags,
729                                       int skip, int pc, struct pt_regs *regs)
730 {
731 }
732
733 #endif
734
735 static __always_inline void
736 trace_event_setup(struct ring_buffer_event *event,
737                   int type, unsigned long flags, int pc)
738 {
739         struct trace_entry *ent = ring_buffer_event_data(event);
740
741         tracing_generic_entry_update(ent, flags, pc);
742         ent->type = type;
743 }
744
745 static __always_inline struct ring_buffer_event *
746 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
747                           int type,
748                           unsigned long len,
749                           unsigned long flags, int pc)
750 {
751         struct ring_buffer_event *event;
752
753         event = ring_buffer_lock_reserve(buffer, len);
754         if (event != NULL)
755                 trace_event_setup(event, type, flags, pc);
756
757         return event;
758 }
759
760 void tracer_tracing_on(struct trace_array *tr)
761 {
762         if (tr->trace_buffer.buffer)
763                 ring_buffer_record_on(tr->trace_buffer.buffer);
764         /*
765          * This flag is looked at when buffers haven't been allocated
766          * yet, or by some tracers (like irqsoff), that just want to
767          * know if the ring buffer has been disabled, but it can handle
768          * races of where it gets disabled but we still do a record.
769          * As the check is in the fast path of the tracers, it is more
770          * important to be fast than accurate.
771          */
772         tr->buffer_disabled = 0;
773         /* Make the flag seen by readers */
774         smp_wmb();
775 }
776
777 /**
778  * tracing_on - enable tracing buffers
779  *
780  * This function enables tracing buffers that may have been
781  * disabled with tracing_off.
782  */
783 void tracing_on(void)
784 {
785         tracer_tracing_on(&global_trace);
786 }
787 EXPORT_SYMBOL_GPL(tracing_on);
788
789
790 static __always_inline void
791 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
792 {
793         __this_cpu_write(trace_cmdline_save, true);
794
795         /* If this is the temp buffer, we need to commit fully */
796         if (this_cpu_read(trace_buffered_event) == event) {
797                 /* Length is in event->array[0] */
798                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
799                 /* Release the temp buffer */
800                 this_cpu_dec(trace_buffered_event_cnt);
801         } else
802                 ring_buffer_unlock_commit(buffer, event);
803 }
804
805 /**
806  * __trace_puts - write a constant string into the trace buffer.
807  * @ip:    The address of the caller
808  * @str:   The constant string to write
809  * @size:  The size of the string.
810  */
811 int __trace_puts(unsigned long ip, const char *str, int size)
812 {
813         struct ring_buffer_event *event;
814         struct ring_buffer *buffer;
815         struct print_entry *entry;
816         unsigned long irq_flags;
817         int alloc;
818         int pc;
819
820         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
821                 return 0;
822
823         pc = preempt_count();
824
825         if (unlikely(tracing_selftest_running || tracing_disabled))
826                 return 0;
827
828         alloc = sizeof(*entry) + size + 2; /* possible \n added */
829
830         local_save_flags(irq_flags);
831         buffer = global_trace.trace_buffer.buffer;
832         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
833                                             irq_flags, pc);
834         if (!event)
835                 return 0;
836
837         entry = ring_buffer_event_data(event);
838         entry->ip = ip;
839
840         memcpy(&entry->buf, str, size);
841
842         /* Add a newline if necessary */
843         if (entry->buf[size - 1] != '\n') {
844                 entry->buf[size] = '\n';
845                 entry->buf[size + 1] = '\0';
846         } else
847                 entry->buf[size] = '\0';
848
849         __buffer_unlock_commit(buffer, event);
850         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
851
852         return size;
853 }
854 EXPORT_SYMBOL_GPL(__trace_puts);
855
856 /**
857  * __trace_bputs - write the pointer to a constant string into trace buffer
858  * @ip:    The address of the caller
859  * @str:   The constant string to write to the buffer to
860  */
861 int __trace_bputs(unsigned long ip, const char *str)
862 {
863         struct ring_buffer_event *event;
864         struct ring_buffer *buffer;
865         struct bputs_entry *entry;
866         unsigned long irq_flags;
867         int size = sizeof(struct bputs_entry);
868         int pc;
869
870         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
871                 return 0;
872
873         pc = preempt_count();
874
875         if (unlikely(tracing_selftest_running || tracing_disabled))
876                 return 0;
877
878         local_save_flags(irq_flags);
879         buffer = global_trace.trace_buffer.buffer;
880         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
881                                             irq_flags, pc);
882         if (!event)
883                 return 0;
884
885         entry = ring_buffer_event_data(event);
886         entry->ip                       = ip;
887         entry->str                      = str;
888
889         __buffer_unlock_commit(buffer, event);
890         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
891
892         return 1;
893 }
894 EXPORT_SYMBOL_GPL(__trace_bputs);
895
896 #ifdef CONFIG_TRACER_SNAPSHOT
897 static void tracing_snapshot_instance(struct trace_array *tr)
898 {
899         struct tracer *tracer = tr->current_trace;
900         unsigned long flags;
901
902         if (in_nmi()) {
903                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
904                 internal_trace_puts("*** snapshot is being ignored        ***\n");
905                 return;
906         }
907
908         if (!tr->allocated_snapshot) {
909                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
910                 internal_trace_puts("*** stopping trace here!   ***\n");
911                 tracing_off();
912                 return;
913         }
914
915         /* Note, snapshot can not be used when the tracer uses it */
916         if (tracer->use_max_tr) {
917                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
918                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
919                 return;
920         }
921
922         local_irq_save(flags);
923         update_max_tr(tr, current, smp_processor_id());
924         local_irq_restore(flags);
925 }
926
927 /**
928  * trace_snapshot - take a snapshot of the current buffer.
929  *
930  * This causes a swap between the snapshot buffer and the current live
931  * tracing buffer. You can use this to take snapshots of the live
932  * trace when some condition is triggered, but continue to trace.
933  *
934  * Note, make sure to allocate the snapshot with either
935  * a tracing_snapshot_alloc(), or by doing it manually
936  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
937  *
938  * If the snapshot buffer is not allocated, it will stop tracing.
939  * Basically making a permanent snapshot.
940  */
941 void tracing_snapshot(void)
942 {
943         struct trace_array *tr = &global_trace;
944
945         tracing_snapshot_instance(tr);
946 }
947 EXPORT_SYMBOL_GPL(tracing_snapshot);
948
949 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
950                                         struct trace_buffer *size_buf, int cpu_id);
951 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
952
953 static int alloc_snapshot(struct trace_array *tr)
954 {
955         int ret;
956
957         if (!tr->allocated_snapshot) {
958
959                 /* allocate spare buffer */
960                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
961                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
962                 if (ret < 0)
963                         return ret;
964
965                 tr->allocated_snapshot = true;
966         }
967
968         return 0;
969 }
970
971 static void free_snapshot(struct trace_array *tr)
972 {
973         /*
974          * We don't free the ring buffer. instead, resize it because
975          * The max_tr ring buffer has some state (e.g. ring->clock) and
976          * we want preserve it.
977          */
978         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
979         set_buffer_entries(&tr->max_buffer, 1);
980         tracing_reset_online_cpus(&tr->max_buffer);
981         tr->allocated_snapshot = false;
982 }
983
984 /**
985  * tracing_alloc_snapshot - allocate snapshot buffer.
986  *
987  * This only allocates the snapshot buffer if it isn't already
988  * allocated - it doesn't also take a snapshot.
989  *
990  * This is meant to be used in cases where the snapshot buffer needs
991  * to be set up for events that can't sleep but need to be able to
992  * trigger a snapshot.
993  */
994 int tracing_alloc_snapshot(void)
995 {
996         struct trace_array *tr = &global_trace;
997         int ret;
998
999         ret = alloc_snapshot(tr);
1000         WARN_ON(ret < 0);
1001
1002         return ret;
1003 }
1004 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1005
1006 /**
1007  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
1008  *
1009  * This is similar to trace_snapshot(), but it will allocate the
1010  * snapshot buffer if it isn't already allocated. Use this only
1011  * where it is safe to sleep, as the allocation may sleep.
1012  *
1013  * This causes a swap between the snapshot buffer and the current live
1014  * tracing buffer. You can use this to take snapshots of the live
1015  * trace when some condition is triggered, but continue to trace.
1016  */
1017 void tracing_snapshot_alloc(void)
1018 {
1019         int ret;
1020
1021         ret = tracing_alloc_snapshot();
1022         if (ret < 0)
1023                 return;
1024
1025         tracing_snapshot();
1026 }
1027 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1028 #else
1029 void tracing_snapshot(void)
1030 {
1031         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1032 }
1033 EXPORT_SYMBOL_GPL(tracing_snapshot);
1034 int tracing_alloc_snapshot(void)
1035 {
1036         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1037         return -ENODEV;
1038 }
1039 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1040 void tracing_snapshot_alloc(void)
1041 {
1042         /* Give warning */
1043         tracing_snapshot();
1044 }
1045 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1046 #endif /* CONFIG_TRACER_SNAPSHOT */
1047
1048 void tracer_tracing_off(struct trace_array *tr)
1049 {
1050         if (tr->trace_buffer.buffer)
1051                 ring_buffer_record_off(tr->trace_buffer.buffer);
1052         /*
1053          * This flag is looked at when buffers haven't been allocated
1054          * yet, or by some tracers (like irqsoff), that just want to
1055          * know if the ring buffer has been disabled, but it can handle
1056          * races of where it gets disabled but we still do a record.
1057          * As the check is in the fast path of the tracers, it is more
1058          * important to be fast than accurate.
1059          */
1060         tr->buffer_disabled = 1;
1061         /* Make the flag seen by readers */
1062         smp_wmb();
1063 }
1064
1065 /**
1066  * tracing_off - turn off tracing buffers
1067  *
1068  * This function stops the tracing buffers from recording data.
1069  * It does not disable any overhead the tracers themselves may
1070  * be causing. This function simply causes all recording to
1071  * the ring buffers to fail.
1072  */
1073 void tracing_off(void)
1074 {
1075         tracer_tracing_off(&global_trace);
1076 }
1077 EXPORT_SYMBOL_GPL(tracing_off);
1078
1079 void disable_trace_on_warning(void)
1080 {
1081         if (__disable_trace_on_warning)
1082                 tracing_off();
1083 }
1084
1085 /**
1086  * tracer_tracing_is_on - show real state of ring buffer enabled
1087  * @tr : the trace array to know if ring buffer is enabled
1088  *
1089  * Shows real state of the ring buffer if it is enabled or not.
1090  */
1091 int tracer_tracing_is_on(struct trace_array *tr)
1092 {
1093         if (tr->trace_buffer.buffer)
1094                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1095         return !tr->buffer_disabled;
1096 }
1097
1098 /**
1099  * tracing_is_on - show state of ring buffers enabled
1100  */
1101 int tracing_is_on(void)
1102 {
1103         return tracer_tracing_is_on(&global_trace);
1104 }
1105 EXPORT_SYMBOL_GPL(tracing_is_on);
1106
1107 static int __init set_buf_size(char *str)
1108 {
1109         unsigned long buf_size;
1110
1111         if (!str)
1112                 return 0;
1113         buf_size = memparse(str, &str);
1114         /* nr_entries can not be zero */
1115         if (buf_size == 0)
1116                 return 0;
1117         trace_buf_size = buf_size;
1118         return 1;
1119 }
1120 __setup("trace_buf_size=", set_buf_size);
1121
1122 static int __init set_tracing_thresh(char *str)
1123 {
1124         unsigned long threshold;
1125         int ret;
1126
1127         if (!str)
1128                 return 0;
1129         ret = kstrtoul(str, 0, &threshold);
1130         if (ret < 0)
1131                 return 0;
1132         tracing_thresh = threshold * 1000;
1133         return 1;
1134 }
1135 __setup("tracing_thresh=", set_tracing_thresh);
1136
1137 unsigned long nsecs_to_usecs(unsigned long nsecs)
1138 {
1139         return nsecs / 1000;
1140 }
1141
1142 /*
1143  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1144  * It uses C(a, b) where 'a' is the enum name and 'b' is the string that
1145  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1146  * of strings in the order that the enums were defined.
1147  */
1148 #undef C
1149 #define C(a, b) b
1150
1151 /* These must match the bit postions in trace_iterator_flags */
1152 static const char *trace_options[] = {
1153         TRACE_FLAGS
1154         NULL
1155 };
1156
1157 static struct {
1158         u64 (*func)(void);
1159         const char *name;
1160         int in_ns;              /* is this clock in nanoseconds? */
1161 } trace_clocks[] = {
1162         { trace_clock_local,            "local",        1 },
1163         { trace_clock_global,           "global",       1 },
1164         { trace_clock_counter,          "counter",      0 },
1165         { trace_clock_jiffies,          "uptime",       0 },
1166         { trace_clock,                  "perf",         1 },
1167         { ktime_get_mono_fast_ns,       "mono",         1 },
1168         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1169         { ktime_get_boot_fast_ns,       "boot",         1 },
1170         ARCH_TRACE_CLOCKS
1171 };
1172
1173 /*
1174  * trace_parser_get_init - gets the buffer for trace parser
1175  */
1176 int trace_parser_get_init(struct trace_parser *parser, int size)
1177 {
1178         memset(parser, 0, sizeof(*parser));
1179
1180         parser->buffer = kmalloc(size, GFP_KERNEL);
1181         if (!parser->buffer)
1182                 return 1;
1183
1184         parser->size = size;
1185         return 0;
1186 }
1187
1188 /*
1189  * trace_parser_put - frees the buffer for trace parser
1190  */
1191 void trace_parser_put(struct trace_parser *parser)
1192 {
1193         kfree(parser->buffer);
1194         parser->buffer = NULL;
1195 }
1196
1197 /*
1198  * trace_get_user - reads the user input string separated by  space
1199  * (matched by isspace(ch))
1200  *
1201  * For each string found the 'struct trace_parser' is updated,
1202  * and the function returns.
1203  *
1204  * Returns number of bytes read.
1205  *
1206  * See kernel/trace/trace.h for 'struct trace_parser' details.
1207  */
1208 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1209         size_t cnt, loff_t *ppos)
1210 {
1211         char ch;
1212         size_t read = 0;
1213         ssize_t ret;
1214
1215         if (!*ppos)
1216                 trace_parser_clear(parser);
1217
1218         ret = get_user(ch, ubuf++);
1219         if (ret)
1220                 goto out;
1221
1222         read++;
1223         cnt--;
1224
1225         /*
1226          * The parser is not finished with the last write,
1227          * continue reading the user input without skipping spaces.
1228          */
1229         if (!parser->cont) {
1230                 /* skip white space */
1231                 while (cnt && isspace(ch)) {
1232                         ret = get_user(ch, ubuf++);
1233                         if (ret)
1234                                 goto out;
1235                         read++;
1236                         cnt--;
1237                 }
1238
1239                 /* only spaces were written */
1240                 if (isspace(ch)) {
1241                         *ppos += read;
1242                         ret = read;
1243                         goto out;
1244                 }
1245
1246                 parser->idx = 0;
1247         }
1248
1249         /* read the non-space input */
1250         while (cnt && !isspace(ch)) {
1251                 if (parser->idx < parser->size - 1)
1252                         parser->buffer[parser->idx++] = ch;
1253                 else {
1254                         ret = -EINVAL;
1255                         goto out;
1256                 }
1257                 ret = get_user(ch, ubuf++);
1258                 if (ret)
1259                         goto out;
1260                 read++;
1261                 cnt--;
1262         }
1263
1264         /* We either got finished input or we have to wait for another call. */
1265         if (isspace(ch)) {
1266                 parser->buffer[parser->idx] = 0;
1267                 parser->cont = false;
1268         } else if (parser->idx < parser->size - 1) {
1269                 parser->cont = true;
1270                 parser->buffer[parser->idx++] = ch;
1271         } else {
1272                 ret = -EINVAL;
1273                 goto out;
1274         }
1275
1276         *ppos += read;
1277         ret = read;
1278
1279 out:
1280         return ret;
1281 }
1282
1283 /* TODO add a seq_buf_to_buffer() */
1284 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1285 {
1286         int len;
1287
1288         if (trace_seq_used(s) <= s->seq.readpos)
1289                 return -EBUSY;
1290
1291         len = trace_seq_used(s) - s->seq.readpos;
1292         if (cnt > len)
1293                 cnt = len;
1294         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1295
1296         s->seq.readpos += cnt;
1297         return cnt;
1298 }
1299
1300 unsigned long __read_mostly     tracing_thresh;
1301
1302 #ifdef CONFIG_TRACER_MAX_TRACE
1303 /*
1304  * Copy the new maximum trace into the separate maximum-trace
1305  * structure. (this way the maximum trace is permanently saved,
1306  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1307  */
1308 static void
1309 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1310 {
1311         struct trace_buffer *trace_buf = &tr->trace_buffer;
1312         struct trace_buffer *max_buf = &tr->max_buffer;
1313         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1314         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1315
1316         max_buf->cpu = cpu;
1317         max_buf->time_start = data->preempt_timestamp;
1318
1319         max_data->saved_latency = tr->max_latency;
1320         max_data->critical_start = data->critical_start;
1321         max_data->critical_end = data->critical_end;
1322
1323         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1324         max_data->pid = tsk->pid;
1325         /*
1326          * If tsk == current, then use current_uid(), as that does not use
1327          * RCU. The irq tracer can be called out of RCU scope.
1328          */
1329         if (tsk == current)
1330                 max_data->uid = current_uid();
1331         else
1332                 max_data->uid = task_uid(tsk);
1333
1334         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1335         max_data->policy = tsk->policy;
1336         max_data->rt_priority = tsk->rt_priority;
1337
1338         /* record this tasks comm */
1339         tracing_record_cmdline(tsk);
1340 }
1341
1342 /**
1343  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1344  * @tr: tracer
1345  * @tsk: the task with the latency
1346  * @cpu: The cpu that initiated the trace.
1347  *
1348  * Flip the buffers between the @tr and the max_tr and record information
1349  * about which task was the cause of this latency.
1350  */
1351 void
1352 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1353 {
1354         struct ring_buffer *buf;
1355
1356         if (tr->stop_count)
1357                 return;
1358
1359         WARN_ON_ONCE(!irqs_disabled());
1360
1361         if (!tr->allocated_snapshot) {
1362                 /* Only the nop tracer should hit this when disabling */
1363                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1364                 return;
1365         }
1366
1367         arch_spin_lock(&tr->max_lock);
1368
1369         buf = tr->trace_buffer.buffer;
1370         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1371         tr->max_buffer.buffer = buf;
1372
1373         __update_max_tr(tr, tsk, cpu);
1374         arch_spin_unlock(&tr->max_lock);
1375 }
1376
1377 /**
1378  * update_max_tr_single - only copy one trace over, and reset the rest
1379  * @tr - tracer
1380  * @tsk - task with the latency
1381  * @cpu - the cpu of the buffer to copy.
1382  *
1383  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1384  */
1385 void
1386 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1387 {
1388         int ret;
1389
1390         if (tr->stop_count)
1391                 return;
1392
1393         WARN_ON_ONCE(!irqs_disabled());
1394         if (!tr->allocated_snapshot) {
1395                 /* Only the nop tracer should hit this when disabling */
1396                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1397                 return;
1398         }
1399
1400         arch_spin_lock(&tr->max_lock);
1401
1402         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1403
1404         if (ret == -EBUSY) {
1405                 /*
1406                  * We failed to swap the buffer due to a commit taking
1407                  * place on this CPU. We fail to record, but we reset
1408                  * the max trace buffer (no one writes directly to it)
1409                  * and flag that it failed.
1410                  */
1411                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1412                         "Failed to swap buffers due to commit in progress\n");
1413         }
1414
1415         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1416
1417         __update_max_tr(tr, tsk, cpu);
1418         arch_spin_unlock(&tr->max_lock);
1419 }
1420 #endif /* CONFIG_TRACER_MAX_TRACE */
1421
1422 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1423 {
1424         /* Iterators are static, they should be filled or empty */
1425         if (trace_buffer_iter(iter, iter->cpu_file))
1426                 return 0;
1427
1428         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1429                                 full);
1430 }
1431
1432 #ifdef CONFIG_FTRACE_STARTUP_TEST
1433 static bool selftests_can_run;
1434
1435 struct trace_selftests {
1436         struct list_head                list;
1437         struct tracer                   *type;
1438 };
1439
1440 static LIST_HEAD(postponed_selftests);
1441
1442 static int save_selftest(struct tracer *type)
1443 {
1444         struct trace_selftests *selftest;
1445
1446         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1447         if (!selftest)
1448                 return -ENOMEM;
1449
1450         selftest->type = type;
1451         list_add(&selftest->list, &postponed_selftests);
1452         return 0;
1453 }
1454
1455 static int run_tracer_selftest(struct tracer *type)
1456 {
1457         struct trace_array *tr = &global_trace;
1458         struct tracer *saved_tracer = tr->current_trace;
1459         int ret;
1460
1461         if (!type->selftest || tracing_selftest_disabled)
1462                 return 0;
1463
1464         /*
1465          * If a tracer registers early in boot up (before scheduling is
1466          * initialized and such), then do not run its selftests yet.
1467          * Instead, run it a little later in the boot process.
1468          */
1469         if (!selftests_can_run)
1470                 return save_selftest(type);
1471
1472         /*
1473          * Run a selftest on this tracer.
1474          * Here we reset the trace buffer, and set the current
1475          * tracer to be this tracer. The tracer can then run some
1476          * internal tracing to verify that everything is in order.
1477          * If we fail, we do not register this tracer.
1478          */
1479         tracing_reset_online_cpus(&tr->trace_buffer);
1480
1481         tr->current_trace = type;
1482
1483 #ifdef CONFIG_TRACER_MAX_TRACE
1484         if (type->use_max_tr) {
1485                 /* If we expanded the buffers, make sure the max is expanded too */
1486                 if (ring_buffer_expanded)
1487                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1488                                            RING_BUFFER_ALL_CPUS);
1489                 tr->allocated_snapshot = true;
1490         }
1491 #endif
1492
1493         /* the test is responsible for initializing and enabling */
1494         pr_info("Testing tracer %s: ", type->name);
1495         ret = type->selftest(type, tr);
1496         /* the test is responsible for resetting too */
1497         tr->current_trace = saved_tracer;
1498         if (ret) {
1499                 printk(KERN_CONT "FAILED!\n");
1500                 /* Add the warning after printing 'FAILED' */
1501                 WARN_ON(1);
1502                 return -1;
1503         }
1504         /* Only reset on passing, to avoid touching corrupted buffers */
1505         tracing_reset_online_cpus(&tr->trace_buffer);
1506
1507 #ifdef CONFIG_TRACER_MAX_TRACE
1508         if (type->use_max_tr) {
1509                 tr->allocated_snapshot = false;
1510
1511                 /* Shrink the max buffer again */
1512                 if (ring_buffer_expanded)
1513                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1514                                            RING_BUFFER_ALL_CPUS);
1515         }
1516 #endif
1517
1518         printk(KERN_CONT "PASSED\n");
1519         return 0;
1520 }
1521
1522 static __init int init_trace_selftests(void)
1523 {
1524         struct trace_selftests *p, *n;
1525         struct tracer *t, **last;
1526         int ret;
1527
1528         selftests_can_run = true;
1529
1530         mutex_lock(&trace_types_lock);
1531
1532         if (list_empty(&postponed_selftests))
1533                 goto out;
1534
1535         pr_info("Running postponed tracer tests:\n");
1536
1537         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1538                 ret = run_tracer_selftest(p->type);
1539                 /* If the test fails, then warn and remove from available_tracers */
1540                 if (ret < 0) {
1541                         WARN(1, "tracer: %s failed selftest, disabling\n",
1542                              p->type->name);
1543                         last = &trace_types;
1544                         for (t = trace_types; t; t = t->next) {
1545                                 if (t == p->type) {
1546                                         *last = t->next;
1547                                         break;
1548                                 }
1549                                 last = &t->next;
1550                         }
1551                 }
1552                 list_del(&p->list);
1553                 kfree(p);
1554         }
1555
1556  out:
1557         mutex_unlock(&trace_types_lock);
1558
1559         return 0;
1560 }
1561 core_initcall(init_trace_selftests);
1562 #else
1563 static inline int run_tracer_selftest(struct tracer *type)
1564 {
1565         return 0;
1566 }
1567 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1568
1569 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1570
1571 static void __init apply_trace_boot_options(void);
1572
1573 /**
1574  * register_tracer - register a tracer with the ftrace system.
1575  * @type - the plugin for the tracer
1576  *
1577  * Register a new plugin tracer.
1578  */
1579 int __init register_tracer(struct tracer *type)
1580 {
1581         struct tracer *t;
1582         int ret = 0;
1583
1584         if (!type->name) {
1585                 pr_info("Tracer must have a name\n");
1586                 return -1;
1587         }
1588
1589         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1590                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1591                 return -1;
1592         }
1593
1594         mutex_lock(&trace_types_lock);
1595
1596         tracing_selftest_running = true;
1597
1598         for (t = trace_types; t; t = t->next) {
1599                 if (strcmp(type->name, t->name) == 0) {
1600                         /* already found */
1601                         pr_info("Tracer %s already registered\n",
1602                                 type->name);
1603                         ret = -1;
1604                         goto out;
1605                 }
1606         }
1607
1608         if (!type->set_flag)
1609                 type->set_flag = &dummy_set_flag;
1610         if (!type->flags) {
1611                 /*allocate a dummy tracer_flags*/
1612                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1613                 if (!type->flags) {
1614                         ret = -ENOMEM;
1615                         goto out;
1616                 }
1617                 type->flags->val = 0;
1618                 type->flags->opts = dummy_tracer_opt;
1619         } else
1620                 if (!type->flags->opts)
1621                         type->flags->opts = dummy_tracer_opt;
1622
1623         /* store the tracer for __set_tracer_option */
1624         type->flags->trace = type;
1625
1626         ret = run_tracer_selftest(type);
1627         if (ret < 0)
1628                 goto out;
1629
1630         type->next = trace_types;
1631         trace_types = type;
1632         add_tracer_options(&global_trace, type);
1633
1634  out:
1635         tracing_selftest_running = false;
1636         mutex_unlock(&trace_types_lock);
1637
1638         if (ret || !default_bootup_tracer)
1639                 goto out_unlock;
1640
1641         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1642                 goto out_unlock;
1643
1644         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1645         /* Do we want this tracer to start on bootup? */
1646         tracing_set_tracer(&global_trace, type->name);
1647         default_bootup_tracer = NULL;
1648
1649         apply_trace_boot_options();
1650
1651         /* disable other selftests, since this will break it. */
1652         tracing_selftest_disabled = true;
1653 #ifdef CONFIG_FTRACE_STARTUP_TEST
1654         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1655                type->name);
1656 #endif
1657
1658  out_unlock:
1659         return ret;
1660 }
1661
1662 void tracing_reset(struct trace_buffer *buf, int cpu)
1663 {
1664         struct ring_buffer *buffer = buf->buffer;
1665
1666         if (!buffer)
1667                 return;
1668
1669         ring_buffer_record_disable(buffer);
1670
1671         /* Make sure all commits have finished */
1672         synchronize_sched();
1673         ring_buffer_reset_cpu(buffer, cpu);
1674
1675         ring_buffer_record_enable(buffer);
1676 }
1677
1678 void tracing_reset_online_cpus(struct trace_buffer *buf)
1679 {
1680         struct ring_buffer *buffer = buf->buffer;
1681         int cpu;
1682
1683         if (!buffer)
1684                 return;
1685
1686         ring_buffer_record_disable(buffer);
1687
1688         /* Make sure all commits have finished */
1689         synchronize_sched();
1690
1691         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1692
1693         for_each_online_cpu(cpu)
1694                 ring_buffer_reset_cpu(buffer, cpu);
1695
1696         ring_buffer_record_enable(buffer);
1697 }
1698
1699 /* Must have trace_types_lock held */
1700 void tracing_reset_all_online_cpus(void)
1701 {
1702         struct trace_array *tr;
1703
1704         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1705                 tracing_reset_online_cpus(&tr->trace_buffer);
1706 #ifdef CONFIG_TRACER_MAX_TRACE
1707                 tracing_reset_online_cpus(&tr->max_buffer);
1708 #endif
1709         }
1710 }
1711
1712 #define SAVED_CMDLINES_DEFAULT 128
1713 #define NO_CMDLINE_MAP UINT_MAX
1714 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1715 struct saved_cmdlines_buffer {
1716         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1717         unsigned *map_cmdline_to_pid;
1718         unsigned cmdline_num;
1719         int cmdline_idx;
1720         char *saved_cmdlines;
1721 };
1722 static struct saved_cmdlines_buffer *savedcmd;
1723
1724 /* temporary disable recording */
1725 static atomic_t trace_record_cmdline_disabled __read_mostly;
1726
1727 static inline char *get_saved_cmdlines(int idx)
1728 {
1729         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1730 }
1731
1732 static inline void set_cmdline(int idx, const char *cmdline)
1733 {
1734         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1735 }
1736
1737 static int allocate_cmdlines_buffer(unsigned int val,
1738                                     struct saved_cmdlines_buffer *s)
1739 {
1740         s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1741                                         GFP_KERNEL);
1742         if (!s->map_cmdline_to_pid)
1743                 return -ENOMEM;
1744
1745         s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1746         if (!s->saved_cmdlines) {
1747                 kfree(s->map_cmdline_to_pid);
1748                 return -ENOMEM;
1749         }
1750
1751         s->cmdline_idx = 0;
1752         s->cmdline_num = val;
1753         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1754                sizeof(s->map_pid_to_cmdline));
1755         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1756                val * sizeof(*s->map_cmdline_to_pid));
1757
1758         return 0;
1759 }
1760
1761 static int trace_create_savedcmd(void)
1762 {
1763         int ret;
1764
1765         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1766         if (!savedcmd)
1767                 return -ENOMEM;
1768
1769         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1770         if (ret < 0) {
1771                 kfree(savedcmd);
1772                 savedcmd = NULL;
1773                 return -ENOMEM;
1774         }
1775
1776         return 0;
1777 }
1778
1779 int is_tracing_stopped(void)
1780 {
1781         return global_trace.stop_count;
1782 }
1783
1784 /**
1785  * tracing_start - quick start of the tracer
1786  *
1787  * If tracing is enabled but was stopped by tracing_stop,
1788  * this will start the tracer back up.
1789  */
1790 void tracing_start(void)
1791 {
1792         struct ring_buffer *buffer;
1793         unsigned long flags;
1794
1795         if (tracing_disabled)
1796                 return;
1797
1798         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1799         if (--global_trace.stop_count) {
1800                 if (global_trace.stop_count < 0) {
1801                         /* Someone screwed up their debugging */
1802                         WARN_ON_ONCE(1);
1803                         global_trace.stop_count = 0;
1804                 }
1805                 goto out;
1806         }
1807
1808         /* Prevent the buffers from switching */
1809         arch_spin_lock(&global_trace.max_lock);
1810
1811         buffer = global_trace.trace_buffer.buffer;
1812         if (buffer)
1813                 ring_buffer_record_enable(buffer);
1814
1815 #ifdef CONFIG_TRACER_MAX_TRACE
1816         buffer = global_trace.max_buffer.buffer;
1817         if (buffer)
1818                 ring_buffer_record_enable(buffer);
1819 #endif
1820
1821         arch_spin_unlock(&global_trace.max_lock);
1822
1823  out:
1824         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1825 }
1826
1827 static void tracing_start_tr(struct trace_array *tr)
1828 {
1829         struct ring_buffer *buffer;
1830         unsigned long flags;
1831
1832         if (tracing_disabled)
1833                 return;
1834
1835         /* If global, we need to also start the max tracer */
1836         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1837                 return tracing_start();
1838
1839         raw_spin_lock_irqsave(&tr->start_lock, flags);
1840
1841         if (--tr->stop_count) {
1842                 if (tr->stop_count < 0) {
1843                         /* Someone screwed up their debugging */
1844                         WARN_ON_ONCE(1);
1845                         tr->stop_count = 0;
1846                 }
1847                 goto out;
1848         }
1849
1850         buffer = tr->trace_buffer.buffer;
1851         if (buffer)
1852                 ring_buffer_record_enable(buffer);
1853
1854  out:
1855         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1856 }
1857
1858 /**
1859  * tracing_stop - quick stop of the tracer
1860  *
1861  * Light weight way to stop tracing. Use in conjunction with
1862  * tracing_start.
1863  */
1864 void tracing_stop(void)
1865 {
1866         struct ring_buffer *buffer;
1867         unsigned long flags;
1868
1869         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1870         if (global_trace.stop_count++)
1871                 goto out;
1872
1873         /* Prevent the buffers from switching */
1874         arch_spin_lock(&global_trace.max_lock);
1875
1876         buffer = global_trace.trace_buffer.buffer;
1877         if (buffer)
1878                 ring_buffer_record_disable(buffer);
1879
1880 #ifdef CONFIG_TRACER_MAX_TRACE
1881         buffer = global_trace.max_buffer.buffer;
1882         if (buffer)
1883                 ring_buffer_record_disable(buffer);
1884 #endif
1885
1886         arch_spin_unlock(&global_trace.max_lock);
1887
1888  out:
1889         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1890 }
1891
1892 static void tracing_stop_tr(struct trace_array *tr)
1893 {
1894         struct ring_buffer *buffer;
1895         unsigned long flags;
1896
1897         /* If global, we need to also stop the max tracer */
1898         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1899                 return tracing_stop();
1900
1901         raw_spin_lock_irqsave(&tr->start_lock, flags);
1902         if (tr->stop_count++)
1903                 goto out;
1904
1905         buffer = tr->trace_buffer.buffer;
1906         if (buffer)
1907                 ring_buffer_record_disable(buffer);
1908
1909  out:
1910         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1911 }
1912
1913 void trace_stop_cmdline_recording(void);
1914
1915 static int trace_save_cmdline(struct task_struct *tsk)
1916 {
1917         unsigned pid, idx;
1918
1919         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1920                 return 0;
1921
1922         /*
1923          * It's not the end of the world if we don't get
1924          * the lock, but we also don't want to spin
1925          * nor do we want to disable interrupts,
1926          * so if we miss here, then better luck next time.
1927          */
1928         if (!arch_spin_trylock(&trace_cmdline_lock))
1929                 return 0;
1930
1931         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1932         if (idx == NO_CMDLINE_MAP) {
1933                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1934
1935                 /*
1936                  * Check whether the cmdline buffer at idx has a pid
1937                  * mapped. We are going to overwrite that entry so we
1938                  * need to clear the map_pid_to_cmdline. Otherwise we
1939                  * would read the new comm for the old pid.
1940                  */
1941                 pid = savedcmd->map_cmdline_to_pid[idx];
1942                 if (pid != NO_CMDLINE_MAP)
1943                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1944
1945                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1946                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1947
1948                 savedcmd->cmdline_idx = idx;
1949         }
1950
1951         set_cmdline(idx, tsk->comm);
1952
1953         arch_spin_unlock(&trace_cmdline_lock);
1954
1955         return 1;
1956 }
1957
1958 static void __trace_find_cmdline(int pid, char comm[])
1959 {
1960         unsigned map;
1961
1962         if (!pid) {
1963                 strcpy(comm, "<idle>");
1964                 return;
1965         }
1966
1967         if (WARN_ON_ONCE(pid < 0)) {
1968                 strcpy(comm, "<XXX>");
1969                 return;
1970         }
1971
1972         if (pid > PID_MAX_DEFAULT) {
1973                 strcpy(comm, "<...>");
1974                 return;
1975         }
1976
1977         map = savedcmd->map_pid_to_cmdline[pid];
1978         if (map != NO_CMDLINE_MAP)
1979                 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
1980         else
1981                 strcpy(comm, "<...>");
1982 }
1983
1984 void trace_find_cmdline(int pid, char comm[])
1985 {
1986         preempt_disable();
1987         arch_spin_lock(&trace_cmdline_lock);
1988
1989         __trace_find_cmdline(pid, comm);
1990
1991         arch_spin_unlock(&trace_cmdline_lock);
1992         preempt_enable();
1993 }
1994
1995 void tracing_record_cmdline(struct task_struct *tsk)
1996 {
1997         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1998                 return;
1999
2000         if (!__this_cpu_read(trace_cmdline_save))
2001                 return;
2002
2003         if (trace_save_cmdline(tsk))
2004                 __this_cpu_write(trace_cmdline_save, false);
2005 }
2006
2007 /*
2008  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2009  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2010  * simplifies those functions and keeps them in sync.
2011  */
2012 enum print_line_t trace_handle_return(struct trace_seq *s)
2013 {
2014         return trace_seq_has_overflowed(s) ?
2015                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2016 }
2017 EXPORT_SYMBOL_GPL(trace_handle_return);
2018
2019 void
2020 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2021                              int pc)
2022 {
2023         struct task_struct *tsk = current;
2024
2025         entry->preempt_count            = pc & 0xff;
2026         entry->pid                      = (tsk) ? tsk->pid : 0;
2027         entry->flags =
2028 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2029                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2030 #else
2031                 TRACE_FLAG_IRQS_NOSUPPORT |
2032 #endif
2033                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2034                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2035                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2036                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2037                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2038 }
2039 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2040
2041 struct ring_buffer_event *
2042 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2043                           int type,
2044                           unsigned long len,
2045                           unsigned long flags, int pc)
2046 {
2047         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2048 }
2049
2050 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2051 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2052 static int trace_buffered_event_ref;
2053
2054 /**
2055  * trace_buffered_event_enable - enable buffering events
2056  *
2057  * When events are being filtered, it is quicker to use a temporary
2058  * buffer to write the event data into if there's a likely chance
2059  * that it will not be committed. The discard of the ring buffer
2060  * is not as fast as committing, and is much slower than copying
2061  * a commit.
2062  *
2063  * When an event is to be filtered, allocate per cpu buffers to
2064  * write the event data into, and if the event is filtered and discarded
2065  * it is simply dropped, otherwise, the entire data is to be committed
2066  * in one shot.
2067  */
2068 void trace_buffered_event_enable(void)
2069 {
2070         struct ring_buffer_event *event;
2071         struct page *page;
2072         int cpu;
2073
2074         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2075
2076         if (trace_buffered_event_ref++)
2077                 return;
2078
2079         for_each_tracing_cpu(cpu) {
2080                 page = alloc_pages_node(cpu_to_node(cpu),
2081                                         GFP_KERNEL | __GFP_NORETRY, 0);
2082                 if (!page)
2083                         goto failed;
2084
2085                 event = page_address(page);
2086                 memset(event, 0, sizeof(*event));
2087
2088                 per_cpu(trace_buffered_event, cpu) = event;
2089
2090                 preempt_disable();
2091                 if (cpu == smp_processor_id() &&
2092                     this_cpu_read(trace_buffered_event) !=
2093                     per_cpu(trace_buffered_event, cpu))
2094                         WARN_ON_ONCE(1);
2095                 preempt_enable();
2096         }
2097
2098         return;
2099  failed:
2100         trace_buffered_event_disable();
2101 }
2102
2103 static void enable_trace_buffered_event(void *data)
2104 {
2105         /* Probably not needed, but do it anyway */
2106         smp_rmb();
2107         this_cpu_dec(trace_buffered_event_cnt);
2108 }
2109
2110 static void disable_trace_buffered_event(void *data)
2111 {
2112         this_cpu_inc(trace_buffered_event_cnt);
2113 }
2114
2115 /**
2116  * trace_buffered_event_disable - disable buffering events
2117  *
2118  * When a filter is removed, it is faster to not use the buffered
2119  * events, and to commit directly into the ring buffer. Free up
2120  * the temp buffers when there are no more users. This requires
2121  * special synchronization with current events.
2122  */
2123 void trace_buffered_event_disable(void)
2124 {
2125         int cpu;
2126
2127         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2128
2129         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2130                 return;
2131
2132         if (--trace_buffered_event_ref)
2133                 return;
2134
2135         preempt_disable();
2136         /* For each CPU, set the buffer as used. */
2137         smp_call_function_many(tracing_buffer_mask,
2138                                disable_trace_buffered_event, NULL, 1);
2139         preempt_enable();
2140
2141         /* Wait for all current users to finish */
2142         synchronize_sched();
2143
2144         for_each_tracing_cpu(cpu) {
2145                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2146                 per_cpu(trace_buffered_event, cpu) = NULL;
2147         }
2148         /*
2149          * Make sure trace_buffered_event is NULL before clearing
2150          * trace_buffered_event_cnt.
2151          */
2152         smp_wmb();
2153
2154         preempt_disable();
2155         /* Do the work on each cpu */
2156         smp_call_function_many(tracing_buffer_mask,
2157                                enable_trace_buffered_event, NULL, 1);
2158         preempt_enable();
2159 }
2160
2161 static struct ring_buffer *temp_buffer;
2162
2163 struct ring_buffer_event *
2164 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2165                           struct trace_event_file *trace_file,
2166                           int type, unsigned long len,
2167                           unsigned long flags, int pc)
2168 {
2169         struct ring_buffer_event *entry;
2170         int val;
2171
2172         *current_rb = trace_file->tr->trace_buffer.buffer;
2173
2174         if ((trace_file->flags &
2175              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2176             (entry = this_cpu_read(trace_buffered_event))) {
2177                 /* Try to use the per cpu buffer first */
2178                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2179                 if (val == 1) {
2180                         trace_event_setup(entry, type, flags, pc);
2181                         entry->array[0] = len;
2182                         return entry;
2183                 }
2184                 this_cpu_dec(trace_buffered_event_cnt);
2185         }
2186
2187         entry = __trace_buffer_lock_reserve(*current_rb,
2188                                             type, len, flags, pc);
2189         /*
2190          * If tracing is off, but we have triggers enabled
2191          * we still need to look at the event data. Use the temp_buffer
2192          * to store the trace event for the tigger to use. It's recusive
2193          * safe and will not be recorded anywhere.
2194          */
2195         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2196                 *current_rb = temp_buffer;
2197                 entry = __trace_buffer_lock_reserve(*current_rb,
2198                                                     type, len, flags, pc);
2199         }
2200         return entry;
2201 }
2202 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2203
2204 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2205 static DEFINE_MUTEX(tracepoint_printk_mutex);
2206
2207 static void output_printk(struct trace_event_buffer *fbuffer)
2208 {
2209         struct trace_event_call *event_call;
2210         struct trace_event *event;
2211         unsigned long flags;
2212         struct trace_iterator *iter = tracepoint_print_iter;
2213
2214         /* We should never get here if iter is NULL */
2215         if (WARN_ON_ONCE(!iter))
2216                 return;
2217
2218         event_call = fbuffer->trace_file->event_call;
2219         if (!event_call || !event_call->event.funcs ||
2220             !event_call->event.funcs->trace)
2221                 return;
2222
2223         event = &fbuffer->trace_file->event_call->event;
2224
2225         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2226         trace_seq_init(&iter->seq);
2227         iter->ent = fbuffer->entry;
2228         event_call->event.funcs->trace(iter, 0, event);
2229         trace_seq_putc(&iter->seq, 0);
2230         printk("%s", iter->seq.buffer);
2231
2232         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2233 }
2234
2235 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2236                              void __user *buffer, size_t *lenp,
2237                              loff_t *ppos)
2238 {
2239         int save_tracepoint_printk;
2240         int ret;
2241
2242         mutex_lock(&tracepoint_printk_mutex);
2243         save_tracepoint_printk = tracepoint_printk;
2244
2245         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2246
2247         /*
2248          * This will force exiting early, as tracepoint_printk
2249          * is always zero when tracepoint_printk_iter is not allocated
2250          */
2251         if (!tracepoint_print_iter)
2252                 tracepoint_printk = 0;
2253
2254         if (save_tracepoint_printk == tracepoint_printk)
2255                 goto out;
2256
2257         if (tracepoint_printk)
2258                 static_key_enable(&tracepoint_printk_key.key);
2259         else
2260                 static_key_disable(&tracepoint_printk_key.key);
2261
2262  out:
2263         mutex_unlock(&tracepoint_printk_mutex);
2264
2265         return ret;
2266 }
2267
2268 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2269 {
2270         if (static_key_false(&tracepoint_printk_key.key))
2271                 output_printk(fbuffer);
2272
2273         event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2274                                     fbuffer->event, fbuffer->entry,
2275                                     fbuffer->flags, fbuffer->pc);
2276 }
2277 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2278
2279 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2280                                      struct ring_buffer *buffer,
2281                                      struct ring_buffer_event *event,
2282                                      unsigned long flags, int pc,
2283                                      struct pt_regs *regs)
2284 {
2285         __buffer_unlock_commit(buffer, event);
2286
2287         /*
2288          * If regs is not set, then skip the following callers:
2289          *   trace_buffer_unlock_commit_regs
2290          *   event_trigger_unlock_commit
2291          *   trace_event_buffer_commit
2292          *   trace_event_raw_event_sched_switch
2293          * Note, we can still get here via blktrace, wakeup tracer
2294          * and mmiotrace, but that's ok if they lose a function or
2295          * two. They are that meaningful.
2296          */
2297         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
2298         ftrace_trace_userstack(buffer, flags, pc);
2299 }
2300
2301 /*
2302  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2303  */
2304 void
2305 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2306                                    struct ring_buffer_event *event)
2307 {
2308         __buffer_unlock_commit(buffer, event);
2309 }
2310
2311 static void
2312 trace_process_export(struct trace_export *export,
2313                struct ring_buffer_event *event)
2314 {
2315         struct trace_entry *entry;
2316         unsigned int size = 0;
2317
2318         entry = ring_buffer_event_data(event);
2319         size = ring_buffer_event_length(event);
2320         export->write(entry, size);
2321 }
2322
2323 static DEFINE_MUTEX(ftrace_export_lock);
2324
2325 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2326
2327 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2328
2329 static inline void ftrace_exports_enable(void)
2330 {
2331         static_branch_enable(&ftrace_exports_enabled);
2332 }
2333
2334 static inline void ftrace_exports_disable(void)
2335 {
2336         static_branch_disable(&ftrace_exports_enabled);
2337 }
2338
2339 void ftrace_exports(struct ring_buffer_event *event)
2340 {
2341         struct trace_export *export;
2342
2343         preempt_disable_notrace();
2344
2345         export = rcu_dereference_raw_notrace(ftrace_exports_list);
2346         while (export) {
2347                 trace_process_export(export, event);
2348                 export = rcu_dereference_raw_notrace(export->next);
2349         }
2350
2351         preempt_enable_notrace();
2352 }
2353
2354 static inline void
2355 add_trace_export(struct trace_export **list, struct trace_export *export)
2356 {
2357         rcu_assign_pointer(export->next, *list);
2358         /*
2359          * We are entering export into the list but another
2360          * CPU might be walking that list. We need to make sure
2361          * the export->next pointer is valid before another CPU sees
2362          * the export pointer included into the list.
2363          */
2364         rcu_assign_pointer(*list, export);
2365 }
2366
2367 static inline int
2368 rm_trace_export(struct trace_export **list, struct trace_export *export)
2369 {
2370         struct trace_export **p;
2371
2372         for (p = list; *p != NULL; p = &(*p)->next)
2373                 if (*p == export)
2374                         break;
2375
2376         if (*p != export)
2377                 return -1;
2378
2379         rcu_assign_pointer(*p, (*p)->next);
2380
2381         return 0;
2382 }
2383
2384 static inline void
2385 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2386 {
2387         if (*list == NULL)
2388                 ftrace_exports_enable();
2389
2390         add_trace_export(list, export);
2391 }
2392
2393 static inline int
2394 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2395 {
2396         int ret;
2397
2398         ret = rm_trace_export(list, export);
2399         if (*list == NULL)
2400                 ftrace_exports_disable();
2401
2402         return ret;
2403 }
2404
2405 int register_ftrace_export(struct trace_export *export)
2406 {
2407         if (WARN_ON_ONCE(!export->write))
2408                 return -1;
2409
2410         mutex_lock(&ftrace_export_lock);
2411
2412         add_ftrace_export(&ftrace_exports_list, export);
2413
2414         mutex_unlock(&ftrace_export_lock);
2415
2416         return 0;
2417 }
2418 EXPORT_SYMBOL_GPL(register_ftrace_export);
2419
2420 int unregister_ftrace_export(struct trace_export *export)
2421 {
2422         int ret;
2423
2424         mutex_lock(&ftrace_export_lock);
2425
2426         ret = rm_ftrace_export(&ftrace_exports_list, export);
2427
2428         mutex_unlock(&ftrace_export_lock);
2429
2430         return ret;
2431 }
2432 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2433
2434 void
2435 trace_function(struct trace_array *tr,
2436                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2437                int pc)
2438 {
2439         struct trace_event_call *call = &event_function;
2440         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2441         struct ring_buffer_event *event;
2442         struct ftrace_entry *entry;
2443
2444         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2445                                             flags, pc);
2446         if (!event)
2447                 return;
2448         entry   = ring_buffer_event_data(event);
2449         entry->ip                       = ip;
2450         entry->parent_ip                = parent_ip;
2451
2452         if (!call_filter_check_discard(call, entry, buffer, event)) {
2453                 if (static_branch_unlikely(&ftrace_exports_enabled))
2454                         ftrace_exports(event);
2455                 __buffer_unlock_commit(buffer, event);
2456         }
2457 }
2458
2459 #ifdef CONFIG_STACKTRACE
2460
2461 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2462 struct ftrace_stack {
2463         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
2464 };
2465
2466 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2467 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2468
2469 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2470                                  unsigned long flags,
2471                                  int skip, int pc, struct pt_regs *regs)
2472 {
2473         struct trace_event_call *call = &event_kernel_stack;
2474         struct ring_buffer_event *event;
2475         struct stack_entry *entry;
2476         struct stack_trace trace;
2477         int use_stack;
2478         int size = FTRACE_STACK_ENTRIES;
2479
2480         trace.nr_entries        = 0;
2481         trace.skip              = skip;
2482
2483         /*
2484          * Add two, for this function and the call to save_stack_trace()
2485          * If regs is set, then these functions will not be in the way.
2486          */
2487         if (!regs)
2488                 trace.skip += 2;
2489
2490         /*
2491          * Since events can happen in NMIs there's no safe way to
2492          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2493          * or NMI comes in, it will just have to use the default
2494          * FTRACE_STACK_SIZE.
2495          */
2496         preempt_disable_notrace();
2497
2498         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2499         /*
2500          * We don't need any atomic variables, just a barrier.
2501          * If an interrupt comes in, we don't care, because it would
2502          * have exited and put the counter back to what we want.
2503          * We just need a barrier to keep gcc from moving things
2504          * around.
2505          */
2506         barrier();
2507         if (use_stack == 1) {
2508                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
2509                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
2510
2511                 if (regs)
2512                         save_stack_trace_regs(regs, &trace);
2513                 else
2514                         save_stack_trace(&trace);
2515
2516                 if (trace.nr_entries > size)
2517                         size = trace.nr_entries;
2518         } else
2519                 /* From now on, use_stack is a boolean */
2520                 use_stack = 0;
2521
2522         size *= sizeof(unsigned long);
2523
2524         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2525                                             sizeof(*entry) + size, flags, pc);
2526         if (!event)
2527                 goto out;
2528         entry = ring_buffer_event_data(event);
2529
2530         memset(&entry->caller, 0, size);
2531
2532         if (use_stack)
2533                 memcpy(&entry->caller, trace.entries,
2534                        trace.nr_entries * sizeof(unsigned long));
2535         else {
2536                 trace.max_entries       = FTRACE_STACK_ENTRIES;
2537                 trace.entries           = entry->caller;
2538                 if (regs)
2539                         save_stack_trace_regs(regs, &trace);
2540                 else
2541                         save_stack_trace(&trace);
2542         }
2543
2544         entry->size = trace.nr_entries;
2545
2546         if (!call_filter_check_discard(call, entry, buffer, event))
2547                 __buffer_unlock_commit(buffer, event);
2548
2549  out:
2550         /* Again, don't let gcc optimize things here */
2551         barrier();
2552         __this_cpu_dec(ftrace_stack_reserve);
2553         preempt_enable_notrace();
2554
2555 }
2556
2557 static inline void ftrace_trace_stack(struct trace_array *tr,
2558                                       struct ring_buffer *buffer,
2559                                       unsigned long flags,
2560                                       int skip, int pc, struct pt_regs *regs)
2561 {
2562         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2563                 return;
2564
2565         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2566 }
2567
2568 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2569                    int pc)
2570 {
2571         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2572
2573         if (rcu_is_watching()) {
2574                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2575                 return;
2576         }
2577
2578         /*
2579          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2580          * but if the above rcu_is_watching() failed, then the NMI
2581          * triggered someplace critical, and rcu_irq_enter() should
2582          * not be called from NMI.
2583          */
2584         if (unlikely(in_nmi()))
2585                 return;
2586
2587         /*
2588          * It is possible that a function is being traced in a
2589          * location that RCU is not watching. A call to
2590          * rcu_irq_enter() will make sure that it is, but there's
2591          * a few internal rcu functions that could be traced
2592          * where that wont work either. In those cases, we just
2593          * do nothing.
2594          */
2595         if (unlikely(rcu_irq_enter_disabled()))
2596                 return;
2597
2598         rcu_irq_enter_irqson();
2599         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2600         rcu_irq_exit_irqson();
2601 }
2602
2603 /**
2604  * trace_dump_stack - record a stack back trace in the trace buffer
2605  * @skip: Number of functions to skip (helper handlers)
2606  */
2607 void trace_dump_stack(int skip)
2608 {
2609         unsigned long flags;
2610
2611         if (tracing_disabled || tracing_selftest_running)
2612                 return;
2613
2614         local_save_flags(flags);
2615
2616         /*
2617          * Skip 3 more, seems to get us at the caller of
2618          * this function.
2619          */
2620         skip += 3;
2621         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2622                              flags, skip, preempt_count(), NULL);
2623 }
2624
2625 static DEFINE_PER_CPU(int, user_stack_count);
2626
2627 void
2628 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2629 {
2630         struct trace_event_call *call = &event_user_stack;
2631         struct ring_buffer_event *event;
2632         struct userstack_entry *entry;
2633         struct stack_trace trace;
2634
2635         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2636                 return;
2637
2638         /*
2639          * NMIs can not handle page faults, even with fix ups.
2640          * The save user stack can (and often does) fault.
2641          */
2642         if (unlikely(in_nmi()))
2643                 return;
2644
2645         /*
2646          * prevent recursion, since the user stack tracing may
2647          * trigger other kernel events.
2648          */
2649         preempt_disable();
2650         if (__this_cpu_read(user_stack_count))
2651                 goto out;
2652
2653         __this_cpu_inc(user_stack_count);
2654
2655         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2656                                             sizeof(*entry), flags, pc);
2657         if (!event)
2658                 goto out_drop_count;
2659         entry   = ring_buffer_event_data(event);
2660
2661         entry->tgid             = current->tgid;
2662         memset(&entry->caller, 0, sizeof(entry->caller));
2663
2664         trace.nr_entries        = 0;
2665         trace.max_entries       = FTRACE_STACK_ENTRIES;
2666         trace.skip              = 0;
2667         trace.entries           = entry->caller;
2668
2669         save_stack_trace_user(&trace);
2670         if (!call_filter_check_discard(call, entry, buffer, event))
2671                 __buffer_unlock_commit(buffer, event);
2672
2673  out_drop_count:
2674         __this_cpu_dec(user_stack_count);
2675  out:
2676         preempt_enable();
2677 }
2678
2679 #ifdef UNUSED
2680 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2681 {
2682         ftrace_trace_userstack(tr, flags, preempt_count());
2683 }
2684 #endif /* UNUSED */
2685
2686 #endif /* CONFIG_STACKTRACE */
2687
2688 /* created for use with alloc_percpu */
2689 struct trace_buffer_struct {
2690         int nesting;
2691         char buffer[4][TRACE_BUF_SIZE];
2692 };
2693
2694 static struct trace_buffer_struct *trace_percpu_buffer;
2695
2696 /*
2697  * Thise allows for lockless recording.  If we're nested too deeply, then
2698  * this returns NULL.
2699  */
2700 static char *get_trace_buf(void)
2701 {
2702         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2703
2704         if (!buffer || buffer->nesting >= 4)
2705                 return NULL;
2706
2707         return &buffer->buffer[buffer->nesting++][0];
2708 }
2709
2710 static void put_trace_buf(void)
2711 {
2712         this_cpu_dec(trace_percpu_buffer->nesting);
2713 }
2714
2715 static int alloc_percpu_trace_buffer(void)
2716 {
2717         struct trace_buffer_struct *buffers;
2718
2719         buffers = alloc_percpu(struct trace_buffer_struct);
2720         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2721                 return -ENOMEM;
2722
2723         trace_percpu_buffer = buffers;
2724         return 0;
2725 }
2726
2727 static int buffers_allocated;
2728
2729 void trace_printk_init_buffers(void)
2730 {
2731         if (buffers_allocated)
2732                 return;
2733
2734         if (alloc_percpu_trace_buffer())
2735                 return;
2736
2737         /* trace_printk() is for debug use only. Don't use it in production. */
2738
2739         pr_warn("\n");
2740         pr_warn("**********************************************************\n");
2741         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2742         pr_warn("**                                                      **\n");
2743         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2744         pr_warn("**                                                      **\n");
2745         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2746         pr_warn("** unsafe for production use.                           **\n");
2747         pr_warn("**                                                      **\n");
2748         pr_warn("** If you see this message and you are not debugging    **\n");
2749         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2750         pr_warn("**                                                      **\n");
2751         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2752         pr_warn("**********************************************************\n");
2753
2754         /* Expand the buffers to set size */
2755         tracing_update_buffers();
2756
2757         buffers_allocated = 1;
2758
2759         /*
2760          * trace_printk_init_buffers() can be called by modules.
2761          * If that happens, then we need to start cmdline recording
2762          * directly here. If the global_trace.buffer is already
2763          * allocated here, then this was called by module code.
2764          */
2765         if (global_trace.trace_buffer.buffer)
2766                 tracing_start_cmdline_record();
2767 }
2768
2769 void trace_printk_start_comm(void)
2770 {
2771         /* Start tracing comms if trace printk is set */
2772         if (!buffers_allocated)
2773                 return;
2774         tracing_start_cmdline_record();
2775 }
2776
2777 static void trace_printk_start_stop_comm(int enabled)
2778 {
2779         if (!buffers_allocated)
2780                 return;
2781
2782         if (enabled)
2783                 tracing_start_cmdline_record();
2784         else
2785                 tracing_stop_cmdline_record();
2786 }
2787
2788 /**
2789  * trace_vbprintk - write binary msg to tracing buffer
2790  *
2791  */
2792 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2793 {
2794         struct trace_event_call *call = &event_bprint;
2795         struct ring_buffer_event *event;
2796         struct ring_buffer *buffer;
2797         struct trace_array *tr = &global_trace;
2798         struct bprint_entry *entry;
2799         unsigned long flags;
2800         char *tbuffer;
2801         int len = 0, size, pc;
2802
2803         if (unlikely(tracing_selftest_running || tracing_disabled))
2804                 return 0;
2805
2806         /* Don't pollute graph traces with trace_vprintk internals */
2807         pause_graph_tracing();
2808
2809         pc = preempt_count();
2810         preempt_disable_notrace();
2811
2812         tbuffer = get_trace_buf();
2813         if (!tbuffer) {
2814                 len = 0;
2815                 goto out_nobuffer;
2816         }
2817
2818         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2819
2820         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2821                 goto out;
2822
2823         local_save_flags(flags);
2824         size = sizeof(*entry) + sizeof(u32) * len;
2825         buffer = tr->trace_buffer.buffer;
2826         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2827                                             flags, pc);
2828         if (!event)
2829                 goto out;
2830         entry = ring_buffer_event_data(event);
2831         entry->ip                       = ip;
2832         entry->fmt                      = fmt;
2833
2834         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2835         if (!call_filter_check_discard(call, entry, buffer, event)) {
2836                 __buffer_unlock_commit(buffer, event);
2837                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2838         }
2839
2840 out:
2841         put_trace_buf();
2842
2843 out_nobuffer:
2844         preempt_enable_notrace();
2845         unpause_graph_tracing();
2846
2847         return len;
2848 }
2849 EXPORT_SYMBOL_GPL(trace_vbprintk);
2850
2851 static int
2852 __trace_array_vprintk(struct ring_buffer *buffer,
2853                       unsigned long ip, const char *fmt, va_list args)
2854 {
2855         struct trace_event_call *call = &event_print;
2856         struct ring_buffer_event *event;
2857         int len = 0, size, pc;
2858         struct print_entry *entry;
2859         unsigned long flags;
2860         char *tbuffer;
2861
2862         if (tracing_disabled || tracing_selftest_running)
2863                 return 0;
2864
2865         /* Don't pollute graph traces with trace_vprintk internals */
2866         pause_graph_tracing();
2867
2868         pc = preempt_count();
2869         preempt_disable_notrace();
2870
2871
2872         tbuffer = get_trace_buf();
2873         if (!tbuffer) {
2874                 len = 0;
2875                 goto out_nobuffer;
2876         }
2877
2878         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2879
2880         local_save_flags(flags);
2881         size = sizeof(*entry) + len + 1;
2882         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2883                                             flags, pc);
2884         if (!event)
2885                 goto out;
2886         entry = ring_buffer_event_data(event);
2887         entry->ip = ip;
2888
2889         memcpy(&entry->buf, tbuffer, len + 1);
2890         if (!call_filter_check_discard(call, entry, buffer, event)) {
2891                 __buffer_unlock_commit(buffer, event);
2892                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2893         }
2894
2895 out:
2896         put_trace_buf();
2897
2898 out_nobuffer:
2899         preempt_enable_notrace();
2900         unpause_graph_tracing();
2901
2902         return len;
2903 }
2904
2905 int trace_array_vprintk(struct trace_array *tr,
2906                         unsigned long ip, const char *fmt, va_list args)
2907 {
2908         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2909 }
2910
2911 int trace_array_printk(struct trace_array *tr,
2912                        unsigned long ip, const char *fmt, ...)
2913 {
2914         int ret;
2915         va_list ap;
2916
2917         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2918                 return 0;
2919
2920         va_start(ap, fmt);
2921         ret = trace_array_vprintk(tr, ip, fmt, ap);
2922         va_end(ap);
2923         return ret;
2924 }
2925
2926 int trace_array_printk_buf(struct ring_buffer *buffer,
2927                            unsigned long ip, const char *fmt, ...)
2928 {
2929         int ret;
2930         va_list ap;
2931
2932         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2933                 return 0;
2934
2935         va_start(ap, fmt);
2936         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2937         va_end(ap);
2938         return ret;
2939 }
2940
2941 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2942 {
2943         return trace_array_vprintk(&global_trace, ip, fmt, args);
2944 }
2945 EXPORT_SYMBOL_GPL(trace_vprintk);
2946
2947 static void trace_iterator_increment(struct trace_iterator *iter)
2948 {
2949         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2950
2951         iter->idx++;
2952         if (buf_iter)
2953                 ring_buffer_read(buf_iter, NULL);
2954 }
2955
2956 static struct trace_entry *
2957 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2958                 unsigned long *lost_events)
2959 {
2960         struct ring_buffer_event *event;
2961         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2962
2963         if (buf_iter)
2964                 event = ring_buffer_iter_peek(buf_iter, ts);
2965         else
2966                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2967                                          lost_events);
2968
2969         if (event) {
2970                 iter->ent_size = ring_buffer_event_length(event);
2971                 return ring_buffer_event_data(event);
2972         }
2973         iter->ent_size = 0;
2974         return NULL;
2975 }
2976
2977 static struct trace_entry *
2978 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2979                   unsigned long *missing_events, u64 *ent_ts)
2980 {
2981         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2982         struct trace_entry *ent, *next = NULL;
2983         unsigned long lost_events = 0, next_lost = 0;
2984         int cpu_file = iter->cpu_file;
2985         u64 next_ts = 0, ts;
2986         int next_cpu = -1;
2987         int next_size = 0;
2988         int cpu;
2989
2990         /*
2991          * If we are in a per_cpu trace file, don't bother by iterating over
2992          * all cpu and peek directly.
2993          */
2994         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2995                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2996                         return NULL;
2997                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2998                 if (ent_cpu)
2999                         *ent_cpu = cpu_file;
3000
3001                 return ent;
3002         }
3003
3004         for_each_tracing_cpu(cpu) {
3005
3006                 if (ring_buffer_empty_cpu(buffer, cpu))
3007                         continue;
3008
3009                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3010
3011                 /*
3012                  * Pick the entry with the smallest timestamp:
3013                  */
3014                 if (ent && (!next || ts < next_ts)) {
3015                         next = ent;
3016                         next_cpu = cpu;
3017                         next_ts = ts;
3018                         next_lost = lost_events;
3019                         next_size = iter->ent_size;
3020                 }
3021         }
3022
3023         iter->ent_size = next_size;
3024
3025         if (ent_cpu)
3026                 *ent_cpu = next_cpu;
3027
3028         if (ent_ts)
3029                 *ent_ts = next_ts;
3030
3031         if (missing_events)
3032                 *missing_events = next_lost;
3033
3034         return next;
3035 }
3036
3037 /* Find the next real entry, without updating the iterator itself */
3038 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3039                                           int *ent_cpu, u64 *ent_ts)
3040 {
3041         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3042 }
3043
3044 /* Find the next real entry, and increment the iterator to the next entry */
3045 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3046 {
3047         iter->ent = __find_next_entry(iter, &iter->cpu,
3048                                       &iter->lost_events, &iter->ts);
3049
3050         if (iter->ent)
3051                 trace_iterator_increment(iter);
3052
3053         return iter->ent ? iter : NULL;
3054 }
3055
3056 static void trace_consume(struct trace_iterator *iter)
3057 {
3058         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3059                             &iter->lost_events);
3060 }
3061
3062 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3063 {
3064         struct trace_iterator *iter = m->private;
3065         int i = (int)*pos;
3066         void *ent;
3067
3068         WARN_ON_ONCE(iter->leftover);
3069
3070         (*pos)++;
3071
3072         /* can't go backwards */
3073         if (iter->idx > i)
3074                 return NULL;
3075
3076         if (iter->idx < 0)
3077                 ent = trace_find_next_entry_inc(iter);
3078         else
3079                 ent = iter;
3080
3081         while (ent && iter->idx < i)
3082                 ent = trace_find_next_entry_inc(iter);
3083
3084         iter->pos = *pos;
3085
3086         return ent;
3087 }
3088
3089 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3090 {
3091         struct ring_buffer_event *event;
3092         struct ring_buffer_iter *buf_iter;
3093         unsigned long entries = 0;
3094         u64 ts;
3095
3096         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3097
3098         buf_iter = trace_buffer_iter(iter, cpu);
3099         if (!buf_iter)
3100                 return;
3101
3102         ring_buffer_iter_reset(buf_iter);
3103
3104         /*
3105          * We could have the case with the max latency tracers
3106          * that a reset never took place on a cpu. This is evident
3107          * by the timestamp being before the start of the buffer.
3108          */
3109         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3110                 if (ts >= iter->trace_buffer->time_start)
3111                         break;
3112                 entries++;
3113                 ring_buffer_read(buf_iter, NULL);
3114         }
3115
3116         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3117 }
3118
3119 /*
3120  * The current tracer is copied to avoid a global locking
3121  * all around.
3122  */
3123 static void *s_start(struct seq_file *m, loff_t *pos)
3124 {
3125         struct trace_iterator *iter = m->private;
3126         struct trace_array *tr = iter->tr;
3127         int cpu_file = iter->cpu_file;
3128         void *p = NULL;
3129         loff_t l = 0;
3130         int cpu;
3131
3132         /*
3133          * copy the tracer to avoid using a global lock all around.
3134          * iter->trace is a copy of current_trace, the pointer to the
3135          * name may be used instead of a strcmp(), as iter->trace->name
3136          * will point to the same string as current_trace->name.
3137          */
3138         mutex_lock(&trace_types_lock);
3139         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3140                 *iter->trace = *tr->current_trace;
3141         mutex_unlock(&trace_types_lock);
3142
3143 #ifdef CONFIG_TRACER_MAX_TRACE
3144         if (iter->snapshot && iter->trace->use_max_tr)
3145                 return ERR_PTR(-EBUSY);
3146 #endif
3147
3148         if (!iter->snapshot)
3149                 atomic_inc(&trace_record_cmdline_disabled);
3150
3151         if (*pos != iter->pos) {
3152                 iter->ent = NULL;
3153                 iter->cpu = 0;
3154                 iter->idx = -1;
3155
3156                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3157                         for_each_tracing_cpu(cpu)
3158                                 tracing_iter_reset(iter, cpu);
3159                 } else
3160                         tracing_iter_reset(iter, cpu_file);
3161
3162                 iter->leftover = 0;
3163                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3164                         ;
3165
3166         } else {
3167                 /*
3168                  * If we overflowed the seq_file before, then we want
3169                  * to just reuse the trace_seq buffer again.
3170                  */
3171                 if (iter->leftover)
3172                         p = iter;
3173                 else {
3174                         l = *pos - 1;
3175                         p = s_next(m, p, &l);
3176                 }
3177         }
3178
3179         trace_event_read_lock();
3180         trace_access_lock(cpu_file);
3181         return p;
3182 }
3183
3184 static void s_stop(struct seq_file *m, void *p)
3185 {
3186         struct trace_iterator *iter = m->private;
3187
3188 #ifdef CONFIG_TRACER_MAX_TRACE
3189         if (iter->snapshot && iter->trace->use_max_tr)
3190                 return;
3191 #endif
3192
3193         if (!iter->snapshot)
3194                 atomic_dec(&trace_record_cmdline_disabled);
3195
3196         trace_access_unlock(iter->cpu_file);
3197         trace_event_read_unlock();
3198 }
3199
3200 static void
3201 get_total_entries(struct trace_buffer *buf,
3202                   unsigned long *total, unsigned long *entries)
3203 {
3204         unsigned long count;
3205         int cpu;
3206
3207         *total = 0;
3208         *entries = 0;
3209
3210         for_each_tracing_cpu(cpu) {
3211                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3212                 /*
3213                  * If this buffer has skipped entries, then we hold all
3214                  * entries for the trace and we need to ignore the
3215                  * ones before the time stamp.
3216                  */
3217                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3218                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3219                         /* total is the same as the entries */
3220                         *total += count;
3221                 } else
3222                         *total += count +
3223                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
3224                 *entries += count;
3225         }
3226 }
3227
3228 static void print_lat_help_header(struct seq_file *m)
3229 {
3230         seq_puts(m, "#                  _------=> CPU#            \n"
3231                     "#                 / _-----=> irqs-off        \n"
3232                     "#                | / _----=> need-resched    \n"
3233                     "#                || / _---=> hardirq/softirq \n"
3234                     "#                ||| / _--=> preempt-depth   \n"
3235                     "#                |||| /     delay            \n"
3236                     "#  cmd     pid   ||||| time  |   caller      \n"
3237                     "#     \\   /      |||||  \\    |   /         \n");
3238 }
3239
3240 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3241 {
3242         unsigned long total;
3243         unsigned long entries;
3244
3245         get_total_entries(buf, &total, &entries);
3246         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3247                    entries, total, num_online_cpus());
3248         seq_puts(m, "#\n");
3249 }
3250
3251 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
3252 {
3253         print_event_info(buf, m);
3254         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n"
3255                     "#              | |       |          |         |\n");
3256 }
3257
3258 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
3259 {
3260         print_event_info(buf, m);
3261         seq_puts(m, "#                              _-----=> irqs-off\n"
3262                     "#                             / _----=> need-resched\n"
3263                     "#                            | / _---=> hardirq/softirq\n"
3264                     "#                            || / _--=> preempt-depth\n"
3265                     "#                            ||| /     delay\n"
3266                     "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n"
3267                     "#              | |       |   ||||       |         |\n");
3268 }
3269
3270 void
3271 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3272 {
3273         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3274         struct trace_buffer *buf = iter->trace_buffer;
3275         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3276         struct tracer *type = iter->trace;
3277         unsigned long entries;
3278         unsigned long total;
3279         const char *name = "preemption";
3280
3281         name = type->name;
3282
3283         get_total_entries(buf, &total, &entries);
3284
3285         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3286                    name, UTS_RELEASE);
3287         seq_puts(m, "# -----------------------------------"
3288                  "---------------------------------\n");
3289         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3290                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3291                    nsecs_to_usecs(data->saved_latency),
3292                    entries,
3293                    total,
3294                    buf->cpu,
3295 #if defined(CONFIG_PREEMPT_NONE)
3296                    "server",
3297 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3298                    "desktop",
3299 #elif defined(CONFIG_PREEMPT)
3300                    "preempt",
3301 #else
3302                    "unknown",
3303 #endif
3304                    /* These are reserved for later use */
3305                    0, 0, 0, 0);
3306 #ifdef CONFIG_SMP
3307         seq_printf(m, " #P:%d)\n", num_online_cpus());
3308 #else
3309         seq_puts(m, ")\n");
3310 #endif
3311         seq_puts(m, "#    -----------------\n");
3312         seq_printf(m, "#    | task: %.16s-%d "
3313                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3314                    data->comm, data->pid,
3315                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3316                    data->policy, data->rt_priority);
3317         seq_puts(m, "#    -----------------\n");
3318
3319         if (data->critical_start) {
3320                 seq_puts(m, "#  => started at: ");
3321                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3322                 trace_print_seq(m, &iter->seq);
3323                 seq_puts(m, "\n#  => ended at:   ");
3324                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3325                 trace_print_seq(m, &iter->seq);
3326                 seq_puts(m, "\n#\n");
3327         }
3328
3329         seq_puts(m, "#\n");
3330 }
3331
3332 static void test_cpu_buff_start(struct trace_iterator *iter)
3333 {
3334         struct trace_seq *s = &iter->seq;
3335         struct trace_array *tr = iter->tr;
3336
3337         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3338                 return;
3339
3340         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3341                 return;
3342
3343         if (cpumask_available(iter->started) &&
3344             cpumask_test_cpu(iter->cpu, iter->started))
3345                 return;
3346
3347         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3348                 return;
3349
3350         if (cpumask_available(iter->started))
3351                 cpumask_set_cpu(iter->cpu, iter->started);
3352
3353         /* Don't print started cpu buffer for the first entry of the trace */
3354         if (iter->idx > 1)
3355                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3356                                 iter->cpu);
3357 }
3358
3359 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3360 {
3361         struct trace_array *tr = iter->tr;
3362         struct trace_seq *s = &iter->seq;
3363         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3364         struct trace_entry *entry;
3365         struct trace_event *event;
3366
3367         entry = iter->ent;
3368
3369         test_cpu_buff_start(iter);
3370
3371         event = ftrace_find_event(entry->type);
3372
3373         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3374                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3375                         trace_print_lat_context(iter);
3376                 else
3377                         trace_print_context(iter);
3378         }
3379
3380         if (trace_seq_has_overflowed(s))
3381                 return TRACE_TYPE_PARTIAL_LINE;
3382
3383         if (event)
3384                 return event->funcs->trace(iter, sym_flags, event);
3385
3386         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3387
3388         return trace_handle_return(s);
3389 }
3390
3391 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3392 {
3393         struct trace_array *tr = iter->tr;
3394         struct trace_seq *s = &iter->seq;
3395         struct trace_entry *entry;
3396         struct trace_event *event;
3397
3398         entry = iter->ent;
3399
3400         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3401                 trace_seq_printf(s, "%d %d %llu ",
3402                                  entry->pid, iter->cpu, iter->ts);
3403
3404         if (trace_seq_has_overflowed(s))
3405                 return TRACE_TYPE_PARTIAL_LINE;
3406
3407         event = ftrace_find_event(entry->type);
3408         if (event)
3409                 return event->funcs->raw(iter, 0, event);
3410
3411         trace_seq_printf(s, "%d ?\n", entry->type);
3412
3413         return trace_handle_return(s);
3414 }
3415
3416 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3417 {
3418         struct trace_array *tr = iter->tr;
3419         struct trace_seq *s = &iter->seq;
3420         unsigned char newline = '\n';
3421         struct trace_entry *entry;
3422         struct trace_event *event;
3423
3424         entry = iter->ent;
3425
3426         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3427                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3428                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3429                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3430                 if (trace_seq_has_overflowed(s))
3431                         return TRACE_TYPE_PARTIAL_LINE;
3432         }
3433
3434         event = ftrace_find_event(entry->type);
3435         if (event) {
3436                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3437                 if (ret != TRACE_TYPE_HANDLED)
3438                         return ret;
3439         }
3440
3441         SEQ_PUT_FIELD(s, newline);
3442
3443         return trace_handle_return(s);
3444 }
3445
3446 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3447 {
3448         struct trace_array *tr = iter->tr;
3449         struct trace_seq *s = &iter->seq;
3450         struct trace_entry *entry;
3451         struct trace_event *event;
3452
3453         entry = iter->ent;
3454
3455         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3456                 SEQ_PUT_FIELD(s, entry->pid);
3457                 SEQ_PUT_FIELD(s, iter->cpu);
3458                 SEQ_PUT_FIELD(s, iter->ts);
3459                 if (trace_seq_has_overflowed(s))
3460                         return TRACE_TYPE_PARTIAL_LINE;
3461         }
3462
3463         event = ftrace_find_event(entry->type);
3464         return event ? event->funcs->binary(iter, 0, event) :
3465                 TRACE_TYPE_HANDLED;
3466 }
3467
3468 int trace_empty(struct trace_iterator *iter)
3469 {
3470         struct ring_buffer_iter *buf_iter;
3471         int cpu;
3472
3473         /* If we are looking at one CPU buffer, only check that one */
3474         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3475                 cpu = iter->cpu_file;
3476                 buf_iter = trace_buffer_iter(iter, cpu);
3477                 if (buf_iter) {
3478                         if (!ring_buffer_iter_empty(buf_iter))
3479                                 return 0;
3480                 } else {
3481                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3482                                 return 0;
3483                 }
3484                 return 1;
3485         }
3486
3487         for_each_tracing_cpu(cpu) {
3488                 buf_iter = trace_buffer_iter(iter, cpu);
3489                 if (buf_iter) {
3490                         if (!ring_buffer_iter_empty(buf_iter))
3491                                 return 0;
3492                 } else {
3493                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3494                                 return 0;
3495                 }
3496         }
3497
3498         return 1;
3499 }
3500
3501 /*  Called with trace_event_read_lock() held. */
3502 enum print_line_t print_trace_line(struct trace_iterator *iter)
3503 {
3504         struct trace_array *tr = iter->tr;
3505         unsigned long trace_flags = tr->trace_flags;
3506         enum print_line_t ret;
3507
3508         if (iter->lost_events) {
3509                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3510                                  iter->cpu, iter->lost_events);
3511                 if (trace_seq_has_overflowed(&iter->seq))
3512                         return TRACE_TYPE_PARTIAL_LINE;
3513         }
3514
3515         if (iter->trace && iter->trace->print_line) {
3516                 ret = iter->trace->print_line(iter);
3517                 if (ret != TRACE_TYPE_UNHANDLED)
3518                         return ret;
3519         }
3520
3521         if (iter->ent->type == TRACE_BPUTS &&
3522                         trace_flags & TRACE_ITER_PRINTK &&
3523                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3524                 return trace_print_bputs_msg_only(iter);
3525
3526         if (iter->ent->type == TRACE_BPRINT &&
3527                         trace_flags & TRACE_ITER_PRINTK &&
3528                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3529                 return trace_print_bprintk_msg_only(iter);
3530
3531         if (iter->ent->type == TRACE_PRINT &&
3532                         trace_flags & TRACE_ITER_PRINTK &&
3533                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3534                 return trace_print_printk_msg_only(iter);
3535
3536         if (trace_flags & TRACE_ITER_BIN)
3537                 return print_bin_fmt(iter);
3538
3539         if (trace_flags & TRACE_ITER_HEX)
3540                 return print_hex_fmt(iter);
3541
3542         if (trace_flags & TRACE_ITER_RAW)
3543                 return print_raw_fmt(iter);
3544
3545         return print_trace_fmt(iter);
3546 }
3547
3548 void trace_latency_header(struct seq_file *m)
3549 {
3550         struct trace_iterator *iter = m->private;
3551         struct trace_array *tr = iter->tr;
3552
3553         /* print nothing if the buffers are empty */
3554         if (trace_empty(iter))
3555                 return;
3556
3557         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3558                 print_trace_header(m, iter);
3559
3560         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3561                 print_lat_help_header(m);
3562 }
3563
3564 void trace_default_header(struct seq_file *m)
3565 {
3566         struct trace_iterator *iter = m->private;
3567         struct trace_array *tr = iter->tr;
3568         unsigned long trace_flags = tr->trace_flags;
3569
3570         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3571                 return;
3572
3573         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3574                 /* print nothing if the buffers are empty */
3575                 if (trace_empty(iter))
3576                         return;
3577                 print_trace_header(m, iter);
3578                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3579                         print_lat_help_header(m);
3580         } else {
3581                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3582                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3583                                 print_func_help_header_irq(iter->trace_buffer, m);
3584                         else
3585                                 print_func_help_header(iter->trace_buffer, m);
3586                 }
3587         }
3588 }
3589
3590 static void test_ftrace_alive(struct seq_file *m)
3591 {
3592         if (!ftrace_is_dead())
3593                 return;
3594         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3595                     "#          MAY BE MISSING FUNCTION EVENTS\n");
3596 }
3597
3598 #ifdef CONFIG_TRACER_MAX_TRACE
3599 static void show_snapshot_main_help(struct seq_file *m)
3600 {
3601         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3602                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3603                     "#                      Takes a snapshot of the main buffer.\n"
3604                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3605                     "#                      (Doesn't have to be '2' works with any number that\n"
3606                     "#                       is not a '0' or '1')\n");
3607 }
3608
3609 static void show_snapshot_percpu_help(struct seq_file *m)
3610 {
3611         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3612 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3613         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3614                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
3615 #else
3616         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3617                     "#                     Must use main snapshot file to allocate.\n");
3618 #endif
3619         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3620                     "#                      (Doesn't have to be '2' works with any number that\n"
3621                     "#                       is not a '0' or '1')\n");
3622 }
3623
3624 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3625 {
3626         if (iter->tr->allocated_snapshot)
3627                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3628         else
3629                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3630
3631         seq_puts(m, "# Snapshot commands:\n");
3632         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3633                 show_snapshot_main_help(m);
3634         else
3635                 show_snapshot_percpu_help(m);
3636 }
3637 #else
3638 /* Should never be called */
3639 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3640 #endif
3641
3642 static int s_show(struct seq_file *m, void *v)
3643 {
3644         struct trace_iterator *iter = v;
3645         int ret;
3646
3647         if (iter->ent == NULL) {
3648                 if (iter->tr) {
3649                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
3650                         seq_puts(m, "#\n");
3651                         test_ftrace_alive(m);
3652                 }
3653                 if (iter->snapshot && trace_empty(iter))
3654                         print_snapshot_help(m, iter);
3655                 else if (iter->trace && iter->trace->print_header)
3656                         iter->trace->print_header(m);
3657                 else
3658                         trace_default_header(m);
3659
3660         } else if (iter->leftover) {
3661                 /*
3662                  * If we filled the seq_file buffer earlier, we
3663                  * want to just show it now.
3664                  */
3665                 ret = trace_print_seq(m, &iter->seq);
3666
3667                 /* ret should this time be zero, but you never know */
3668                 iter->leftover = ret;
3669
3670         } else {
3671                 print_trace_line(iter);
3672                 ret = trace_print_seq(m, &iter->seq);
3673                 /*
3674                  * If we overflow the seq_file buffer, then it will
3675                  * ask us for this data again at start up.
3676                  * Use that instead.
3677                  *  ret is 0 if seq_file write succeeded.
3678                  *        -1 otherwise.
3679                  */
3680                 iter->leftover = ret;
3681         }
3682
3683         return 0;
3684 }
3685
3686 /*
3687  * Should be used after trace_array_get(), trace_types_lock
3688  * ensures that i_cdev was already initialized.
3689  */
3690 static inline int tracing_get_cpu(struct inode *inode)
3691 {
3692         if (inode->i_cdev) /* See trace_create_cpu_file() */
3693                 return (long)inode->i_cdev - 1;
3694         return RING_BUFFER_ALL_CPUS;
3695 }
3696
3697 static const struct seq_operations tracer_seq_ops = {
3698         .start          = s_start,
3699         .next           = s_next,
3700         .stop           = s_stop,
3701         .show           = s_show,
3702 };
3703
3704 static struct trace_iterator *
3705 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3706 {
3707         struct trace_array *tr = inode->i_private;
3708         struct trace_iterator *iter;
3709         int cpu;
3710
3711         if (tracing_disabled)
3712                 return ERR_PTR(-ENODEV);
3713
3714         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3715         if (!iter)
3716                 return ERR_PTR(-ENOMEM);
3717
3718         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3719                                     GFP_KERNEL);
3720         if (!iter->buffer_iter)
3721                 goto release;
3722
3723         /*
3724          * We make a copy of the current tracer to avoid concurrent
3725          * changes on it while we are reading.
3726          */
3727         mutex_lock(&trace_types_lock);
3728         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3729         if (!iter->trace)
3730                 goto fail;
3731
3732         *iter->trace = *tr->current_trace;
3733
3734         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3735                 goto fail;
3736
3737         iter->tr = tr;
3738
3739 #ifdef CONFIG_TRACER_MAX_TRACE
3740         /* Currently only the top directory has a snapshot */
3741         if (tr->current_trace->print_max || snapshot)
3742                 iter->trace_buffer = &tr->max_buffer;
3743         else
3744 #endif
3745                 iter->trace_buffer = &tr->trace_buffer;
3746         iter->snapshot = snapshot;
3747         iter->pos = -1;
3748         iter->cpu_file = tracing_get_cpu(inode);
3749         mutex_init(&iter->mutex);
3750
3751         /* Notify the tracer early; before we stop tracing. */
3752         if (iter->trace && iter->trace->open)
3753                 iter->trace->open(iter);
3754
3755         /* Annotate start of buffers if we had overruns */
3756         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3757                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3758
3759         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3760         if (trace_clocks[tr->clock_id].in_ns)
3761                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3762
3763         /* stop the trace while dumping if we are not opening "snapshot" */
3764         if (!iter->snapshot)
3765                 tracing_stop_tr(tr);
3766
3767         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3768                 for_each_tracing_cpu(cpu) {
3769                         iter->buffer_iter[cpu] =
3770                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3771                 }
3772                 ring_buffer_read_prepare_sync();
3773                 for_each_tracing_cpu(cpu) {
3774                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3775                         tracing_iter_reset(iter, cpu);
3776                 }
3777         } else {
3778                 cpu = iter->cpu_file;
3779                 iter->buffer_iter[cpu] =
3780                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3781                 ring_buffer_read_prepare_sync();
3782                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3783                 tracing_iter_reset(iter, cpu);
3784         }
3785
3786         mutex_unlock(&trace_types_lock);
3787
3788         return iter;
3789
3790  fail:
3791         mutex_unlock(&trace_types_lock);
3792         kfree(iter->trace);
3793         kfree(iter->buffer_iter);
3794 release:
3795         seq_release_private(inode, file);
3796         return ERR_PTR(-ENOMEM);
3797 }
3798
3799 int tracing_open_generic(struct inode *inode, struct file *filp)
3800 {
3801         if (tracing_disabled)
3802                 return -ENODEV;
3803
3804         filp->private_data = inode->i_private;
3805         return 0;
3806 }
3807
3808 bool tracing_is_disabled(void)
3809 {
3810         return (tracing_disabled) ? true: false;
3811 }
3812
3813 /*
3814  * Open and update trace_array ref count.
3815  * Must have the current trace_array passed to it.
3816  */
3817 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3818 {
3819         struct trace_array *tr = inode->i_private;
3820
3821         if (tracing_disabled)
3822                 return -ENODEV;
3823
3824         if (trace_array_get(tr) < 0)
3825                 return -ENODEV;
3826
3827         filp->private_data = inode->i_private;
3828
3829         return 0;
3830 }
3831
3832 static int tracing_release(struct inode *inode, struct file *file)
3833 {
3834         struct trace_array *tr = inode->i_private;
3835         struct seq_file *m = file->private_data;
3836         struct trace_iterator *iter;
3837         int cpu;
3838
3839         if (!(file->f_mode & FMODE_READ)) {
3840                 trace_array_put(tr);
3841                 return 0;
3842         }
3843
3844         /* Writes do not use seq_file */
3845         iter = m->private;
3846         mutex_lock(&trace_types_lock);
3847
3848         for_each_tracing_cpu(cpu) {
3849                 if (iter->buffer_iter[cpu])
3850                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3851         }
3852
3853         if (iter->trace && iter->trace->close)
3854                 iter->trace->close(iter);
3855
3856         if (!iter->snapshot)
3857                 /* reenable tracing if it was previously enabled */
3858                 tracing_start_tr(tr);
3859
3860         __trace_array_put(tr);
3861
3862         mutex_unlock(&trace_types_lock);
3863
3864         mutex_destroy(&iter->mutex);
3865         free_cpumask_var(iter->started);
3866         kfree(iter->trace);
3867         kfree(iter->buffer_iter);
3868         seq_release_private(inode, file);
3869
3870         return 0;
3871 }
3872
3873 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3874 {
3875         struct trace_array *tr = inode->i_private;
3876
3877         trace_array_put(tr);
3878         return 0;
3879 }
3880
3881 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3882 {
3883         struct trace_array *tr = inode->i_private;
3884
3885         trace_array_put(tr);
3886
3887         return single_release(inode, file);
3888 }
3889
3890 static int tracing_open(struct inode *inode, struct file *file)
3891 {
3892         struct trace_array *tr = inode->i_private;
3893         struct trace_iterator *iter;
3894         int ret = 0;
3895
3896         if (trace_array_get(tr) < 0)
3897                 return -ENODEV;
3898
3899         /* If this file was open for write, then erase contents */
3900         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3901                 int cpu = tracing_get_cpu(inode);
3902
3903                 if (cpu == RING_BUFFER_ALL_CPUS)
3904                         tracing_reset_online_cpus(&tr->trace_buffer);
3905                 else
3906                         tracing_reset(&tr->trace_buffer, cpu);
3907         }
3908
3909         if (file->f_mode & FMODE_READ) {
3910                 iter = __tracing_open(inode, file, false);
3911                 if (IS_ERR(iter))
3912                         ret = PTR_ERR(iter);
3913                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3914                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3915         }
3916
3917         if (ret < 0)
3918                 trace_array_put(tr);
3919
3920         return ret;
3921 }
3922
3923 /*
3924  * Some tracers are not suitable for instance buffers.
3925  * A tracer is always available for the global array (toplevel)
3926  * or if it explicitly states that it is.
3927  */
3928 static bool
3929 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3930 {
3931         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3932 }
3933
3934 /* Find the next tracer that this trace array may use */
3935 static struct tracer *
3936 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3937 {
3938         while (t && !trace_ok_for_array(t, tr))
3939                 t = t->next;
3940
3941         return t;
3942 }
3943
3944 static void *
3945 t_next(struct seq_file *m, void *v, loff_t *pos)
3946 {
3947         struct trace_array *tr = m->private;
3948         struct tracer *t = v;
3949
3950         (*pos)++;
3951
3952         if (t)
3953                 t = get_tracer_for_array(tr, t->next);
3954
3955         return t;
3956 }
3957
3958 static void *t_start(struct seq_file *m, loff_t *pos)
3959 {
3960         struct trace_array *tr = m->private;
3961         struct tracer *t;
3962         loff_t l = 0;
3963
3964         mutex_lock(&trace_types_lock);
3965
3966         t = get_tracer_for_array(tr, trace_types);
3967         for (; t && l < *pos; t = t_next(m, t, &l))
3968                         ;
3969
3970         return t;
3971 }
3972
3973 static void t_stop(struct seq_file *m, void *p)
3974 {
3975         mutex_unlock(&trace_types_lock);
3976 }
3977
3978 static int t_show(struct seq_file *m, void *v)
3979 {
3980         struct tracer *t = v;
3981
3982         if (!t)
3983                 return 0;
3984
3985         seq_puts(m, t->name);
3986         if (t->next)
3987                 seq_putc(m, ' ');
3988         else
3989                 seq_putc(m, '\n');
3990
3991         return 0;
3992 }
3993
3994 static const struct seq_operations show_traces_seq_ops = {
3995         .start          = t_start,
3996         .next           = t_next,
3997         .stop           = t_stop,
3998         .show           = t_show,
3999 };
4000
4001 static int show_traces_open(struct inode *inode, struct file *file)
4002 {
4003         struct trace_array *tr = inode->i_private;
4004         struct seq_file *m;
4005         int ret;
4006
4007         if (tracing_disabled)
4008                 return -ENODEV;
4009
4010         ret = seq_open(file, &show_traces_seq_ops);
4011         if (ret)
4012                 return ret;
4013
4014         m = file->private_data;
4015         m->private = tr;
4016
4017         return 0;
4018 }
4019
4020 static ssize_t
4021 tracing_write_stub(struct file *filp, const char __user *ubuf,
4022                    size_t count, loff_t *ppos)
4023 {
4024         return count;
4025 }
4026
4027 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4028 {
4029         int ret;
4030
4031         if (file->f_mode & FMODE_READ)
4032                 ret = seq_lseek(file, offset, whence);
4033         else
4034                 file->f_pos = ret = 0;
4035
4036         return ret;
4037 }
4038
4039 static const struct file_operations tracing_fops = {
4040         .open           = tracing_open,
4041         .read           = seq_read,
4042         .write          = tracing_write_stub,
4043         .llseek         = tracing_lseek,
4044         .release        = tracing_release,
4045 };
4046
4047 static const struct file_operations show_traces_fops = {
4048         .open           = show_traces_open,
4049         .read           = seq_read,
4050         .release        = seq_release,
4051         .llseek         = seq_lseek,
4052 };
4053
4054 /*
4055  * The tracer itself will not take this lock, but still we want
4056  * to provide a consistent cpumask to user-space:
4057  */
4058 static DEFINE_MUTEX(tracing_cpumask_update_lock);
4059
4060 /*
4061  * Temporary storage for the character representation of the
4062  * CPU bitmask (and one more byte for the newline):
4063  */
4064 static char mask_str[NR_CPUS + 1];
4065
4066 static ssize_t
4067 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4068                      size_t count, loff_t *ppos)
4069 {
4070         struct trace_array *tr = file_inode(filp)->i_private;
4071         int len;
4072
4073         mutex_lock(&tracing_cpumask_update_lock);
4074
4075         len = snprintf(mask_str, count, "%*pb\n",
4076                        cpumask_pr_args(tr->tracing_cpumask));
4077         if (len >= count) {
4078                 count = -EINVAL;
4079                 goto out_err;
4080         }
4081         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
4082
4083 out_err:
4084         mutex_unlock(&tracing_cpumask_update_lock);
4085
4086         return count;
4087 }
4088
4089 static ssize_t
4090 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4091                       size_t count, loff_t *ppos)
4092 {
4093         struct trace_array *tr = file_inode(filp)->i_private;
4094         cpumask_var_t tracing_cpumask_new;
4095         int err, cpu;
4096
4097         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4098                 return -ENOMEM;
4099
4100         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4101         if (err)
4102                 goto err_unlock;
4103
4104         mutex_lock(&tracing_cpumask_update_lock);
4105
4106         local_irq_disable();
4107         arch_spin_lock(&tr->max_lock);
4108         for_each_tracing_cpu(cpu) {
4109                 /*
4110                  * Increase/decrease the disabled counter if we are
4111                  * about to flip a bit in the cpumask:
4112                  */
4113                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4114                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4115                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4116                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4117                 }
4118                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4119                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4120                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4121                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4122                 }
4123         }
4124         arch_spin_unlock(&tr->max_lock);
4125         local_irq_enable();
4126
4127         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4128
4129         mutex_unlock(&tracing_cpumask_update_lock);
4130         free_cpumask_var(tracing_cpumask_new);
4131
4132         return count;
4133
4134 err_unlock:
4135         free_cpumask_var(tracing_cpumask_new);
4136
4137         return err;
4138 }
4139
4140 static const struct file_operations tracing_cpumask_fops = {
4141         .open           = tracing_open_generic_tr,
4142         .read           = tracing_cpumask_read,
4143         .write          = tracing_cpumask_write,
4144         .release        = tracing_release_generic_tr,
4145         .llseek         = generic_file_llseek,
4146 };
4147
4148 static int tracing_trace_options_show(struct seq_file *m, void *v)
4149 {
4150         struct tracer_opt *trace_opts;
4151         struct trace_array *tr = m->private;
4152         u32 tracer_flags;
4153         int i;
4154
4155         mutex_lock(&trace_types_lock);
4156         tracer_flags = tr->current_trace->flags->val;
4157         trace_opts = tr->current_trace->flags->opts;
4158
4159         for (i = 0; trace_options[i]; i++) {
4160                 if (tr->trace_flags & (1 << i))
4161                         seq_printf(m, "%s\n", trace_options[i]);
4162                 else
4163                         seq_printf(m, "no%s\n", trace_options[i]);
4164         }
4165
4166         for (i = 0; trace_opts[i].name; i++) {
4167                 if (tracer_flags & trace_opts[i].bit)
4168                         seq_printf(m, "%s\n", trace_opts[i].name);
4169                 else
4170                         seq_printf(m, "no%s\n", trace_opts[i].name);
4171         }
4172         mutex_unlock(&trace_types_lock);
4173
4174         return 0;
4175 }
4176
4177 static int __set_tracer_option(struct trace_array *tr,
4178                                struct tracer_flags *tracer_flags,
4179                                struct tracer_opt *opts, int neg)
4180 {
4181         struct tracer *trace = tracer_flags->trace;
4182         int ret;
4183
4184         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4185         if (ret)
4186                 return ret;
4187
4188         if (neg)
4189                 tracer_flags->val &= ~opts->bit;
4190         else
4191                 tracer_flags->val |= opts->bit;
4192         return 0;
4193 }
4194
4195 /* Try to assign a tracer specific option */
4196 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4197 {
4198         struct tracer *trace = tr->current_trace;
4199         struct tracer_flags *tracer_flags = trace->flags;
4200         struct tracer_opt *opts = NULL;
4201         int i;
4202
4203         for (i = 0; tracer_flags->opts[i].name; i++) {
4204                 opts = &tracer_flags->opts[i];
4205
4206                 if (strcmp(cmp, opts->name) == 0)
4207                         return __set_tracer_option(tr, trace->flags, opts, neg);
4208         }
4209
4210         return -EINVAL;
4211 }
4212
4213 /* Some tracers require overwrite to stay enabled */
4214 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4215 {
4216         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4217                 return -1;
4218
4219         return 0;
4220 }
4221
4222 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4223 {
4224         /* do nothing if flag is already set */
4225         if (!!(tr->trace_flags & mask) == !!enabled)
4226                 return 0;
4227
4228         /* Give the tracer a chance to approve the change */
4229         if (tr->current_trace->flag_changed)
4230                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4231                         return -EINVAL;
4232
4233         if (enabled)
4234                 tr->trace_flags |= mask;
4235         else
4236                 tr->trace_flags &= ~mask;
4237
4238         if (mask == TRACE_ITER_RECORD_CMD)
4239                 trace_event_enable_cmd_record(enabled);
4240
4241         if (mask == TRACE_ITER_EVENT_FORK)
4242                 trace_event_follow_fork(tr, enabled);
4243
4244         if (mask == TRACE_ITER_FUNC_FORK)
4245                 ftrace_pid_follow_fork(tr, enabled);
4246
4247         if (mask == TRACE_ITER_OVERWRITE) {
4248                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4249 #ifdef CONFIG_TRACER_MAX_TRACE
4250                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4251 #endif
4252         }
4253
4254         if (mask == TRACE_ITER_PRINTK) {
4255                 trace_printk_start_stop_comm(enabled);
4256                 trace_printk_control(enabled);
4257         }
4258
4259         return 0;
4260 }
4261
4262 static int trace_set_options(struct trace_array *tr, char *option)
4263 {
4264         char *cmp;
4265         int neg = 0;
4266         int ret = -ENODEV;
4267         int i;
4268         size_t orig_len = strlen(option);
4269
4270         cmp = strstrip(option);
4271
4272         if (strncmp(cmp, "no", 2) == 0) {
4273                 neg = 1;
4274                 cmp += 2;
4275         }
4276
4277         mutex_lock(&trace_types_lock);
4278
4279         for (i = 0; trace_options[i]; i++) {
4280                 if (strcmp(cmp, trace_options[i]) == 0) {
4281                         ret = set_tracer_flag(tr, 1 << i, !neg);
4282                         break;
4283                 }
4284         }
4285
4286         /* If no option could be set, test the specific tracer options */
4287         if (!trace_options[i])
4288                 ret = set_tracer_option(tr, cmp, neg);
4289
4290         mutex_unlock(&trace_types_lock);
4291
4292         /*
4293          * If the first trailing whitespace is replaced with '\0' by strstrip,
4294          * turn it back into a space.
4295          */
4296         if (orig_len > strlen(option))
4297                 option[strlen(option)] = ' ';
4298
4299         return ret;
4300 }
4301
4302 static void __init apply_trace_boot_options(void)
4303 {
4304         char *buf = trace_boot_options_buf;
4305         char *option;
4306
4307         while (true) {
4308                 option = strsep(&buf, ",");
4309
4310                 if (!option)
4311                         break;
4312
4313                 if (*option)
4314                         trace_set_options(&global_trace, option);
4315
4316                 /* Put back the comma to allow this to be called again */
4317                 if (buf)
4318                         *(buf - 1) = ',';
4319         }
4320 }
4321
4322 static ssize_t
4323 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4324                         size_t cnt, loff_t *ppos)
4325 {
4326         struct seq_file *m = filp->private_data;
4327         struct trace_array *tr = m->private;
4328         char buf[64];
4329         int ret;
4330
4331         if (cnt >= sizeof(buf))
4332                 return -EINVAL;
4333
4334         if (copy_from_user(buf, ubuf, cnt))
4335                 return -EFAULT;
4336
4337         buf[cnt] = 0;
4338
4339         ret = trace_set_options(tr, buf);
4340         if (ret < 0)
4341                 return ret;
4342
4343         *ppos += cnt;
4344
4345         return cnt;
4346 }
4347
4348 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4349 {
4350         struct trace_array *tr = inode->i_private;
4351         int ret;
4352
4353         if (tracing_disabled)
4354                 return -ENODEV;
4355
4356         if (trace_array_get(tr) < 0)
4357                 return -ENODEV;
4358
4359         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4360         if (ret < 0)
4361                 trace_array_put(tr);
4362
4363         return ret;
4364 }
4365
4366 static const struct file_operations tracing_iter_fops = {
4367         .open           = tracing_trace_options_open,
4368         .read           = seq_read,
4369         .llseek         = seq_lseek,
4370         .release        = tracing_single_release_tr,
4371         .write          = tracing_trace_options_write,
4372 };
4373
4374 static const char readme_msg[] =
4375         "tracing mini-HOWTO:\n\n"
4376         "# echo 0 > tracing_on : quick way to disable tracing\n"
4377         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4378         " Important files:\n"
4379         "  trace\t\t\t- The static contents of the buffer\n"
4380         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4381         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4382         "  current_tracer\t- function and latency tracers\n"
4383         "  available_tracers\t- list of configured tracers for current_tracer\n"
4384         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4385         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4386         "  trace_clock\t\t-change the clock used to order events\n"
4387         "       local:   Per cpu clock but may not be synced across CPUs\n"
4388         "      global:   Synced across CPUs but slows tracing down.\n"
4389         "     counter:   Not a clock, but just an increment\n"
4390         "      uptime:   Jiffy counter from time of boot\n"
4391         "        perf:   Same clock that perf events use\n"
4392 #ifdef CONFIG_X86_64
4393         "     x86-tsc:   TSC cycle counter\n"
4394 #endif
4395         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4396         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4397         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4398         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4399         "\t\t\t  Remove sub-buffer with rmdir\n"
4400         "  trace_options\t\t- Set format or modify how tracing happens\n"
4401         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4402         "\t\t\t  option name\n"
4403         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4404 #ifdef CONFIG_DYNAMIC_FTRACE
4405         "\n  available_filter_functions - list of functions that can be filtered on\n"
4406         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4407         "\t\t\t  functions\n"
4408         "\t     accepts: func_full_name or glob-matching-pattern\n"
4409         "\t     modules: Can select a group via module\n"
4410         "\t      Format: :mod:<module-name>\n"
4411         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4412         "\t    triggers: a command to perform when function is hit\n"
4413         "\t      Format: <function>:<trigger>[:count]\n"
4414         "\t     trigger: traceon, traceoff\n"
4415         "\t\t      enable_event:<system>:<event>\n"
4416         "\t\t      disable_event:<system>:<event>\n"
4417 #ifdef CONFIG_STACKTRACE
4418         "\t\t      stacktrace\n"
4419 #endif
4420 #ifdef CONFIG_TRACER_SNAPSHOT
4421         "\t\t      snapshot\n"
4422 #endif
4423         "\t\t      dump\n"
4424         "\t\t      cpudump\n"
4425         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4426         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4427         "\t     The first one will disable tracing every time do_fault is hit\n"
4428         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4429         "\t       The first time do trap is hit and it disables tracing, the\n"
4430         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4431         "\t       the counter will not decrement. It only decrements when the\n"
4432         "\t       trigger did work\n"
4433         "\t     To remove trigger without count:\n"
4434         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4435         "\t     To remove trigger with a count:\n"
4436         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4437         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4438         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4439         "\t    modules: Can select a group via module command :mod:\n"
4440         "\t    Does not accept triggers\n"
4441 #endif /* CONFIG_DYNAMIC_FTRACE */
4442 #ifdef CONFIG_FUNCTION_TRACER
4443         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4444         "\t\t    (function)\n"
4445 #endif
4446 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4447         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4448         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4449         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4450 #endif
4451 #ifdef CONFIG_TRACER_SNAPSHOT
4452         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4453         "\t\t\t  snapshot buffer. Read the contents for more\n"
4454         "\t\t\t  information\n"
4455 #endif
4456 #ifdef CONFIG_STACK_TRACER
4457         "  stack_trace\t\t- Shows the max stack trace when active\n"
4458         "  stack_max_size\t- Shows current max stack size that was traced\n"
4459         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4460         "\t\t\t  new trace)\n"
4461 #ifdef CONFIG_DYNAMIC_FTRACE
4462         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4463         "\t\t\t  traces\n"
4464 #endif
4465 #endif /* CONFIG_STACK_TRACER */
4466 #ifdef CONFIG_KPROBE_EVENTS
4467         "  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4468         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4469 #endif
4470 #ifdef CONFIG_UPROBE_EVENTS
4471         "  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4472         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4473 #endif
4474 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4475         "\t  accepts: event-definitions (one definition per line)\n"
4476         "\t   Format: p|r[:[<group>/]<event>] <place> [<args>]\n"
4477         "\t           -:[<group>/]<event>\n"
4478 #ifdef CONFIG_KPROBE_EVENTS
4479         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4480   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4481 #endif
4482 #ifdef CONFIG_UPROBE_EVENTS
4483         "\t    place: <path>:<offset>\n"
4484 #endif
4485         "\t     args: <name>=fetcharg[:type]\n"
4486         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4487         "\t           $stack<index>, $stack, $retval, $comm\n"
4488         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4489         "\t           b<bit-width>@<bit-offset>/<container-size>\n"
4490 #endif
4491         "  events/\t\t- Directory containing all trace event subsystems:\n"
4492         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4493         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4494         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4495         "\t\t\t  events\n"
4496         "      filter\t\t- If set, only events passing filter are traced\n"
4497         "  events/<system>/<event>/\t- Directory containing control files for\n"
4498         "\t\t\t  <event>:\n"
4499         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4500         "      filter\t\t- If set, only events passing filter are traced\n"
4501         "      trigger\t\t- If set, a command to perform when event is hit\n"
4502         "\t    Format: <trigger>[:count][if <filter>]\n"
4503         "\t   trigger: traceon, traceoff\n"
4504         "\t            enable_event:<system>:<event>\n"
4505         "\t            disable_event:<system>:<event>\n"
4506 #ifdef CONFIG_HIST_TRIGGERS
4507         "\t            enable_hist:<system>:<event>\n"
4508         "\t            disable_hist:<system>:<event>\n"
4509 #endif
4510 #ifdef CONFIG_STACKTRACE
4511         "\t\t    stacktrace\n"
4512 #endif
4513 #ifdef CONFIG_TRACER_SNAPSHOT
4514         "\t\t    snapshot\n"
4515 #endif
4516 #ifdef CONFIG_HIST_TRIGGERS
4517         "\t\t    hist (see below)\n"
4518 #endif
4519         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4520         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4521         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4522         "\t                  events/block/block_unplug/trigger\n"
4523         "\t   The first disables tracing every time block_unplug is hit.\n"
4524         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4525         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4526         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4527         "\t   Like function triggers, the counter is only decremented if it\n"
4528         "\t    enabled or disabled tracing.\n"
4529         "\t   To remove a trigger without a count:\n"
4530         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4531         "\t   To remove a trigger with a count:\n"
4532         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4533         "\t   Filters can be ignored when removing a trigger.\n"
4534 #ifdef CONFIG_HIST_TRIGGERS
4535         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4536         "\t    Format: hist:keys=<field1[,field2,...]>\n"
4537         "\t            [:values=<field1[,field2,...]>]\n"
4538         "\t            [:sort=<field1[,field2,...]>]\n"
4539         "\t            [:size=#entries]\n"
4540         "\t            [:pause][:continue][:clear]\n"
4541         "\t            [:name=histname1]\n"
4542         "\t            [if <filter>]\n\n"
4543         "\t    When a matching event is hit, an entry is added to a hash\n"
4544         "\t    table using the key(s) and value(s) named, and the value of a\n"
4545         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4546         "\t    correspond to fields in the event's format description.  Keys\n"
4547         "\t    can be any field, or the special string 'stacktrace'.\n"
4548         "\t    Compound keys consisting of up to two fields can be specified\n"
4549         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4550         "\t    fields.  Sort keys consisting of up to two fields can be\n"
4551         "\t    specified using the 'sort' keyword.  The sort direction can\n"
4552         "\t    be modified by appending '.descending' or '.ascending' to a\n"
4553         "\t    sort field.  The 'size' parameter can be used to specify more\n"
4554         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4555         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4556         "\t    its histogram data will be shared with other triggers of the\n"
4557         "\t    same name, and trigger hits will update this common data.\n\n"
4558         "\t    Reading the 'hist' file for the event will dump the hash\n"
4559         "\t    table in its entirety to stdout.  If there are multiple hist\n"
4560         "\t    triggers attached to an event, there will be a table for each\n"
4561         "\t    trigger in the output.  The table displayed for a named\n"
4562         "\t    trigger will be the same as any other instance having the\n"
4563         "\t    same name.  The default format used to display a given field\n"
4564         "\t    can be modified by appending any of the following modifiers\n"
4565         "\t    to the field name, as applicable:\n\n"
4566         "\t            .hex        display a number as a hex value\n"
4567         "\t            .sym        display an address as a symbol\n"
4568         "\t            .sym-offset display an address as a symbol and offset\n"
4569         "\t            .execname   display a common_pid as a program name\n"
4570         "\t            .syscall    display a syscall id as a syscall name\n\n"
4571         "\t            .log2       display log2 value rather than raw number\n\n"
4572         "\t    The 'pause' parameter can be used to pause an existing hist\n"
4573         "\t    trigger or to start a hist trigger but not log any events\n"
4574         "\t    until told to do so.  'continue' can be used to start or\n"
4575         "\t    restart a paused hist trigger.\n\n"
4576         "\t    The 'clear' parameter will clear the contents of a running\n"
4577         "\t    hist trigger and leave its current paused/active state\n"
4578         "\t    unchanged.\n\n"
4579         "\t    The enable_hist and disable_hist triggers can be used to\n"
4580         "\t    have one event conditionally start and stop another event's\n"
4581         "\t    already-attached hist trigger.  The syntax is analagous to\n"
4582         "\t    the enable_event and disable_event triggers.\n"
4583 #endif
4584 ;
4585
4586 static ssize_t
4587 tracing_readme_read(struct file *filp, char __user *ubuf,
4588                        size_t cnt, loff_t *ppos)
4589 {
4590         return simple_read_from_buffer(ubuf, cnt, ppos,
4591                                         readme_msg, strlen(readme_msg));
4592 }
4593
4594 static const struct file_operations tracing_readme_fops = {
4595         .open           = tracing_open_generic,
4596         .read           = tracing_readme_read,
4597         .llseek         = generic_file_llseek,
4598 };
4599
4600 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4601 {
4602         unsigned int *ptr = v;
4603
4604         if (*pos || m->count)
4605                 ptr++;
4606
4607         (*pos)++;
4608
4609         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4610              ptr++) {
4611                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4612                         continue;
4613
4614                 return ptr;
4615         }
4616
4617         return NULL;
4618 }
4619
4620 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4621 {
4622         void *v;
4623         loff_t l = 0;
4624
4625         preempt_disable();
4626         arch_spin_lock(&trace_cmdline_lock);
4627
4628         v = &savedcmd->map_cmdline_to_pid[0];
4629         while (l <= *pos) {
4630                 v = saved_cmdlines_next(m, v, &l);
4631                 if (!v)
4632                         return NULL;
4633         }
4634
4635         return v;
4636 }
4637
4638 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4639 {
4640         arch_spin_unlock(&trace_cmdline_lock);
4641         preempt_enable();
4642 }
4643
4644 static int saved_cmdlines_show(struct seq_file *m, void *v)
4645 {
4646         char buf[TASK_COMM_LEN];
4647         unsigned int *pid = v;
4648
4649         __trace_find_cmdline(*pid, buf);
4650         seq_printf(m, "%d %s\n", *pid, buf);
4651         return 0;
4652 }
4653
4654 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4655         .start          = saved_cmdlines_start,
4656         .next           = saved_cmdlines_next,
4657         .stop           = saved_cmdlines_stop,
4658         .show           = saved_cmdlines_show,
4659 };
4660
4661 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4662 {
4663         if (tracing_disabled)
4664                 return -ENODEV;
4665
4666         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4667 }
4668
4669 static const struct file_operations tracing_saved_cmdlines_fops = {
4670         .open           = tracing_saved_cmdlines_open,
4671         .read           = seq_read,
4672         .llseek         = seq_lseek,
4673         .release        = seq_release,
4674 };
4675
4676 static ssize_t
4677 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4678                                  size_t cnt, loff_t *ppos)
4679 {
4680         char buf[64];
4681         int r;
4682
4683         arch_spin_lock(&trace_cmdline_lock);
4684         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4685         arch_spin_unlock(&trace_cmdline_lock);
4686
4687         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4688 }
4689
4690 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4691 {
4692         kfree(s->saved_cmdlines);
4693         kfree(s->map_cmdline_to_pid);
4694         kfree(s);
4695 }
4696
4697 static int tracing_resize_saved_cmdlines(unsigned int val)
4698 {
4699         struct saved_cmdlines_buffer *s, *savedcmd_temp;
4700
4701         s = kmalloc(sizeof(*s), GFP_KERNEL);
4702         if (!s)
4703                 return -ENOMEM;
4704
4705         if (allocate_cmdlines_buffer(val, s) < 0) {
4706                 kfree(s);
4707                 return -ENOMEM;
4708         }
4709
4710         arch_spin_lock(&trace_cmdline_lock);
4711         savedcmd_temp = savedcmd;
4712         savedcmd = s;
4713         arch_spin_unlock(&trace_cmdline_lock);
4714         free_saved_cmdlines_buffer(savedcmd_temp);
4715
4716         return 0;
4717 }
4718
4719 static ssize_t
4720 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4721                                   size_t cnt, loff_t *ppos)
4722 {
4723         unsigned long val;
4724         int ret;
4725
4726         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4727         if (ret)
4728                 return ret;
4729
4730         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4731         if (!val || val > PID_MAX_DEFAULT)
4732                 return -EINVAL;
4733
4734         ret = tracing_resize_saved_cmdlines((unsigned int)val);
4735         if (ret < 0)
4736                 return ret;
4737
4738         *ppos += cnt;
4739
4740         return cnt;
4741 }
4742
4743 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4744         .open           = tracing_open_generic,
4745         .read           = tracing_saved_cmdlines_size_read,
4746         .write          = tracing_saved_cmdlines_size_write,
4747 };
4748
4749 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
4750 static union trace_enum_map_item *
4751 update_enum_map(union trace_enum_map_item *ptr)
4752 {
4753         if (!ptr->map.enum_string) {
4754                 if (ptr->tail.next) {
4755                         ptr = ptr->tail.next;
4756                         /* Set ptr to the next real item (skip head) */
4757                         ptr++;
4758                 } else
4759                         return NULL;
4760         }
4761         return ptr;
4762 }
4763
4764 static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
4765 {
4766         union trace_enum_map_item *ptr = v;
4767
4768         /*
4769          * Paranoid! If ptr points to end, we don't want to increment past it.
4770          * This really should never happen.
4771          */
4772         ptr = update_enum_map(ptr);
4773         if (WARN_ON_ONCE(!ptr))
4774                 return NULL;
4775
4776         ptr++;
4777
4778         (*pos)++;
4779
4780         ptr = update_enum_map(ptr);
4781
4782         return ptr;
4783 }
4784
4785 static void *enum_map_start(struct seq_file *m, loff_t *pos)
4786 {
4787         union trace_enum_map_item *v;
4788         loff_t l = 0;
4789
4790         mutex_lock(&trace_enum_mutex);
4791
4792         v = trace_enum_maps;
4793         if (v)
4794                 v++;
4795
4796         while (v && l < *pos) {
4797                 v = enum_map_next(m, v, &l);
4798         }
4799
4800         return v;
4801 }
4802
4803 static void enum_map_stop(struct seq_file *m, void *v)
4804 {
4805         mutex_unlock(&trace_enum_mutex);
4806 }
4807
4808 static int enum_map_show(struct seq_file *m, void *v)
4809 {
4810         union trace_enum_map_item *ptr = v;
4811
4812         seq_printf(m, "%s %ld (%s)\n",
4813                    ptr->map.enum_string, ptr->map.enum_value,
4814                    ptr->map.system);
4815
4816         return 0;
4817 }
4818
4819 static const struct seq_operations tracing_enum_map_seq_ops = {
4820         .start          = enum_map_start,
4821         .next           = enum_map_next,
4822         .stop           = enum_map_stop,
4823         .show           = enum_map_show,
4824 };
4825
4826 static int tracing_enum_map_open(struct inode *inode, struct file *filp)
4827 {
4828         if (tracing_disabled)
4829                 return -ENODEV;
4830
4831         return seq_open(filp, &tracing_enum_map_seq_ops);
4832 }
4833
4834 static const struct file_operations tracing_enum_map_fops = {
4835         .open           = tracing_enum_map_open,
4836         .read           = seq_read,
4837         .llseek         = seq_lseek,
4838         .release        = seq_release,
4839 };
4840
4841 static inline union trace_enum_map_item *
4842 trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
4843 {
4844         /* Return tail of array given the head */
4845         return ptr + ptr->head.length + 1;
4846 }
4847
4848 static void
4849 trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
4850                            int len)
4851 {
4852         struct trace_enum_map **stop;
4853         struct trace_enum_map **map;
4854         union trace_enum_map_item *map_array;
4855         union trace_enum_map_item *ptr;
4856
4857         stop = start + len;
4858
4859         /*
4860          * The trace_enum_maps contains the map plus a head and tail item,
4861          * where the head holds the module and length of array, and the
4862          * tail holds a pointer to the next list.
4863          */
4864         map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4865         if (!map_array) {
4866                 pr_warn("Unable to allocate trace enum mapping\n");
4867                 return;
4868         }
4869
4870         mutex_lock(&trace_enum_mutex);
4871
4872         if (!trace_enum_maps)
4873                 trace_enum_maps = map_array;
4874         else {
4875                 ptr = trace_enum_maps;
4876                 for (;;) {
4877                         ptr = trace_enum_jmp_to_tail(ptr);
4878                         if (!ptr->tail.next)
4879                                 break;
4880                         ptr = ptr->tail.next;
4881
4882                 }
4883                 ptr->tail.next = map_array;
4884         }
4885         map_array->head.mod = mod;
4886         map_array->head.length = len;
4887         map_array++;
4888
4889         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4890                 map_array->map = **map;
4891                 map_array++;
4892         }
4893         memset(map_array, 0, sizeof(*map_array));
4894
4895         mutex_unlock(&trace_enum_mutex);
4896 }
4897
4898 static void trace_create_enum_file(struct dentry *d_tracer)
4899 {
4900         trace_create_file("enum_map", 0444, d_tracer,
4901                           NULL, &tracing_enum_map_fops);
4902 }
4903
4904 #else /* CONFIG_TRACE_ENUM_MAP_FILE */
4905 static inline void trace_create_enum_file(struct dentry *d_tracer) { }
4906 static inline void trace_insert_enum_map_file(struct module *mod,
4907                               struct trace_enum_map **start, int len) { }
4908 #endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
4909
4910 static void trace_insert_enum_map(struct module *mod,
4911                                   struct trace_enum_map **start, int len)
4912 {
4913         struct trace_enum_map **map;
4914
4915         if (len <= 0)
4916                 return;
4917
4918         map = start;
4919
4920         trace_event_enum_update(map, len);
4921
4922         trace_insert_enum_map_file(mod, start, len);
4923 }
4924
4925 static ssize_t
4926 tracing_set_trace_read(struct file *filp, char __user *ubuf,
4927                        size_t cnt, loff_t *ppos)
4928 {
4929         struct trace_array *tr = filp->private_data;
4930         char buf[MAX_TRACER_SIZE+2];
4931         int r;
4932
4933         mutex_lock(&trace_types_lock);
4934         r = sprintf(buf, "%s\n", tr->current_trace->name);
4935         mutex_unlock(&trace_types_lock);
4936
4937         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4938 }
4939
4940 int tracer_init(struct tracer *t, struct trace_array *tr)
4941 {
4942         tracing_reset_online_cpus(&tr->trace_buffer);
4943         return t->init(tr);
4944 }
4945
4946 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4947 {
4948         int cpu;
4949
4950         for_each_tracing_cpu(cpu)
4951                 per_cpu_ptr(buf->data, cpu)->entries = val;
4952 }
4953
4954 #ifdef CONFIG_TRACER_MAX_TRACE
4955 /* resize @tr's buffer to the size of @size_tr's entries */
4956 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4957                                         struct trace_buffer *size_buf, int cpu_id)
4958 {
4959         int cpu, ret = 0;
4960
4961         if (cpu_id == RING_BUFFER_ALL_CPUS) {
4962                 for_each_tracing_cpu(cpu) {
4963                         ret = ring_buffer_resize(trace_buf->buffer,
4964                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4965                         if (ret < 0)
4966                                 break;
4967                         per_cpu_ptr(trace_buf->data, cpu)->entries =
4968                                 per_cpu_ptr(size_buf->data, cpu)->entries;
4969                 }
4970         } else {
4971                 ret = ring_buffer_resize(trace_buf->buffer,
4972                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4973                 if (ret == 0)
4974                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4975                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
4976         }
4977
4978         return ret;
4979 }
4980 #endif /* CONFIG_TRACER_MAX_TRACE */
4981
4982 static int __tracing_resize_ring_buffer(struct trace_array *tr,
4983                                         unsigned long size, int cpu)
4984 {
4985         int ret;
4986
4987         /*
4988          * If kernel or user changes the size of the ring buffer
4989          * we use the size that was given, and we can forget about
4990          * expanding it later.
4991          */
4992         ring_buffer_expanded = true;
4993
4994         /* May be called before buffers are initialized */
4995         if (!tr->trace_buffer.buffer)
4996                 return 0;
4997
4998         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4999         if (ret < 0)
5000                 return ret;
5001
5002 #ifdef CONFIG_TRACER_MAX_TRACE
5003         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5004             !tr->current_trace->use_max_tr)
5005                 goto out;
5006
5007         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5008         if (ret < 0) {
5009                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5010                                                      &tr->trace_buffer, cpu);
5011                 if (r < 0) {
5012                         /*
5013                          * AARGH! We are left with different
5014                          * size max buffer!!!!
5015                          * The max buffer is our "snapshot" buffer.
5016                          * When a tracer needs a snapshot (one of the
5017                          * latency tracers), it swaps the max buffer
5018                          * with the saved snap shot. We succeeded to
5019                          * update the size of the main buffer, but failed to
5020                          * update the size of the max buffer. But when we tried
5021                          * to reset the main buffer to the original size, we
5022                          * failed there too. This is very unlikely to
5023                          * happen, but if it does, warn and kill all
5024                          * tracing.
5025                          */
5026                         WARN_ON(1);
5027                         tracing_disabled = 1;
5028                 }
5029                 return ret;
5030         }
5031
5032         if (cpu == RING_BUFFER_ALL_CPUS)
5033                 set_buffer_entries(&tr->max_buffer, size);
5034         else
5035                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5036
5037  out:
5038 #endif /* CONFIG_TRACER_MAX_TRACE */
5039
5040         if (cpu == RING_BUFFER_ALL_CPUS)
5041                 set_buffer_entries(&tr->trace_buffer, size);
5042         else
5043                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5044
5045         return ret;
5046 }
5047
5048 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5049                                           unsigned long size, int cpu_id)
5050 {
5051         int ret = size;
5052
5053         mutex_lock(&trace_types_lock);
5054
5055         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5056                 /* make sure, this cpu is enabled in the mask */
5057                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5058                         ret = -EINVAL;
5059                         goto out;
5060                 }
5061         }
5062
5063         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5064         if (ret < 0)
5065                 ret = -ENOMEM;
5066
5067 out:
5068         mutex_unlock(&trace_types_lock);
5069
5070         return ret;
5071 }
5072
5073
5074 /**
5075  * tracing_update_buffers - used by tracing facility to expand ring buffers
5076  *
5077  * To save on memory when the tracing is never used on a system with it
5078  * configured in. The ring buffers are set to a minimum size. But once
5079  * a user starts to use the tracing facility, then they need to grow
5080  * to their default size.
5081  *
5082  * This function is to be called when a tracer is about to be used.
5083  */
5084 int tracing_update_buffers(void)
5085 {
5086         int ret = 0;
5087
5088         mutex_lock(&trace_types_lock);
5089         if (!ring_buffer_expanded)
5090                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5091                                                 RING_BUFFER_ALL_CPUS);
5092         mutex_unlock(&trace_types_lock);
5093
5094         return ret;
5095 }
5096
5097 struct trace_option_dentry;
5098
5099 static void
5100 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5101
5102 /*
5103  * Used to clear out the tracer before deletion of an instance.
5104  * Must have trace_types_lock held.
5105  */
5106 static void tracing_set_nop(struct trace_array *tr)
5107 {
5108         if (tr->current_trace == &nop_trace)
5109                 return;
5110         
5111         tr->current_trace->enabled--;
5112
5113         if (tr->current_trace->reset)
5114                 tr->current_trace->reset(tr);
5115
5116         tr->current_trace = &nop_trace;
5117 }
5118
5119 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5120 {
5121         /* Only enable if the directory has been created already. */
5122         if (!tr->dir)
5123                 return;
5124
5125         create_trace_option_files(tr, t);
5126 }
5127
5128 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5129 {
5130         struct tracer *t;
5131 #ifdef CONFIG_TRACER_MAX_TRACE
5132         bool had_max_tr;
5133 #endif
5134         int ret = 0;
5135
5136         mutex_lock(&trace_types_lock);
5137
5138         if (!ring_buffer_expanded) {
5139                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5140                                                 RING_BUFFER_ALL_CPUS);
5141                 if (ret < 0)
5142                         goto out;
5143                 ret = 0;
5144         }
5145
5146         for (t = trace_types; t; t = t->next) {
5147                 if (strcmp(t->name, buf) == 0)
5148                         break;
5149         }
5150         if (!t) {
5151                 ret = -EINVAL;
5152                 goto out;
5153         }
5154         if (t == tr->current_trace)
5155                 goto out;
5156
5157         /* Some tracers are only allowed for the top level buffer */
5158         if (!trace_ok_for_array(t, tr)) {
5159                 ret = -EINVAL;
5160                 goto out;
5161         }
5162
5163         /* If trace pipe files are being read, we can't change the tracer */
5164         if (tr->current_trace->ref) {
5165                 ret = -EBUSY;
5166                 goto out;
5167         }
5168
5169         trace_branch_disable();
5170
5171         tr->current_trace->enabled--;
5172
5173         if (tr->current_trace->reset)
5174                 tr->current_trace->reset(tr);
5175
5176         /* Current trace needs to be nop_trace before synchronize_sched */
5177         tr->current_trace = &nop_trace;
5178
5179 #ifdef CONFIG_TRACER_MAX_TRACE
5180         had_max_tr = tr->allocated_snapshot;
5181
5182         if (had_max_tr && !t->use_max_tr) {
5183                 /*
5184                  * We need to make sure that the update_max_tr sees that
5185                  * current_trace changed to nop_trace to keep it from
5186                  * swapping the buffers after we resize it.
5187                  * The update_max_tr is called from interrupts disabled
5188                  * so a synchronized_sched() is sufficient.
5189                  */
5190                 synchronize_sched();
5191                 free_snapshot(tr);
5192         }
5193 #endif
5194
5195 #ifdef CONFIG_TRACER_MAX_TRACE
5196         if (t->use_max_tr && !had_max_tr) {
5197                 ret = alloc_snapshot(tr);
5198                 if (ret < 0)
5199                         goto out;
5200         }
5201 #endif
5202
5203         if (t->init) {
5204                 ret = tracer_init(t, tr);
5205                 if (ret)
5206                         goto out;
5207         }
5208
5209         tr->current_trace = t;
5210         tr->current_trace->enabled++;
5211         trace_branch_enable(tr);
5212  out:
5213         mutex_unlock(&trace_types_lock);
5214
5215         return ret;
5216 }
5217
5218 static ssize_t
5219 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5220                         size_t cnt, loff_t *ppos)
5221 {
5222         struct trace_array *tr = filp->private_data;
5223         char buf[MAX_TRACER_SIZE+1];
5224         int i;
5225         size_t ret;
5226         int err;
5227
5228         ret = cnt;
5229
5230         if (cnt > MAX_TRACER_SIZE)
5231                 cnt = MAX_TRACER_SIZE;
5232
5233         if (copy_from_user(buf, ubuf, cnt))
5234                 return -EFAULT;
5235
5236         buf[cnt] = 0;
5237
5238         /* strip ending whitespace. */
5239         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5240                 buf[i] = 0;
5241
5242         err = tracing_set_tracer(tr, buf);
5243         if (err)
5244                 return err;
5245
5246         *ppos += ret;
5247
5248         return ret;
5249 }
5250
5251 static ssize_t
5252 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5253                    size_t cnt, loff_t *ppos)
5254 {
5255         char buf[64];
5256         int r;
5257
5258         r = snprintf(buf, sizeof(buf), "%ld\n",
5259                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5260         if (r > sizeof(buf))
5261                 r = sizeof(buf);
5262         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5263 }
5264
5265 static ssize_t
5266 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5267                     size_t cnt, loff_t *ppos)
5268 {
5269         unsigned long val;
5270         int ret;
5271
5272         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5273         if (ret)
5274                 return ret;
5275
5276         *ptr = val * 1000;
5277
5278         return cnt;
5279 }
5280
5281 static ssize_t
5282 tracing_thresh_read(struct file *filp, char __user *ubuf,
5283                     size_t cnt, loff_t *ppos)
5284 {
5285         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5286 }
5287
5288 static ssize_t
5289 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5290                      size_t cnt, loff_t *ppos)
5291 {
5292         struct trace_array *tr = filp->private_data;
5293         int ret;
5294
5295         mutex_lock(&trace_types_lock);
5296         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5297         if (ret < 0)
5298                 goto out;
5299
5300         if (tr->current_trace->update_thresh) {
5301                 ret = tr->current_trace->update_thresh(tr);
5302                 if (ret < 0)
5303                         goto out;
5304         }
5305
5306         ret = cnt;
5307 out:
5308         mutex_unlock(&trace_types_lock);
5309
5310         return ret;
5311 }
5312
5313 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5314
5315 static ssize_t
5316 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5317                      size_t cnt, loff_t *ppos)
5318 {
5319         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5320 }
5321
5322 static ssize_t
5323 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5324                       size_t cnt, loff_t *ppos)
5325 {
5326         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5327 }
5328
5329 #endif
5330
5331 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5332 {
5333         struct trace_array *tr = inode->i_private;
5334         struct trace_iterator *iter;
5335         int ret = 0;
5336
5337         if (tracing_disabled)
5338                 return -ENODEV;
5339
5340         if (trace_array_get(tr) < 0)
5341                 return -ENODEV;
5342
5343         mutex_lock(&trace_types_lock);
5344
5345         /* create a buffer to store the information to pass to userspace */
5346         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5347         if (!iter) {
5348                 ret = -ENOMEM;
5349                 __trace_array_put(tr);
5350                 goto out;
5351         }
5352
5353         trace_seq_init(&iter->seq);
5354         iter->trace = tr->current_trace;
5355
5356         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5357                 ret = -ENOMEM;
5358                 goto fail;
5359         }
5360
5361         /* trace pipe does not show start of buffer */
5362         cpumask_setall(iter->started);
5363
5364         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5365                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5366
5367         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5368         if (trace_clocks[tr->clock_id].in_ns)
5369                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5370
5371         iter->tr = tr;
5372         iter->trace_buffer = &tr->trace_buffer;
5373         iter->cpu_file = tracing_get_cpu(inode);
5374         mutex_init(&iter->mutex);
5375         filp->private_data = iter;
5376
5377         if (iter->trace->pipe_open)
5378                 iter->trace->pipe_open(iter);
5379
5380         nonseekable_open(inode, filp);
5381
5382         tr->current_trace->ref++;
5383 out:
5384         mutex_unlock(&trace_types_lock);
5385         return ret;
5386
5387 fail:
5388         kfree(iter->trace);
5389         kfree(iter);
5390         __trace_array_put(tr);
5391         mutex_unlock(&trace_types_lock);
5392         return ret;
5393 }
5394
5395 static int tracing_release_pipe(struct inode *inode, struct file *file)
5396 {
5397         struct trace_iterator *iter = file->private_data;
5398         struct trace_array *tr = inode->i_private;
5399
5400         mutex_lock(&trace_types_lock);
5401
5402         tr->current_trace->ref--;
5403
5404         if (iter->trace->pipe_close)
5405                 iter->trace->pipe_close(iter);
5406
5407         mutex_unlock(&trace_types_lock);
5408
5409         free_cpumask_var(iter->started);
5410         mutex_destroy(&iter->mutex);
5411         kfree(iter);
5412
5413         trace_array_put(tr);
5414
5415         return 0;
5416 }
5417
5418 static unsigned int
5419 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5420 {
5421         struct trace_array *tr = iter->tr;
5422
5423         /* Iterators are static, they should be filled or empty */
5424         if (trace_buffer_iter(iter, iter->cpu_file))
5425                 return POLLIN | POLLRDNORM;
5426
5427         if (tr->trace_flags & TRACE_ITER_BLOCK)
5428                 /*
5429                  * Always select as readable when in blocking mode
5430                  */
5431                 return POLLIN | POLLRDNORM;
5432         else
5433                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5434                                              filp, poll_table);
5435 }
5436
5437 static unsigned int
5438 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5439 {
5440         struct trace_iterator *iter = filp->private_data;
5441
5442         return trace_poll(iter, filp, poll_table);
5443 }
5444
5445 /* Must be called with iter->mutex held. */
5446 static int tracing_wait_pipe(struct file *filp)
5447 {
5448         struct trace_iterator *iter = filp->private_data;
5449         int ret;
5450
5451         while (trace_empty(iter)) {
5452
5453                 if ((filp->f_flags & O_NONBLOCK)) {
5454                         return -EAGAIN;
5455                 }
5456
5457                 /*
5458                  * We block until we read something and tracing is disabled.
5459                  * We still block if tracing is disabled, but we have never
5460                  * read anything. This allows a user to cat this file, and
5461                  * then enable tracing. But after we have read something,
5462                  * we give an EOF when tracing is again disabled.
5463                  *
5464                  * iter->pos will be 0 if we haven't read anything.
5465                  */
5466                 if (!tracing_is_on() && iter->pos)
5467                         break;
5468
5469                 mutex_unlock(&iter->mutex);
5470
5471                 ret = wait_on_pipe(iter, false);
5472
5473                 mutex_lock(&iter->mutex);
5474
5475                 if (ret)
5476                         return ret;
5477         }
5478
5479         return 1;
5480 }
5481
5482 /*
5483  * Consumer reader.
5484  */
5485 static ssize_t
5486 tracing_read_pipe(struct file *filp, char __user *ubuf,
5487                   size_t cnt, loff_t *ppos)
5488 {
5489         struct trace_iterator *iter = filp->private_data;
5490         ssize_t sret;
5491
5492         /*
5493          * Avoid more than one consumer on a single file descriptor
5494          * This is just a matter of traces coherency, the ring buffer itself
5495          * is protected.
5496          */
5497         mutex_lock(&iter->mutex);
5498
5499         /* return any leftover data */
5500         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5501         if (sret != -EBUSY)
5502                 goto out;
5503
5504         trace_seq_init(&iter->seq);
5505
5506         if (iter->trace->read) {
5507                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5508                 if (sret)
5509                         goto out;
5510         }
5511
5512 waitagain:
5513         sret = tracing_wait_pipe(filp);
5514         if (sret <= 0)
5515                 goto out;
5516
5517         /* stop when tracing is finished */
5518         if (trace_empty(iter)) {
5519                 sret = 0;
5520                 goto out;
5521         }
5522
5523         if (cnt >= PAGE_SIZE)
5524                 cnt = PAGE_SIZE - 1;
5525
5526         /* reset all but tr, trace, and overruns */
5527         memset(&iter->seq, 0,
5528                sizeof(struct trace_iterator) -
5529                offsetof(struct trace_iterator, seq));
5530         cpumask_clear(iter->started);
5531         iter->pos = -1;
5532
5533         trace_event_read_lock();
5534         trace_access_lock(iter->cpu_file);
5535         while (trace_find_next_entry_inc(iter) != NULL) {
5536                 enum print_line_t ret;
5537                 int save_len = iter->seq.seq.len;
5538
5539                 ret = print_trace_line(iter);
5540                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5541                         /* don't print partial lines */
5542                         iter->seq.seq.len = save_len;
5543                         break;
5544                 }
5545                 if (ret != TRACE_TYPE_NO_CONSUME)
5546                         trace_consume(iter);
5547
5548                 if (trace_seq_used(&iter->seq) >= cnt)
5549                         break;
5550
5551                 /*
5552                  * Setting the full flag means we reached the trace_seq buffer
5553                  * size and we should leave by partial output condition above.
5554                  * One of the trace_seq_* functions is not used properly.
5555                  */
5556                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5557                           iter->ent->type);
5558         }
5559         trace_access_unlock(iter->cpu_file);
5560         trace_event_read_unlock();
5561
5562         /* Now copy what we have to the user */
5563         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5564         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5565                 trace_seq_init(&iter->seq);
5566
5567         /*
5568          * If there was nothing to send to user, in spite of consuming trace
5569          * entries, go back to wait for more entries.
5570          */
5571         if (sret == -EBUSY)
5572                 goto waitagain;
5573
5574 out:
5575         mutex_unlock(&iter->mutex);
5576
5577         return sret;
5578 }
5579
5580 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5581                                      unsigned int idx)
5582 {
5583         __free_page(spd->pages[idx]);
5584 }
5585
5586 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5587         .can_merge              = 0,
5588         .confirm                = generic_pipe_buf_confirm,
5589         .release                = generic_pipe_buf_release,
5590         .steal                  = generic_pipe_buf_steal,
5591         .get                    = generic_pipe_buf_get,
5592 };
5593
5594 static size_t
5595 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5596 {
5597         size_t count;
5598         int save_len;
5599         int ret;
5600
5601         /* Seq buffer is page-sized, exactly what we need. */
5602         for (;;) {
5603                 save_len = iter->seq.seq.len;
5604                 ret = print_trace_line(iter);
5605
5606                 if (trace_seq_has_overflowed(&iter->seq)) {
5607                         iter->seq.seq.len = save_len;
5608                         break;
5609                 }
5610
5611                 /*
5612                  * This should not be hit, because it should only
5613                  * be set if the iter->seq overflowed. But check it
5614                  * anyway to be safe.
5615                  */
5616                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5617                         iter->seq.seq.len = save_len;
5618                         break;
5619                 }
5620
5621                 count = trace_seq_used(&iter->seq) - save_len;
5622                 if (rem < count) {
5623                         rem = 0;
5624                         iter->seq.seq.len = save_len;
5625                         break;
5626                 }
5627
5628                 if (ret != TRACE_TYPE_NO_CONSUME)
5629                         trace_consume(iter);
5630                 rem -= count;
5631                 if (!trace_find_next_entry_inc(iter))   {
5632                         rem = 0;
5633                         iter->ent = NULL;
5634                         break;
5635                 }
5636         }
5637
5638         return rem;
5639 }
5640
5641 static ssize_t tracing_splice_read_pipe(struct file *filp,
5642                                         loff_t *ppos,
5643                                         struct pipe_inode_info *pipe,
5644                                         size_t len,
5645                                         unsigned int flags)
5646 {
5647         struct page *pages_def[PIPE_DEF_BUFFERS];
5648         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5649         struct trace_iterator *iter = filp->private_data;
5650         struct splice_pipe_desc spd = {
5651                 .pages          = pages_def,
5652                 .partial        = partial_def,
5653                 .nr_pages       = 0, /* This gets updated below. */
5654                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5655                 .ops            = &tracing_pipe_buf_ops,
5656                 .spd_release    = tracing_spd_release_pipe,
5657         };
5658         ssize_t ret;
5659         size_t rem;
5660         unsigned int i;
5661
5662         if (splice_grow_spd(pipe, &spd))
5663                 return -ENOMEM;
5664
5665         mutex_lock(&iter->mutex);
5666
5667         if (iter->trace->splice_read) {
5668                 ret = iter->trace->splice_read(iter, filp,
5669                                                ppos, pipe, len, flags);
5670                 if (ret)
5671                         goto out_err;
5672         }
5673
5674         ret = tracing_wait_pipe(filp);
5675         if (ret <= 0)
5676                 goto out_err;
5677
5678         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5679                 ret = -EFAULT;
5680                 goto out_err;
5681         }
5682
5683         trace_event_read_lock();
5684         trace_access_lock(iter->cpu_file);
5685
5686         /* Fill as many pages as possible. */
5687         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5688                 spd.pages[i] = alloc_page(GFP_KERNEL);
5689                 if (!spd.pages[i])
5690                         break;
5691
5692                 rem = tracing_fill_pipe_page(rem, iter);
5693
5694                 /* Copy the data into the page, so we can start over. */
5695                 ret = trace_seq_to_buffer(&iter->seq,
5696                                           page_address(spd.pages[i]),
5697                                           trace_seq_used(&iter->seq));
5698                 if (ret < 0) {
5699                         __free_page(spd.pages[i]);
5700                         break;
5701                 }
5702                 spd.partial[i].offset = 0;
5703                 spd.partial[i].len = trace_seq_used(&iter->seq);
5704
5705                 trace_seq_init(&iter->seq);
5706         }
5707
5708         trace_access_unlock(iter->cpu_file);
5709         trace_event_read_unlock();
5710         mutex_unlock(&iter->mutex);
5711
5712         spd.nr_pages = i;
5713
5714         if (i)
5715                 ret = splice_to_pipe(pipe, &spd);
5716         else
5717                 ret = 0;
5718 out:
5719         splice_shrink_spd(&spd);
5720         return ret;
5721
5722 out_err:
5723         mutex_unlock(&iter->mutex);
5724         goto out;
5725 }
5726
5727 static ssize_t
5728 tracing_entries_read(struct file *filp, char __user *ubuf,
5729                      size_t cnt, loff_t *ppos)
5730 {
5731         struct inode *inode = file_inode(filp);
5732         struct trace_array *tr = inode->i_private;
5733         int cpu = tracing_get_cpu(inode);
5734         char buf[64];
5735         int r = 0;
5736         ssize_t ret;
5737
5738         mutex_lock(&trace_types_lock);
5739
5740         if (cpu == RING_BUFFER_ALL_CPUS) {
5741                 int cpu, buf_size_same;
5742                 unsigned long size;
5743
5744                 size = 0;
5745                 buf_size_same = 1;
5746                 /* check if all cpu sizes are same */
5747                 for_each_tracing_cpu(cpu) {
5748                         /* fill in the size from first enabled cpu */
5749                         if (size == 0)
5750                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5751                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5752                                 buf_size_same = 0;
5753                                 break;
5754                         }
5755                 }
5756
5757                 if (buf_size_same) {
5758                         if (!ring_buffer_expanded)
5759                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
5760                                             size >> 10,
5761                                             trace_buf_size >> 10);
5762                         else
5763                                 r = sprintf(buf, "%lu\n", size >> 10);
5764                 } else
5765                         r = sprintf(buf, "X\n");
5766         } else
5767                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5768
5769         mutex_unlock(&trace_types_lock);
5770
5771         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5772         return ret;
5773 }
5774
5775 static ssize_t
5776 tracing_entries_write(struct file *filp, const char __user *ubuf,
5777                       size_t cnt, loff_t *ppos)
5778 {
5779         struct inode *inode = file_inode(filp);
5780         struct trace_array *tr = inode->i_private;
5781         unsigned long val;
5782         int ret;
5783
5784         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5785         if (ret)
5786                 return ret;
5787
5788         /* must have at least 1 entry */
5789         if (!val)
5790                 return -EINVAL;
5791
5792         /* value is in KB */
5793         val <<= 10;
5794         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5795         if (ret < 0)
5796                 return ret;
5797
5798         *ppos += cnt;
5799
5800         return cnt;
5801 }
5802
5803 static ssize_t
5804 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5805                                 size_t cnt, loff_t *ppos)
5806 {
5807         struct trace_array *tr = filp->private_data;
5808         char buf[64];
5809         int r, cpu;
5810         unsigned long size = 0, expanded_size = 0;
5811
5812         mutex_lock(&trace_types_lock);
5813         for_each_tracing_cpu(cpu) {
5814                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5815                 if (!ring_buffer_expanded)
5816                         expanded_size += trace_buf_size >> 10;
5817         }
5818         if (ring_buffer_expanded)
5819                 r = sprintf(buf, "%lu\n", size);
5820         else
5821                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5822         mutex_unlock(&trace_types_lock);
5823
5824         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5825 }
5826
5827 static ssize_t
5828 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5829                           size_t cnt, loff_t *ppos)
5830 {
5831         /*
5832          * There is no need to read what the user has written, this function
5833          * is just to make sure that there is no error when "echo" is used
5834          */
5835
5836         *ppos += cnt;
5837
5838         return cnt;
5839 }
5840
5841 static int
5842 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5843 {
5844         struct trace_array *tr = inode->i_private;
5845
5846         /* disable tracing ? */
5847         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5848                 tracer_tracing_off(tr);
5849         /* resize the ring buffer to 0 */
5850         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5851
5852         trace_array_put(tr);
5853
5854         return 0;
5855 }
5856
5857 static ssize_t
5858 tracing_mark_write(struct file *filp, const char __user *ubuf,
5859                                         size_t cnt, loff_t *fpos)
5860 {
5861         struct trace_array *tr = filp->private_data;
5862         struct ring_buffer_event *event;
5863         struct ring_buffer *buffer;
5864         struct print_entry *entry;
5865         unsigned long irq_flags;
5866         const char faulted[] = "<faulted>";
5867         ssize_t written;
5868         int size;
5869         int len;
5870
5871 /* Used in tracing_mark_raw_write() as well */
5872 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
5873
5874         if (tracing_disabled)
5875                 return -EINVAL;
5876
5877         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5878                 return -EINVAL;
5879
5880         if (cnt > TRACE_BUF_SIZE)
5881                 cnt = TRACE_BUF_SIZE;
5882
5883         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5884
5885         local_save_flags(irq_flags);
5886         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
5887
5888         /* If less than "<faulted>", then make sure we can still add that */
5889         if (cnt < FAULTED_SIZE)
5890                 size += FAULTED_SIZE - cnt;
5891
5892         buffer = tr->trace_buffer.buffer;
5893         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5894                                             irq_flags, preempt_count());
5895         if (unlikely(!event))
5896                 /* Ring buffer disabled, return as if not open for write */
5897                 return -EBADF;
5898
5899         entry = ring_buffer_event_data(event);
5900         entry->ip = _THIS_IP_;
5901
5902         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
5903         if (len) {
5904                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
5905                 cnt = FAULTED_SIZE;
5906                 written = -EFAULT;
5907         } else
5908                 written = cnt;
5909         len = cnt;
5910
5911         if (entry->buf[cnt - 1] != '\n') {
5912                 entry->buf[cnt] = '\n';
5913                 entry->buf[cnt + 1] = '\0';
5914         } else
5915                 entry->buf[cnt] = '\0';
5916
5917         __buffer_unlock_commit(buffer, event);
5918
5919         if (written > 0)
5920                 *fpos += written;
5921
5922         return written;
5923 }
5924
5925 /* Limit it for now to 3K (including tag) */
5926 #define RAW_DATA_MAX_SIZE (1024*3)
5927
5928 static ssize_t
5929 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
5930                                         size_t cnt, loff_t *fpos)
5931 {
5932         struct trace_array *tr = filp->private_data;
5933         struct ring_buffer_event *event;
5934         struct ring_buffer *buffer;
5935         struct raw_data_entry *entry;
5936         const char faulted[] = "<faulted>";
5937         unsigned long irq_flags;
5938         ssize_t written;
5939         int size;
5940         int len;
5941
5942 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
5943
5944         if (tracing_disabled)
5945                 return -EINVAL;
5946
5947         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5948                 return -EINVAL;
5949
5950         /* The marker must at least have a tag id */
5951         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
5952                 return -EINVAL;
5953
5954         if (cnt > TRACE_BUF_SIZE)
5955                 cnt = TRACE_BUF_SIZE;
5956
5957         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5958
5959         local_save_flags(irq_flags);
5960         size = sizeof(*entry) + cnt;
5961         if (cnt < FAULT_SIZE_ID)
5962                 size += FAULT_SIZE_ID - cnt;
5963
5964         buffer = tr->trace_buffer.buffer;
5965         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
5966                                             irq_flags, preempt_count());
5967         if (!event)
5968                 /* Ring buffer disabled, return as if not open for write */
5969                 return -EBADF;
5970
5971         entry = ring_buffer_event_data(event);
5972
5973         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
5974         if (len) {
5975                 entry->id = -1;
5976                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
5977                 written = -EFAULT;
5978         } else
5979                 written = cnt;
5980
5981         __buffer_unlock_commit(buffer, event);
5982
5983         if (written > 0)
5984                 *fpos += written;
5985
5986         return written;
5987 }
5988
5989 static int tracing_clock_show(struct seq_file *m, void *v)
5990 {
5991         struct trace_array *tr = m->private;
5992         int i;
5993
5994         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5995                 seq_printf(m,
5996                         "%s%s%s%s", i ? " " : "",
5997                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5998                         i == tr->clock_id ? "]" : "");
5999         seq_putc(m, '\n');
6000
6001         return 0;
6002 }
6003
6004 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6005 {
6006         int i;
6007
6008         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6009                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6010                         break;
6011         }
6012         if (i == ARRAY_SIZE(trace_clocks))
6013                 return -EINVAL;
6014
6015         mutex_lock(&trace_types_lock);
6016
6017         tr->clock_id = i;
6018
6019         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6020
6021         /*
6022          * New clock may not be consistent with the previous clock.
6023          * Reset the buffer so that it doesn't have incomparable timestamps.
6024          */
6025         tracing_reset_online_cpus(&tr->trace_buffer);
6026
6027 #ifdef CONFIG_TRACER_MAX_TRACE
6028         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
6029                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6030         tracing_reset_online_cpus(&tr->max_buffer);
6031 #endif
6032
6033         mutex_unlock(&trace_types_lock);
6034
6035         return 0;
6036 }
6037
6038 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6039                                    size_t cnt, loff_t *fpos)
6040 {
6041         struct seq_file *m = filp->private_data;
6042         struct trace_array *tr = m->private;
6043         char buf[64];
6044         const char *clockstr;
6045         int ret;
6046
6047         if (cnt >= sizeof(buf))
6048                 return -EINVAL;
6049
6050         if (copy_from_user(buf, ubuf, cnt))
6051                 return -EFAULT;
6052
6053         buf[cnt] = 0;
6054
6055         clockstr = strstrip(buf);
6056
6057         ret = tracing_set_clock(tr, clockstr);
6058         if (ret)
6059                 return ret;
6060
6061         *fpos += cnt;
6062
6063         return cnt;
6064 }
6065
6066 static int tracing_clock_open(struct inode *inode, struct file *file)
6067 {
6068         struct trace_array *tr = inode->i_private;
6069         int ret;
6070
6071         if (tracing_disabled)
6072                 return -ENODEV;
6073
6074         if (trace_array_get(tr))
6075                 return -ENODEV;
6076
6077         ret = single_open(file, tracing_clock_show, inode->i_private);
6078         if (ret < 0)
6079                 trace_array_put(tr);
6080
6081         return ret;
6082 }
6083
6084 struct ftrace_buffer_info {
6085         struct trace_iterator   iter;
6086         void                    *spare;
6087         unsigned int            spare_cpu;
6088         unsigned int            read;
6089 };
6090
6091 #ifdef CONFIG_TRACER_SNAPSHOT
6092 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6093 {
6094         struct trace_array *tr = inode->i_private;
6095         struct trace_iterator *iter;
6096         struct seq_file *m;
6097         int ret = 0;
6098
6099         if (trace_array_get(tr) < 0)
6100                 return -ENODEV;
6101
6102         if (file->f_mode & FMODE_READ) {
6103                 iter = __tracing_open(inode, file, true);
6104                 if (IS_ERR(iter))
6105                         ret = PTR_ERR(iter);
6106         } else {
6107                 /* Writes still need the seq_file to hold the private data */
6108                 ret = -ENOMEM;
6109                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6110                 if (!m)
6111                         goto out;
6112                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6113                 if (!iter) {
6114                         kfree(m);
6115                         goto out;
6116                 }
6117                 ret = 0;
6118
6119                 iter->tr = tr;
6120                 iter->trace_buffer = &tr->max_buffer;
6121                 iter->cpu_file = tracing_get_cpu(inode);
6122                 m->private = iter;
6123                 file->private_data = m;
6124         }
6125 out:
6126         if (ret < 0)
6127                 trace_array_put(tr);
6128
6129         return ret;
6130 }
6131
6132 static ssize_t
6133 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6134                        loff_t *ppos)
6135 {
6136         struct seq_file *m = filp->private_data;
6137         struct trace_iterator *iter = m->private;
6138         struct trace_array *tr = iter->tr;
6139         unsigned long val;
6140         int ret;
6141
6142         ret = tracing_update_buffers();
6143         if (ret < 0)
6144                 return ret;
6145
6146         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6147         if (ret)
6148                 return ret;
6149
6150         mutex_lock(&trace_types_lock);
6151
6152         if (tr->current_trace->use_max_tr) {
6153                 ret = -EBUSY;
6154                 goto out;
6155         }
6156
6157         switch (val) {
6158         case 0:
6159                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6160                         ret = -EINVAL;
6161                         break;
6162                 }
6163                 if (tr->allocated_snapshot)
6164                         free_snapshot(tr);
6165                 break;
6166         case 1:
6167 /* Only allow per-cpu swap if the ring buffer supports it */
6168 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6169                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6170                         ret = -EINVAL;
6171                         break;
6172                 }
6173 #endif
6174                 if (!tr->allocated_snapshot) {
6175                         ret = alloc_snapshot(tr);
6176                         if (ret < 0)
6177                                 break;
6178                 }
6179                 local_irq_disable();
6180                 /* Now, we're going to swap */
6181                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6182                         update_max_tr(tr, current, smp_processor_id());
6183                 else
6184                         update_max_tr_single(tr, current, iter->cpu_file);
6185                 local_irq_enable();
6186                 break;
6187         default:
6188                 if (tr->allocated_snapshot) {
6189                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6190                                 tracing_reset_online_cpus(&tr->max_buffer);
6191                         else
6192                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
6193                 }
6194                 break;
6195         }
6196
6197         if (ret >= 0) {
6198                 *ppos += cnt;
6199                 ret = cnt;
6200         }
6201 out:
6202         mutex_unlock(&trace_types_lock);
6203         return ret;
6204 }
6205
6206 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6207 {
6208         struct seq_file *m = file->private_data;
6209         int ret;
6210
6211         ret = tracing_release(inode, file);
6212
6213         if (file->f_mode & FMODE_READ)
6214                 return ret;
6215
6216         /* If write only, the seq_file is just a stub */
6217         if (m)
6218                 kfree(m->private);
6219         kfree(m);
6220
6221         return 0;
6222 }
6223
6224 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6225 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6226                                     size_t count, loff_t *ppos);
6227 static int tracing_buffers_release(struct inode *inode, struct file *file);
6228 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6229                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6230
6231 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6232 {
6233         struct ftrace_buffer_info *info;
6234         int ret;
6235
6236         ret = tracing_buffers_open(inode, filp);
6237         if (ret < 0)
6238                 return ret;
6239
6240         info = filp->private_data;
6241
6242         if (info->iter.trace->use_max_tr) {
6243                 tracing_buffers_release(inode, filp);
6244                 return -EBUSY;
6245         }
6246
6247         info->iter.snapshot = true;
6248         info->iter.trace_buffer = &info->iter.tr->max_buffer;
6249
6250         return ret;
6251 }
6252
6253 #endif /* CONFIG_TRACER_SNAPSHOT */
6254
6255
6256 static const struct file_operations tracing_thresh_fops = {
6257         .open           = tracing_open_generic,
6258         .read           = tracing_thresh_read,
6259         .write          = tracing_thresh_write,
6260         .llseek         = generic_file_llseek,
6261 };
6262
6263 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6264 static const struct file_operations tracing_max_lat_fops = {
6265         .open           = tracing_open_generic,
6266         .read           = tracing_max_lat_read,
6267         .write          = tracing_max_lat_write,
6268         .llseek         = generic_file_llseek,
6269 };
6270 #endif
6271
6272 static const struct file_operations set_tracer_fops = {
6273         .open           = tracing_open_generic,
6274         .read           = tracing_set_trace_read,
6275         .write          = tracing_set_trace_write,
6276         .llseek         = generic_file_llseek,
6277 };
6278
6279 static const struct file_operations tracing_pipe_fops = {
6280         .open           = tracing_open_pipe,
6281         .poll           = tracing_poll_pipe,
6282         .read           = tracing_read_pipe,
6283         .splice_read    = tracing_splice_read_pipe,
6284         .release        = tracing_release_pipe,
6285         .llseek         = no_llseek,
6286 };
6287
6288 static const struct file_operations tracing_entries_fops = {
6289         .open           = tracing_open_generic_tr,
6290         .read           = tracing_entries_read,
6291         .write          = tracing_entries_write,
6292         .llseek         = generic_file_llseek,
6293         .release        = tracing_release_generic_tr,
6294 };
6295
6296 static const struct file_operations tracing_total_entries_fops = {
6297         .open           = tracing_open_generic_tr,
6298         .read           = tracing_total_entries_read,
6299         .llseek         = generic_file_llseek,
6300         .release        = tracing_release_generic_tr,
6301 };
6302
6303 static const struct file_operations tracing_free_buffer_fops = {
6304         .open           = tracing_open_generic_tr,
6305         .write          = tracing_free_buffer_write,
6306         .release        = tracing_free_buffer_release,
6307 };
6308
6309 static const struct file_operations tracing_mark_fops = {
6310         .open           = tracing_open_generic_tr,
6311         .write          = tracing_mark_write,
6312         .llseek         = generic_file_llseek,
6313         .release        = tracing_release_generic_tr,
6314 };
6315
6316 static const struct file_operations tracing_mark_raw_fops = {
6317         .open           = tracing_open_generic_tr,
6318         .write          = tracing_mark_raw_write,
6319         .llseek         = generic_file_llseek,
6320         .release        = tracing_release_generic_tr,
6321 };
6322
6323 static const struct file_operations trace_clock_fops = {
6324         .open           = tracing_clock_open,
6325         .read           = seq_read,
6326         .llseek         = seq_lseek,
6327         .release        = tracing_single_release_tr,
6328         .write          = tracing_clock_write,
6329 };
6330
6331 #ifdef CONFIG_TRACER_SNAPSHOT
6332 static const struct file_operations snapshot_fops = {
6333         .open           = tracing_snapshot_open,
6334         .read           = seq_read,
6335         .write          = tracing_snapshot_write,
6336         .llseek         = tracing_lseek,
6337         .release        = tracing_snapshot_release,
6338 };
6339
6340 static const struct file_operations snapshot_raw_fops = {
6341         .open           = snapshot_raw_open,
6342         .read           = tracing_buffers_read,
6343         .release        = tracing_buffers_release,
6344         .splice_read    = tracing_buffers_splice_read,
6345         .llseek         = no_llseek,
6346 };
6347
6348 #endif /* CONFIG_TRACER_SNAPSHOT */
6349
6350 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6351 {
6352         struct trace_array *tr = inode->i_private;
6353         struct ftrace_buffer_info *info;
6354         int ret;
6355
6356         if (tracing_disabled)
6357                 return -ENODEV;
6358
6359         if (trace_array_get(tr) < 0)
6360                 return -ENODEV;
6361
6362         info = kzalloc(sizeof(*info), GFP_KERNEL);
6363         if (!info) {
6364                 trace_array_put(tr);
6365                 return -ENOMEM;
6366         }
6367
6368         mutex_lock(&trace_types_lock);
6369
6370         info->iter.tr           = tr;
6371         info->iter.cpu_file     = tracing_get_cpu(inode);
6372         info->iter.trace        = tr->current_trace;
6373         info->iter.trace_buffer = &tr->trace_buffer;
6374         info->spare             = NULL;
6375         /* Force reading ring buffer for first read */
6376         info->read              = (unsigned int)-1;
6377
6378         filp->private_data = info;
6379
6380         tr->current_trace->ref++;
6381
6382         mutex_unlock(&trace_types_lock);
6383
6384         ret = nonseekable_open(inode, filp);
6385         if (ret < 0)
6386                 trace_array_put(tr);
6387
6388         return ret;
6389 }
6390
6391 static unsigned int
6392 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6393 {
6394         struct ftrace_buffer_info *info = filp->private_data;
6395         struct trace_iterator *iter = &info->iter;
6396
6397         return trace_poll(iter, filp, poll_table);
6398 }
6399
6400 static ssize_t
6401 tracing_buffers_read(struct file *filp, char __user *ubuf,
6402                      size_t count, loff_t *ppos)
6403 {
6404         struct ftrace_buffer_info *info = filp->private_data;
6405         struct trace_iterator *iter = &info->iter;
6406         ssize_t ret;
6407         ssize_t size;
6408
6409         if (!count)
6410                 return 0;
6411
6412 #ifdef CONFIG_TRACER_MAX_TRACE
6413         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6414                 return -EBUSY;
6415 #endif
6416
6417         if (!info->spare) {
6418                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6419                                                           iter->cpu_file);
6420                 info->spare_cpu = iter->cpu_file;
6421         }
6422         if (!info->spare)
6423                 return -ENOMEM;
6424
6425         /* Do we have previous read data to read? */
6426         if (info->read < PAGE_SIZE)
6427                 goto read;
6428
6429  again:
6430         trace_access_lock(iter->cpu_file);
6431         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6432                                     &info->spare,
6433                                     count,
6434                                     iter->cpu_file, 0);
6435         trace_access_unlock(iter->cpu_file);
6436
6437         if (ret < 0) {
6438                 if (trace_empty(iter)) {
6439                         if ((filp->f_flags & O_NONBLOCK))
6440                                 return -EAGAIN;
6441
6442                         ret = wait_on_pipe(iter, false);
6443                         if (ret)
6444                                 return ret;
6445
6446                         goto again;
6447                 }
6448                 return 0;
6449         }
6450
6451         info->read = 0;
6452  read:
6453         size = PAGE_SIZE - info->read;
6454         if (size > count)
6455                 size = count;
6456
6457         ret = copy_to_user(ubuf, info->spare + info->read, size);
6458         if (ret == size)
6459                 return -EFAULT;
6460
6461         size -= ret;
6462
6463         *ppos += size;
6464         info->read += size;
6465
6466         return size;
6467 }
6468
6469 static int tracing_buffers_release(struct inode *inode, struct file *file)
6470 {
6471         struct ftrace_buffer_info *info = file->private_data;
6472         struct trace_iterator *iter = &info->iter;
6473
6474         mutex_lock(&trace_types_lock);
6475
6476         iter->tr->current_trace->ref--;
6477
6478         __trace_array_put(iter->tr);
6479
6480         if (info->spare)
6481                 ring_buffer_free_read_page(iter->trace_buffer->buffer,
6482                                            info->spare_cpu, info->spare);
6483         kfree(info);
6484
6485         mutex_unlock(&trace_types_lock);
6486
6487         return 0;
6488 }
6489
6490 struct buffer_ref {
6491         struct ring_buffer      *buffer;
6492         void                    *page;
6493         int                     cpu;
6494         int                     ref;
6495 };
6496
6497 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6498                                     struct pipe_buffer *buf)
6499 {
6500         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6501
6502         if (--ref->ref)
6503                 return;
6504
6505         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6506         kfree(ref);
6507         buf->private = 0;
6508 }
6509
6510 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6511                                 struct pipe_buffer *buf)
6512 {
6513         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6514
6515         ref->ref++;
6516 }
6517
6518 /* Pipe buffer operations for a buffer. */
6519 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6520         .can_merge              = 0,
6521         .confirm                = generic_pipe_buf_confirm,
6522         .release                = buffer_pipe_buf_release,
6523         .steal                  = generic_pipe_buf_steal,
6524         .get                    = buffer_pipe_buf_get,
6525 };
6526
6527 /*
6528  * Callback from splice_to_pipe(), if we need to release some pages
6529  * at the end of the spd in case we error'ed out in filling the pipe.
6530  */
6531 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6532 {
6533         struct buffer_ref *ref =
6534                 (struct buffer_ref *)spd->partial[i].private;
6535
6536         if (--ref->ref)
6537                 return;
6538
6539         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6540         kfree(ref);
6541         spd->partial[i].private = 0;
6542 }
6543
6544 static ssize_t
6545 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6546                             struct pipe_inode_info *pipe, size_t len,
6547                             unsigned int flags)
6548 {
6549         struct ftrace_buffer_info *info = file->private_data;
6550         struct trace_iterator *iter = &info->iter;
6551         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6552         struct page *pages_def[PIPE_DEF_BUFFERS];
6553         struct splice_pipe_desc spd = {
6554                 .pages          = pages_def,
6555                 .partial        = partial_def,
6556                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6557                 .ops            = &buffer_pipe_buf_ops,
6558                 .spd_release    = buffer_spd_release,
6559         };
6560         struct buffer_ref *ref;
6561         int entries, size, i;
6562         ssize_t ret = 0;
6563
6564 #ifdef CONFIG_TRACER_MAX_TRACE
6565         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6566                 return -EBUSY;
6567 #endif
6568
6569         if (*ppos & (PAGE_SIZE - 1))
6570                 return -EINVAL;
6571
6572         if (len & (PAGE_SIZE - 1)) {
6573                 if (len < PAGE_SIZE)
6574                         return -EINVAL;
6575                 len &= PAGE_MASK;
6576         }
6577
6578         if (splice_grow_spd(pipe, &spd))
6579                 return -ENOMEM;
6580
6581  again:
6582         trace_access_lock(iter->cpu_file);
6583         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6584
6585         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6586                 struct page *page;
6587                 int r;
6588
6589                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6590                 if (!ref) {
6591                         ret = -ENOMEM;
6592                         break;
6593                 }
6594
6595                 ref->ref = 1;
6596                 ref->buffer = iter->trace_buffer->buffer;
6597                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6598                 if (!ref->page) {
6599                         ret = -ENOMEM;
6600                         kfree(ref);
6601                         break;
6602                 }
6603                 ref->cpu = iter->cpu_file;
6604
6605                 r = ring_buffer_read_page(ref->buffer, &ref->page,
6606                                           len, iter->cpu_file, 1);
6607                 if (r < 0) {
6608                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
6609                                                    ref->page);
6610                         kfree(ref);
6611                         break;
6612                 }
6613
6614                 /*
6615                  * zero out any left over data, this is going to
6616                  * user land.
6617                  */
6618                 size = ring_buffer_page_len(ref->page);
6619                 if (size < PAGE_SIZE)
6620                         memset(ref->page + size, 0, PAGE_SIZE - size);
6621
6622                 page = virt_to_page(ref->page);
6623
6624                 spd.pages[i] = page;
6625                 spd.partial[i].len = PAGE_SIZE;
6626                 spd.partial[i].offset = 0;
6627                 spd.partial[i].private = (unsigned long)ref;
6628                 spd.nr_pages++;
6629                 *ppos += PAGE_SIZE;
6630
6631                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6632         }
6633
6634         trace_access_unlock(iter->cpu_file);
6635         spd.nr_pages = i;
6636
6637         /* did we read anything? */
6638         if (!spd.nr_pages) {
6639                 if (ret)
6640                         goto out;
6641
6642                 ret = -EAGAIN;
6643                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6644                         goto out;
6645
6646                 ret = wait_on_pipe(iter, true);
6647                 if (ret)
6648                         goto out;
6649
6650                 goto again;
6651         }
6652
6653         ret = splice_to_pipe(pipe, &spd);
6654 out:
6655         splice_shrink_spd(&spd);
6656
6657         return ret;
6658 }
6659
6660 static const struct file_operations tracing_buffers_fops = {
6661         .open           = tracing_buffers_open,
6662         .read           = tracing_buffers_read,
6663         .poll           = tracing_buffers_poll,
6664         .release        = tracing_buffers_release,
6665         .splice_read    = tracing_buffers_splice_read,
6666         .llseek         = no_llseek,
6667 };
6668
6669 static ssize_t
6670 tracing_stats_read(struct file *filp, char __user *ubuf,
6671                    size_t count, loff_t *ppos)
6672 {
6673         struct inode *inode = file_inode(filp);
6674         struct trace_array *tr = inode->i_private;
6675         struct trace_buffer *trace_buf = &tr->trace_buffer;
6676         int cpu = tracing_get_cpu(inode);
6677         struct trace_seq *s;
6678         unsigned long cnt;
6679         unsigned long long t;
6680         unsigned long usec_rem;
6681
6682         s = kmalloc(sizeof(*s), GFP_KERNEL);
6683         if (!s)
6684                 return -ENOMEM;
6685
6686         trace_seq_init(s);
6687
6688         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6689         trace_seq_printf(s, "entries: %ld\n", cnt);
6690
6691         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6692         trace_seq_printf(s, "overrun: %ld\n", cnt);
6693
6694         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6695         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6696
6697         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6698         trace_seq_printf(s, "bytes: %ld\n", cnt);
6699
6700         if (trace_clocks[tr->clock_id].in_ns) {
6701                 /* local or global for trace_clock */
6702                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6703                 usec_rem = do_div(t, USEC_PER_SEC);
6704                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6705                                                                 t, usec_rem);
6706
6707                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6708                 usec_rem = do_div(t, USEC_PER_SEC);
6709                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6710         } else {
6711                 /* counter or tsc mode for trace_clock */
6712                 trace_seq_printf(s, "oldest event ts: %llu\n",
6713                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6714
6715                 trace_seq_printf(s, "now ts: %llu\n",
6716                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
6717         }
6718
6719         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6720         trace_seq_printf(s, "dropped events: %ld\n", cnt);
6721
6722         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6723         trace_seq_printf(s, "read events: %ld\n", cnt);
6724
6725         count = simple_read_from_buffer(ubuf, count, ppos,
6726                                         s->buffer, trace_seq_used(s));
6727
6728         kfree(s);
6729
6730         return count;
6731 }
6732
6733 static const struct file_operations tracing_stats_fops = {
6734         .open           = tracing_open_generic_tr,
6735         .read           = tracing_stats_read,
6736         .llseek         = generic_file_llseek,
6737         .release        = tracing_release_generic_tr,
6738 };
6739
6740 #ifdef CONFIG_DYNAMIC_FTRACE
6741
6742 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
6743 {
6744         return 0;
6745 }
6746
6747 static ssize_t
6748 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6749                   size_t cnt, loff_t *ppos)
6750 {
6751         static char ftrace_dyn_info_buffer[1024];
6752         static DEFINE_MUTEX(dyn_info_mutex);
6753         unsigned long *p = filp->private_data;
6754         char *buf = ftrace_dyn_info_buffer;
6755         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
6756         int r;
6757
6758         mutex_lock(&dyn_info_mutex);
6759         r = sprintf(buf, "%ld ", *p);
6760
6761         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
6762         buf[r++] = '\n';
6763
6764         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6765
6766         mutex_unlock(&dyn_info_mutex);
6767
6768         return r;
6769 }
6770
6771 static const struct file_operations tracing_dyn_info_fops = {
6772         .open           = tracing_open_generic,
6773         .read           = tracing_read_dyn_info,
6774         .llseek         = generic_file_llseek,
6775 };
6776 #endif /* CONFIG_DYNAMIC_FTRACE */
6777
6778 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6779 static void
6780 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
6781                 struct trace_array *tr, struct ftrace_probe_ops *ops,
6782                 void *data)
6783 {
6784         tracing_snapshot_instance(tr);
6785 }
6786
6787 static void
6788 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
6789                       struct trace_array *tr, struct ftrace_probe_ops *ops,
6790                       void *data)
6791 {
6792         struct ftrace_func_mapper *mapper = data;
6793         long *count = NULL;
6794
6795         if (mapper)
6796                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
6797
6798         if (count) {
6799
6800                 if (*count <= 0)
6801                         return;
6802
6803                 (*count)--;
6804         }
6805
6806         tracing_snapshot_instance(tr);
6807 }
6808
6809 static int
6810 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
6811                       struct ftrace_probe_ops *ops, void *data)
6812 {
6813         struct ftrace_func_mapper *mapper = data;
6814         long *count = NULL;
6815
6816         seq_printf(m, "%ps:", (void *)ip);
6817
6818         seq_puts(m, "snapshot");
6819
6820         if (mapper)
6821                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
6822
6823         if (count)
6824                 seq_printf(m, ":count=%ld\n", *count);
6825         else
6826                 seq_puts(m, ":unlimited\n");
6827
6828         return 0;
6829 }
6830
6831 static int
6832 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
6833                      unsigned long ip, void *init_data, void **data)
6834 {
6835         struct ftrace_func_mapper *mapper = *data;
6836
6837         if (!mapper) {
6838                 mapper = allocate_ftrace_func_mapper();
6839                 if (!mapper)
6840                         return -ENOMEM;
6841                 *data = mapper;
6842         }
6843
6844         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
6845 }
6846
6847 static void
6848 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
6849                      unsigned long ip, void *data)
6850 {
6851         struct ftrace_func_mapper *mapper = data;
6852
6853         if (!ip) {
6854                 if (!mapper)
6855                         return;
6856                 free_ftrace_func_mapper(mapper, NULL);
6857                 return;
6858         }
6859
6860         ftrace_func_mapper_remove_ip(mapper, ip);
6861 }
6862
6863 static struct ftrace_probe_ops snapshot_probe_ops = {
6864         .func                   = ftrace_snapshot,
6865         .print                  = ftrace_snapshot_print,
6866 };
6867
6868 static struct ftrace_probe_ops snapshot_count_probe_ops = {
6869         .func                   = ftrace_count_snapshot,
6870         .print                  = ftrace_snapshot_print,
6871         .init                   = ftrace_snapshot_init,
6872         .free                   = ftrace_snapshot_free,
6873 };
6874
6875 static int
6876 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
6877                                char *glob, char *cmd, char *param, int enable)
6878 {
6879         struct ftrace_probe_ops *ops;
6880         void *count = (void *)-1;
6881         char *number;
6882         int ret;
6883
6884         /* hash funcs only work with set_ftrace_filter */
6885         if (!enable)
6886                 return -EINVAL;
6887
6888         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
6889
6890         if (glob[0] == '!')
6891                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
6892
6893         if (!param)
6894                 goto out_reg;
6895
6896         number = strsep(&param, ":");
6897
6898         if (!strlen(number))
6899                 goto out_reg;
6900
6901         /*
6902          * We use the callback data field (which is a pointer)
6903          * as our counter.
6904          */
6905         ret = kstrtoul(number, 0, (unsigned long *)&count);
6906         if (ret)
6907                 return ret;
6908
6909  out_reg:
6910         ret = alloc_snapshot(tr);
6911         if (ret < 0)
6912                 goto out;
6913
6914         ret = register_ftrace_function_probe(glob, tr, ops, count);
6915
6916  out:
6917         return ret < 0 ? ret : 0;
6918 }
6919
6920 static struct ftrace_func_command ftrace_snapshot_cmd = {
6921         .name                   = "snapshot",
6922         .func                   = ftrace_trace_snapshot_callback,
6923 };
6924
6925 static __init int register_snapshot_cmd(void)
6926 {
6927         return register_ftrace_command(&ftrace_snapshot_cmd);
6928 }
6929 #else
6930 static inline __init int register_snapshot_cmd(void) { return 0; }
6931 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
6932
6933 static struct dentry *tracing_get_dentry(struct trace_array *tr)
6934 {
6935         if (WARN_ON(!tr->dir))
6936                 return ERR_PTR(-ENODEV);
6937
6938         /* Top directory uses NULL as the parent */
6939         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
6940                 return NULL;
6941
6942         /* All sub buffers have a descriptor */
6943         return tr->dir;
6944 }
6945
6946 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
6947 {
6948         struct dentry *d_tracer;
6949
6950         if (tr->percpu_dir)
6951                 return tr->percpu_dir;
6952
6953         d_tracer = tracing_get_dentry(tr);
6954         if (IS_ERR(d_tracer))
6955                 return NULL;
6956
6957         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
6958
6959         WARN_ONCE(!tr->percpu_dir,
6960                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
6961
6962         return tr->percpu_dir;
6963 }
6964
6965 static struct dentry *
6966 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
6967                       void *data, long cpu, const struct file_operations *fops)
6968 {
6969         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
6970
6971         if (ret) /* See tracing_get_cpu() */
6972                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
6973         return ret;
6974 }
6975
6976 static void
6977 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
6978 {
6979         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
6980         struct dentry *d_cpu;
6981         char cpu_dir[30]; /* 30 characters should be more than enough */
6982
6983         if (!d_percpu)
6984                 return;
6985
6986         snprintf(cpu_dir, 30, "cpu%ld", cpu);
6987         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
6988         if (!d_cpu) {
6989                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
6990                 return;
6991         }
6992
6993         /* per cpu trace_pipe */
6994         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
6995                                 tr, cpu, &tracing_pipe_fops);
6996
6997         /* per cpu trace */
6998         trace_create_cpu_file("trace", 0644, d_cpu,
6999                                 tr, cpu, &tracing_fops);
7000
7001         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7002                                 tr, cpu, &tracing_buffers_fops);
7003
7004         trace_create_cpu_file("stats", 0444, d_cpu,
7005                                 tr, cpu, &tracing_stats_fops);
7006
7007         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7008                                 tr, cpu, &tracing_entries_fops);
7009
7010 #ifdef CONFIG_TRACER_SNAPSHOT
7011         trace_create_cpu_file("snapshot", 0644, d_cpu,
7012                                 tr, cpu, &snapshot_fops);
7013
7014         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7015                                 tr, cpu, &snapshot_raw_fops);
7016 #endif
7017 }
7018
7019 #ifdef CONFIG_FTRACE_SELFTEST
7020 /* Let selftest have access to static functions in this file */
7021 #include "trace_selftest.c"
7022 #endif
7023
7024 static ssize_t
7025 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7026                         loff_t *ppos)
7027 {
7028         struct trace_option_dentry *topt = filp->private_data;
7029         char *buf;
7030
7031         if (topt->flags->val & topt->opt->bit)
7032                 buf = "1\n";
7033         else
7034                 buf = "0\n";
7035
7036         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7037 }
7038
7039 static ssize_t
7040 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7041                          loff_t *ppos)
7042 {
7043         struct trace_option_dentry *topt = filp->private_data;
7044         unsigned long val;
7045         int ret;
7046
7047         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7048         if (ret)
7049                 return ret;
7050
7051         if (val != 0 && val != 1)
7052                 return -EINVAL;
7053
7054         if (!!(topt->flags->val & topt->opt->bit) != val) {
7055                 mutex_lock(&trace_types_lock);
7056                 ret = __set_tracer_option(topt->tr, topt->flags,
7057                                           topt->opt, !val);
7058                 mutex_unlock(&trace_types_lock);
7059                 if (ret)
7060                         return ret;
7061         }
7062
7063         *ppos += cnt;
7064
7065         return cnt;
7066 }
7067
7068
7069 static const struct file_operations trace_options_fops = {
7070         .open = tracing_open_generic,
7071         .read = trace_options_read,
7072         .write = trace_options_write,
7073         .llseek = generic_file_llseek,
7074 };
7075
7076 /*
7077  * In order to pass in both the trace_array descriptor as well as the index
7078  * to the flag that the trace option file represents, the trace_array
7079  * has a character array of trace_flags_index[], which holds the index
7080  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7081  * The address of this character array is passed to the flag option file
7082  * read/write callbacks.
7083  *
7084  * In order to extract both the index and the trace_array descriptor,
7085  * get_tr_index() uses the following algorithm.
7086  *
7087  *   idx = *ptr;
7088  *
7089  * As the pointer itself contains the address of the index (remember
7090  * index[1] == 1).
7091  *
7092  * Then to get the trace_array descriptor, by subtracting that index
7093  * from the ptr, we get to the start of the index itself.
7094  *
7095  *   ptr - idx == &index[0]
7096  *
7097  * Then a simple container_of() from that pointer gets us to the
7098  * trace_array descriptor.
7099  */
7100 static void get_tr_index(void *data, struct trace_array **ptr,
7101                          unsigned int *pindex)
7102 {
7103         *pindex = *(unsigned char *)data;
7104
7105         *ptr = container_of(data - *pindex, struct trace_array,
7106                             trace_flags_index);
7107 }
7108
7109 static ssize_t
7110 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7111                         loff_t *ppos)
7112 {
7113         void *tr_index = filp->private_data;
7114         struct trace_array *tr;
7115         unsigned int index;
7116         char *buf;
7117
7118         get_tr_index(tr_index, &tr, &index);
7119
7120         if (tr->trace_flags & (1 << index))
7121                 buf = "1\n";
7122         else
7123                 buf = "0\n";
7124
7125         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7126 }
7127
7128 static ssize_t
7129 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7130                          loff_t *ppos)
7131 {
7132         void *tr_index = filp->private_data;
7133         struct trace_array *tr;
7134         unsigned int index;
7135         unsigned long val;
7136         int ret;
7137
7138         get_tr_index(tr_index, &tr, &index);
7139
7140         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7141         if (ret)
7142                 return ret;
7143
7144         if (val != 0 && val != 1)
7145                 return -EINVAL;
7146
7147         mutex_lock(&trace_types_lock);
7148         ret = set_tracer_flag(tr, 1 << index, val);
7149         mutex_unlock(&trace_types_lock);
7150
7151         if (ret < 0)
7152                 return ret;
7153
7154         *ppos += cnt;
7155
7156         return cnt;
7157 }
7158
7159 static const struct file_operations trace_options_core_fops = {
7160         .open = tracing_open_generic,
7161         .read = trace_options_core_read,
7162         .write = trace_options_core_write,
7163         .llseek = generic_file_llseek,
7164 };
7165
7166 struct dentry *trace_create_file(const char *name,
7167                                  umode_t mode,
7168                                  struct dentry *parent,
7169                                  void *data,
7170                                  const struct file_operations *fops)
7171 {
7172         struct dentry *ret;
7173
7174         ret = tracefs_create_file(name, mode, parent, data, fops);
7175         if (!ret)
7176                 pr_warn("Could not create tracefs '%s' entry\n", name);
7177
7178         return ret;
7179 }
7180
7181
7182 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7183 {
7184         struct dentry *d_tracer;
7185
7186         if (tr->options)
7187                 return tr->options;
7188
7189         d_tracer = tracing_get_dentry(tr);
7190         if (IS_ERR(d_tracer))
7191                 return NULL;
7192
7193         tr->options = tracefs_create_dir("options", d_tracer);
7194         if (!tr->options) {
7195                 pr_warn("Could not create tracefs directory 'options'\n");
7196                 return NULL;
7197         }
7198
7199         return tr->options;
7200 }
7201
7202 static void
7203 create_trace_option_file(struct trace_array *tr,
7204                          struct trace_option_dentry *topt,
7205                          struct tracer_flags *flags,
7206                          struct tracer_opt *opt)
7207 {
7208         struct dentry *t_options;
7209
7210         t_options = trace_options_init_dentry(tr);
7211         if (!t_options)
7212                 return;
7213
7214         topt->flags = flags;
7215         topt->opt = opt;
7216         topt->tr = tr;
7217
7218         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7219                                     &trace_options_fops);
7220
7221 }
7222
7223 static void
7224 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7225 {
7226         struct trace_option_dentry *topts;
7227         struct trace_options *tr_topts;
7228         struct tracer_flags *flags;
7229         struct tracer_opt *opts;
7230         int cnt;
7231         int i;
7232
7233         if (!tracer)
7234                 return;
7235
7236         flags = tracer->flags;
7237
7238         if (!flags || !flags->opts)
7239                 return;
7240
7241         /*
7242          * If this is an instance, only create flags for tracers
7243          * the instance may have.
7244          */
7245         if (!trace_ok_for_array(tracer, tr))
7246                 return;
7247
7248         for (i = 0; i < tr->nr_topts; i++) {
7249                 /* Make sure there's no duplicate flags. */
7250                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7251                         return;
7252         }
7253
7254         opts = flags->opts;
7255
7256         for (cnt = 0; opts[cnt].name; cnt++)
7257                 ;
7258
7259         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7260         if (!topts)
7261                 return;
7262
7263         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7264                             GFP_KERNEL);
7265         if (!tr_topts) {
7266                 kfree(topts);
7267                 return;
7268         }
7269
7270         tr->topts = tr_topts;
7271         tr->topts[tr->nr_topts].tracer = tracer;
7272         tr->topts[tr->nr_topts].topts = topts;
7273         tr->nr_topts++;
7274
7275         for (cnt = 0; opts[cnt].name; cnt++) {
7276                 create_trace_option_file(tr, &topts[cnt], flags,
7277                                          &opts[cnt]);
7278                 WARN_ONCE(topts[cnt].entry == NULL,
7279                           "Failed to create trace option: %s",
7280                           opts[cnt].name);
7281         }
7282 }
7283
7284 static struct dentry *
7285 create_trace_option_core_file(struct trace_array *tr,
7286                               const char *option, long index)
7287 {
7288         struct dentry *t_options;
7289
7290         t_options = trace_options_init_dentry(tr);
7291         if (!t_options)
7292                 return NULL;
7293
7294         return trace_create_file(option, 0644, t_options,
7295                                  (void *)&tr->trace_flags_index[index],
7296                                  &trace_options_core_fops);
7297 }
7298
7299 static void create_trace_options_dir(struct trace_array *tr)
7300 {
7301         struct dentry *t_options;
7302         bool top_level = tr == &global_trace;
7303         int i;
7304
7305         t_options = trace_options_init_dentry(tr);
7306         if (!t_options)
7307                 return;
7308
7309         for (i = 0; trace_options[i]; i++) {
7310                 if (top_level ||
7311                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7312                         create_trace_option_core_file(tr, trace_options[i], i);
7313         }
7314 }
7315
7316 static ssize_t
7317 rb_simple_read(struct file *filp, char __user *ubuf,
7318                size_t cnt, loff_t *ppos)
7319 {
7320         struct trace_array *tr = filp->private_data;
7321         char buf[64];
7322         int r;
7323
7324         r = tracer_tracing_is_on(tr);
7325         r = sprintf(buf, "%d\n", r);
7326
7327         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7328 }
7329
7330 static ssize_t
7331 rb_simple_write(struct file *filp, const char __user *ubuf,
7332                 size_t cnt, loff_t *ppos)
7333 {
7334         struct trace_array *tr = filp->private_data;
7335         struct ring_buffer *buffer = tr->trace_buffer.buffer;
7336         unsigned long val;
7337         int ret;
7338
7339         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7340         if (ret)
7341                 return ret;
7342
7343         if (buffer) {
7344                 mutex_lock(&trace_types_lock);
7345                 if (val) {
7346                         tracer_tracing_on(tr);
7347                         if (tr->current_trace->start)
7348                                 tr->current_trace->start(tr);
7349                 } else {
7350                         tracer_tracing_off(tr);
7351                         if (tr->current_trace->stop)
7352                                 tr->current_trace->stop(tr);
7353                 }
7354                 mutex_unlock(&trace_types_lock);
7355         }
7356
7357         (*ppos)++;
7358
7359         return cnt;
7360 }
7361
7362 static const struct file_operations rb_simple_fops = {
7363         .open           = tracing_open_generic_tr,
7364         .read           = rb_simple_read,
7365         .write          = rb_simple_write,
7366         .release        = tracing_release_generic_tr,
7367         .llseek         = default_llseek,
7368 };
7369
7370 struct dentry *trace_instance_dir;
7371
7372 static void
7373 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7374
7375 static int
7376 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7377 {
7378         enum ring_buffer_flags rb_flags;
7379
7380         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7381
7382         buf->tr = tr;
7383
7384         buf->buffer = ring_buffer_alloc(size, rb_flags);
7385         if (!buf->buffer)
7386                 return -ENOMEM;
7387
7388         buf->data = alloc_percpu(struct trace_array_cpu);
7389         if (!buf->data) {
7390                 ring_buffer_free(buf->buffer);
7391                 return -ENOMEM;
7392         }
7393
7394         /* Allocate the first page for all buffers */
7395         set_buffer_entries(&tr->trace_buffer,
7396                            ring_buffer_size(tr->trace_buffer.buffer, 0));
7397
7398         return 0;
7399 }
7400
7401 static int allocate_trace_buffers(struct trace_array *tr, int size)
7402 {
7403         int ret;
7404
7405         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7406         if (ret)
7407                 return ret;
7408
7409 #ifdef CONFIG_TRACER_MAX_TRACE
7410         ret = allocate_trace_buffer(tr, &tr->max_buffer,
7411                                     allocate_snapshot ? size : 1);
7412         if (WARN_ON(ret)) {
7413                 ring_buffer_free(tr->trace_buffer.buffer);
7414                 free_percpu(tr->trace_buffer.data);
7415                 return -ENOMEM;
7416         }
7417         tr->allocated_snapshot = allocate_snapshot;
7418
7419         /*
7420          * Only the top level trace array gets its snapshot allocated
7421          * from the kernel command line.
7422          */
7423         allocate_snapshot = false;
7424 #endif
7425         return 0;
7426 }
7427
7428 static void free_trace_buffer(struct trace_buffer *buf)
7429 {
7430         if (buf->buffer) {
7431                 ring_buffer_free(buf->buffer);
7432                 buf->buffer = NULL;
7433                 free_percpu(buf->data);
7434                 buf->data = NULL;
7435         }
7436 }
7437
7438 static void free_trace_buffers(struct trace_array *tr)
7439 {
7440         if (!tr)
7441                 return;
7442
7443         free_trace_buffer(&tr->trace_buffer);
7444
7445 #ifdef CONFIG_TRACER_MAX_TRACE
7446         free_trace_buffer(&tr->max_buffer);
7447 #endif
7448 }
7449
7450 static void init_trace_flags_index(struct trace_array *tr)
7451 {
7452         int i;
7453
7454         /* Used by the trace options files */
7455         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7456                 tr->trace_flags_index[i] = i;
7457 }
7458
7459 static void __update_tracer_options(struct trace_array *tr)
7460 {
7461         struct tracer *t;
7462
7463         for (t = trace_types; t; t = t->next)
7464                 add_tracer_options(tr, t);
7465 }
7466
7467 static void update_tracer_options(struct trace_array *tr)
7468 {
7469         mutex_lock(&trace_types_lock);
7470         __update_tracer_options(tr);
7471         mutex_unlock(&trace_types_lock);
7472 }
7473
7474 static int instance_mkdir(const char *name)
7475 {
7476         struct trace_array *tr;
7477         int ret;
7478
7479         mutex_lock(&trace_types_lock);
7480
7481         ret = -EEXIST;
7482         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7483                 if (tr->name && strcmp(tr->name, name) == 0)
7484                         goto out_unlock;
7485         }
7486
7487         ret = -ENOMEM;
7488         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7489         if (!tr)
7490                 goto out_unlock;
7491
7492         tr->name = kstrdup(name, GFP_KERNEL);
7493         if (!tr->name)
7494                 goto out_free_tr;
7495
7496         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7497                 goto out_free_tr;
7498
7499         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7500
7501         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7502
7503         raw_spin_lock_init(&tr->start_lock);
7504
7505         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7506
7507         tr->current_trace = &nop_trace;
7508
7509         INIT_LIST_HEAD(&tr->systems);
7510         INIT_LIST_HEAD(&tr->events);
7511
7512         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7513                 goto out_free_tr;
7514
7515         tr->dir = tracefs_create_dir(name, trace_instance_dir);
7516         if (!tr->dir)
7517                 goto out_free_tr;
7518
7519         ret = event_trace_add_tracer(tr->dir, tr);
7520         if (ret) {
7521                 tracefs_remove_recursive(tr->dir);
7522                 goto out_free_tr;
7523         }
7524
7525         ftrace_init_trace_array(tr);
7526
7527         init_tracer_tracefs(tr, tr->dir);
7528         init_trace_flags_index(tr);
7529         __update_tracer_options(tr);
7530
7531         list_add(&tr->list, &ftrace_trace_arrays);
7532
7533         mutex_unlock(&trace_types_lock);
7534
7535         return 0;
7536
7537  out_free_tr:
7538         free_trace_buffers(tr);
7539         free_cpumask_var(tr->tracing_cpumask);
7540         kfree(tr->name);
7541         kfree(tr);
7542
7543  out_unlock:
7544         mutex_unlock(&trace_types_lock);
7545
7546         return ret;
7547
7548 }
7549
7550 static int instance_rmdir(const char *name)
7551 {
7552         struct trace_array *tr;
7553         int found = 0;
7554         int ret;
7555         int i;
7556
7557         mutex_lock(&trace_types_lock);
7558
7559         ret = -ENODEV;
7560         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7561                 if (tr->name && strcmp(tr->name, name) == 0) {
7562                         found = 1;
7563                         break;
7564                 }
7565         }
7566         if (!found)
7567                 goto out_unlock;
7568
7569         ret = -EBUSY;
7570         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7571                 goto out_unlock;
7572
7573         list_del(&tr->list);
7574
7575         /* Disable all the flags that were enabled coming in */
7576         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7577                 if ((1 << i) & ZEROED_TRACE_FLAGS)
7578                         set_tracer_flag(tr, 1 << i, 0);
7579         }
7580
7581         tracing_set_nop(tr);
7582         clear_ftrace_function_probes(tr);
7583         event_trace_del_tracer(tr);
7584         ftrace_clear_pids(tr);
7585         ftrace_destroy_function_files(tr);
7586         tracefs_remove_recursive(tr->dir);
7587         free_trace_buffers(tr);
7588
7589         for (i = 0; i < tr->nr_topts; i++) {
7590                 kfree(tr->topts[i].topts);
7591         }
7592         kfree(tr->topts);
7593
7594         kfree(tr->name);
7595         kfree(tr);
7596
7597         ret = 0;
7598
7599  out_unlock:
7600         mutex_unlock(&trace_types_lock);
7601
7602         return ret;
7603 }
7604
7605 static __init void create_trace_instances(struct dentry *d_tracer)
7606 {
7607         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7608                                                          instance_mkdir,
7609                                                          instance_rmdir);
7610         if (WARN_ON(!trace_instance_dir))
7611                 return;
7612 }
7613
7614 static void
7615 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7616 {
7617         int cpu;
7618
7619         trace_create_file("available_tracers", 0444, d_tracer,
7620                         tr, &show_traces_fops);
7621
7622         trace_create_file("current_tracer", 0644, d_tracer,
7623                         tr, &set_tracer_fops);
7624
7625         trace_create_file("tracing_cpumask", 0644, d_tracer,
7626                           tr, &tracing_cpumask_fops);
7627
7628         trace_create_file("trace_options", 0644, d_tracer,
7629                           tr, &tracing_iter_fops);
7630
7631         trace_create_file("trace", 0644, d_tracer,
7632                           tr, &tracing_fops);
7633
7634         trace_create_file("trace_pipe", 0444, d_tracer,
7635                           tr, &tracing_pipe_fops);
7636
7637         trace_create_file("buffer_size_kb", 0644, d_tracer,
7638                           tr, &tracing_entries_fops);
7639
7640         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7641                           tr, &tracing_total_entries_fops);
7642
7643         trace_create_file("free_buffer", 0200, d_tracer,
7644                           tr, &tracing_free_buffer_fops);
7645
7646         trace_create_file("trace_marker", 0220, d_tracer,
7647                           tr, &tracing_mark_fops);
7648
7649         trace_create_file("trace_marker_raw", 0220, d_tracer,
7650                           tr, &tracing_mark_raw_fops);
7651
7652         trace_create_file("trace_clock", 0644, d_tracer, tr,
7653                           &trace_clock_fops);
7654
7655         trace_create_file("tracing_on", 0644, d_tracer,
7656                           tr, &rb_simple_fops);
7657
7658         create_trace_options_dir(tr);
7659
7660 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7661         trace_create_file("tracing_max_latency", 0644, d_tracer,
7662                         &tr->max_latency, &tracing_max_lat_fops);
7663 #endif
7664
7665         if (ftrace_create_function_files(tr, d_tracer))
7666                 WARN(1, "Could not allocate function filter files");
7667
7668 #ifdef CONFIG_TRACER_SNAPSHOT
7669         trace_create_file("snapshot", 0644, d_tracer,
7670                           tr, &snapshot_fops);
7671 #endif
7672
7673         for_each_tracing_cpu(cpu)
7674                 tracing_init_tracefs_percpu(tr, cpu);
7675
7676         ftrace_init_tracefs(tr, d_tracer);
7677 }
7678
7679 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7680 {
7681         struct vfsmount *mnt;
7682         struct file_system_type *type;
7683
7684         /*
7685          * To maintain backward compatibility for tools that mount
7686          * debugfs to get to the tracing facility, tracefs is automatically
7687          * mounted to the debugfs/tracing directory.
7688          */
7689         type = get_fs_type("tracefs");
7690         if (!type)
7691                 return NULL;
7692         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
7693         put_filesystem(type);
7694         if (IS_ERR(mnt))
7695                 return NULL;
7696         mntget(mnt);
7697
7698         return mnt;
7699 }
7700
7701 /**
7702  * tracing_init_dentry - initialize top level trace array
7703  *
7704  * This is called when creating files or directories in the tracing
7705  * directory. It is called via fs_initcall() by any of the boot up code
7706  * and expects to return the dentry of the top level tracing directory.
7707  */
7708 struct dentry *tracing_init_dentry(void)
7709 {
7710         struct trace_array *tr = &global_trace;
7711
7712         /* The top level trace array uses  NULL as parent */
7713         if (tr->dir)
7714                 return NULL;
7715
7716         if (WARN_ON(!tracefs_initialized()) ||
7717                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
7718                  WARN_ON(!debugfs_initialized())))
7719                 return ERR_PTR(-ENODEV);
7720
7721         /*
7722          * As there may still be users that expect the tracing
7723          * files to exist in debugfs/tracing, we must automount
7724          * the tracefs file system there, so older tools still
7725          * work with the newer kerenl.
7726          */
7727         tr->dir = debugfs_create_automount("tracing", NULL,
7728                                            trace_automount, NULL);
7729         if (!tr->dir) {
7730                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
7731                 return ERR_PTR(-ENOMEM);
7732         }
7733
7734         return NULL;
7735 }
7736
7737 extern struct trace_enum_map *__start_ftrace_enum_maps[];
7738 extern struct trace_enum_map *__stop_ftrace_enum_maps[];
7739
7740 static void __init trace_enum_init(void)
7741 {
7742         int len;
7743
7744         len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
7745         trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len);
7746 }
7747
7748 #ifdef CONFIG_MODULES
7749 static void trace_module_add_enums(struct module *mod)
7750 {
7751         if (!mod->num_trace_enums)
7752                 return;
7753
7754         /*
7755          * Modules with bad taint do not have events created, do
7756          * not bother with enums either.
7757          */
7758         if (trace_module_has_bad_taint(mod))
7759                 return;
7760
7761         trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
7762 }
7763
7764 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
7765 static void trace_module_remove_enums(struct module *mod)
7766 {
7767         union trace_enum_map_item *map;
7768         union trace_enum_map_item **last = &trace_enum_maps;
7769
7770         if (!mod->num_trace_enums)
7771                 return;
7772
7773         mutex_lock(&trace_enum_mutex);
7774
7775         map = trace_enum_maps;
7776
7777         while (map) {
7778                 if (map->head.mod == mod)
7779                         break;
7780                 map = trace_enum_jmp_to_tail(map);
7781                 last = &map->tail.next;
7782                 map = map->tail.next;
7783         }
7784         if (!map)
7785                 goto out;
7786
7787         *last = trace_enum_jmp_to_tail(map)->tail.next;
7788         kfree(map);
7789  out:
7790         mutex_unlock(&trace_enum_mutex);
7791 }
7792 #else
7793 static inline void trace_module_remove_enums(struct module *mod) { }
7794 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
7795
7796 static int trace_module_notify(struct notifier_block *self,
7797                                unsigned long val, void *data)
7798 {
7799         struct module *mod = data;
7800
7801         switch (val) {
7802         case MODULE_STATE_COMING:
7803                 trace_module_add_enums(mod);
7804                 break;
7805         case MODULE_STATE_GOING:
7806                 trace_module_remove_enums(mod);
7807                 break;
7808         }
7809
7810         return 0;
7811 }
7812
7813 static struct notifier_block trace_module_nb = {
7814         .notifier_call = trace_module_notify,
7815         .priority = 0,
7816 };
7817 #endif /* CONFIG_MODULES */
7818
7819 static __init int tracer_init_tracefs(void)
7820 {
7821         struct dentry *d_tracer;
7822
7823         trace_access_lock_init();
7824
7825         d_tracer = tracing_init_dentry();
7826         if (IS_ERR(d_tracer))
7827                 return 0;
7828
7829         init_tracer_tracefs(&global_trace, d_tracer);
7830         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
7831
7832         trace_create_file("tracing_thresh", 0644, d_tracer,
7833                         &global_trace, &tracing_thresh_fops);
7834
7835         trace_create_file("README", 0444, d_tracer,
7836                         NULL, &tracing_readme_fops);
7837
7838         trace_create_file("saved_cmdlines", 0444, d_tracer,
7839                         NULL, &tracing_saved_cmdlines_fops);
7840
7841         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
7842                           NULL, &tracing_saved_cmdlines_size_fops);
7843
7844         trace_enum_init();
7845
7846         trace_create_enum_file(d_tracer);
7847
7848 #ifdef CONFIG_MODULES
7849         register_module_notifier(&trace_module_nb);
7850 #endif
7851
7852 #ifdef CONFIG_DYNAMIC_FTRACE
7853         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
7854                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
7855 #endif
7856
7857         create_trace_instances(d_tracer);
7858
7859         update_tracer_options(&global_trace);
7860
7861         return 0;
7862 }
7863
7864 static int trace_panic_handler(struct notifier_block *this,
7865                                unsigned long event, void *unused)
7866 {
7867         if (ftrace_dump_on_oops)
7868                 ftrace_dump(ftrace_dump_on_oops);
7869         return NOTIFY_OK;
7870 }
7871
7872 static struct notifier_block trace_panic_notifier = {
7873         .notifier_call  = trace_panic_handler,
7874         .next           = NULL,
7875         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
7876 };
7877
7878 static int trace_die_handler(struct notifier_block *self,
7879                              unsigned long val,
7880                              void *data)
7881 {
7882         switch (val) {
7883         case DIE_OOPS:
7884                 if (ftrace_dump_on_oops)
7885                         ftrace_dump(ftrace_dump_on_oops);
7886                 break;
7887         default:
7888                 break;
7889         }
7890         return NOTIFY_OK;
7891 }
7892
7893 static struct notifier_block trace_die_notifier = {
7894         .notifier_call = trace_die_handler,
7895         .priority = 200
7896 };
7897
7898 /*
7899  * printk is set to max of 1024, we really don't need it that big.
7900  * Nothing should be printing 1000 characters anyway.
7901  */
7902 #define TRACE_MAX_PRINT         1000
7903
7904 /*
7905  * Define here KERN_TRACE so that we have one place to modify
7906  * it if we decide to change what log level the ftrace dump
7907  * should be at.
7908  */
7909 #define KERN_TRACE              KERN_EMERG
7910
7911 void
7912 trace_printk_seq(struct trace_seq *s)
7913 {
7914         /* Probably should print a warning here. */
7915         if (s->seq.len >= TRACE_MAX_PRINT)
7916                 s->seq.len = TRACE_MAX_PRINT;
7917
7918         /*
7919          * More paranoid code. Although the buffer size is set to
7920          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
7921          * an extra layer of protection.
7922          */
7923         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
7924                 s->seq.len = s->seq.size - 1;
7925
7926         /* should be zero ended, but we are paranoid. */
7927         s->buffer[s->seq.len] = 0;
7928
7929         printk(KERN_TRACE "%s", s->buffer);
7930
7931         trace_seq_init(s);
7932 }
7933
7934 void trace_init_global_iter(struct trace_iterator *iter)
7935 {
7936         iter->tr = &global_trace;
7937         iter->trace = iter->tr->current_trace;
7938         iter->cpu_file = RING_BUFFER_ALL_CPUS;
7939         iter->trace_buffer = &global_trace.trace_buffer;
7940
7941         if (iter->trace && iter->trace->open)
7942                 iter->trace->open(iter);
7943
7944         /* Annotate start of buffers if we had overruns */
7945         if (ring_buffer_overruns(iter->trace_buffer->buffer))
7946                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
7947
7948         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
7949         if (trace_clocks[iter->tr->clock_id].in_ns)
7950                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
7951 }
7952
7953 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
7954 {
7955         /* use static because iter can be a bit big for the stack */
7956         static struct trace_iterator iter;
7957         static atomic_t dump_running;
7958         struct trace_array *tr = &global_trace;
7959         unsigned int old_userobj;
7960         unsigned long flags;
7961         int cnt = 0, cpu;
7962
7963         /* Only allow one dump user at a time. */
7964         if (atomic_inc_return(&dump_running) != 1) {
7965                 atomic_dec(&dump_running);
7966                 return;
7967         }
7968
7969         /*
7970          * Always turn off tracing when we dump.
7971          * We don't need to show trace output of what happens
7972          * between multiple crashes.
7973          *
7974          * If the user does a sysrq-z, then they can re-enable
7975          * tracing with echo 1 > tracing_on.
7976          */
7977         tracing_off();
7978
7979         local_irq_save(flags);
7980
7981         /* Simulate the iterator */
7982         trace_init_global_iter(&iter);
7983
7984         for_each_tracing_cpu(cpu) {
7985                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7986         }
7987
7988         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
7989
7990         /* don't look at user memory in panic mode */
7991         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
7992
7993         switch (oops_dump_mode) {
7994         case DUMP_ALL:
7995                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7996                 break;
7997         case DUMP_ORIG:
7998                 iter.cpu_file = raw_smp_processor_id();
7999                 break;
8000         case DUMP_NONE:
8001                 goto out_enable;
8002         default:
8003                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8004                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8005         }
8006
8007         printk(KERN_TRACE "Dumping ftrace buffer:\n");
8008
8009         /* Did function tracer already get disabled? */
8010         if (ftrace_is_dead()) {
8011                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8012                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8013         }
8014
8015         /*
8016          * We need to stop all tracing on all CPUS to read the
8017          * the next buffer. This is a bit expensive, but is
8018          * not done often. We fill all what we can read,
8019          * and then release the locks again.
8020          */
8021
8022         while (!trace_empty(&iter)) {
8023
8024                 if (!cnt)
8025                         printk(KERN_TRACE "---------------------------------\n");
8026
8027                 cnt++;
8028
8029                 /* reset all but tr, trace, and overruns */
8030                 memset(&iter.seq, 0,
8031                        sizeof(struct trace_iterator) -
8032                        offsetof(struct trace_iterator, seq));
8033                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
8034                 iter.pos = -1;
8035
8036                 if (trace_find_next_entry_inc(&iter) != NULL) {
8037                         int ret;
8038
8039                         ret = print_trace_line(&iter);
8040                         if (ret != TRACE_TYPE_NO_CONSUME)
8041                                 trace_consume(&iter);
8042                 }
8043                 touch_nmi_watchdog();
8044
8045                 trace_printk_seq(&iter.seq);
8046         }
8047
8048         if (!cnt)
8049                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
8050         else
8051                 printk(KERN_TRACE "---------------------------------\n");
8052
8053  out_enable:
8054         tr->trace_flags |= old_userobj;
8055
8056         for_each_tracing_cpu(cpu) {
8057                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8058         }
8059         atomic_dec(&dump_running);
8060         local_irq_restore(flags);
8061 }
8062 EXPORT_SYMBOL_GPL(ftrace_dump);
8063
8064 __init static int tracer_alloc_buffers(void)
8065 {
8066         int ring_buf_size;
8067         int ret = -ENOMEM;
8068
8069         /*
8070          * Make sure we don't accidently add more trace options
8071          * than we have bits for.
8072          */
8073         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8074
8075         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8076                 goto out;
8077
8078         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8079                 goto out_free_buffer_mask;
8080
8081         /* Only allocate trace_printk buffers if a trace_printk exists */
8082         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
8083                 /* Must be called before global_trace.buffer is allocated */
8084                 trace_printk_init_buffers();
8085
8086         /* To save memory, keep the ring buffer size to its minimum */
8087         if (ring_buffer_expanded)
8088                 ring_buf_size = trace_buf_size;
8089         else
8090                 ring_buf_size = 1;
8091
8092         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8093         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8094
8095         raw_spin_lock_init(&global_trace.start_lock);
8096
8097         /*
8098          * The prepare callbacks allocates some memory for the ring buffer. We
8099          * don't free the buffer if the if the CPU goes down. If we were to free
8100          * the buffer, then the user would lose any trace that was in the
8101          * buffer. The memory will be removed once the "instance" is removed.
8102          */
8103         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8104                                       "trace/RB:preapre", trace_rb_cpu_prepare,
8105                                       NULL);
8106         if (ret < 0)
8107                 goto out_free_cpumask;
8108         /* Used for event triggers */
8109         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8110         if (!temp_buffer)
8111                 goto out_rm_hp_state;
8112
8113         if (trace_create_savedcmd() < 0)
8114                 goto out_free_temp_buffer;
8115
8116         /* TODO: make the number of buffers hot pluggable with CPUS */
8117         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8118                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8119                 WARN_ON(1);
8120                 goto out_free_savedcmd;
8121         }
8122
8123         if (global_trace.buffer_disabled)
8124                 tracing_off();
8125
8126         if (trace_boot_clock) {
8127                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
8128                 if (ret < 0)
8129                         pr_warn("Trace clock %s not defined, going back to default\n",
8130                                 trace_boot_clock);
8131         }
8132
8133         /*
8134          * register_tracer() might reference current_trace, so it
8135          * needs to be set before we register anything. This is
8136          * just a bootstrap of current_trace anyway.
8137          */
8138         global_trace.current_trace = &nop_trace;
8139
8140         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8141
8142         ftrace_init_global_array_ops(&global_trace);
8143
8144         init_trace_flags_index(&global_trace);
8145
8146         register_tracer(&nop_trace);
8147
8148         /* Function tracing may start here (via kernel command line) */
8149         init_function_trace();
8150
8151         /* All seems OK, enable tracing */
8152         tracing_disabled = 0;
8153
8154         atomic_notifier_chain_register(&panic_notifier_list,
8155                                        &trace_panic_notifier);
8156
8157         register_die_notifier(&trace_die_notifier);
8158
8159         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8160
8161         INIT_LIST_HEAD(&global_trace.systems);
8162         INIT_LIST_HEAD(&global_trace.events);
8163         list_add(&global_trace.list, &ftrace_trace_arrays);
8164
8165         apply_trace_boot_options();
8166
8167         register_snapshot_cmd();
8168
8169         return 0;
8170
8171 out_free_savedcmd:
8172         free_saved_cmdlines_buffer(savedcmd);
8173 out_free_temp_buffer:
8174         ring_buffer_free(temp_buffer);
8175 out_rm_hp_state:
8176         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8177 out_free_cpumask:
8178         free_cpumask_var(global_trace.tracing_cpumask);
8179 out_free_buffer_mask:
8180         free_cpumask_var(tracing_buffer_mask);
8181 out:
8182         return ret;
8183 }
8184
8185 void __init early_trace_init(void)
8186 {
8187         if (tracepoint_printk) {
8188                 tracepoint_print_iter =
8189                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8190                 if (WARN_ON(!tracepoint_print_iter))
8191                         tracepoint_printk = 0;
8192                 else
8193                         static_key_enable(&tracepoint_printk_key.key);
8194         }
8195         tracer_alloc_buffers();
8196 }
8197
8198 void __init trace_init(void)
8199 {
8200         trace_event_init();
8201 }
8202
8203 __init static int clear_boot_tracer(void)
8204 {
8205         /*
8206          * The default tracer at boot buffer is an init section.
8207          * This function is called in lateinit. If we did not
8208          * find the boot tracer, then clear it out, to prevent
8209          * later registration from accessing the buffer that is
8210          * about to be freed.
8211          */
8212         if (!default_bootup_tracer)
8213                 return 0;
8214
8215         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8216                default_bootup_tracer);
8217         default_bootup_tracer = NULL;
8218
8219         return 0;
8220 }
8221
8222 fs_initcall(tracer_init_tracefs);
8223 late_initcall(clear_boot_tracer);