]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - kernel/trace/trace.c
Merge tag 'powerpc-4.13-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc...
[karo-tx-linux.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/trace.h>
44 #include <linux/sched/rt.h>
45
46 #include "trace.h"
47 #include "trace_output.h"
48
49 /*
50  * On boot up, the ring buffer is set to the minimum size, so that
51  * we do not waste memory on systems that are not using tracing.
52  */
53 bool ring_buffer_expanded;
54
55 /*
56  * We need to change this state when a selftest is running.
57  * A selftest will lurk into the ring-buffer to count the
58  * entries inserted during the selftest although some concurrent
59  * insertions into the ring-buffer such as trace_printk could occurred
60  * at the same time, giving false positive or negative results.
61  */
62 static bool __read_mostly tracing_selftest_running;
63
64 /*
65  * If a tracer is running, we do not want to run SELFTEST.
66  */
67 bool __read_mostly tracing_selftest_disabled;
68
69 /* Pipe tracepoints to printk */
70 struct trace_iterator *tracepoint_print_iter;
71 int tracepoint_printk;
72 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
73
74 /* For tracers that don't implement custom flags */
75 static struct tracer_opt dummy_tracer_opt[] = {
76         { }
77 };
78
79 static int
80 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
81 {
82         return 0;
83 }
84
85 /*
86  * To prevent the comm cache from being overwritten when no
87  * tracing is active, only save the comm when a trace event
88  * occurred.
89  */
90 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
91
92 /*
93  * Kill all tracing for good (never come back).
94  * It is initialized to 1 but will turn to zero if the initialization
95  * of the tracer is successful. But that is the only place that sets
96  * this back to zero.
97  */
98 static int tracing_disabled = 1;
99
100 cpumask_var_t __read_mostly     tracing_buffer_mask;
101
102 /*
103  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
104  *
105  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
106  * is set, then ftrace_dump is called. This will output the contents
107  * of the ftrace buffers to the console.  This is very useful for
108  * capturing traces that lead to crashes and outputing it to a
109  * serial console.
110  *
111  * It is default off, but you can enable it with either specifying
112  * "ftrace_dump_on_oops" in the kernel command line, or setting
113  * /proc/sys/kernel/ftrace_dump_on_oops
114  * Set 1 if you want to dump buffers of all CPUs
115  * Set 2 if you want to dump the buffer of the CPU that triggered oops
116  */
117
118 enum ftrace_dump_mode ftrace_dump_on_oops;
119
120 /* When set, tracing will stop when a WARN*() is hit */
121 int __disable_trace_on_warning;
122
123 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
124 /* Map of enums to their values, for "eval_map" file */
125 struct trace_eval_map_head {
126         struct module                   *mod;
127         unsigned long                   length;
128 };
129
130 union trace_eval_map_item;
131
132 struct trace_eval_map_tail {
133         /*
134          * "end" is first and points to NULL as it must be different
135          * than "mod" or "eval_string"
136          */
137         union trace_eval_map_item       *next;
138         const char                      *end;   /* points to NULL */
139 };
140
141 static DEFINE_MUTEX(trace_eval_mutex);
142
143 /*
144  * The trace_eval_maps are saved in an array with two extra elements,
145  * one at the beginning, and one at the end. The beginning item contains
146  * the count of the saved maps (head.length), and the module they
147  * belong to if not built in (head.mod). The ending item contains a
148  * pointer to the next array of saved eval_map items.
149  */
150 union trace_eval_map_item {
151         struct trace_eval_map           map;
152         struct trace_eval_map_head      head;
153         struct trace_eval_map_tail      tail;
154 };
155
156 static union trace_eval_map_item *trace_eval_maps;
157 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
158
159 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
160
161 #define MAX_TRACER_SIZE         100
162 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
163 static char *default_bootup_tracer;
164
165 static bool allocate_snapshot;
166
167 static int __init set_cmdline_ftrace(char *str)
168 {
169         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
170         default_bootup_tracer = bootup_tracer_buf;
171         /* We are using ftrace early, expand it */
172         ring_buffer_expanded = true;
173         return 1;
174 }
175 __setup("ftrace=", set_cmdline_ftrace);
176
177 static int __init set_ftrace_dump_on_oops(char *str)
178 {
179         if (*str++ != '=' || !*str) {
180                 ftrace_dump_on_oops = DUMP_ALL;
181                 return 1;
182         }
183
184         if (!strcmp("orig_cpu", str)) {
185                 ftrace_dump_on_oops = DUMP_ORIG;
186                 return 1;
187         }
188
189         return 0;
190 }
191 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
192
193 static int __init stop_trace_on_warning(char *str)
194 {
195         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
196                 __disable_trace_on_warning = 1;
197         return 1;
198 }
199 __setup("traceoff_on_warning", stop_trace_on_warning);
200
201 static int __init boot_alloc_snapshot(char *str)
202 {
203         allocate_snapshot = true;
204         /* We also need the main ring buffer expanded */
205         ring_buffer_expanded = true;
206         return 1;
207 }
208 __setup("alloc_snapshot", boot_alloc_snapshot);
209
210
211 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
212
213 static int __init set_trace_boot_options(char *str)
214 {
215         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
216         return 0;
217 }
218 __setup("trace_options=", set_trace_boot_options);
219
220 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
221 static char *trace_boot_clock __initdata;
222
223 static int __init set_trace_boot_clock(char *str)
224 {
225         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
226         trace_boot_clock = trace_boot_clock_buf;
227         return 0;
228 }
229 __setup("trace_clock=", set_trace_boot_clock);
230
231 static int __init set_tracepoint_printk(char *str)
232 {
233         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
234                 tracepoint_printk = 1;
235         return 1;
236 }
237 __setup("tp_printk", set_tracepoint_printk);
238
239 unsigned long long ns2usecs(u64 nsec)
240 {
241         nsec += 500;
242         do_div(nsec, 1000);
243         return nsec;
244 }
245
246 /* trace_flags holds trace_options default values */
247 #define TRACE_DEFAULT_FLAGS                                             \
248         (FUNCTION_DEFAULT_FLAGS |                                       \
249          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
250          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
251          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
252          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
253
254 /* trace_options that are only supported by global_trace */
255 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
256                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
257
258 /* trace_flags that are default zero for instances */
259 #define ZEROED_TRACE_FLAGS \
260         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
261
262 /*
263  * The global_trace is the descriptor that holds the top-level tracing
264  * buffers for the live tracing.
265  */
266 static struct trace_array global_trace = {
267         .trace_flags = TRACE_DEFAULT_FLAGS,
268 };
269
270 LIST_HEAD(ftrace_trace_arrays);
271
272 int trace_array_get(struct trace_array *this_tr)
273 {
274         struct trace_array *tr;
275         int ret = -ENODEV;
276
277         mutex_lock(&trace_types_lock);
278         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
279                 if (tr == this_tr) {
280                         tr->ref++;
281                         ret = 0;
282                         break;
283                 }
284         }
285         mutex_unlock(&trace_types_lock);
286
287         return ret;
288 }
289
290 static void __trace_array_put(struct trace_array *this_tr)
291 {
292         WARN_ON(!this_tr->ref);
293         this_tr->ref--;
294 }
295
296 void trace_array_put(struct trace_array *this_tr)
297 {
298         mutex_lock(&trace_types_lock);
299         __trace_array_put(this_tr);
300         mutex_unlock(&trace_types_lock);
301 }
302
303 int call_filter_check_discard(struct trace_event_call *call, void *rec,
304                               struct ring_buffer *buffer,
305                               struct ring_buffer_event *event)
306 {
307         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
308             !filter_match_preds(call->filter, rec)) {
309                 __trace_event_discard_commit(buffer, event);
310                 return 1;
311         }
312
313         return 0;
314 }
315
316 void trace_free_pid_list(struct trace_pid_list *pid_list)
317 {
318         vfree(pid_list->pids);
319         kfree(pid_list);
320 }
321
322 /**
323  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
324  * @filtered_pids: The list of pids to check
325  * @search_pid: The PID to find in @filtered_pids
326  *
327  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
328  */
329 bool
330 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
331 {
332         /*
333          * If pid_max changed after filtered_pids was created, we
334          * by default ignore all pids greater than the previous pid_max.
335          */
336         if (search_pid >= filtered_pids->pid_max)
337                 return false;
338
339         return test_bit(search_pid, filtered_pids->pids);
340 }
341
342 /**
343  * trace_ignore_this_task - should a task be ignored for tracing
344  * @filtered_pids: The list of pids to check
345  * @task: The task that should be ignored if not filtered
346  *
347  * Checks if @task should be traced or not from @filtered_pids.
348  * Returns true if @task should *NOT* be traced.
349  * Returns false if @task should be traced.
350  */
351 bool
352 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
353 {
354         /*
355          * Return false, because if filtered_pids does not exist,
356          * all pids are good to trace.
357          */
358         if (!filtered_pids)
359                 return false;
360
361         return !trace_find_filtered_pid(filtered_pids, task->pid);
362 }
363
364 /**
365  * trace_pid_filter_add_remove - Add or remove a task from a pid_list
366  * @pid_list: The list to modify
367  * @self: The current task for fork or NULL for exit
368  * @task: The task to add or remove
369  *
370  * If adding a task, if @self is defined, the task is only added if @self
371  * is also included in @pid_list. This happens on fork and tasks should
372  * only be added when the parent is listed. If @self is NULL, then the
373  * @task pid will be removed from the list, which would happen on exit
374  * of a task.
375  */
376 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
377                                   struct task_struct *self,
378                                   struct task_struct *task)
379 {
380         if (!pid_list)
381                 return;
382
383         /* For forks, we only add if the forking task is listed */
384         if (self) {
385                 if (!trace_find_filtered_pid(pid_list, self->pid))
386                         return;
387         }
388
389         /* Sorry, but we don't support pid_max changing after setting */
390         if (task->pid >= pid_list->pid_max)
391                 return;
392
393         /* "self" is set for forks, and NULL for exits */
394         if (self)
395                 set_bit(task->pid, pid_list->pids);
396         else
397                 clear_bit(task->pid, pid_list->pids);
398 }
399
400 /**
401  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
402  * @pid_list: The pid list to show
403  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
404  * @pos: The position of the file
405  *
406  * This is used by the seq_file "next" operation to iterate the pids
407  * listed in a trace_pid_list structure.
408  *
409  * Returns the pid+1 as we want to display pid of zero, but NULL would
410  * stop the iteration.
411  */
412 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
413 {
414         unsigned long pid = (unsigned long)v;
415
416         (*pos)++;
417
418         /* pid already is +1 of the actual prevous bit */
419         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
420
421         /* Return pid + 1 to allow zero to be represented */
422         if (pid < pid_list->pid_max)
423                 return (void *)(pid + 1);
424
425         return NULL;
426 }
427
428 /**
429  * trace_pid_start - Used for seq_file to start reading pid lists
430  * @pid_list: The pid list to show
431  * @pos: The position of the file
432  *
433  * This is used by seq_file "start" operation to start the iteration
434  * of listing pids.
435  *
436  * Returns the pid+1 as we want to display pid of zero, but NULL would
437  * stop the iteration.
438  */
439 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
440 {
441         unsigned long pid;
442         loff_t l = 0;
443
444         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
445         if (pid >= pid_list->pid_max)
446                 return NULL;
447
448         /* Return pid + 1 so that zero can be the exit value */
449         for (pid++; pid && l < *pos;
450              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
451                 ;
452         return (void *)pid;
453 }
454
455 /**
456  * trace_pid_show - show the current pid in seq_file processing
457  * @m: The seq_file structure to write into
458  * @v: A void pointer of the pid (+1) value to display
459  *
460  * Can be directly used by seq_file operations to display the current
461  * pid value.
462  */
463 int trace_pid_show(struct seq_file *m, void *v)
464 {
465         unsigned long pid = (unsigned long)v - 1;
466
467         seq_printf(m, "%lu\n", pid);
468         return 0;
469 }
470
471 /* 128 should be much more than enough */
472 #define PID_BUF_SIZE            127
473
474 int trace_pid_write(struct trace_pid_list *filtered_pids,
475                     struct trace_pid_list **new_pid_list,
476                     const char __user *ubuf, size_t cnt)
477 {
478         struct trace_pid_list *pid_list;
479         struct trace_parser parser;
480         unsigned long val;
481         int nr_pids = 0;
482         ssize_t read = 0;
483         ssize_t ret = 0;
484         loff_t pos;
485         pid_t pid;
486
487         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
488                 return -ENOMEM;
489
490         /*
491          * Always recreate a new array. The write is an all or nothing
492          * operation. Always create a new array when adding new pids by
493          * the user. If the operation fails, then the current list is
494          * not modified.
495          */
496         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
497         if (!pid_list)
498                 return -ENOMEM;
499
500         pid_list->pid_max = READ_ONCE(pid_max);
501
502         /* Only truncating will shrink pid_max */
503         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
504                 pid_list->pid_max = filtered_pids->pid_max;
505
506         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
507         if (!pid_list->pids) {
508                 kfree(pid_list);
509                 return -ENOMEM;
510         }
511
512         if (filtered_pids) {
513                 /* copy the current bits to the new max */
514                 for_each_set_bit(pid, filtered_pids->pids,
515                                  filtered_pids->pid_max) {
516                         set_bit(pid, pid_list->pids);
517                         nr_pids++;
518                 }
519         }
520
521         while (cnt > 0) {
522
523                 pos = 0;
524
525                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
526                 if (ret < 0 || !trace_parser_loaded(&parser))
527                         break;
528
529                 read += ret;
530                 ubuf += ret;
531                 cnt -= ret;
532
533                 parser.buffer[parser.idx] = 0;
534
535                 ret = -EINVAL;
536                 if (kstrtoul(parser.buffer, 0, &val))
537                         break;
538                 if (val >= pid_list->pid_max)
539                         break;
540
541                 pid = (pid_t)val;
542
543                 set_bit(pid, pid_list->pids);
544                 nr_pids++;
545
546                 trace_parser_clear(&parser);
547                 ret = 0;
548         }
549         trace_parser_put(&parser);
550
551         if (ret < 0) {
552                 trace_free_pid_list(pid_list);
553                 return ret;
554         }
555
556         if (!nr_pids) {
557                 /* Cleared the list of pids */
558                 trace_free_pid_list(pid_list);
559                 read = ret;
560                 pid_list = NULL;
561         }
562
563         *new_pid_list = pid_list;
564
565         return read;
566 }
567
568 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
569 {
570         u64 ts;
571
572         /* Early boot up does not have a buffer yet */
573         if (!buf->buffer)
574                 return trace_clock_local();
575
576         ts = ring_buffer_time_stamp(buf->buffer, cpu);
577         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
578
579         return ts;
580 }
581
582 u64 ftrace_now(int cpu)
583 {
584         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
585 }
586
587 /**
588  * tracing_is_enabled - Show if global_trace has been disabled
589  *
590  * Shows if the global trace has been enabled or not. It uses the
591  * mirror flag "buffer_disabled" to be used in fast paths such as for
592  * the irqsoff tracer. But it may be inaccurate due to races. If you
593  * need to know the accurate state, use tracing_is_on() which is a little
594  * slower, but accurate.
595  */
596 int tracing_is_enabled(void)
597 {
598         /*
599          * For quick access (irqsoff uses this in fast path), just
600          * return the mirror variable of the state of the ring buffer.
601          * It's a little racy, but we don't really care.
602          */
603         smp_rmb();
604         return !global_trace.buffer_disabled;
605 }
606
607 /*
608  * trace_buf_size is the size in bytes that is allocated
609  * for a buffer. Note, the number of bytes is always rounded
610  * to page size.
611  *
612  * This number is purposely set to a low number of 16384.
613  * If the dump on oops happens, it will be much appreciated
614  * to not have to wait for all that output. Anyway this can be
615  * boot time and run time configurable.
616  */
617 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
618
619 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
620
621 /* trace_types holds a link list of available tracers. */
622 static struct tracer            *trace_types __read_mostly;
623
624 /*
625  * trace_types_lock is used to protect the trace_types list.
626  */
627 DEFINE_MUTEX(trace_types_lock);
628
629 /*
630  * serialize the access of the ring buffer
631  *
632  * ring buffer serializes readers, but it is low level protection.
633  * The validity of the events (which returns by ring_buffer_peek() ..etc)
634  * are not protected by ring buffer.
635  *
636  * The content of events may become garbage if we allow other process consumes
637  * these events concurrently:
638  *   A) the page of the consumed events may become a normal page
639  *      (not reader page) in ring buffer, and this page will be rewrited
640  *      by events producer.
641  *   B) The page of the consumed events may become a page for splice_read,
642  *      and this page will be returned to system.
643  *
644  * These primitives allow multi process access to different cpu ring buffer
645  * concurrently.
646  *
647  * These primitives don't distinguish read-only and read-consume access.
648  * Multi read-only access are also serialized.
649  */
650
651 #ifdef CONFIG_SMP
652 static DECLARE_RWSEM(all_cpu_access_lock);
653 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
654
655 static inline void trace_access_lock(int cpu)
656 {
657         if (cpu == RING_BUFFER_ALL_CPUS) {
658                 /* gain it for accessing the whole ring buffer. */
659                 down_write(&all_cpu_access_lock);
660         } else {
661                 /* gain it for accessing a cpu ring buffer. */
662
663                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
664                 down_read(&all_cpu_access_lock);
665
666                 /* Secondly block other access to this @cpu ring buffer. */
667                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
668         }
669 }
670
671 static inline void trace_access_unlock(int cpu)
672 {
673         if (cpu == RING_BUFFER_ALL_CPUS) {
674                 up_write(&all_cpu_access_lock);
675         } else {
676                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
677                 up_read(&all_cpu_access_lock);
678         }
679 }
680
681 static inline void trace_access_lock_init(void)
682 {
683         int cpu;
684
685         for_each_possible_cpu(cpu)
686                 mutex_init(&per_cpu(cpu_access_lock, cpu));
687 }
688
689 #else
690
691 static DEFINE_MUTEX(access_lock);
692
693 static inline void trace_access_lock(int cpu)
694 {
695         (void)cpu;
696         mutex_lock(&access_lock);
697 }
698
699 static inline void trace_access_unlock(int cpu)
700 {
701         (void)cpu;
702         mutex_unlock(&access_lock);
703 }
704
705 static inline void trace_access_lock_init(void)
706 {
707 }
708
709 #endif
710
711 #ifdef CONFIG_STACKTRACE
712 static void __ftrace_trace_stack(struct ring_buffer *buffer,
713                                  unsigned long flags,
714                                  int skip, int pc, struct pt_regs *regs);
715 static inline void ftrace_trace_stack(struct trace_array *tr,
716                                       struct ring_buffer *buffer,
717                                       unsigned long flags,
718                                       int skip, int pc, struct pt_regs *regs);
719
720 #else
721 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
722                                         unsigned long flags,
723                                         int skip, int pc, struct pt_regs *regs)
724 {
725 }
726 static inline void ftrace_trace_stack(struct trace_array *tr,
727                                       struct ring_buffer *buffer,
728                                       unsigned long flags,
729                                       int skip, int pc, struct pt_regs *regs)
730 {
731 }
732
733 #endif
734
735 static __always_inline void
736 trace_event_setup(struct ring_buffer_event *event,
737                   int type, unsigned long flags, int pc)
738 {
739         struct trace_entry *ent = ring_buffer_event_data(event);
740
741         tracing_generic_entry_update(ent, flags, pc);
742         ent->type = type;
743 }
744
745 static __always_inline struct ring_buffer_event *
746 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
747                           int type,
748                           unsigned long len,
749                           unsigned long flags, int pc)
750 {
751         struct ring_buffer_event *event;
752
753         event = ring_buffer_lock_reserve(buffer, len);
754         if (event != NULL)
755                 trace_event_setup(event, type, flags, pc);
756
757         return event;
758 }
759
760 void tracer_tracing_on(struct trace_array *tr)
761 {
762         if (tr->trace_buffer.buffer)
763                 ring_buffer_record_on(tr->trace_buffer.buffer);
764         /*
765          * This flag is looked at when buffers haven't been allocated
766          * yet, or by some tracers (like irqsoff), that just want to
767          * know if the ring buffer has been disabled, but it can handle
768          * races of where it gets disabled but we still do a record.
769          * As the check is in the fast path of the tracers, it is more
770          * important to be fast than accurate.
771          */
772         tr->buffer_disabled = 0;
773         /* Make the flag seen by readers */
774         smp_wmb();
775 }
776
777 /**
778  * tracing_on - enable tracing buffers
779  *
780  * This function enables tracing buffers that may have been
781  * disabled with tracing_off.
782  */
783 void tracing_on(void)
784 {
785         tracer_tracing_on(&global_trace);
786 }
787 EXPORT_SYMBOL_GPL(tracing_on);
788
789
790 static __always_inline void
791 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
792 {
793         __this_cpu_write(trace_taskinfo_save, true);
794
795         /* If this is the temp buffer, we need to commit fully */
796         if (this_cpu_read(trace_buffered_event) == event) {
797                 /* Length is in event->array[0] */
798                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
799                 /* Release the temp buffer */
800                 this_cpu_dec(trace_buffered_event_cnt);
801         } else
802                 ring_buffer_unlock_commit(buffer, event);
803 }
804
805 /**
806  * __trace_puts - write a constant string into the trace buffer.
807  * @ip:    The address of the caller
808  * @str:   The constant string to write
809  * @size:  The size of the string.
810  */
811 int __trace_puts(unsigned long ip, const char *str, int size)
812 {
813         struct ring_buffer_event *event;
814         struct ring_buffer *buffer;
815         struct print_entry *entry;
816         unsigned long irq_flags;
817         int alloc;
818         int pc;
819
820         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
821                 return 0;
822
823         pc = preempt_count();
824
825         if (unlikely(tracing_selftest_running || tracing_disabled))
826                 return 0;
827
828         alloc = sizeof(*entry) + size + 2; /* possible \n added */
829
830         local_save_flags(irq_flags);
831         buffer = global_trace.trace_buffer.buffer;
832         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
833                                             irq_flags, pc);
834         if (!event)
835                 return 0;
836
837         entry = ring_buffer_event_data(event);
838         entry->ip = ip;
839
840         memcpy(&entry->buf, str, size);
841
842         /* Add a newline if necessary */
843         if (entry->buf[size - 1] != '\n') {
844                 entry->buf[size] = '\n';
845                 entry->buf[size + 1] = '\0';
846         } else
847                 entry->buf[size] = '\0';
848
849         __buffer_unlock_commit(buffer, event);
850         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
851
852         return size;
853 }
854 EXPORT_SYMBOL_GPL(__trace_puts);
855
856 /**
857  * __trace_bputs - write the pointer to a constant string into trace buffer
858  * @ip:    The address of the caller
859  * @str:   The constant string to write to the buffer to
860  */
861 int __trace_bputs(unsigned long ip, const char *str)
862 {
863         struct ring_buffer_event *event;
864         struct ring_buffer *buffer;
865         struct bputs_entry *entry;
866         unsigned long irq_flags;
867         int size = sizeof(struct bputs_entry);
868         int pc;
869
870         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
871                 return 0;
872
873         pc = preempt_count();
874
875         if (unlikely(tracing_selftest_running || tracing_disabled))
876                 return 0;
877
878         local_save_flags(irq_flags);
879         buffer = global_trace.trace_buffer.buffer;
880         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
881                                             irq_flags, pc);
882         if (!event)
883                 return 0;
884
885         entry = ring_buffer_event_data(event);
886         entry->ip                       = ip;
887         entry->str                      = str;
888
889         __buffer_unlock_commit(buffer, event);
890         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
891
892         return 1;
893 }
894 EXPORT_SYMBOL_GPL(__trace_bputs);
895
896 #ifdef CONFIG_TRACER_SNAPSHOT
897 static void tracing_snapshot_instance(struct trace_array *tr)
898 {
899         struct tracer *tracer = tr->current_trace;
900         unsigned long flags;
901
902         if (in_nmi()) {
903                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
904                 internal_trace_puts("*** snapshot is being ignored        ***\n");
905                 return;
906         }
907
908         if (!tr->allocated_snapshot) {
909                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
910                 internal_trace_puts("*** stopping trace here!   ***\n");
911                 tracing_off();
912                 return;
913         }
914
915         /* Note, snapshot can not be used when the tracer uses it */
916         if (tracer->use_max_tr) {
917                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
918                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
919                 return;
920         }
921
922         local_irq_save(flags);
923         update_max_tr(tr, current, smp_processor_id());
924         local_irq_restore(flags);
925 }
926
927 /**
928  * trace_snapshot - take a snapshot of the current buffer.
929  *
930  * This causes a swap between the snapshot buffer and the current live
931  * tracing buffer. You can use this to take snapshots of the live
932  * trace when some condition is triggered, but continue to trace.
933  *
934  * Note, make sure to allocate the snapshot with either
935  * a tracing_snapshot_alloc(), or by doing it manually
936  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
937  *
938  * If the snapshot buffer is not allocated, it will stop tracing.
939  * Basically making a permanent snapshot.
940  */
941 void tracing_snapshot(void)
942 {
943         struct trace_array *tr = &global_trace;
944
945         tracing_snapshot_instance(tr);
946 }
947 EXPORT_SYMBOL_GPL(tracing_snapshot);
948
949 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
950                                         struct trace_buffer *size_buf, int cpu_id);
951 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
952
953 static int alloc_snapshot(struct trace_array *tr)
954 {
955         int ret;
956
957         if (!tr->allocated_snapshot) {
958
959                 /* allocate spare buffer */
960                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
961                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
962                 if (ret < 0)
963                         return ret;
964
965                 tr->allocated_snapshot = true;
966         }
967
968         return 0;
969 }
970
971 static void free_snapshot(struct trace_array *tr)
972 {
973         /*
974          * We don't free the ring buffer. instead, resize it because
975          * The max_tr ring buffer has some state (e.g. ring->clock) and
976          * we want preserve it.
977          */
978         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
979         set_buffer_entries(&tr->max_buffer, 1);
980         tracing_reset_online_cpus(&tr->max_buffer);
981         tr->allocated_snapshot = false;
982 }
983
984 /**
985  * tracing_alloc_snapshot - allocate snapshot buffer.
986  *
987  * This only allocates the snapshot buffer if it isn't already
988  * allocated - it doesn't also take a snapshot.
989  *
990  * This is meant to be used in cases where the snapshot buffer needs
991  * to be set up for events that can't sleep but need to be able to
992  * trigger a snapshot.
993  */
994 int tracing_alloc_snapshot(void)
995 {
996         struct trace_array *tr = &global_trace;
997         int ret;
998
999         ret = alloc_snapshot(tr);
1000         WARN_ON(ret < 0);
1001
1002         return ret;
1003 }
1004 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1005
1006 /**
1007  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
1008  *
1009  * This is similar to trace_snapshot(), but it will allocate the
1010  * snapshot buffer if it isn't already allocated. Use this only
1011  * where it is safe to sleep, as the allocation may sleep.
1012  *
1013  * This causes a swap between the snapshot buffer and the current live
1014  * tracing buffer. You can use this to take snapshots of the live
1015  * trace when some condition is triggered, but continue to trace.
1016  */
1017 void tracing_snapshot_alloc(void)
1018 {
1019         int ret;
1020
1021         ret = tracing_alloc_snapshot();
1022         if (ret < 0)
1023                 return;
1024
1025         tracing_snapshot();
1026 }
1027 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1028 #else
1029 void tracing_snapshot(void)
1030 {
1031         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1032 }
1033 EXPORT_SYMBOL_GPL(tracing_snapshot);
1034 int tracing_alloc_snapshot(void)
1035 {
1036         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1037         return -ENODEV;
1038 }
1039 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1040 void tracing_snapshot_alloc(void)
1041 {
1042         /* Give warning */
1043         tracing_snapshot();
1044 }
1045 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1046 #endif /* CONFIG_TRACER_SNAPSHOT */
1047
1048 void tracer_tracing_off(struct trace_array *tr)
1049 {
1050         if (tr->trace_buffer.buffer)
1051                 ring_buffer_record_off(tr->trace_buffer.buffer);
1052         /*
1053          * This flag is looked at when buffers haven't been allocated
1054          * yet, or by some tracers (like irqsoff), that just want to
1055          * know if the ring buffer has been disabled, but it can handle
1056          * races of where it gets disabled but we still do a record.
1057          * As the check is in the fast path of the tracers, it is more
1058          * important to be fast than accurate.
1059          */
1060         tr->buffer_disabled = 1;
1061         /* Make the flag seen by readers */
1062         smp_wmb();
1063 }
1064
1065 /**
1066  * tracing_off - turn off tracing buffers
1067  *
1068  * This function stops the tracing buffers from recording data.
1069  * It does not disable any overhead the tracers themselves may
1070  * be causing. This function simply causes all recording to
1071  * the ring buffers to fail.
1072  */
1073 void tracing_off(void)
1074 {
1075         tracer_tracing_off(&global_trace);
1076 }
1077 EXPORT_SYMBOL_GPL(tracing_off);
1078
1079 void disable_trace_on_warning(void)
1080 {
1081         if (__disable_trace_on_warning)
1082                 tracing_off();
1083 }
1084
1085 /**
1086  * tracer_tracing_is_on - show real state of ring buffer enabled
1087  * @tr : the trace array to know if ring buffer is enabled
1088  *
1089  * Shows real state of the ring buffer if it is enabled or not.
1090  */
1091 int tracer_tracing_is_on(struct trace_array *tr)
1092 {
1093         if (tr->trace_buffer.buffer)
1094                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1095         return !tr->buffer_disabled;
1096 }
1097
1098 /**
1099  * tracing_is_on - show state of ring buffers enabled
1100  */
1101 int tracing_is_on(void)
1102 {
1103         return tracer_tracing_is_on(&global_trace);
1104 }
1105 EXPORT_SYMBOL_GPL(tracing_is_on);
1106
1107 static int __init set_buf_size(char *str)
1108 {
1109         unsigned long buf_size;
1110
1111         if (!str)
1112                 return 0;
1113         buf_size = memparse(str, &str);
1114         /* nr_entries can not be zero */
1115         if (buf_size == 0)
1116                 return 0;
1117         trace_buf_size = buf_size;
1118         return 1;
1119 }
1120 __setup("trace_buf_size=", set_buf_size);
1121
1122 static int __init set_tracing_thresh(char *str)
1123 {
1124         unsigned long threshold;
1125         int ret;
1126
1127         if (!str)
1128                 return 0;
1129         ret = kstrtoul(str, 0, &threshold);
1130         if (ret < 0)
1131                 return 0;
1132         tracing_thresh = threshold * 1000;
1133         return 1;
1134 }
1135 __setup("tracing_thresh=", set_tracing_thresh);
1136
1137 unsigned long nsecs_to_usecs(unsigned long nsecs)
1138 {
1139         return nsecs / 1000;
1140 }
1141
1142 /*
1143  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1144  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1145  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1146  * of strings in the order that the evals (enum) were defined.
1147  */
1148 #undef C
1149 #define C(a, b) b
1150
1151 /* These must match the bit postions in trace_iterator_flags */
1152 static const char *trace_options[] = {
1153         TRACE_FLAGS
1154         NULL
1155 };
1156
1157 static struct {
1158         u64 (*func)(void);
1159         const char *name;
1160         int in_ns;              /* is this clock in nanoseconds? */
1161 } trace_clocks[] = {
1162         { trace_clock_local,            "local",        1 },
1163         { trace_clock_global,           "global",       1 },
1164         { trace_clock_counter,          "counter",      0 },
1165         { trace_clock_jiffies,          "uptime",       0 },
1166         { trace_clock,                  "perf",         1 },
1167         { ktime_get_mono_fast_ns,       "mono",         1 },
1168         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1169         { ktime_get_boot_fast_ns,       "boot",         1 },
1170         ARCH_TRACE_CLOCKS
1171 };
1172
1173 /*
1174  * trace_parser_get_init - gets the buffer for trace parser
1175  */
1176 int trace_parser_get_init(struct trace_parser *parser, int size)
1177 {
1178         memset(parser, 0, sizeof(*parser));
1179
1180         parser->buffer = kmalloc(size, GFP_KERNEL);
1181         if (!parser->buffer)
1182                 return 1;
1183
1184         parser->size = size;
1185         return 0;
1186 }
1187
1188 /*
1189  * trace_parser_put - frees the buffer for trace parser
1190  */
1191 void trace_parser_put(struct trace_parser *parser)
1192 {
1193         kfree(parser->buffer);
1194         parser->buffer = NULL;
1195 }
1196
1197 /*
1198  * trace_get_user - reads the user input string separated by  space
1199  * (matched by isspace(ch))
1200  *
1201  * For each string found the 'struct trace_parser' is updated,
1202  * and the function returns.
1203  *
1204  * Returns number of bytes read.
1205  *
1206  * See kernel/trace/trace.h for 'struct trace_parser' details.
1207  */
1208 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1209         size_t cnt, loff_t *ppos)
1210 {
1211         char ch;
1212         size_t read = 0;
1213         ssize_t ret;
1214
1215         if (!*ppos)
1216                 trace_parser_clear(parser);
1217
1218         ret = get_user(ch, ubuf++);
1219         if (ret)
1220                 goto out;
1221
1222         read++;
1223         cnt--;
1224
1225         /*
1226          * The parser is not finished with the last write,
1227          * continue reading the user input without skipping spaces.
1228          */
1229         if (!parser->cont) {
1230                 /* skip white space */
1231                 while (cnt && isspace(ch)) {
1232                         ret = get_user(ch, ubuf++);
1233                         if (ret)
1234                                 goto out;
1235                         read++;
1236                         cnt--;
1237                 }
1238
1239                 /* only spaces were written */
1240                 if (isspace(ch)) {
1241                         *ppos += read;
1242                         ret = read;
1243                         goto out;
1244                 }
1245
1246                 parser->idx = 0;
1247         }
1248
1249         /* read the non-space input */
1250         while (cnt && !isspace(ch)) {
1251                 if (parser->idx < parser->size - 1)
1252                         parser->buffer[parser->idx++] = ch;
1253                 else {
1254                         ret = -EINVAL;
1255                         goto out;
1256                 }
1257                 ret = get_user(ch, ubuf++);
1258                 if (ret)
1259                         goto out;
1260                 read++;
1261                 cnt--;
1262         }
1263
1264         /* We either got finished input or we have to wait for another call. */
1265         if (isspace(ch)) {
1266                 parser->buffer[parser->idx] = 0;
1267                 parser->cont = false;
1268         } else if (parser->idx < parser->size - 1) {
1269                 parser->cont = true;
1270                 parser->buffer[parser->idx++] = ch;
1271         } else {
1272                 ret = -EINVAL;
1273                 goto out;
1274         }
1275
1276         *ppos += read;
1277         ret = read;
1278
1279 out:
1280         return ret;
1281 }
1282
1283 /* TODO add a seq_buf_to_buffer() */
1284 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1285 {
1286         int len;
1287
1288         if (trace_seq_used(s) <= s->seq.readpos)
1289                 return -EBUSY;
1290
1291         len = trace_seq_used(s) - s->seq.readpos;
1292         if (cnt > len)
1293                 cnt = len;
1294         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1295
1296         s->seq.readpos += cnt;
1297         return cnt;
1298 }
1299
1300 unsigned long __read_mostly     tracing_thresh;
1301
1302 #ifdef CONFIG_TRACER_MAX_TRACE
1303 /*
1304  * Copy the new maximum trace into the separate maximum-trace
1305  * structure. (this way the maximum trace is permanently saved,
1306  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1307  */
1308 static void
1309 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1310 {
1311         struct trace_buffer *trace_buf = &tr->trace_buffer;
1312         struct trace_buffer *max_buf = &tr->max_buffer;
1313         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1314         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1315
1316         max_buf->cpu = cpu;
1317         max_buf->time_start = data->preempt_timestamp;
1318
1319         max_data->saved_latency = tr->max_latency;
1320         max_data->critical_start = data->critical_start;
1321         max_data->critical_end = data->critical_end;
1322
1323         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1324         max_data->pid = tsk->pid;
1325         /*
1326          * If tsk == current, then use current_uid(), as that does not use
1327          * RCU. The irq tracer can be called out of RCU scope.
1328          */
1329         if (tsk == current)
1330                 max_data->uid = current_uid();
1331         else
1332                 max_data->uid = task_uid(tsk);
1333
1334         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1335         max_data->policy = tsk->policy;
1336         max_data->rt_priority = tsk->rt_priority;
1337
1338         /* record this tasks comm */
1339         tracing_record_cmdline(tsk);
1340 }
1341
1342 /**
1343  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1344  * @tr: tracer
1345  * @tsk: the task with the latency
1346  * @cpu: The cpu that initiated the trace.
1347  *
1348  * Flip the buffers between the @tr and the max_tr and record information
1349  * about which task was the cause of this latency.
1350  */
1351 void
1352 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1353 {
1354         struct ring_buffer *buf;
1355
1356         if (tr->stop_count)
1357                 return;
1358
1359         WARN_ON_ONCE(!irqs_disabled());
1360
1361         if (!tr->allocated_snapshot) {
1362                 /* Only the nop tracer should hit this when disabling */
1363                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1364                 return;
1365         }
1366
1367         arch_spin_lock(&tr->max_lock);
1368
1369         buf = tr->trace_buffer.buffer;
1370         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1371         tr->max_buffer.buffer = buf;
1372
1373         __update_max_tr(tr, tsk, cpu);
1374         arch_spin_unlock(&tr->max_lock);
1375 }
1376
1377 /**
1378  * update_max_tr_single - only copy one trace over, and reset the rest
1379  * @tr - tracer
1380  * @tsk - task with the latency
1381  * @cpu - the cpu of the buffer to copy.
1382  *
1383  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1384  */
1385 void
1386 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1387 {
1388         int ret;
1389
1390         if (tr->stop_count)
1391                 return;
1392
1393         WARN_ON_ONCE(!irqs_disabled());
1394         if (!tr->allocated_snapshot) {
1395                 /* Only the nop tracer should hit this when disabling */
1396                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1397                 return;
1398         }
1399
1400         arch_spin_lock(&tr->max_lock);
1401
1402         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1403
1404         if (ret == -EBUSY) {
1405                 /*
1406                  * We failed to swap the buffer due to a commit taking
1407                  * place on this CPU. We fail to record, but we reset
1408                  * the max trace buffer (no one writes directly to it)
1409                  * and flag that it failed.
1410                  */
1411                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1412                         "Failed to swap buffers due to commit in progress\n");
1413         }
1414
1415         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1416
1417         __update_max_tr(tr, tsk, cpu);
1418         arch_spin_unlock(&tr->max_lock);
1419 }
1420 #endif /* CONFIG_TRACER_MAX_TRACE */
1421
1422 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1423 {
1424         /* Iterators are static, they should be filled or empty */
1425         if (trace_buffer_iter(iter, iter->cpu_file))
1426                 return 0;
1427
1428         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1429                                 full);
1430 }
1431
1432 #ifdef CONFIG_FTRACE_STARTUP_TEST
1433 static bool selftests_can_run;
1434
1435 struct trace_selftests {
1436         struct list_head                list;
1437         struct tracer                   *type;
1438 };
1439
1440 static LIST_HEAD(postponed_selftests);
1441
1442 static int save_selftest(struct tracer *type)
1443 {
1444         struct trace_selftests *selftest;
1445
1446         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1447         if (!selftest)
1448                 return -ENOMEM;
1449
1450         selftest->type = type;
1451         list_add(&selftest->list, &postponed_selftests);
1452         return 0;
1453 }
1454
1455 static int run_tracer_selftest(struct tracer *type)
1456 {
1457         struct trace_array *tr = &global_trace;
1458         struct tracer *saved_tracer = tr->current_trace;
1459         int ret;
1460
1461         if (!type->selftest || tracing_selftest_disabled)
1462                 return 0;
1463
1464         /*
1465          * If a tracer registers early in boot up (before scheduling is
1466          * initialized and such), then do not run its selftests yet.
1467          * Instead, run it a little later in the boot process.
1468          */
1469         if (!selftests_can_run)
1470                 return save_selftest(type);
1471
1472         /*
1473          * Run a selftest on this tracer.
1474          * Here we reset the trace buffer, and set the current
1475          * tracer to be this tracer. The tracer can then run some
1476          * internal tracing to verify that everything is in order.
1477          * If we fail, we do not register this tracer.
1478          */
1479         tracing_reset_online_cpus(&tr->trace_buffer);
1480
1481         tr->current_trace = type;
1482
1483 #ifdef CONFIG_TRACER_MAX_TRACE
1484         if (type->use_max_tr) {
1485                 /* If we expanded the buffers, make sure the max is expanded too */
1486                 if (ring_buffer_expanded)
1487                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1488                                            RING_BUFFER_ALL_CPUS);
1489                 tr->allocated_snapshot = true;
1490         }
1491 #endif
1492
1493         /* the test is responsible for initializing and enabling */
1494         pr_info("Testing tracer %s: ", type->name);
1495         ret = type->selftest(type, tr);
1496         /* the test is responsible for resetting too */
1497         tr->current_trace = saved_tracer;
1498         if (ret) {
1499                 printk(KERN_CONT "FAILED!\n");
1500                 /* Add the warning after printing 'FAILED' */
1501                 WARN_ON(1);
1502                 return -1;
1503         }
1504         /* Only reset on passing, to avoid touching corrupted buffers */
1505         tracing_reset_online_cpus(&tr->trace_buffer);
1506
1507 #ifdef CONFIG_TRACER_MAX_TRACE
1508         if (type->use_max_tr) {
1509                 tr->allocated_snapshot = false;
1510
1511                 /* Shrink the max buffer again */
1512                 if (ring_buffer_expanded)
1513                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1514                                            RING_BUFFER_ALL_CPUS);
1515         }
1516 #endif
1517
1518         printk(KERN_CONT "PASSED\n");
1519         return 0;
1520 }
1521
1522 static __init int init_trace_selftests(void)
1523 {
1524         struct trace_selftests *p, *n;
1525         struct tracer *t, **last;
1526         int ret;
1527
1528         selftests_can_run = true;
1529
1530         mutex_lock(&trace_types_lock);
1531
1532         if (list_empty(&postponed_selftests))
1533                 goto out;
1534
1535         pr_info("Running postponed tracer tests:\n");
1536
1537         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1538                 ret = run_tracer_selftest(p->type);
1539                 /* If the test fails, then warn and remove from available_tracers */
1540                 if (ret < 0) {
1541                         WARN(1, "tracer: %s failed selftest, disabling\n",
1542                              p->type->name);
1543                         last = &trace_types;
1544                         for (t = trace_types; t; t = t->next) {
1545                                 if (t == p->type) {
1546                                         *last = t->next;
1547                                         break;
1548                                 }
1549                                 last = &t->next;
1550                         }
1551                 }
1552                 list_del(&p->list);
1553                 kfree(p);
1554         }
1555
1556  out:
1557         mutex_unlock(&trace_types_lock);
1558
1559         return 0;
1560 }
1561 core_initcall(init_trace_selftests);
1562 #else
1563 static inline int run_tracer_selftest(struct tracer *type)
1564 {
1565         return 0;
1566 }
1567 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1568
1569 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1570
1571 static void __init apply_trace_boot_options(void);
1572
1573 /**
1574  * register_tracer - register a tracer with the ftrace system.
1575  * @type - the plugin for the tracer
1576  *
1577  * Register a new plugin tracer.
1578  */
1579 int __init register_tracer(struct tracer *type)
1580 {
1581         struct tracer *t;
1582         int ret = 0;
1583
1584         if (!type->name) {
1585                 pr_info("Tracer must have a name\n");
1586                 return -1;
1587         }
1588
1589         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1590                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1591                 return -1;
1592         }
1593
1594         mutex_lock(&trace_types_lock);
1595
1596         tracing_selftest_running = true;
1597
1598         for (t = trace_types; t; t = t->next) {
1599                 if (strcmp(type->name, t->name) == 0) {
1600                         /* already found */
1601                         pr_info("Tracer %s already registered\n",
1602                                 type->name);
1603                         ret = -1;
1604                         goto out;
1605                 }
1606         }
1607
1608         if (!type->set_flag)
1609                 type->set_flag = &dummy_set_flag;
1610         if (!type->flags) {
1611                 /*allocate a dummy tracer_flags*/
1612                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1613                 if (!type->flags) {
1614                         ret = -ENOMEM;
1615                         goto out;
1616                 }
1617                 type->flags->val = 0;
1618                 type->flags->opts = dummy_tracer_opt;
1619         } else
1620                 if (!type->flags->opts)
1621                         type->flags->opts = dummy_tracer_opt;
1622
1623         /* store the tracer for __set_tracer_option */
1624         type->flags->trace = type;
1625
1626         ret = run_tracer_selftest(type);
1627         if (ret < 0)
1628                 goto out;
1629
1630         type->next = trace_types;
1631         trace_types = type;
1632         add_tracer_options(&global_trace, type);
1633
1634  out:
1635         tracing_selftest_running = false;
1636         mutex_unlock(&trace_types_lock);
1637
1638         if (ret || !default_bootup_tracer)
1639                 goto out_unlock;
1640
1641         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1642                 goto out_unlock;
1643
1644         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1645         /* Do we want this tracer to start on bootup? */
1646         tracing_set_tracer(&global_trace, type->name);
1647         default_bootup_tracer = NULL;
1648
1649         apply_trace_boot_options();
1650
1651         /* disable other selftests, since this will break it. */
1652         tracing_selftest_disabled = true;
1653 #ifdef CONFIG_FTRACE_STARTUP_TEST
1654         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1655                type->name);
1656 #endif
1657
1658  out_unlock:
1659         return ret;
1660 }
1661
1662 void tracing_reset(struct trace_buffer *buf, int cpu)
1663 {
1664         struct ring_buffer *buffer = buf->buffer;
1665
1666         if (!buffer)
1667                 return;
1668
1669         ring_buffer_record_disable(buffer);
1670
1671         /* Make sure all commits have finished */
1672         synchronize_sched();
1673         ring_buffer_reset_cpu(buffer, cpu);
1674
1675         ring_buffer_record_enable(buffer);
1676 }
1677
1678 void tracing_reset_online_cpus(struct trace_buffer *buf)
1679 {
1680         struct ring_buffer *buffer = buf->buffer;
1681         int cpu;
1682
1683         if (!buffer)
1684                 return;
1685
1686         ring_buffer_record_disable(buffer);
1687
1688         /* Make sure all commits have finished */
1689         synchronize_sched();
1690
1691         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1692
1693         for_each_online_cpu(cpu)
1694                 ring_buffer_reset_cpu(buffer, cpu);
1695
1696         ring_buffer_record_enable(buffer);
1697 }
1698
1699 /* Must have trace_types_lock held */
1700 void tracing_reset_all_online_cpus(void)
1701 {
1702         struct trace_array *tr;
1703
1704         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1705                 tracing_reset_online_cpus(&tr->trace_buffer);
1706 #ifdef CONFIG_TRACER_MAX_TRACE
1707                 tracing_reset_online_cpus(&tr->max_buffer);
1708 #endif
1709         }
1710 }
1711
1712 static int *tgid_map;
1713
1714 #define SAVED_CMDLINES_DEFAULT 128
1715 #define NO_CMDLINE_MAP UINT_MAX
1716 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1717 struct saved_cmdlines_buffer {
1718         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1719         unsigned *map_cmdline_to_pid;
1720         unsigned cmdline_num;
1721         int cmdline_idx;
1722         char *saved_cmdlines;
1723 };
1724 static struct saved_cmdlines_buffer *savedcmd;
1725
1726 /* temporary disable recording */
1727 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1728
1729 static inline char *get_saved_cmdlines(int idx)
1730 {
1731         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1732 }
1733
1734 static inline void set_cmdline(int idx, const char *cmdline)
1735 {
1736         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1737 }
1738
1739 static int allocate_cmdlines_buffer(unsigned int val,
1740                                     struct saved_cmdlines_buffer *s)
1741 {
1742         s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1743                                         GFP_KERNEL);
1744         if (!s->map_cmdline_to_pid)
1745                 return -ENOMEM;
1746
1747         s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1748         if (!s->saved_cmdlines) {
1749                 kfree(s->map_cmdline_to_pid);
1750                 return -ENOMEM;
1751         }
1752
1753         s->cmdline_idx = 0;
1754         s->cmdline_num = val;
1755         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1756                sizeof(s->map_pid_to_cmdline));
1757         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1758                val * sizeof(*s->map_cmdline_to_pid));
1759
1760         return 0;
1761 }
1762
1763 static int trace_create_savedcmd(void)
1764 {
1765         int ret;
1766
1767         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1768         if (!savedcmd)
1769                 return -ENOMEM;
1770
1771         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1772         if (ret < 0) {
1773                 kfree(savedcmd);
1774                 savedcmd = NULL;
1775                 return -ENOMEM;
1776         }
1777
1778         return 0;
1779 }
1780
1781 int is_tracing_stopped(void)
1782 {
1783         return global_trace.stop_count;
1784 }
1785
1786 /**
1787  * tracing_start - quick start of the tracer
1788  *
1789  * If tracing is enabled but was stopped by tracing_stop,
1790  * this will start the tracer back up.
1791  */
1792 void tracing_start(void)
1793 {
1794         struct ring_buffer *buffer;
1795         unsigned long flags;
1796
1797         if (tracing_disabled)
1798                 return;
1799
1800         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1801         if (--global_trace.stop_count) {
1802                 if (global_trace.stop_count < 0) {
1803                         /* Someone screwed up their debugging */
1804                         WARN_ON_ONCE(1);
1805                         global_trace.stop_count = 0;
1806                 }
1807                 goto out;
1808         }
1809
1810         /* Prevent the buffers from switching */
1811         arch_spin_lock(&global_trace.max_lock);
1812
1813         buffer = global_trace.trace_buffer.buffer;
1814         if (buffer)
1815                 ring_buffer_record_enable(buffer);
1816
1817 #ifdef CONFIG_TRACER_MAX_TRACE
1818         buffer = global_trace.max_buffer.buffer;
1819         if (buffer)
1820                 ring_buffer_record_enable(buffer);
1821 #endif
1822
1823         arch_spin_unlock(&global_trace.max_lock);
1824
1825  out:
1826         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1827 }
1828
1829 static void tracing_start_tr(struct trace_array *tr)
1830 {
1831         struct ring_buffer *buffer;
1832         unsigned long flags;
1833
1834         if (tracing_disabled)
1835                 return;
1836
1837         /* If global, we need to also start the max tracer */
1838         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1839                 return tracing_start();
1840
1841         raw_spin_lock_irqsave(&tr->start_lock, flags);
1842
1843         if (--tr->stop_count) {
1844                 if (tr->stop_count < 0) {
1845                         /* Someone screwed up their debugging */
1846                         WARN_ON_ONCE(1);
1847                         tr->stop_count = 0;
1848                 }
1849                 goto out;
1850         }
1851
1852         buffer = tr->trace_buffer.buffer;
1853         if (buffer)
1854                 ring_buffer_record_enable(buffer);
1855
1856  out:
1857         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1858 }
1859
1860 /**
1861  * tracing_stop - quick stop of the tracer
1862  *
1863  * Light weight way to stop tracing. Use in conjunction with
1864  * tracing_start.
1865  */
1866 void tracing_stop(void)
1867 {
1868         struct ring_buffer *buffer;
1869         unsigned long flags;
1870
1871         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1872         if (global_trace.stop_count++)
1873                 goto out;
1874
1875         /* Prevent the buffers from switching */
1876         arch_spin_lock(&global_trace.max_lock);
1877
1878         buffer = global_trace.trace_buffer.buffer;
1879         if (buffer)
1880                 ring_buffer_record_disable(buffer);
1881
1882 #ifdef CONFIG_TRACER_MAX_TRACE
1883         buffer = global_trace.max_buffer.buffer;
1884         if (buffer)
1885                 ring_buffer_record_disable(buffer);
1886 #endif
1887
1888         arch_spin_unlock(&global_trace.max_lock);
1889
1890  out:
1891         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1892 }
1893
1894 static void tracing_stop_tr(struct trace_array *tr)
1895 {
1896         struct ring_buffer *buffer;
1897         unsigned long flags;
1898
1899         /* If global, we need to also stop the max tracer */
1900         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1901                 return tracing_stop();
1902
1903         raw_spin_lock_irqsave(&tr->start_lock, flags);
1904         if (tr->stop_count++)
1905                 goto out;
1906
1907         buffer = tr->trace_buffer.buffer;
1908         if (buffer)
1909                 ring_buffer_record_disable(buffer);
1910
1911  out:
1912         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1913 }
1914
1915 static int trace_save_cmdline(struct task_struct *tsk)
1916 {
1917         unsigned pid, idx;
1918
1919         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1920                 return 0;
1921
1922         /*
1923          * It's not the end of the world if we don't get
1924          * the lock, but we also don't want to spin
1925          * nor do we want to disable interrupts,
1926          * so if we miss here, then better luck next time.
1927          */
1928         if (!arch_spin_trylock(&trace_cmdline_lock))
1929                 return 0;
1930
1931         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1932         if (idx == NO_CMDLINE_MAP) {
1933                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1934
1935                 /*
1936                  * Check whether the cmdline buffer at idx has a pid
1937                  * mapped. We are going to overwrite that entry so we
1938                  * need to clear the map_pid_to_cmdline. Otherwise we
1939                  * would read the new comm for the old pid.
1940                  */
1941                 pid = savedcmd->map_cmdline_to_pid[idx];
1942                 if (pid != NO_CMDLINE_MAP)
1943                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1944
1945                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1946                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1947
1948                 savedcmd->cmdline_idx = idx;
1949         }
1950
1951         set_cmdline(idx, tsk->comm);
1952
1953         arch_spin_unlock(&trace_cmdline_lock);
1954
1955         return 1;
1956 }
1957
1958 static void __trace_find_cmdline(int pid, char comm[])
1959 {
1960         unsigned map;
1961
1962         if (!pid) {
1963                 strcpy(comm, "<idle>");
1964                 return;
1965         }
1966
1967         if (WARN_ON_ONCE(pid < 0)) {
1968                 strcpy(comm, "<XXX>");
1969                 return;
1970         }
1971
1972         if (pid > PID_MAX_DEFAULT) {
1973                 strcpy(comm, "<...>");
1974                 return;
1975         }
1976
1977         map = savedcmd->map_pid_to_cmdline[pid];
1978         if (map != NO_CMDLINE_MAP)
1979                 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
1980         else
1981                 strcpy(comm, "<...>");
1982 }
1983
1984 void trace_find_cmdline(int pid, char comm[])
1985 {
1986         preempt_disable();
1987         arch_spin_lock(&trace_cmdline_lock);
1988
1989         __trace_find_cmdline(pid, comm);
1990
1991         arch_spin_unlock(&trace_cmdline_lock);
1992         preempt_enable();
1993 }
1994
1995 int trace_find_tgid(int pid)
1996 {
1997         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
1998                 return 0;
1999
2000         return tgid_map[pid];
2001 }
2002
2003 static int trace_save_tgid(struct task_struct *tsk)
2004 {
2005         if (unlikely(!tgid_map || !tsk->pid || tsk->pid > PID_MAX_DEFAULT))
2006                 return 0;
2007
2008         tgid_map[tsk->pid] = tsk->tgid;
2009         return 1;
2010 }
2011
2012 static bool tracing_record_taskinfo_skip(int flags)
2013 {
2014         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2015                 return true;
2016         if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2017                 return true;
2018         if (!__this_cpu_read(trace_taskinfo_save))
2019                 return true;
2020         return false;
2021 }
2022
2023 /**
2024  * tracing_record_taskinfo - record the task info of a task
2025  *
2026  * @task  - task to record
2027  * @flags - TRACE_RECORD_CMDLINE for recording comm
2028  *        - TRACE_RECORD_TGID for recording tgid
2029  */
2030 void tracing_record_taskinfo(struct task_struct *task, int flags)
2031 {
2032         if (tracing_record_taskinfo_skip(flags))
2033                 return;
2034         if ((flags & TRACE_RECORD_CMDLINE) && !trace_save_cmdline(task))
2035                 return;
2036         if ((flags & TRACE_RECORD_TGID) && !trace_save_tgid(task))
2037                 return;
2038
2039         __this_cpu_write(trace_taskinfo_save, false);
2040 }
2041
2042 /**
2043  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2044  *
2045  * @prev - previous task during sched_switch
2046  * @next - next task during sched_switch
2047  * @flags - TRACE_RECORD_CMDLINE for recording comm
2048  *          TRACE_RECORD_TGID for recording tgid
2049  */
2050 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2051                                           struct task_struct *next, int flags)
2052 {
2053         if (tracing_record_taskinfo_skip(flags))
2054                 return;
2055
2056         if ((flags & TRACE_RECORD_CMDLINE) &&
2057             (!trace_save_cmdline(prev) || !trace_save_cmdline(next)))
2058                 return;
2059
2060         if ((flags & TRACE_RECORD_TGID) &&
2061             (!trace_save_tgid(prev) || !trace_save_tgid(next)))
2062                 return;
2063
2064         __this_cpu_write(trace_taskinfo_save, false);
2065 }
2066
2067 /* Helpers to record a specific task information */
2068 void tracing_record_cmdline(struct task_struct *task)
2069 {
2070         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2071 }
2072
2073 void tracing_record_tgid(struct task_struct *task)
2074 {
2075         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2076 }
2077
2078 /*
2079  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2080  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2081  * simplifies those functions and keeps them in sync.
2082  */
2083 enum print_line_t trace_handle_return(struct trace_seq *s)
2084 {
2085         return trace_seq_has_overflowed(s) ?
2086                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2087 }
2088 EXPORT_SYMBOL_GPL(trace_handle_return);
2089
2090 void
2091 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2092                              int pc)
2093 {
2094         struct task_struct *tsk = current;
2095
2096         entry->preempt_count            = pc & 0xff;
2097         entry->pid                      = (tsk) ? tsk->pid : 0;
2098         entry->flags =
2099 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2100                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2101 #else
2102                 TRACE_FLAG_IRQS_NOSUPPORT |
2103 #endif
2104                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2105                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2106                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2107                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2108                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2109 }
2110 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2111
2112 struct ring_buffer_event *
2113 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2114                           int type,
2115                           unsigned long len,
2116                           unsigned long flags, int pc)
2117 {
2118         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2119 }
2120
2121 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2122 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2123 static int trace_buffered_event_ref;
2124
2125 /**
2126  * trace_buffered_event_enable - enable buffering events
2127  *
2128  * When events are being filtered, it is quicker to use a temporary
2129  * buffer to write the event data into if there's a likely chance
2130  * that it will not be committed. The discard of the ring buffer
2131  * is not as fast as committing, and is much slower than copying
2132  * a commit.
2133  *
2134  * When an event is to be filtered, allocate per cpu buffers to
2135  * write the event data into, and if the event is filtered and discarded
2136  * it is simply dropped, otherwise, the entire data is to be committed
2137  * in one shot.
2138  */
2139 void trace_buffered_event_enable(void)
2140 {
2141         struct ring_buffer_event *event;
2142         struct page *page;
2143         int cpu;
2144
2145         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2146
2147         if (trace_buffered_event_ref++)
2148                 return;
2149
2150         for_each_tracing_cpu(cpu) {
2151                 page = alloc_pages_node(cpu_to_node(cpu),
2152                                         GFP_KERNEL | __GFP_NORETRY, 0);
2153                 if (!page)
2154                         goto failed;
2155
2156                 event = page_address(page);
2157                 memset(event, 0, sizeof(*event));
2158
2159                 per_cpu(trace_buffered_event, cpu) = event;
2160
2161                 preempt_disable();
2162                 if (cpu == smp_processor_id() &&
2163                     this_cpu_read(trace_buffered_event) !=
2164                     per_cpu(trace_buffered_event, cpu))
2165                         WARN_ON_ONCE(1);
2166                 preempt_enable();
2167         }
2168
2169         return;
2170  failed:
2171         trace_buffered_event_disable();
2172 }
2173
2174 static void enable_trace_buffered_event(void *data)
2175 {
2176         /* Probably not needed, but do it anyway */
2177         smp_rmb();
2178         this_cpu_dec(trace_buffered_event_cnt);
2179 }
2180
2181 static void disable_trace_buffered_event(void *data)
2182 {
2183         this_cpu_inc(trace_buffered_event_cnt);
2184 }
2185
2186 /**
2187  * trace_buffered_event_disable - disable buffering events
2188  *
2189  * When a filter is removed, it is faster to not use the buffered
2190  * events, and to commit directly into the ring buffer. Free up
2191  * the temp buffers when there are no more users. This requires
2192  * special synchronization with current events.
2193  */
2194 void trace_buffered_event_disable(void)
2195 {
2196         int cpu;
2197
2198         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2199
2200         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2201                 return;
2202
2203         if (--trace_buffered_event_ref)
2204                 return;
2205
2206         preempt_disable();
2207         /* For each CPU, set the buffer as used. */
2208         smp_call_function_many(tracing_buffer_mask,
2209                                disable_trace_buffered_event, NULL, 1);
2210         preempt_enable();
2211
2212         /* Wait for all current users to finish */
2213         synchronize_sched();
2214
2215         for_each_tracing_cpu(cpu) {
2216                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2217                 per_cpu(trace_buffered_event, cpu) = NULL;
2218         }
2219         /*
2220          * Make sure trace_buffered_event is NULL before clearing
2221          * trace_buffered_event_cnt.
2222          */
2223         smp_wmb();
2224
2225         preempt_disable();
2226         /* Do the work on each cpu */
2227         smp_call_function_many(tracing_buffer_mask,
2228                                enable_trace_buffered_event, NULL, 1);
2229         preempt_enable();
2230 }
2231
2232 static struct ring_buffer *temp_buffer;
2233
2234 struct ring_buffer_event *
2235 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2236                           struct trace_event_file *trace_file,
2237                           int type, unsigned long len,
2238                           unsigned long flags, int pc)
2239 {
2240         struct ring_buffer_event *entry;
2241         int val;
2242
2243         *current_rb = trace_file->tr->trace_buffer.buffer;
2244
2245         if ((trace_file->flags &
2246              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2247             (entry = this_cpu_read(trace_buffered_event))) {
2248                 /* Try to use the per cpu buffer first */
2249                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2250                 if (val == 1) {
2251                         trace_event_setup(entry, type, flags, pc);
2252                         entry->array[0] = len;
2253                         return entry;
2254                 }
2255                 this_cpu_dec(trace_buffered_event_cnt);
2256         }
2257
2258         entry = __trace_buffer_lock_reserve(*current_rb,
2259                                             type, len, flags, pc);
2260         /*
2261          * If tracing is off, but we have triggers enabled
2262          * we still need to look at the event data. Use the temp_buffer
2263          * to store the trace event for the tigger to use. It's recusive
2264          * safe and will not be recorded anywhere.
2265          */
2266         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2267                 *current_rb = temp_buffer;
2268                 entry = __trace_buffer_lock_reserve(*current_rb,
2269                                                     type, len, flags, pc);
2270         }
2271         return entry;
2272 }
2273 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2274
2275 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2276 static DEFINE_MUTEX(tracepoint_printk_mutex);
2277
2278 static void output_printk(struct trace_event_buffer *fbuffer)
2279 {
2280         struct trace_event_call *event_call;
2281         struct trace_event *event;
2282         unsigned long flags;
2283         struct trace_iterator *iter = tracepoint_print_iter;
2284
2285         /* We should never get here if iter is NULL */
2286         if (WARN_ON_ONCE(!iter))
2287                 return;
2288
2289         event_call = fbuffer->trace_file->event_call;
2290         if (!event_call || !event_call->event.funcs ||
2291             !event_call->event.funcs->trace)
2292                 return;
2293
2294         event = &fbuffer->trace_file->event_call->event;
2295
2296         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2297         trace_seq_init(&iter->seq);
2298         iter->ent = fbuffer->entry;
2299         event_call->event.funcs->trace(iter, 0, event);
2300         trace_seq_putc(&iter->seq, 0);
2301         printk("%s", iter->seq.buffer);
2302
2303         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2304 }
2305
2306 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2307                              void __user *buffer, size_t *lenp,
2308                              loff_t *ppos)
2309 {
2310         int save_tracepoint_printk;
2311         int ret;
2312
2313         mutex_lock(&tracepoint_printk_mutex);
2314         save_tracepoint_printk = tracepoint_printk;
2315
2316         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2317
2318         /*
2319          * This will force exiting early, as tracepoint_printk
2320          * is always zero when tracepoint_printk_iter is not allocated
2321          */
2322         if (!tracepoint_print_iter)
2323                 tracepoint_printk = 0;
2324
2325         if (save_tracepoint_printk == tracepoint_printk)
2326                 goto out;
2327
2328         if (tracepoint_printk)
2329                 static_key_enable(&tracepoint_printk_key.key);
2330         else
2331                 static_key_disable(&tracepoint_printk_key.key);
2332
2333  out:
2334         mutex_unlock(&tracepoint_printk_mutex);
2335
2336         return ret;
2337 }
2338
2339 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2340 {
2341         if (static_key_false(&tracepoint_printk_key.key))
2342                 output_printk(fbuffer);
2343
2344         event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2345                                     fbuffer->event, fbuffer->entry,
2346                                     fbuffer->flags, fbuffer->pc);
2347 }
2348 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2349
2350 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2351                                      struct ring_buffer *buffer,
2352                                      struct ring_buffer_event *event,
2353                                      unsigned long flags, int pc,
2354                                      struct pt_regs *regs)
2355 {
2356         __buffer_unlock_commit(buffer, event);
2357
2358         /*
2359          * If regs is not set, then skip the following callers:
2360          *   trace_buffer_unlock_commit_regs
2361          *   event_trigger_unlock_commit
2362          *   trace_event_buffer_commit
2363          *   trace_event_raw_event_sched_switch
2364          * Note, we can still get here via blktrace, wakeup tracer
2365          * and mmiotrace, but that's ok if they lose a function or
2366          * two. They are that meaningful.
2367          */
2368         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
2369         ftrace_trace_userstack(buffer, flags, pc);
2370 }
2371
2372 /*
2373  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2374  */
2375 void
2376 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2377                                    struct ring_buffer_event *event)
2378 {
2379         __buffer_unlock_commit(buffer, event);
2380 }
2381
2382 static void
2383 trace_process_export(struct trace_export *export,
2384                struct ring_buffer_event *event)
2385 {
2386         struct trace_entry *entry;
2387         unsigned int size = 0;
2388
2389         entry = ring_buffer_event_data(event);
2390         size = ring_buffer_event_length(event);
2391         export->write(entry, size);
2392 }
2393
2394 static DEFINE_MUTEX(ftrace_export_lock);
2395
2396 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2397
2398 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2399
2400 static inline void ftrace_exports_enable(void)
2401 {
2402         static_branch_enable(&ftrace_exports_enabled);
2403 }
2404
2405 static inline void ftrace_exports_disable(void)
2406 {
2407         static_branch_disable(&ftrace_exports_enabled);
2408 }
2409
2410 void ftrace_exports(struct ring_buffer_event *event)
2411 {
2412         struct trace_export *export;
2413
2414         preempt_disable_notrace();
2415
2416         export = rcu_dereference_raw_notrace(ftrace_exports_list);
2417         while (export) {
2418                 trace_process_export(export, event);
2419                 export = rcu_dereference_raw_notrace(export->next);
2420         }
2421
2422         preempt_enable_notrace();
2423 }
2424
2425 static inline void
2426 add_trace_export(struct trace_export **list, struct trace_export *export)
2427 {
2428         rcu_assign_pointer(export->next, *list);
2429         /*
2430          * We are entering export into the list but another
2431          * CPU might be walking that list. We need to make sure
2432          * the export->next pointer is valid before another CPU sees
2433          * the export pointer included into the list.
2434          */
2435         rcu_assign_pointer(*list, export);
2436 }
2437
2438 static inline int
2439 rm_trace_export(struct trace_export **list, struct trace_export *export)
2440 {
2441         struct trace_export **p;
2442
2443         for (p = list; *p != NULL; p = &(*p)->next)
2444                 if (*p == export)
2445                         break;
2446
2447         if (*p != export)
2448                 return -1;
2449
2450         rcu_assign_pointer(*p, (*p)->next);
2451
2452         return 0;
2453 }
2454
2455 static inline void
2456 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2457 {
2458         if (*list == NULL)
2459                 ftrace_exports_enable();
2460
2461         add_trace_export(list, export);
2462 }
2463
2464 static inline int
2465 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2466 {
2467         int ret;
2468
2469         ret = rm_trace_export(list, export);
2470         if (*list == NULL)
2471                 ftrace_exports_disable();
2472
2473         return ret;
2474 }
2475
2476 int register_ftrace_export(struct trace_export *export)
2477 {
2478         if (WARN_ON_ONCE(!export->write))
2479                 return -1;
2480
2481         mutex_lock(&ftrace_export_lock);
2482
2483         add_ftrace_export(&ftrace_exports_list, export);
2484
2485         mutex_unlock(&ftrace_export_lock);
2486
2487         return 0;
2488 }
2489 EXPORT_SYMBOL_GPL(register_ftrace_export);
2490
2491 int unregister_ftrace_export(struct trace_export *export)
2492 {
2493         int ret;
2494
2495         mutex_lock(&ftrace_export_lock);
2496
2497         ret = rm_ftrace_export(&ftrace_exports_list, export);
2498
2499         mutex_unlock(&ftrace_export_lock);
2500
2501         return ret;
2502 }
2503 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2504
2505 void
2506 trace_function(struct trace_array *tr,
2507                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2508                int pc)
2509 {
2510         struct trace_event_call *call = &event_function;
2511         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2512         struct ring_buffer_event *event;
2513         struct ftrace_entry *entry;
2514
2515         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2516                                             flags, pc);
2517         if (!event)
2518                 return;
2519         entry   = ring_buffer_event_data(event);
2520         entry->ip                       = ip;
2521         entry->parent_ip                = parent_ip;
2522
2523         if (!call_filter_check_discard(call, entry, buffer, event)) {
2524                 if (static_branch_unlikely(&ftrace_exports_enabled))
2525                         ftrace_exports(event);
2526                 __buffer_unlock_commit(buffer, event);
2527         }
2528 }
2529
2530 #ifdef CONFIG_STACKTRACE
2531
2532 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2533 struct ftrace_stack {
2534         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
2535 };
2536
2537 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2538 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2539
2540 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2541                                  unsigned long flags,
2542                                  int skip, int pc, struct pt_regs *regs)
2543 {
2544         struct trace_event_call *call = &event_kernel_stack;
2545         struct ring_buffer_event *event;
2546         struct stack_entry *entry;
2547         struct stack_trace trace;
2548         int use_stack;
2549         int size = FTRACE_STACK_ENTRIES;
2550
2551         trace.nr_entries        = 0;
2552         trace.skip              = skip;
2553
2554         /*
2555          * Add two, for this function and the call to save_stack_trace()
2556          * If regs is set, then these functions will not be in the way.
2557          */
2558         if (!regs)
2559                 trace.skip += 2;
2560
2561         /*
2562          * Since events can happen in NMIs there's no safe way to
2563          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2564          * or NMI comes in, it will just have to use the default
2565          * FTRACE_STACK_SIZE.
2566          */
2567         preempt_disable_notrace();
2568
2569         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2570         /*
2571          * We don't need any atomic variables, just a barrier.
2572          * If an interrupt comes in, we don't care, because it would
2573          * have exited and put the counter back to what we want.
2574          * We just need a barrier to keep gcc from moving things
2575          * around.
2576          */
2577         barrier();
2578         if (use_stack == 1) {
2579                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
2580                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
2581
2582                 if (regs)
2583                         save_stack_trace_regs(regs, &trace);
2584                 else
2585                         save_stack_trace(&trace);
2586
2587                 if (trace.nr_entries > size)
2588                         size = trace.nr_entries;
2589         } else
2590                 /* From now on, use_stack is a boolean */
2591                 use_stack = 0;
2592
2593         size *= sizeof(unsigned long);
2594
2595         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2596                                             sizeof(*entry) + size, flags, pc);
2597         if (!event)
2598                 goto out;
2599         entry = ring_buffer_event_data(event);
2600
2601         memset(&entry->caller, 0, size);
2602
2603         if (use_stack)
2604                 memcpy(&entry->caller, trace.entries,
2605                        trace.nr_entries * sizeof(unsigned long));
2606         else {
2607                 trace.max_entries       = FTRACE_STACK_ENTRIES;
2608                 trace.entries           = entry->caller;
2609                 if (regs)
2610                         save_stack_trace_regs(regs, &trace);
2611                 else
2612                         save_stack_trace(&trace);
2613         }
2614
2615         entry->size = trace.nr_entries;
2616
2617         if (!call_filter_check_discard(call, entry, buffer, event))
2618                 __buffer_unlock_commit(buffer, event);
2619
2620  out:
2621         /* Again, don't let gcc optimize things here */
2622         barrier();
2623         __this_cpu_dec(ftrace_stack_reserve);
2624         preempt_enable_notrace();
2625
2626 }
2627
2628 static inline void ftrace_trace_stack(struct trace_array *tr,
2629                                       struct ring_buffer *buffer,
2630                                       unsigned long flags,
2631                                       int skip, int pc, struct pt_regs *regs)
2632 {
2633         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2634                 return;
2635
2636         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2637 }
2638
2639 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2640                    int pc)
2641 {
2642         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2643
2644         if (rcu_is_watching()) {
2645                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2646                 return;
2647         }
2648
2649         /*
2650          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2651          * but if the above rcu_is_watching() failed, then the NMI
2652          * triggered someplace critical, and rcu_irq_enter() should
2653          * not be called from NMI.
2654          */
2655         if (unlikely(in_nmi()))
2656                 return;
2657
2658         /*
2659          * It is possible that a function is being traced in a
2660          * location that RCU is not watching. A call to
2661          * rcu_irq_enter() will make sure that it is, but there's
2662          * a few internal rcu functions that could be traced
2663          * where that wont work either. In those cases, we just
2664          * do nothing.
2665          */
2666         if (unlikely(rcu_irq_enter_disabled()))
2667                 return;
2668
2669         rcu_irq_enter_irqson();
2670         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2671         rcu_irq_exit_irqson();
2672 }
2673
2674 /**
2675  * trace_dump_stack - record a stack back trace in the trace buffer
2676  * @skip: Number of functions to skip (helper handlers)
2677  */
2678 void trace_dump_stack(int skip)
2679 {
2680         unsigned long flags;
2681
2682         if (tracing_disabled || tracing_selftest_running)
2683                 return;
2684
2685         local_save_flags(flags);
2686
2687         /*
2688          * Skip 3 more, seems to get us at the caller of
2689          * this function.
2690          */
2691         skip += 3;
2692         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2693                              flags, skip, preempt_count(), NULL);
2694 }
2695
2696 static DEFINE_PER_CPU(int, user_stack_count);
2697
2698 void
2699 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2700 {
2701         struct trace_event_call *call = &event_user_stack;
2702         struct ring_buffer_event *event;
2703         struct userstack_entry *entry;
2704         struct stack_trace trace;
2705
2706         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2707                 return;
2708
2709         /*
2710          * NMIs can not handle page faults, even with fix ups.
2711          * The save user stack can (and often does) fault.
2712          */
2713         if (unlikely(in_nmi()))
2714                 return;
2715
2716         /*
2717          * prevent recursion, since the user stack tracing may
2718          * trigger other kernel events.
2719          */
2720         preempt_disable();
2721         if (__this_cpu_read(user_stack_count))
2722                 goto out;
2723
2724         __this_cpu_inc(user_stack_count);
2725
2726         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2727                                             sizeof(*entry), flags, pc);
2728         if (!event)
2729                 goto out_drop_count;
2730         entry   = ring_buffer_event_data(event);
2731
2732         entry->tgid             = current->tgid;
2733         memset(&entry->caller, 0, sizeof(entry->caller));
2734
2735         trace.nr_entries        = 0;
2736         trace.max_entries       = FTRACE_STACK_ENTRIES;
2737         trace.skip              = 0;
2738         trace.entries           = entry->caller;
2739
2740         save_stack_trace_user(&trace);
2741         if (!call_filter_check_discard(call, entry, buffer, event))
2742                 __buffer_unlock_commit(buffer, event);
2743
2744  out_drop_count:
2745         __this_cpu_dec(user_stack_count);
2746  out:
2747         preempt_enable();
2748 }
2749
2750 #ifdef UNUSED
2751 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2752 {
2753         ftrace_trace_userstack(tr, flags, preempt_count());
2754 }
2755 #endif /* UNUSED */
2756
2757 #endif /* CONFIG_STACKTRACE */
2758
2759 /* created for use with alloc_percpu */
2760 struct trace_buffer_struct {
2761         int nesting;
2762         char buffer[4][TRACE_BUF_SIZE];
2763 };
2764
2765 static struct trace_buffer_struct *trace_percpu_buffer;
2766
2767 /*
2768  * Thise allows for lockless recording.  If we're nested too deeply, then
2769  * this returns NULL.
2770  */
2771 static char *get_trace_buf(void)
2772 {
2773         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2774
2775         if (!buffer || buffer->nesting >= 4)
2776                 return NULL;
2777
2778         return &buffer->buffer[buffer->nesting++][0];
2779 }
2780
2781 static void put_trace_buf(void)
2782 {
2783         this_cpu_dec(trace_percpu_buffer->nesting);
2784 }
2785
2786 static int alloc_percpu_trace_buffer(void)
2787 {
2788         struct trace_buffer_struct *buffers;
2789
2790         buffers = alloc_percpu(struct trace_buffer_struct);
2791         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2792                 return -ENOMEM;
2793
2794         trace_percpu_buffer = buffers;
2795         return 0;
2796 }
2797
2798 static int buffers_allocated;
2799
2800 void trace_printk_init_buffers(void)
2801 {
2802         if (buffers_allocated)
2803                 return;
2804
2805         if (alloc_percpu_trace_buffer())
2806                 return;
2807
2808         /* trace_printk() is for debug use only. Don't use it in production. */
2809
2810         pr_warn("\n");
2811         pr_warn("**********************************************************\n");
2812         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2813         pr_warn("**                                                      **\n");
2814         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2815         pr_warn("**                                                      **\n");
2816         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2817         pr_warn("** unsafe for production use.                           **\n");
2818         pr_warn("**                                                      **\n");
2819         pr_warn("** If you see this message and you are not debugging    **\n");
2820         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2821         pr_warn("**                                                      **\n");
2822         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2823         pr_warn("**********************************************************\n");
2824
2825         /* Expand the buffers to set size */
2826         tracing_update_buffers();
2827
2828         buffers_allocated = 1;
2829
2830         /*
2831          * trace_printk_init_buffers() can be called by modules.
2832          * If that happens, then we need to start cmdline recording
2833          * directly here. If the global_trace.buffer is already
2834          * allocated here, then this was called by module code.
2835          */
2836         if (global_trace.trace_buffer.buffer)
2837                 tracing_start_cmdline_record();
2838 }
2839
2840 void trace_printk_start_comm(void)
2841 {
2842         /* Start tracing comms if trace printk is set */
2843         if (!buffers_allocated)
2844                 return;
2845         tracing_start_cmdline_record();
2846 }
2847
2848 static void trace_printk_start_stop_comm(int enabled)
2849 {
2850         if (!buffers_allocated)
2851                 return;
2852
2853         if (enabled)
2854                 tracing_start_cmdline_record();
2855         else
2856                 tracing_stop_cmdline_record();
2857 }
2858
2859 /**
2860  * trace_vbprintk - write binary msg to tracing buffer
2861  *
2862  */
2863 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2864 {
2865         struct trace_event_call *call = &event_bprint;
2866         struct ring_buffer_event *event;
2867         struct ring_buffer *buffer;
2868         struct trace_array *tr = &global_trace;
2869         struct bprint_entry *entry;
2870         unsigned long flags;
2871         char *tbuffer;
2872         int len = 0, size, pc;
2873
2874         if (unlikely(tracing_selftest_running || tracing_disabled))
2875                 return 0;
2876
2877         /* Don't pollute graph traces with trace_vprintk internals */
2878         pause_graph_tracing();
2879
2880         pc = preempt_count();
2881         preempt_disable_notrace();
2882
2883         tbuffer = get_trace_buf();
2884         if (!tbuffer) {
2885                 len = 0;
2886                 goto out_nobuffer;
2887         }
2888
2889         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2890
2891         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2892                 goto out;
2893
2894         local_save_flags(flags);
2895         size = sizeof(*entry) + sizeof(u32) * len;
2896         buffer = tr->trace_buffer.buffer;
2897         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2898                                             flags, pc);
2899         if (!event)
2900                 goto out;
2901         entry = ring_buffer_event_data(event);
2902         entry->ip                       = ip;
2903         entry->fmt                      = fmt;
2904
2905         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2906         if (!call_filter_check_discard(call, entry, buffer, event)) {
2907                 __buffer_unlock_commit(buffer, event);
2908                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2909         }
2910
2911 out:
2912         put_trace_buf();
2913
2914 out_nobuffer:
2915         preempt_enable_notrace();
2916         unpause_graph_tracing();
2917
2918         return len;
2919 }
2920 EXPORT_SYMBOL_GPL(trace_vbprintk);
2921
2922 static int
2923 __trace_array_vprintk(struct ring_buffer *buffer,
2924                       unsigned long ip, const char *fmt, va_list args)
2925 {
2926         struct trace_event_call *call = &event_print;
2927         struct ring_buffer_event *event;
2928         int len = 0, size, pc;
2929         struct print_entry *entry;
2930         unsigned long flags;
2931         char *tbuffer;
2932
2933         if (tracing_disabled || tracing_selftest_running)
2934                 return 0;
2935
2936         /* Don't pollute graph traces with trace_vprintk internals */
2937         pause_graph_tracing();
2938
2939         pc = preempt_count();
2940         preempt_disable_notrace();
2941
2942
2943         tbuffer = get_trace_buf();
2944         if (!tbuffer) {
2945                 len = 0;
2946                 goto out_nobuffer;
2947         }
2948
2949         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2950
2951         local_save_flags(flags);
2952         size = sizeof(*entry) + len + 1;
2953         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2954                                             flags, pc);
2955         if (!event)
2956                 goto out;
2957         entry = ring_buffer_event_data(event);
2958         entry->ip = ip;
2959
2960         memcpy(&entry->buf, tbuffer, len + 1);
2961         if (!call_filter_check_discard(call, entry, buffer, event)) {
2962                 __buffer_unlock_commit(buffer, event);
2963                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2964         }
2965
2966 out:
2967         put_trace_buf();
2968
2969 out_nobuffer:
2970         preempt_enable_notrace();
2971         unpause_graph_tracing();
2972
2973         return len;
2974 }
2975
2976 int trace_array_vprintk(struct trace_array *tr,
2977                         unsigned long ip, const char *fmt, va_list args)
2978 {
2979         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2980 }
2981
2982 int trace_array_printk(struct trace_array *tr,
2983                        unsigned long ip, const char *fmt, ...)
2984 {
2985         int ret;
2986         va_list ap;
2987
2988         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2989                 return 0;
2990
2991         va_start(ap, fmt);
2992         ret = trace_array_vprintk(tr, ip, fmt, ap);
2993         va_end(ap);
2994         return ret;
2995 }
2996
2997 int trace_array_printk_buf(struct ring_buffer *buffer,
2998                            unsigned long ip, const char *fmt, ...)
2999 {
3000         int ret;
3001         va_list ap;
3002
3003         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3004                 return 0;
3005
3006         va_start(ap, fmt);
3007         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3008         va_end(ap);
3009         return ret;
3010 }
3011
3012 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3013 {
3014         return trace_array_vprintk(&global_trace, ip, fmt, args);
3015 }
3016 EXPORT_SYMBOL_GPL(trace_vprintk);
3017
3018 static void trace_iterator_increment(struct trace_iterator *iter)
3019 {
3020         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3021
3022         iter->idx++;
3023         if (buf_iter)
3024                 ring_buffer_read(buf_iter, NULL);
3025 }
3026
3027 static struct trace_entry *
3028 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3029                 unsigned long *lost_events)
3030 {
3031         struct ring_buffer_event *event;
3032         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3033
3034         if (buf_iter)
3035                 event = ring_buffer_iter_peek(buf_iter, ts);
3036         else
3037                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3038                                          lost_events);
3039
3040         if (event) {
3041                 iter->ent_size = ring_buffer_event_length(event);
3042                 return ring_buffer_event_data(event);
3043         }
3044         iter->ent_size = 0;
3045         return NULL;
3046 }
3047
3048 static struct trace_entry *
3049 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3050                   unsigned long *missing_events, u64 *ent_ts)
3051 {
3052         struct ring_buffer *buffer = iter->trace_buffer->buffer;
3053         struct trace_entry *ent, *next = NULL;
3054         unsigned long lost_events = 0, next_lost = 0;
3055         int cpu_file = iter->cpu_file;
3056         u64 next_ts = 0, ts;
3057         int next_cpu = -1;
3058         int next_size = 0;
3059         int cpu;
3060
3061         /*
3062          * If we are in a per_cpu trace file, don't bother by iterating over
3063          * all cpu and peek directly.
3064          */
3065         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3066                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3067                         return NULL;
3068                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3069                 if (ent_cpu)
3070                         *ent_cpu = cpu_file;
3071
3072                 return ent;
3073         }
3074
3075         for_each_tracing_cpu(cpu) {
3076
3077                 if (ring_buffer_empty_cpu(buffer, cpu))
3078                         continue;
3079
3080                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3081
3082                 /*
3083                  * Pick the entry with the smallest timestamp:
3084                  */
3085                 if (ent && (!next || ts < next_ts)) {
3086                         next = ent;
3087                         next_cpu = cpu;
3088                         next_ts = ts;
3089                         next_lost = lost_events;
3090                         next_size = iter->ent_size;
3091                 }
3092         }
3093
3094         iter->ent_size = next_size;
3095
3096         if (ent_cpu)
3097                 *ent_cpu = next_cpu;
3098
3099         if (ent_ts)
3100                 *ent_ts = next_ts;
3101
3102         if (missing_events)
3103                 *missing_events = next_lost;
3104
3105         return next;
3106 }
3107
3108 /* Find the next real entry, without updating the iterator itself */
3109 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3110                                           int *ent_cpu, u64 *ent_ts)
3111 {
3112         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3113 }
3114
3115 /* Find the next real entry, and increment the iterator to the next entry */
3116 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3117 {
3118         iter->ent = __find_next_entry(iter, &iter->cpu,
3119                                       &iter->lost_events, &iter->ts);
3120
3121         if (iter->ent)
3122                 trace_iterator_increment(iter);
3123
3124         return iter->ent ? iter : NULL;
3125 }
3126
3127 static void trace_consume(struct trace_iterator *iter)
3128 {
3129         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3130                             &iter->lost_events);
3131 }
3132
3133 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3134 {
3135         struct trace_iterator *iter = m->private;
3136         int i = (int)*pos;
3137         void *ent;
3138
3139         WARN_ON_ONCE(iter->leftover);
3140
3141         (*pos)++;
3142
3143         /* can't go backwards */
3144         if (iter->idx > i)
3145                 return NULL;
3146
3147         if (iter->idx < 0)
3148                 ent = trace_find_next_entry_inc(iter);
3149         else
3150                 ent = iter;
3151
3152         while (ent && iter->idx < i)
3153                 ent = trace_find_next_entry_inc(iter);
3154
3155         iter->pos = *pos;
3156
3157         return ent;
3158 }
3159
3160 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3161 {
3162         struct ring_buffer_event *event;
3163         struct ring_buffer_iter *buf_iter;
3164         unsigned long entries = 0;
3165         u64 ts;
3166
3167         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3168
3169         buf_iter = trace_buffer_iter(iter, cpu);
3170         if (!buf_iter)
3171                 return;
3172
3173         ring_buffer_iter_reset(buf_iter);
3174
3175         /*
3176          * We could have the case with the max latency tracers
3177          * that a reset never took place on a cpu. This is evident
3178          * by the timestamp being before the start of the buffer.
3179          */
3180         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3181                 if (ts >= iter->trace_buffer->time_start)
3182                         break;
3183                 entries++;
3184                 ring_buffer_read(buf_iter, NULL);
3185         }
3186
3187         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3188 }
3189
3190 /*
3191  * The current tracer is copied to avoid a global locking
3192  * all around.
3193  */
3194 static void *s_start(struct seq_file *m, loff_t *pos)
3195 {
3196         struct trace_iterator *iter = m->private;
3197         struct trace_array *tr = iter->tr;
3198         int cpu_file = iter->cpu_file;
3199         void *p = NULL;
3200         loff_t l = 0;
3201         int cpu;
3202
3203         /*
3204          * copy the tracer to avoid using a global lock all around.
3205          * iter->trace is a copy of current_trace, the pointer to the
3206          * name may be used instead of a strcmp(), as iter->trace->name
3207          * will point to the same string as current_trace->name.
3208          */
3209         mutex_lock(&trace_types_lock);
3210         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3211                 *iter->trace = *tr->current_trace;
3212         mutex_unlock(&trace_types_lock);
3213
3214 #ifdef CONFIG_TRACER_MAX_TRACE
3215         if (iter->snapshot && iter->trace->use_max_tr)
3216                 return ERR_PTR(-EBUSY);
3217 #endif
3218
3219         if (!iter->snapshot)
3220                 atomic_inc(&trace_record_taskinfo_disabled);
3221
3222         if (*pos != iter->pos) {
3223                 iter->ent = NULL;
3224                 iter->cpu = 0;
3225                 iter->idx = -1;
3226
3227                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3228                         for_each_tracing_cpu(cpu)
3229                                 tracing_iter_reset(iter, cpu);
3230                 } else
3231                         tracing_iter_reset(iter, cpu_file);
3232
3233                 iter->leftover = 0;
3234                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3235                         ;
3236
3237         } else {
3238                 /*
3239                  * If we overflowed the seq_file before, then we want
3240                  * to just reuse the trace_seq buffer again.
3241                  */
3242                 if (iter->leftover)
3243                         p = iter;
3244                 else {
3245                         l = *pos - 1;
3246                         p = s_next(m, p, &l);
3247                 }
3248         }
3249
3250         trace_event_read_lock();
3251         trace_access_lock(cpu_file);
3252         return p;
3253 }
3254
3255 static void s_stop(struct seq_file *m, void *p)
3256 {
3257         struct trace_iterator *iter = m->private;
3258
3259 #ifdef CONFIG_TRACER_MAX_TRACE
3260         if (iter->snapshot && iter->trace->use_max_tr)
3261                 return;
3262 #endif
3263
3264         if (!iter->snapshot)
3265                 atomic_dec(&trace_record_taskinfo_disabled);
3266
3267         trace_access_unlock(iter->cpu_file);
3268         trace_event_read_unlock();
3269 }
3270
3271 static void
3272 get_total_entries(struct trace_buffer *buf,
3273                   unsigned long *total, unsigned long *entries)
3274 {
3275         unsigned long count;
3276         int cpu;
3277
3278         *total = 0;
3279         *entries = 0;
3280
3281         for_each_tracing_cpu(cpu) {
3282                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3283                 /*
3284                  * If this buffer has skipped entries, then we hold all
3285                  * entries for the trace and we need to ignore the
3286                  * ones before the time stamp.
3287                  */
3288                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3289                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3290                         /* total is the same as the entries */
3291                         *total += count;
3292                 } else
3293                         *total += count +
3294                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
3295                 *entries += count;
3296         }
3297 }
3298
3299 static void print_lat_help_header(struct seq_file *m)
3300 {
3301         seq_puts(m, "#                  _------=> CPU#            \n"
3302                     "#                 / _-----=> irqs-off        \n"
3303                     "#                | / _----=> need-resched    \n"
3304                     "#                || / _---=> hardirq/softirq \n"
3305                     "#                ||| / _--=> preempt-depth   \n"
3306                     "#                |||| /     delay            \n"
3307                     "#  cmd     pid   ||||| time  |   caller      \n"
3308                     "#     \\   /      |||||  \\    |   /         \n");
3309 }
3310
3311 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3312 {
3313         unsigned long total;
3314         unsigned long entries;
3315
3316         get_total_entries(buf, &total, &entries);
3317         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3318                    entries, total, num_online_cpus());
3319         seq_puts(m, "#\n");
3320 }
3321
3322 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3323                                    unsigned int flags)
3324 {
3325         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3326
3327         print_event_info(buf, m);
3328
3329         seq_printf(m, "#           TASK-PID   CPU#   %s  TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3330         seq_printf(m, "#              | |       |    %s     |         |\n",      tgid ? "  |      " : "");
3331 }
3332
3333 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3334                                        unsigned int flags)
3335 {
3336         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3337
3338         seq_printf(m, "#                          %s  _-----=> irqs-off\n",         tgid ? "          " : "");
3339         seq_printf(m, "#                          %s / _----=> need-resched\n",     tgid ? "          " : "");
3340         seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",  tgid ? "          " : "");
3341         seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",    tgid ? "          " : "");
3342         seq_printf(m, "#                          %s||| /     delay\n",             tgid ? "          " : "");
3343         seq_printf(m, "#           TASK-PID   CPU#%s||||    TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
3344         seq_printf(m, "#              | |       | %s||||       |         |\n",      tgid ? "     |    " : "");
3345 }
3346
3347 void
3348 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3349 {
3350         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3351         struct trace_buffer *buf = iter->trace_buffer;
3352         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3353         struct tracer *type = iter->trace;
3354         unsigned long entries;
3355         unsigned long total;
3356         const char *name = "preemption";
3357
3358         name = type->name;
3359
3360         get_total_entries(buf, &total, &entries);
3361
3362         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3363                    name, UTS_RELEASE);
3364         seq_puts(m, "# -----------------------------------"
3365                  "---------------------------------\n");
3366         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3367                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3368                    nsecs_to_usecs(data->saved_latency),
3369                    entries,
3370                    total,
3371                    buf->cpu,
3372 #if defined(CONFIG_PREEMPT_NONE)
3373                    "server",
3374 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3375                    "desktop",
3376 #elif defined(CONFIG_PREEMPT)
3377                    "preempt",
3378 #else
3379                    "unknown",
3380 #endif
3381                    /* These are reserved for later use */
3382                    0, 0, 0, 0);
3383 #ifdef CONFIG_SMP
3384         seq_printf(m, " #P:%d)\n", num_online_cpus());
3385 #else
3386         seq_puts(m, ")\n");
3387 #endif
3388         seq_puts(m, "#    -----------------\n");
3389         seq_printf(m, "#    | task: %.16s-%d "
3390                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3391                    data->comm, data->pid,
3392                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3393                    data->policy, data->rt_priority);
3394         seq_puts(m, "#    -----------------\n");
3395
3396         if (data->critical_start) {
3397                 seq_puts(m, "#  => started at: ");
3398                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3399                 trace_print_seq(m, &iter->seq);
3400                 seq_puts(m, "\n#  => ended at:   ");
3401                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3402                 trace_print_seq(m, &iter->seq);
3403                 seq_puts(m, "\n#\n");
3404         }
3405
3406         seq_puts(m, "#\n");
3407 }
3408
3409 static void test_cpu_buff_start(struct trace_iterator *iter)
3410 {
3411         struct trace_seq *s = &iter->seq;
3412         struct trace_array *tr = iter->tr;
3413
3414         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3415                 return;
3416
3417         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3418                 return;
3419
3420         if (cpumask_available(iter->started) &&
3421             cpumask_test_cpu(iter->cpu, iter->started))
3422                 return;
3423
3424         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3425                 return;
3426
3427         if (cpumask_available(iter->started))
3428                 cpumask_set_cpu(iter->cpu, iter->started);
3429
3430         /* Don't print started cpu buffer for the first entry of the trace */
3431         if (iter->idx > 1)
3432                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3433                                 iter->cpu);
3434 }
3435
3436 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3437 {
3438         struct trace_array *tr = iter->tr;
3439         struct trace_seq *s = &iter->seq;
3440         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3441         struct trace_entry *entry;
3442         struct trace_event *event;
3443
3444         entry = iter->ent;
3445
3446         test_cpu_buff_start(iter);
3447
3448         event = ftrace_find_event(entry->type);
3449
3450         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3451                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3452                         trace_print_lat_context(iter);
3453                 else
3454                         trace_print_context(iter);
3455         }
3456
3457         if (trace_seq_has_overflowed(s))
3458                 return TRACE_TYPE_PARTIAL_LINE;
3459
3460         if (event)
3461                 return event->funcs->trace(iter, sym_flags, event);
3462
3463         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3464
3465         return trace_handle_return(s);
3466 }
3467
3468 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3469 {
3470         struct trace_array *tr = iter->tr;
3471         struct trace_seq *s = &iter->seq;
3472         struct trace_entry *entry;
3473         struct trace_event *event;
3474
3475         entry = iter->ent;
3476
3477         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3478                 trace_seq_printf(s, "%d %d %llu ",
3479                                  entry->pid, iter->cpu, iter->ts);
3480
3481         if (trace_seq_has_overflowed(s))
3482                 return TRACE_TYPE_PARTIAL_LINE;
3483
3484         event = ftrace_find_event(entry->type);
3485         if (event)
3486                 return event->funcs->raw(iter, 0, event);
3487
3488         trace_seq_printf(s, "%d ?\n", entry->type);
3489
3490         return trace_handle_return(s);
3491 }
3492
3493 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3494 {
3495         struct trace_array *tr = iter->tr;
3496         struct trace_seq *s = &iter->seq;
3497         unsigned char newline = '\n';
3498         struct trace_entry *entry;
3499         struct trace_event *event;
3500
3501         entry = iter->ent;
3502
3503         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3504                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3505                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3506                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3507                 if (trace_seq_has_overflowed(s))
3508                         return TRACE_TYPE_PARTIAL_LINE;
3509         }
3510
3511         event = ftrace_find_event(entry->type);
3512         if (event) {
3513                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3514                 if (ret != TRACE_TYPE_HANDLED)
3515                         return ret;
3516         }
3517
3518         SEQ_PUT_FIELD(s, newline);
3519
3520         return trace_handle_return(s);
3521 }
3522
3523 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3524 {
3525         struct trace_array *tr = iter->tr;
3526         struct trace_seq *s = &iter->seq;
3527         struct trace_entry *entry;
3528         struct trace_event *event;
3529
3530         entry = iter->ent;
3531
3532         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3533                 SEQ_PUT_FIELD(s, entry->pid);
3534                 SEQ_PUT_FIELD(s, iter->cpu);
3535                 SEQ_PUT_FIELD(s, iter->ts);
3536                 if (trace_seq_has_overflowed(s))
3537                         return TRACE_TYPE_PARTIAL_LINE;
3538         }
3539
3540         event = ftrace_find_event(entry->type);
3541         return event ? event->funcs->binary(iter, 0, event) :
3542                 TRACE_TYPE_HANDLED;
3543 }
3544
3545 int trace_empty(struct trace_iterator *iter)
3546 {
3547         struct ring_buffer_iter *buf_iter;
3548         int cpu;
3549
3550         /* If we are looking at one CPU buffer, only check that one */
3551         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3552                 cpu = iter->cpu_file;
3553                 buf_iter = trace_buffer_iter(iter, cpu);
3554                 if (buf_iter) {
3555                         if (!ring_buffer_iter_empty(buf_iter))
3556                                 return 0;
3557                 } else {
3558                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3559                                 return 0;
3560                 }
3561                 return 1;
3562         }
3563
3564         for_each_tracing_cpu(cpu) {
3565                 buf_iter = trace_buffer_iter(iter, cpu);
3566                 if (buf_iter) {
3567                         if (!ring_buffer_iter_empty(buf_iter))
3568                                 return 0;
3569                 } else {
3570                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3571                                 return 0;
3572                 }
3573         }
3574
3575         return 1;
3576 }
3577
3578 /*  Called with trace_event_read_lock() held. */
3579 enum print_line_t print_trace_line(struct trace_iterator *iter)
3580 {
3581         struct trace_array *tr = iter->tr;
3582         unsigned long trace_flags = tr->trace_flags;
3583         enum print_line_t ret;
3584
3585         if (iter->lost_events) {
3586                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3587                                  iter->cpu, iter->lost_events);
3588                 if (trace_seq_has_overflowed(&iter->seq))
3589                         return TRACE_TYPE_PARTIAL_LINE;
3590         }
3591
3592         if (iter->trace && iter->trace->print_line) {
3593                 ret = iter->trace->print_line(iter);
3594                 if (ret != TRACE_TYPE_UNHANDLED)
3595                         return ret;
3596         }
3597
3598         if (iter->ent->type == TRACE_BPUTS &&
3599                         trace_flags & TRACE_ITER_PRINTK &&
3600                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3601                 return trace_print_bputs_msg_only(iter);
3602
3603         if (iter->ent->type == TRACE_BPRINT &&
3604                         trace_flags & TRACE_ITER_PRINTK &&
3605                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3606                 return trace_print_bprintk_msg_only(iter);
3607
3608         if (iter->ent->type == TRACE_PRINT &&
3609                         trace_flags & TRACE_ITER_PRINTK &&
3610                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3611                 return trace_print_printk_msg_only(iter);
3612
3613         if (trace_flags & TRACE_ITER_BIN)
3614                 return print_bin_fmt(iter);
3615
3616         if (trace_flags & TRACE_ITER_HEX)
3617                 return print_hex_fmt(iter);
3618
3619         if (trace_flags & TRACE_ITER_RAW)
3620                 return print_raw_fmt(iter);
3621
3622         return print_trace_fmt(iter);
3623 }
3624
3625 void trace_latency_header(struct seq_file *m)
3626 {
3627         struct trace_iterator *iter = m->private;
3628         struct trace_array *tr = iter->tr;
3629
3630         /* print nothing if the buffers are empty */
3631         if (trace_empty(iter))
3632                 return;
3633
3634         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3635                 print_trace_header(m, iter);
3636
3637         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3638                 print_lat_help_header(m);
3639 }
3640
3641 void trace_default_header(struct seq_file *m)
3642 {
3643         struct trace_iterator *iter = m->private;
3644         struct trace_array *tr = iter->tr;
3645         unsigned long trace_flags = tr->trace_flags;
3646
3647         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3648                 return;
3649
3650         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3651                 /* print nothing if the buffers are empty */
3652                 if (trace_empty(iter))
3653                         return;
3654                 print_trace_header(m, iter);
3655                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3656                         print_lat_help_header(m);
3657         } else {
3658                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3659                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3660                                 print_func_help_header_irq(iter->trace_buffer,
3661                                                            m, trace_flags);
3662                         else
3663                                 print_func_help_header(iter->trace_buffer, m,
3664                                                        trace_flags);
3665                 }
3666         }
3667 }
3668
3669 static void test_ftrace_alive(struct seq_file *m)
3670 {
3671         if (!ftrace_is_dead())
3672                 return;
3673         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3674                     "#          MAY BE MISSING FUNCTION EVENTS\n");
3675 }
3676
3677 #ifdef CONFIG_TRACER_MAX_TRACE
3678 static void show_snapshot_main_help(struct seq_file *m)
3679 {
3680         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3681                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3682                     "#                      Takes a snapshot of the main buffer.\n"
3683                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3684                     "#                      (Doesn't have to be '2' works with any number that\n"
3685                     "#                       is not a '0' or '1')\n");
3686 }
3687
3688 static void show_snapshot_percpu_help(struct seq_file *m)
3689 {
3690         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3691 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3692         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3693                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
3694 #else
3695         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3696                     "#                     Must use main snapshot file to allocate.\n");
3697 #endif
3698         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3699                     "#                      (Doesn't have to be '2' works with any number that\n"
3700                     "#                       is not a '0' or '1')\n");
3701 }
3702
3703 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3704 {
3705         if (iter->tr->allocated_snapshot)
3706                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3707         else
3708                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3709
3710         seq_puts(m, "# Snapshot commands:\n");
3711         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3712                 show_snapshot_main_help(m);
3713         else
3714                 show_snapshot_percpu_help(m);
3715 }
3716 #else
3717 /* Should never be called */
3718 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3719 #endif
3720
3721 static int s_show(struct seq_file *m, void *v)
3722 {
3723         struct trace_iterator *iter = v;
3724         int ret;
3725
3726         if (iter->ent == NULL) {
3727                 if (iter->tr) {
3728                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
3729                         seq_puts(m, "#\n");
3730                         test_ftrace_alive(m);
3731                 }
3732                 if (iter->snapshot && trace_empty(iter))
3733                         print_snapshot_help(m, iter);
3734                 else if (iter->trace && iter->trace->print_header)
3735                         iter->trace->print_header(m);
3736                 else
3737                         trace_default_header(m);
3738
3739         } else if (iter->leftover) {
3740                 /*
3741                  * If we filled the seq_file buffer earlier, we
3742                  * want to just show it now.
3743                  */
3744                 ret = trace_print_seq(m, &iter->seq);
3745
3746                 /* ret should this time be zero, but you never know */
3747                 iter->leftover = ret;
3748
3749         } else {
3750                 print_trace_line(iter);
3751                 ret = trace_print_seq(m, &iter->seq);
3752                 /*
3753                  * If we overflow the seq_file buffer, then it will
3754                  * ask us for this data again at start up.
3755                  * Use that instead.
3756                  *  ret is 0 if seq_file write succeeded.
3757                  *        -1 otherwise.
3758                  */
3759                 iter->leftover = ret;
3760         }
3761
3762         return 0;
3763 }
3764
3765 /*
3766  * Should be used after trace_array_get(), trace_types_lock
3767  * ensures that i_cdev was already initialized.
3768  */
3769 static inline int tracing_get_cpu(struct inode *inode)
3770 {
3771         if (inode->i_cdev) /* See trace_create_cpu_file() */
3772                 return (long)inode->i_cdev - 1;
3773         return RING_BUFFER_ALL_CPUS;
3774 }
3775
3776 static const struct seq_operations tracer_seq_ops = {
3777         .start          = s_start,
3778         .next           = s_next,
3779         .stop           = s_stop,
3780         .show           = s_show,
3781 };
3782
3783 static struct trace_iterator *
3784 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3785 {
3786         struct trace_array *tr = inode->i_private;
3787         struct trace_iterator *iter;
3788         int cpu;
3789
3790         if (tracing_disabled)
3791                 return ERR_PTR(-ENODEV);
3792
3793         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3794         if (!iter)
3795                 return ERR_PTR(-ENOMEM);
3796
3797         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3798                                     GFP_KERNEL);
3799         if (!iter->buffer_iter)
3800                 goto release;
3801
3802         /*
3803          * We make a copy of the current tracer to avoid concurrent
3804          * changes on it while we are reading.
3805          */
3806         mutex_lock(&trace_types_lock);
3807         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3808         if (!iter->trace)
3809                 goto fail;
3810
3811         *iter->trace = *tr->current_trace;
3812
3813         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3814                 goto fail;
3815
3816         iter->tr = tr;
3817
3818 #ifdef CONFIG_TRACER_MAX_TRACE
3819         /* Currently only the top directory has a snapshot */
3820         if (tr->current_trace->print_max || snapshot)
3821                 iter->trace_buffer = &tr->max_buffer;
3822         else
3823 #endif
3824                 iter->trace_buffer = &tr->trace_buffer;
3825         iter->snapshot = snapshot;
3826         iter->pos = -1;
3827         iter->cpu_file = tracing_get_cpu(inode);
3828         mutex_init(&iter->mutex);
3829
3830         /* Notify the tracer early; before we stop tracing. */
3831         if (iter->trace && iter->trace->open)
3832                 iter->trace->open(iter);
3833
3834         /* Annotate start of buffers if we had overruns */
3835         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3836                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3837
3838         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3839         if (trace_clocks[tr->clock_id].in_ns)
3840                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3841
3842         /* stop the trace while dumping if we are not opening "snapshot" */
3843         if (!iter->snapshot)
3844                 tracing_stop_tr(tr);
3845
3846         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3847                 for_each_tracing_cpu(cpu) {
3848                         iter->buffer_iter[cpu] =
3849                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3850                 }
3851                 ring_buffer_read_prepare_sync();
3852                 for_each_tracing_cpu(cpu) {
3853                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3854                         tracing_iter_reset(iter, cpu);
3855                 }
3856         } else {
3857                 cpu = iter->cpu_file;
3858                 iter->buffer_iter[cpu] =
3859                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3860                 ring_buffer_read_prepare_sync();
3861                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3862                 tracing_iter_reset(iter, cpu);
3863         }
3864
3865         mutex_unlock(&trace_types_lock);
3866
3867         return iter;
3868
3869  fail:
3870         mutex_unlock(&trace_types_lock);
3871         kfree(iter->trace);
3872         kfree(iter->buffer_iter);
3873 release:
3874         seq_release_private(inode, file);
3875         return ERR_PTR(-ENOMEM);
3876 }
3877
3878 int tracing_open_generic(struct inode *inode, struct file *filp)
3879 {
3880         if (tracing_disabled)
3881                 return -ENODEV;
3882
3883         filp->private_data = inode->i_private;
3884         return 0;
3885 }
3886
3887 bool tracing_is_disabled(void)
3888 {
3889         return (tracing_disabled) ? true: false;
3890 }
3891
3892 /*
3893  * Open and update trace_array ref count.
3894  * Must have the current trace_array passed to it.
3895  */
3896 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3897 {
3898         struct trace_array *tr = inode->i_private;
3899
3900         if (tracing_disabled)
3901                 return -ENODEV;
3902
3903         if (trace_array_get(tr) < 0)
3904                 return -ENODEV;
3905
3906         filp->private_data = inode->i_private;
3907
3908         return 0;
3909 }
3910
3911 static int tracing_release(struct inode *inode, struct file *file)
3912 {
3913         struct trace_array *tr = inode->i_private;
3914         struct seq_file *m = file->private_data;
3915         struct trace_iterator *iter;
3916         int cpu;
3917
3918         if (!(file->f_mode & FMODE_READ)) {
3919                 trace_array_put(tr);
3920                 return 0;
3921         }
3922
3923         /* Writes do not use seq_file */
3924         iter = m->private;
3925         mutex_lock(&trace_types_lock);
3926
3927         for_each_tracing_cpu(cpu) {
3928                 if (iter->buffer_iter[cpu])
3929                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3930         }
3931
3932         if (iter->trace && iter->trace->close)
3933                 iter->trace->close(iter);
3934
3935         if (!iter->snapshot)
3936                 /* reenable tracing if it was previously enabled */
3937                 tracing_start_tr(tr);
3938
3939         __trace_array_put(tr);
3940
3941         mutex_unlock(&trace_types_lock);
3942
3943         mutex_destroy(&iter->mutex);
3944         free_cpumask_var(iter->started);
3945         kfree(iter->trace);
3946         kfree(iter->buffer_iter);
3947         seq_release_private(inode, file);
3948
3949         return 0;
3950 }
3951
3952 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3953 {
3954         struct trace_array *tr = inode->i_private;
3955
3956         trace_array_put(tr);
3957         return 0;
3958 }
3959
3960 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3961 {
3962         struct trace_array *tr = inode->i_private;
3963
3964         trace_array_put(tr);
3965
3966         return single_release(inode, file);
3967 }
3968
3969 static int tracing_open(struct inode *inode, struct file *file)
3970 {
3971         struct trace_array *tr = inode->i_private;
3972         struct trace_iterator *iter;
3973         int ret = 0;
3974
3975         if (trace_array_get(tr) < 0)
3976                 return -ENODEV;
3977
3978         /* If this file was open for write, then erase contents */
3979         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3980                 int cpu = tracing_get_cpu(inode);
3981
3982                 if (cpu == RING_BUFFER_ALL_CPUS)
3983                         tracing_reset_online_cpus(&tr->trace_buffer);
3984                 else
3985                         tracing_reset(&tr->trace_buffer, cpu);
3986         }
3987
3988         if (file->f_mode & FMODE_READ) {
3989                 iter = __tracing_open(inode, file, false);
3990                 if (IS_ERR(iter))
3991                         ret = PTR_ERR(iter);
3992                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3993                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3994         }
3995
3996         if (ret < 0)
3997                 trace_array_put(tr);
3998
3999         return ret;
4000 }
4001
4002 /*
4003  * Some tracers are not suitable for instance buffers.
4004  * A tracer is always available for the global array (toplevel)
4005  * or if it explicitly states that it is.
4006  */
4007 static bool
4008 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4009 {
4010         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4011 }
4012
4013 /* Find the next tracer that this trace array may use */
4014 static struct tracer *
4015 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4016 {
4017         while (t && !trace_ok_for_array(t, tr))
4018                 t = t->next;
4019
4020         return t;
4021 }
4022
4023 static void *
4024 t_next(struct seq_file *m, void *v, loff_t *pos)
4025 {
4026         struct trace_array *tr = m->private;
4027         struct tracer *t = v;
4028
4029         (*pos)++;
4030
4031         if (t)
4032                 t = get_tracer_for_array(tr, t->next);
4033
4034         return t;
4035 }
4036
4037 static void *t_start(struct seq_file *m, loff_t *pos)
4038 {
4039         struct trace_array *tr = m->private;
4040         struct tracer *t;
4041         loff_t l = 0;
4042
4043         mutex_lock(&trace_types_lock);
4044
4045         t = get_tracer_for_array(tr, trace_types);
4046         for (; t && l < *pos; t = t_next(m, t, &l))
4047                         ;
4048
4049         return t;
4050 }
4051
4052 static void t_stop(struct seq_file *m, void *p)
4053 {
4054         mutex_unlock(&trace_types_lock);
4055 }
4056
4057 static int t_show(struct seq_file *m, void *v)
4058 {
4059         struct tracer *t = v;
4060
4061         if (!t)
4062                 return 0;
4063
4064         seq_puts(m, t->name);
4065         if (t->next)
4066                 seq_putc(m, ' ');
4067         else
4068                 seq_putc(m, '\n');
4069
4070         return 0;
4071 }
4072
4073 static const struct seq_operations show_traces_seq_ops = {
4074         .start          = t_start,
4075         .next           = t_next,
4076         .stop           = t_stop,
4077         .show           = t_show,
4078 };
4079
4080 static int show_traces_open(struct inode *inode, struct file *file)
4081 {
4082         struct trace_array *tr = inode->i_private;
4083         struct seq_file *m;
4084         int ret;
4085
4086         if (tracing_disabled)
4087                 return -ENODEV;
4088
4089         ret = seq_open(file, &show_traces_seq_ops);
4090         if (ret)
4091                 return ret;
4092
4093         m = file->private_data;
4094         m->private = tr;
4095
4096         return 0;
4097 }
4098
4099 static ssize_t
4100 tracing_write_stub(struct file *filp, const char __user *ubuf,
4101                    size_t count, loff_t *ppos)
4102 {
4103         return count;
4104 }
4105
4106 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4107 {
4108         int ret;
4109
4110         if (file->f_mode & FMODE_READ)
4111                 ret = seq_lseek(file, offset, whence);
4112         else
4113                 file->f_pos = ret = 0;
4114
4115         return ret;
4116 }
4117
4118 static const struct file_operations tracing_fops = {
4119         .open           = tracing_open,
4120         .read           = seq_read,
4121         .write          = tracing_write_stub,
4122         .llseek         = tracing_lseek,
4123         .release        = tracing_release,
4124 };
4125
4126 static const struct file_operations show_traces_fops = {
4127         .open           = show_traces_open,
4128         .read           = seq_read,
4129         .release        = seq_release,
4130         .llseek         = seq_lseek,
4131 };
4132
4133 /*
4134  * The tracer itself will not take this lock, but still we want
4135  * to provide a consistent cpumask to user-space:
4136  */
4137 static DEFINE_MUTEX(tracing_cpumask_update_lock);
4138
4139 /*
4140  * Temporary storage for the character representation of the
4141  * CPU bitmask (and one more byte for the newline):
4142  */
4143 static char mask_str[NR_CPUS + 1];
4144
4145 static ssize_t
4146 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4147                      size_t count, loff_t *ppos)
4148 {
4149         struct trace_array *tr = file_inode(filp)->i_private;
4150         int len;
4151
4152         mutex_lock(&tracing_cpumask_update_lock);
4153
4154         len = snprintf(mask_str, count, "%*pb\n",
4155                        cpumask_pr_args(tr->tracing_cpumask));
4156         if (len >= count) {
4157                 count = -EINVAL;
4158                 goto out_err;
4159         }
4160         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
4161
4162 out_err:
4163         mutex_unlock(&tracing_cpumask_update_lock);
4164
4165         return count;
4166 }
4167
4168 static ssize_t
4169 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4170                       size_t count, loff_t *ppos)
4171 {
4172         struct trace_array *tr = file_inode(filp)->i_private;
4173         cpumask_var_t tracing_cpumask_new;
4174         int err, cpu;
4175
4176         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4177                 return -ENOMEM;
4178
4179         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4180         if (err)
4181                 goto err_unlock;
4182
4183         mutex_lock(&tracing_cpumask_update_lock);
4184
4185         local_irq_disable();
4186         arch_spin_lock(&tr->max_lock);
4187         for_each_tracing_cpu(cpu) {
4188                 /*
4189                  * Increase/decrease the disabled counter if we are
4190                  * about to flip a bit in the cpumask:
4191                  */
4192                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4193                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4194                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4195                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4196                 }
4197                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4198                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4199                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4200                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4201                 }
4202         }
4203         arch_spin_unlock(&tr->max_lock);
4204         local_irq_enable();
4205
4206         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4207
4208         mutex_unlock(&tracing_cpumask_update_lock);
4209         free_cpumask_var(tracing_cpumask_new);
4210
4211         return count;
4212
4213 err_unlock:
4214         free_cpumask_var(tracing_cpumask_new);
4215
4216         return err;
4217 }
4218
4219 static const struct file_operations tracing_cpumask_fops = {
4220         .open           = tracing_open_generic_tr,
4221         .read           = tracing_cpumask_read,
4222         .write          = tracing_cpumask_write,
4223         .release        = tracing_release_generic_tr,
4224         .llseek         = generic_file_llseek,
4225 };
4226
4227 static int tracing_trace_options_show(struct seq_file *m, void *v)
4228 {
4229         struct tracer_opt *trace_opts;
4230         struct trace_array *tr = m->private;
4231         u32 tracer_flags;
4232         int i;
4233
4234         mutex_lock(&trace_types_lock);
4235         tracer_flags = tr->current_trace->flags->val;
4236         trace_opts = tr->current_trace->flags->opts;
4237
4238         for (i = 0; trace_options[i]; i++) {
4239                 if (tr->trace_flags & (1 << i))
4240                         seq_printf(m, "%s\n", trace_options[i]);
4241                 else
4242                         seq_printf(m, "no%s\n", trace_options[i]);
4243         }
4244
4245         for (i = 0; trace_opts[i].name; i++) {
4246                 if (tracer_flags & trace_opts[i].bit)
4247                         seq_printf(m, "%s\n", trace_opts[i].name);
4248                 else
4249                         seq_printf(m, "no%s\n", trace_opts[i].name);
4250         }
4251         mutex_unlock(&trace_types_lock);
4252
4253         return 0;
4254 }
4255
4256 static int __set_tracer_option(struct trace_array *tr,
4257                                struct tracer_flags *tracer_flags,
4258                                struct tracer_opt *opts, int neg)
4259 {
4260         struct tracer *trace = tracer_flags->trace;
4261         int ret;
4262
4263         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4264         if (ret)
4265                 return ret;
4266
4267         if (neg)
4268                 tracer_flags->val &= ~opts->bit;
4269         else
4270                 tracer_flags->val |= opts->bit;
4271         return 0;
4272 }
4273
4274 /* Try to assign a tracer specific option */
4275 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4276 {
4277         struct tracer *trace = tr->current_trace;
4278         struct tracer_flags *tracer_flags = trace->flags;
4279         struct tracer_opt *opts = NULL;
4280         int i;
4281
4282         for (i = 0; tracer_flags->opts[i].name; i++) {
4283                 opts = &tracer_flags->opts[i];
4284
4285                 if (strcmp(cmp, opts->name) == 0)
4286                         return __set_tracer_option(tr, trace->flags, opts, neg);
4287         }
4288
4289         return -EINVAL;
4290 }
4291
4292 /* Some tracers require overwrite to stay enabled */
4293 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4294 {
4295         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4296                 return -1;
4297
4298         return 0;
4299 }
4300
4301 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4302 {
4303         /* do nothing if flag is already set */
4304         if (!!(tr->trace_flags & mask) == !!enabled)
4305                 return 0;
4306
4307         /* Give the tracer a chance to approve the change */
4308         if (tr->current_trace->flag_changed)
4309                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4310                         return -EINVAL;
4311
4312         if (enabled)
4313                 tr->trace_flags |= mask;
4314         else
4315                 tr->trace_flags &= ~mask;
4316
4317         if (mask == TRACE_ITER_RECORD_CMD)
4318                 trace_event_enable_cmd_record(enabled);
4319
4320         if (mask == TRACE_ITER_RECORD_TGID) {
4321                 if (!tgid_map)
4322                         tgid_map = kzalloc((PID_MAX_DEFAULT + 1) * sizeof(*tgid_map),
4323                                            GFP_KERNEL);
4324                 if (!tgid_map) {
4325                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4326                         return -ENOMEM;
4327                 }
4328
4329                 trace_event_enable_tgid_record(enabled);
4330         }
4331
4332         if (mask == TRACE_ITER_EVENT_FORK)
4333                 trace_event_follow_fork(tr, enabled);
4334
4335         if (mask == TRACE_ITER_FUNC_FORK)
4336                 ftrace_pid_follow_fork(tr, enabled);
4337
4338         if (mask == TRACE_ITER_OVERWRITE) {
4339                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4340 #ifdef CONFIG_TRACER_MAX_TRACE
4341                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4342 #endif
4343         }
4344
4345         if (mask == TRACE_ITER_PRINTK) {
4346                 trace_printk_start_stop_comm(enabled);
4347                 trace_printk_control(enabled);
4348         }
4349
4350         return 0;
4351 }
4352
4353 static int trace_set_options(struct trace_array *tr, char *option)
4354 {
4355         char *cmp;
4356         int neg = 0;
4357         int ret = -ENODEV;
4358         int i;
4359         size_t orig_len = strlen(option);
4360
4361         cmp = strstrip(option);
4362
4363         if (strncmp(cmp, "no", 2) == 0) {
4364                 neg = 1;
4365                 cmp += 2;
4366         }
4367
4368         mutex_lock(&trace_types_lock);
4369
4370         for (i = 0; trace_options[i]; i++) {
4371                 if (strcmp(cmp, trace_options[i]) == 0) {
4372                         ret = set_tracer_flag(tr, 1 << i, !neg);
4373                         break;
4374                 }
4375         }
4376
4377         /* If no option could be set, test the specific tracer options */
4378         if (!trace_options[i])
4379                 ret = set_tracer_option(tr, cmp, neg);
4380
4381         mutex_unlock(&trace_types_lock);
4382
4383         /*
4384          * If the first trailing whitespace is replaced with '\0' by strstrip,
4385          * turn it back into a space.
4386          */
4387         if (orig_len > strlen(option))
4388                 option[strlen(option)] = ' ';
4389
4390         return ret;
4391 }
4392
4393 static void __init apply_trace_boot_options(void)
4394 {
4395         char *buf = trace_boot_options_buf;
4396         char *option;
4397
4398         while (true) {
4399                 option = strsep(&buf, ",");
4400
4401                 if (!option)
4402                         break;
4403
4404                 if (*option)
4405                         trace_set_options(&global_trace, option);
4406
4407                 /* Put back the comma to allow this to be called again */
4408                 if (buf)
4409                         *(buf - 1) = ',';
4410         }
4411 }
4412
4413 static ssize_t
4414 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4415                         size_t cnt, loff_t *ppos)
4416 {
4417         struct seq_file *m = filp->private_data;
4418         struct trace_array *tr = m->private;
4419         char buf[64];
4420         int ret;
4421
4422         if (cnt >= sizeof(buf))
4423                 return -EINVAL;
4424
4425         if (copy_from_user(buf, ubuf, cnt))
4426                 return -EFAULT;
4427
4428         buf[cnt] = 0;
4429
4430         ret = trace_set_options(tr, buf);
4431         if (ret < 0)
4432                 return ret;
4433
4434         *ppos += cnt;
4435
4436         return cnt;
4437 }
4438
4439 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4440 {
4441         struct trace_array *tr = inode->i_private;
4442         int ret;
4443
4444         if (tracing_disabled)
4445                 return -ENODEV;
4446
4447         if (trace_array_get(tr) < 0)
4448                 return -ENODEV;
4449
4450         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4451         if (ret < 0)
4452                 trace_array_put(tr);
4453
4454         return ret;
4455 }
4456
4457 static const struct file_operations tracing_iter_fops = {
4458         .open           = tracing_trace_options_open,
4459         .read           = seq_read,
4460         .llseek         = seq_lseek,
4461         .release        = tracing_single_release_tr,
4462         .write          = tracing_trace_options_write,
4463 };
4464
4465 static const char readme_msg[] =
4466         "tracing mini-HOWTO:\n\n"
4467         "# echo 0 > tracing_on : quick way to disable tracing\n"
4468         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4469         " Important files:\n"
4470         "  trace\t\t\t- The static contents of the buffer\n"
4471         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4472         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4473         "  current_tracer\t- function and latency tracers\n"
4474         "  available_tracers\t- list of configured tracers for current_tracer\n"
4475         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4476         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4477         "  trace_clock\t\t-change the clock used to order events\n"
4478         "       local:   Per cpu clock but may not be synced across CPUs\n"
4479         "      global:   Synced across CPUs but slows tracing down.\n"
4480         "     counter:   Not a clock, but just an increment\n"
4481         "      uptime:   Jiffy counter from time of boot\n"
4482         "        perf:   Same clock that perf events use\n"
4483 #ifdef CONFIG_X86_64
4484         "     x86-tsc:   TSC cycle counter\n"
4485 #endif
4486         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4487         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4488         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4489         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4490         "\t\t\t  Remove sub-buffer with rmdir\n"
4491         "  trace_options\t\t- Set format or modify how tracing happens\n"
4492         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4493         "\t\t\t  option name\n"
4494         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4495 #ifdef CONFIG_DYNAMIC_FTRACE
4496         "\n  available_filter_functions - list of functions that can be filtered on\n"
4497         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4498         "\t\t\t  functions\n"
4499         "\t     accepts: func_full_name or glob-matching-pattern\n"
4500         "\t     modules: Can select a group via module\n"
4501         "\t      Format: :mod:<module-name>\n"
4502         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4503         "\t    triggers: a command to perform when function is hit\n"
4504         "\t      Format: <function>:<trigger>[:count]\n"
4505         "\t     trigger: traceon, traceoff\n"
4506         "\t\t      enable_event:<system>:<event>\n"
4507         "\t\t      disable_event:<system>:<event>\n"
4508 #ifdef CONFIG_STACKTRACE
4509         "\t\t      stacktrace\n"
4510 #endif
4511 #ifdef CONFIG_TRACER_SNAPSHOT
4512         "\t\t      snapshot\n"
4513 #endif
4514         "\t\t      dump\n"
4515         "\t\t      cpudump\n"
4516         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4517         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4518         "\t     The first one will disable tracing every time do_fault is hit\n"
4519         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4520         "\t       The first time do trap is hit and it disables tracing, the\n"
4521         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4522         "\t       the counter will not decrement. It only decrements when the\n"
4523         "\t       trigger did work\n"
4524         "\t     To remove trigger without count:\n"
4525         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4526         "\t     To remove trigger with a count:\n"
4527         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4528         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4529         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4530         "\t    modules: Can select a group via module command :mod:\n"
4531         "\t    Does not accept triggers\n"
4532 #endif /* CONFIG_DYNAMIC_FTRACE */
4533 #ifdef CONFIG_FUNCTION_TRACER
4534         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4535         "\t\t    (function)\n"
4536 #endif
4537 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4538         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4539         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4540         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4541 #endif
4542 #ifdef CONFIG_TRACER_SNAPSHOT
4543         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4544         "\t\t\t  snapshot buffer. Read the contents for more\n"
4545         "\t\t\t  information\n"
4546 #endif
4547 #ifdef CONFIG_STACK_TRACER
4548         "  stack_trace\t\t- Shows the max stack trace when active\n"
4549         "  stack_max_size\t- Shows current max stack size that was traced\n"
4550         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4551         "\t\t\t  new trace)\n"
4552 #ifdef CONFIG_DYNAMIC_FTRACE
4553         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4554         "\t\t\t  traces\n"
4555 #endif
4556 #endif /* CONFIG_STACK_TRACER */
4557 #ifdef CONFIG_KPROBE_EVENTS
4558         "  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4559         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4560 #endif
4561 #ifdef CONFIG_UPROBE_EVENTS
4562         "  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4563         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4564 #endif
4565 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4566         "\t  accepts: event-definitions (one definition per line)\n"
4567         "\t   Format: p|r[:[<group>/]<event>] <place> [<args>]\n"
4568         "\t           -:[<group>/]<event>\n"
4569 #ifdef CONFIG_KPROBE_EVENTS
4570         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4571   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4572 #endif
4573 #ifdef CONFIG_UPROBE_EVENTS
4574         "\t    place: <path>:<offset>\n"
4575 #endif
4576         "\t     args: <name>=fetcharg[:type]\n"
4577         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4578         "\t           $stack<index>, $stack, $retval, $comm\n"
4579         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4580         "\t           b<bit-width>@<bit-offset>/<container-size>\n"
4581 #endif
4582         "  events/\t\t- Directory containing all trace event subsystems:\n"
4583         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4584         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4585         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4586         "\t\t\t  events\n"
4587         "      filter\t\t- If set, only events passing filter are traced\n"
4588         "  events/<system>/<event>/\t- Directory containing control files for\n"
4589         "\t\t\t  <event>:\n"
4590         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4591         "      filter\t\t- If set, only events passing filter are traced\n"
4592         "      trigger\t\t- If set, a command to perform when event is hit\n"
4593         "\t    Format: <trigger>[:count][if <filter>]\n"
4594         "\t   trigger: traceon, traceoff\n"
4595         "\t            enable_event:<system>:<event>\n"
4596         "\t            disable_event:<system>:<event>\n"
4597 #ifdef CONFIG_HIST_TRIGGERS
4598         "\t            enable_hist:<system>:<event>\n"
4599         "\t            disable_hist:<system>:<event>\n"
4600 #endif
4601 #ifdef CONFIG_STACKTRACE
4602         "\t\t    stacktrace\n"
4603 #endif
4604 #ifdef CONFIG_TRACER_SNAPSHOT
4605         "\t\t    snapshot\n"
4606 #endif
4607 #ifdef CONFIG_HIST_TRIGGERS
4608         "\t\t    hist (see below)\n"
4609 #endif
4610         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4611         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4612         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4613         "\t                  events/block/block_unplug/trigger\n"
4614         "\t   The first disables tracing every time block_unplug is hit.\n"
4615         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4616         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4617         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4618         "\t   Like function triggers, the counter is only decremented if it\n"
4619         "\t    enabled or disabled tracing.\n"
4620         "\t   To remove a trigger without a count:\n"
4621         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4622         "\t   To remove a trigger with a count:\n"
4623         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4624         "\t   Filters can be ignored when removing a trigger.\n"
4625 #ifdef CONFIG_HIST_TRIGGERS
4626         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4627         "\t    Format: hist:keys=<field1[,field2,...]>\n"
4628         "\t            [:values=<field1[,field2,...]>]\n"
4629         "\t            [:sort=<field1[,field2,...]>]\n"
4630         "\t            [:size=#entries]\n"
4631         "\t            [:pause][:continue][:clear]\n"
4632         "\t            [:name=histname1]\n"
4633         "\t            [if <filter>]\n\n"
4634         "\t    When a matching event is hit, an entry is added to a hash\n"
4635         "\t    table using the key(s) and value(s) named, and the value of a\n"
4636         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4637         "\t    correspond to fields in the event's format description.  Keys\n"
4638         "\t    can be any field, or the special string 'stacktrace'.\n"
4639         "\t    Compound keys consisting of up to two fields can be specified\n"
4640         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4641         "\t    fields.  Sort keys consisting of up to two fields can be\n"
4642         "\t    specified using the 'sort' keyword.  The sort direction can\n"
4643         "\t    be modified by appending '.descending' or '.ascending' to a\n"
4644         "\t    sort field.  The 'size' parameter can be used to specify more\n"
4645         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4646         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4647         "\t    its histogram data will be shared with other triggers of the\n"
4648         "\t    same name, and trigger hits will update this common data.\n\n"
4649         "\t    Reading the 'hist' file for the event will dump the hash\n"
4650         "\t    table in its entirety to stdout.  If there are multiple hist\n"
4651         "\t    triggers attached to an event, there will be a table for each\n"
4652         "\t    trigger in the output.  The table displayed for a named\n"
4653         "\t    trigger will be the same as any other instance having the\n"
4654         "\t    same name.  The default format used to display a given field\n"
4655         "\t    can be modified by appending any of the following modifiers\n"
4656         "\t    to the field name, as applicable:\n\n"
4657         "\t            .hex        display a number as a hex value\n"
4658         "\t            .sym        display an address as a symbol\n"
4659         "\t            .sym-offset display an address as a symbol and offset\n"
4660         "\t            .execname   display a common_pid as a program name\n"
4661         "\t            .syscall    display a syscall id as a syscall name\n\n"
4662         "\t            .log2       display log2 value rather than raw number\n\n"
4663         "\t    The 'pause' parameter can be used to pause an existing hist\n"
4664         "\t    trigger or to start a hist trigger but not log any events\n"
4665         "\t    until told to do so.  'continue' can be used to start or\n"
4666         "\t    restart a paused hist trigger.\n\n"
4667         "\t    The 'clear' parameter will clear the contents of a running\n"
4668         "\t    hist trigger and leave its current paused/active state\n"
4669         "\t    unchanged.\n\n"
4670         "\t    The enable_hist and disable_hist triggers can be used to\n"
4671         "\t    have one event conditionally start and stop another event's\n"
4672         "\t    already-attached hist trigger.  The syntax is analagous to\n"
4673         "\t    the enable_event and disable_event triggers.\n"
4674 #endif
4675 ;
4676
4677 static ssize_t
4678 tracing_readme_read(struct file *filp, char __user *ubuf,
4679                        size_t cnt, loff_t *ppos)
4680 {
4681         return simple_read_from_buffer(ubuf, cnt, ppos,
4682                                         readme_msg, strlen(readme_msg));
4683 }
4684
4685 static const struct file_operations tracing_readme_fops = {
4686         .open           = tracing_open_generic,
4687         .read           = tracing_readme_read,
4688         .llseek         = generic_file_llseek,
4689 };
4690
4691 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4692 {
4693         unsigned int *ptr = v;
4694
4695         if (*pos || m->count)
4696                 ptr++;
4697
4698         (*pos)++;
4699
4700         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4701              ptr++) {
4702                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4703                         continue;
4704
4705                 return ptr;
4706         }
4707
4708         return NULL;
4709 }
4710
4711 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4712 {
4713         void *v;
4714         loff_t l = 0;
4715
4716         preempt_disable();
4717         arch_spin_lock(&trace_cmdline_lock);
4718
4719         v = &savedcmd->map_cmdline_to_pid[0];
4720         while (l <= *pos) {
4721                 v = saved_cmdlines_next(m, v, &l);
4722                 if (!v)
4723                         return NULL;
4724         }
4725
4726         return v;
4727 }
4728
4729 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4730 {
4731         arch_spin_unlock(&trace_cmdline_lock);
4732         preempt_enable();
4733 }
4734
4735 static int saved_cmdlines_show(struct seq_file *m, void *v)
4736 {
4737         char buf[TASK_COMM_LEN];
4738         unsigned int *pid = v;
4739
4740         __trace_find_cmdline(*pid, buf);
4741         seq_printf(m, "%d %s\n", *pid, buf);
4742         return 0;
4743 }
4744
4745 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4746         .start          = saved_cmdlines_start,
4747         .next           = saved_cmdlines_next,
4748         .stop           = saved_cmdlines_stop,
4749         .show           = saved_cmdlines_show,
4750 };
4751
4752 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4753 {
4754         if (tracing_disabled)
4755                 return -ENODEV;
4756
4757         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4758 }
4759
4760 static const struct file_operations tracing_saved_cmdlines_fops = {
4761         .open           = tracing_saved_cmdlines_open,
4762         .read           = seq_read,
4763         .llseek         = seq_lseek,
4764         .release        = seq_release,
4765 };
4766
4767 static ssize_t
4768 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4769                                  size_t cnt, loff_t *ppos)
4770 {
4771         char buf[64];
4772         int r;
4773
4774         arch_spin_lock(&trace_cmdline_lock);
4775         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4776         arch_spin_unlock(&trace_cmdline_lock);
4777
4778         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4779 }
4780
4781 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4782 {
4783         kfree(s->saved_cmdlines);
4784         kfree(s->map_cmdline_to_pid);
4785         kfree(s);
4786 }
4787
4788 static int tracing_resize_saved_cmdlines(unsigned int val)
4789 {
4790         struct saved_cmdlines_buffer *s, *savedcmd_temp;
4791
4792         s = kmalloc(sizeof(*s), GFP_KERNEL);
4793         if (!s)
4794                 return -ENOMEM;
4795
4796         if (allocate_cmdlines_buffer(val, s) < 0) {
4797                 kfree(s);
4798                 return -ENOMEM;
4799         }
4800
4801         arch_spin_lock(&trace_cmdline_lock);
4802         savedcmd_temp = savedcmd;
4803         savedcmd = s;
4804         arch_spin_unlock(&trace_cmdline_lock);
4805         free_saved_cmdlines_buffer(savedcmd_temp);
4806
4807         return 0;
4808 }
4809
4810 static ssize_t
4811 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4812                                   size_t cnt, loff_t *ppos)
4813 {
4814         unsigned long val;
4815         int ret;
4816
4817         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4818         if (ret)
4819                 return ret;
4820
4821         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4822         if (!val || val > PID_MAX_DEFAULT)
4823                 return -EINVAL;
4824
4825         ret = tracing_resize_saved_cmdlines((unsigned int)val);
4826         if (ret < 0)
4827                 return ret;
4828
4829         *ppos += cnt;
4830
4831         return cnt;
4832 }
4833
4834 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4835         .open           = tracing_open_generic,
4836         .read           = tracing_saved_cmdlines_size_read,
4837         .write          = tracing_saved_cmdlines_size_write,
4838 };
4839
4840 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
4841 static union trace_eval_map_item *
4842 update_eval_map(union trace_eval_map_item *ptr)
4843 {
4844         if (!ptr->map.eval_string) {
4845                 if (ptr->tail.next) {
4846                         ptr = ptr->tail.next;
4847                         /* Set ptr to the next real item (skip head) */
4848                         ptr++;
4849                 } else
4850                         return NULL;
4851         }
4852         return ptr;
4853 }
4854
4855 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
4856 {
4857         union trace_eval_map_item *ptr = v;
4858
4859         /*
4860          * Paranoid! If ptr points to end, we don't want to increment past it.
4861          * This really should never happen.
4862          */
4863         ptr = update_eval_map(ptr);
4864         if (WARN_ON_ONCE(!ptr))
4865                 return NULL;
4866
4867         ptr++;
4868
4869         (*pos)++;
4870
4871         ptr = update_eval_map(ptr);
4872
4873         return ptr;
4874 }
4875
4876 static void *eval_map_start(struct seq_file *m, loff_t *pos)
4877 {
4878         union trace_eval_map_item *v;
4879         loff_t l = 0;
4880
4881         mutex_lock(&trace_eval_mutex);
4882
4883         v = trace_eval_maps;
4884         if (v)
4885                 v++;
4886
4887         while (v && l < *pos) {
4888                 v = eval_map_next(m, v, &l);
4889         }
4890
4891         return v;
4892 }
4893
4894 static void eval_map_stop(struct seq_file *m, void *v)
4895 {
4896         mutex_unlock(&trace_eval_mutex);
4897 }
4898
4899 static int eval_map_show(struct seq_file *m, void *v)
4900 {
4901         union trace_eval_map_item *ptr = v;
4902
4903         seq_printf(m, "%s %ld (%s)\n",
4904                    ptr->map.eval_string, ptr->map.eval_value,
4905                    ptr->map.system);
4906
4907         return 0;
4908 }
4909
4910 static const struct seq_operations tracing_eval_map_seq_ops = {
4911         .start          = eval_map_start,
4912         .next           = eval_map_next,
4913         .stop           = eval_map_stop,
4914         .show           = eval_map_show,
4915 };
4916
4917 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
4918 {
4919         if (tracing_disabled)
4920                 return -ENODEV;
4921
4922         return seq_open(filp, &tracing_eval_map_seq_ops);
4923 }
4924
4925 static const struct file_operations tracing_eval_map_fops = {
4926         .open           = tracing_eval_map_open,
4927         .read           = seq_read,
4928         .llseek         = seq_lseek,
4929         .release        = seq_release,
4930 };
4931
4932 static inline union trace_eval_map_item *
4933 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
4934 {
4935         /* Return tail of array given the head */
4936         return ptr + ptr->head.length + 1;
4937 }
4938
4939 static void
4940 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
4941                            int len)
4942 {
4943         struct trace_eval_map **stop;
4944         struct trace_eval_map **map;
4945         union trace_eval_map_item *map_array;
4946         union trace_eval_map_item *ptr;
4947
4948         stop = start + len;
4949
4950         /*
4951          * The trace_eval_maps contains the map plus a head and tail item,
4952          * where the head holds the module and length of array, and the
4953          * tail holds a pointer to the next list.
4954          */
4955         map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4956         if (!map_array) {
4957                 pr_warn("Unable to allocate trace eval mapping\n");
4958                 return;
4959         }
4960
4961         mutex_lock(&trace_eval_mutex);
4962
4963         if (!trace_eval_maps)
4964                 trace_eval_maps = map_array;
4965         else {
4966                 ptr = trace_eval_maps;
4967                 for (;;) {
4968                         ptr = trace_eval_jmp_to_tail(ptr);
4969                         if (!ptr->tail.next)
4970                                 break;
4971                         ptr = ptr->tail.next;
4972
4973                 }
4974                 ptr->tail.next = map_array;
4975         }
4976         map_array->head.mod = mod;
4977         map_array->head.length = len;
4978         map_array++;
4979
4980         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4981                 map_array->map = **map;
4982                 map_array++;
4983         }
4984         memset(map_array, 0, sizeof(*map_array));
4985
4986         mutex_unlock(&trace_eval_mutex);
4987 }
4988
4989 static void trace_create_eval_file(struct dentry *d_tracer)
4990 {
4991         trace_create_file("eval_map", 0444, d_tracer,
4992                           NULL, &tracing_eval_map_fops);
4993 }
4994
4995 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
4996 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
4997 static inline void trace_insert_eval_map_file(struct module *mod,
4998                               struct trace_eval_map **start, int len) { }
4999 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5000
5001 static void trace_insert_eval_map(struct module *mod,
5002                                   struct trace_eval_map **start, int len)
5003 {
5004         struct trace_eval_map **map;
5005
5006         if (len <= 0)
5007                 return;
5008
5009         map = start;
5010
5011         trace_event_eval_update(map, len);
5012
5013         trace_insert_eval_map_file(mod, start, len);
5014 }
5015
5016 static ssize_t
5017 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5018                        size_t cnt, loff_t *ppos)
5019 {
5020         struct trace_array *tr = filp->private_data;
5021         char buf[MAX_TRACER_SIZE+2];
5022         int r;
5023
5024         mutex_lock(&trace_types_lock);
5025         r = sprintf(buf, "%s\n", tr->current_trace->name);
5026         mutex_unlock(&trace_types_lock);
5027
5028         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5029 }
5030
5031 int tracer_init(struct tracer *t, struct trace_array *tr)
5032 {
5033         tracing_reset_online_cpus(&tr->trace_buffer);
5034         return t->init(tr);
5035 }
5036
5037 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5038 {
5039         int cpu;
5040
5041         for_each_tracing_cpu(cpu)
5042                 per_cpu_ptr(buf->data, cpu)->entries = val;
5043 }
5044
5045 #ifdef CONFIG_TRACER_MAX_TRACE
5046 /* resize @tr's buffer to the size of @size_tr's entries */
5047 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5048                                         struct trace_buffer *size_buf, int cpu_id)
5049 {
5050         int cpu, ret = 0;
5051
5052         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5053                 for_each_tracing_cpu(cpu) {
5054                         ret = ring_buffer_resize(trace_buf->buffer,
5055                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5056                         if (ret < 0)
5057                                 break;
5058                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5059                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5060                 }
5061         } else {
5062                 ret = ring_buffer_resize(trace_buf->buffer,
5063                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5064                 if (ret == 0)
5065                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5066                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5067         }
5068
5069         return ret;
5070 }
5071 #endif /* CONFIG_TRACER_MAX_TRACE */
5072
5073 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5074                                         unsigned long size, int cpu)
5075 {
5076         int ret;
5077
5078         /*
5079          * If kernel or user changes the size of the ring buffer
5080          * we use the size that was given, and we can forget about
5081          * expanding it later.
5082          */
5083         ring_buffer_expanded = true;
5084
5085         /* May be called before buffers are initialized */
5086         if (!tr->trace_buffer.buffer)
5087                 return 0;
5088
5089         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5090         if (ret < 0)
5091                 return ret;
5092
5093 #ifdef CONFIG_TRACER_MAX_TRACE
5094         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5095             !tr->current_trace->use_max_tr)
5096                 goto out;
5097
5098         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5099         if (ret < 0) {
5100                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5101                                                      &tr->trace_buffer, cpu);
5102                 if (r < 0) {
5103                         /*
5104                          * AARGH! We are left with different
5105                          * size max buffer!!!!
5106                          * The max buffer is our "snapshot" buffer.
5107                          * When a tracer needs a snapshot (one of the
5108                          * latency tracers), it swaps the max buffer
5109                          * with the saved snap shot. We succeeded to
5110                          * update the size of the main buffer, but failed to
5111                          * update the size of the max buffer. But when we tried
5112                          * to reset the main buffer to the original size, we
5113                          * failed there too. This is very unlikely to
5114                          * happen, but if it does, warn and kill all
5115                          * tracing.
5116                          */
5117                         WARN_ON(1);
5118                         tracing_disabled = 1;
5119                 }
5120                 return ret;
5121         }
5122
5123         if (cpu == RING_BUFFER_ALL_CPUS)
5124                 set_buffer_entries(&tr->max_buffer, size);
5125         else
5126                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5127
5128  out:
5129 #endif /* CONFIG_TRACER_MAX_TRACE */
5130
5131         if (cpu == RING_BUFFER_ALL_CPUS)
5132                 set_buffer_entries(&tr->trace_buffer, size);
5133         else
5134                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5135
5136         return ret;
5137 }
5138
5139 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5140                                           unsigned long size, int cpu_id)
5141 {
5142         int ret = size;
5143
5144         mutex_lock(&trace_types_lock);
5145
5146         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5147                 /* make sure, this cpu is enabled in the mask */
5148                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5149                         ret = -EINVAL;
5150                         goto out;
5151                 }
5152         }
5153
5154         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5155         if (ret < 0)
5156                 ret = -ENOMEM;
5157
5158 out:
5159         mutex_unlock(&trace_types_lock);
5160
5161         return ret;
5162 }
5163
5164
5165 /**
5166  * tracing_update_buffers - used by tracing facility to expand ring buffers
5167  *
5168  * To save on memory when the tracing is never used on a system with it
5169  * configured in. The ring buffers are set to a minimum size. But once
5170  * a user starts to use the tracing facility, then they need to grow
5171  * to their default size.
5172  *
5173  * This function is to be called when a tracer is about to be used.
5174  */
5175 int tracing_update_buffers(void)
5176 {
5177         int ret = 0;
5178
5179         mutex_lock(&trace_types_lock);
5180         if (!ring_buffer_expanded)
5181                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5182                                                 RING_BUFFER_ALL_CPUS);
5183         mutex_unlock(&trace_types_lock);
5184
5185         return ret;
5186 }
5187
5188 struct trace_option_dentry;
5189
5190 static void
5191 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5192
5193 /*
5194  * Used to clear out the tracer before deletion of an instance.
5195  * Must have trace_types_lock held.
5196  */
5197 static void tracing_set_nop(struct trace_array *tr)
5198 {
5199         if (tr->current_trace == &nop_trace)
5200                 return;
5201         
5202         tr->current_trace->enabled--;
5203
5204         if (tr->current_trace->reset)
5205                 tr->current_trace->reset(tr);
5206
5207         tr->current_trace = &nop_trace;
5208 }
5209
5210 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5211 {
5212         /* Only enable if the directory has been created already. */
5213         if (!tr->dir)
5214                 return;
5215
5216         create_trace_option_files(tr, t);
5217 }
5218
5219 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5220 {
5221         struct tracer *t;
5222 #ifdef CONFIG_TRACER_MAX_TRACE
5223         bool had_max_tr;
5224 #endif
5225         int ret = 0;
5226
5227         mutex_lock(&trace_types_lock);
5228
5229         if (!ring_buffer_expanded) {
5230                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5231                                                 RING_BUFFER_ALL_CPUS);
5232                 if (ret < 0)
5233                         goto out;
5234                 ret = 0;
5235         }
5236
5237         for (t = trace_types; t; t = t->next) {
5238                 if (strcmp(t->name, buf) == 0)
5239                         break;
5240         }
5241         if (!t) {
5242                 ret = -EINVAL;
5243                 goto out;
5244         }
5245         if (t == tr->current_trace)
5246                 goto out;
5247
5248         /* Some tracers are only allowed for the top level buffer */
5249         if (!trace_ok_for_array(t, tr)) {
5250                 ret = -EINVAL;
5251                 goto out;
5252         }
5253
5254         /* If trace pipe files are being read, we can't change the tracer */
5255         if (tr->current_trace->ref) {
5256                 ret = -EBUSY;
5257                 goto out;
5258         }
5259
5260         trace_branch_disable();
5261
5262         tr->current_trace->enabled--;
5263
5264         if (tr->current_trace->reset)
5265                 tr->current_trace->reset(tr);
5266
5267         /* Current trace needs to be nop_trace before synchronize_sched */
5268         tr->current_trace = &nop_trace;
5269
5270 #ifdef CONFIG_TRACER_MAX_TRACE
5271         had_max_tr = tr->allocated_snapshot;
5272
5273         if (had_max_tr && !t->use_max_tr) {
5274                 /*
5275                  * We need to make sure that the update_max_tr sees that
5276                  * current_trace changed to nop_trace to keep it from
5277                  * swapping the buffers after we resize it.
5278                  * The update_max_tr is called from interrupts disabled
5279                  * so a synchronized_sched() is sufficient.
5280                  */
5281                 synchronize_sched();
5282                 free_snapshot(tr);
5283         }
5284 #endif
5285
5286 #ifdef CONFIG_TRACER_MAX_TRACE
5287         if (t->use_max_tr && !had_max_tr) {
5288                 ret = alloc_snapshot(tr);
5289                 if (ret < 0)
5290                         goto out;
5291         }
5292 #endif
5293
5294         if (t->init) {
5295                 ret = tracer_init(t, tr);
5296                 if (ret)
5297                         goto out;
5298         }
5299
5300         tr->current_trace = t;
5301         tr->current_trace->enabled++;
5302         trace_branch_enable(tr);
5303  out:
5304         mutex_unlock(&trace_types_lock);
5305
5306         return ret;
5307 }
5308
5309 static ssize_t
5310 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5311                         size_t cnt, loff_t *ppos)
5312 {
5313         struct trace_array *tr = filp->private_data;
5314         char buf[MAX_TRACER_SIZE+1];
5315         int i;
5316         size_t ret;
5317         int err;
5318
5319         ret = cnt;
5320
5321         if (cnt > MAX_TRACER_SIZE)
5322                 cnt = MAX_TRACER_SIZE;
5323
5324         if (copy_from_user(buf, ubuf, cnt))
5325                 return -EFAULT;
5326
5327         buf[cnt] = 0;
5328
5329         /* strip ending whitespace. */
5330         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5331                 buf[i] = 0;
5332
5333         err = tracing_set_tracer(tr, buf);
5334         if (err)
5335                 return err;
5336
5337         *ppos += ret;
5338
5339         return ret;
5340 }
5341
5342 static ssize_t
5343 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5344                    size_t cnt, loff_t *ppos)
5345 {
5346         char buf[64];
5347         int r;
5348
5349         r = snprintf(buf, sizeof(buf), "%ld\n",
5350                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5351         if (r > sizeof(buf))
5352                 r = sizeof(buf);
5353         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5354 }
5355
5356 static ssize_t
5357 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5358                     size_t cnt, loff_t *ppos)
5359 {
5360         unsigned long val;
5361         int ret;
5362
5363         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5364         if (ret)
5365                 return ret;
5366
5367         *ptr = val * 1000;
5368
5369         return cnt;
5370 }
5371
5372 static ssize_t
5373 tracing_thresh_read(struct file *filp, char __user *ubuf,
5374                     size_t cnt, loff_t *ppos)
5375 {
5376         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5377 }
5378
5379 static ssize_t
5380 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5381                      size_t cnt, loff_t *ppos)
5382 {
5383         struct trace_array *tr = filp->private_data;
5384         int ret;
5385
5386         mutex_lock(&trace_types_lock);
5387         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5388         if (ret < 0)
5389                 goto out;
5390
5391         if (tr->current_trace->update_thresh) {
5392                 ret = tr->current_trace->update_thresh(tr);
5393                 if (ret < 0)
5394                         goto out;
5395         }
5396
5397         ret = cnt;
5398 out:
5399         mutex_unlock(&trace_types_lock);
5400
5401         return ret;
5402 }
5403
5404 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5405
5406 static ssize_t
5407 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5408                      size_t cnt, loff_t *ppos)
5409 {
5410         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5411 }
5412
5413 static ssize_t
5414 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5415                       size_t cnt, loff_t *ppos)
5416 {
5417         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5418 }
5419
5420 #endif
5421
5422 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5423 {
5424         struct trace_array *tr = inode->i_private;
5425         struct trace_iterator *iter;
5426         int ret = 0;
5427
5428         if (tracing_disabled)
5429                 return -ENODEV;
5430
5431         if (trace_array_get(tr) < 0)
5432                 return -ENODEV;
5433
5434         mutex_lock(&trace_types_lock);
5435
5436         /* create a buffer to store the information to pass to userspace */
5437         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5438         if (!iter) {
5439                 ret = -ENOMEM;
5440                 __trace_array_put(tr);
5441                 goto out;
5442         }
5443
5444         trace_seq_init(&iter->seq);
5445         iter->trace = tr->current_trace;
5446
5447         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5448                 ret = -ENOMEM;
5449                 goto fail;
5450         }
5451
5452         /* trace pipe does not show start of buffer */
5453         cpumask_setall(iter->started);
5454
5455         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5456                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5457
5458         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5459         if (trace_clocks[tr->clock_id].in_ns)
5460                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5461
5462         iter->tr = tr;
5463         iter->trace_buffer = &tr->trace_buffer;
5464         iter->cpu_file = tracing_get_cpu(inode);
5465         mutex_init(&iter->mutex);
5466         filp->private_data = iter;
5467
5468         if (iter->trace->pipe_open)
5469                 iter->trace->pipe_open(iter);
5470
5471         nonseekable_open(inode, filp);
5472
5473         tr->current_trace->ref++;
5474 out:
5475         mutex_unlock(&trace_types_lock);
5476         return ret;
5477
5478 fail:
5479         kfree(iter->trace);
5480         kfree(iter);
5481         __trace_array_put(tr);
5482         mutex_unlock(&trace_types_lock);
5483         return ret;
5484 }
5485
5486 static int tracing_release_pipe(struct inode *inode, struct file *file)
5487 {
5488         struct trace_iterator *iter = file->private_data;
5489         struct trace_array *tr = inode->i_private;
5490
5491         mutex_lock(&trace_types_lock);
5492
5493         tr->current_trace->ref--;
5494
5495         if (iter->trace->pipe_close)
5496                 iter->trace->pipe_close(iter);
5497
5498         mutex_unlock(&trace_types_lock);
5499
5500         free_cpumask_var(iter->started);
5501         mutex_destroy(&iter->mutex);
5502         kfree(iter);
5503
5504         trace_array_put(tr);
5505
5506         return 0;
5507 }
5508
5509 static unsigned int
5510 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5511 {
5512         struct trace_array *tr = iter->tr;
5513
5514         /* Iterators are static, they should be filled or empty */
5515         if (trace_buffer_iter(iter, iter->cpu_file))
5516                 return POLLIN | POLLRDNORM;
5517
5518         if (tr->trace_flags & TRACE_ITER_BLOCK)
5519                 /*
5520                  * Always select as readable when in blocking mode
5521                  */
5522                 return POLLIN | POLLRDNORM;
5523         else
5524                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5525                                              filp, poll_table);
5526 }
5527
5528 static unsigned int
5529 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5530 {
5531         struct trace_iterator *iter = filp->private_data;
5532
5533         return trace_poll(iter, filp, poll_table);
5534 }
5535
5536 /* Must be called with iter->mutex held. */
5537 static int tracing_wait_pipe(struct file *filp)
5538 {
5539         struct trace_iterator *iter = filp->private_data;
5540         int ret;
5541
5542         while (trace_empty(iter)) {
5543
5544                 if ((filp->f_flags & O_NONBLOCK)) {
5545                         return -EAGAIN;
5546                 }
5547
5548                 /*
5549                  * We block until we read something and tracing is disabled.
5550                  * We still block if tracing is disabled, but we have never
5551                  * read anything. This allows a user to cat this file, and
5552                  * then enable tracing. But after we have read something,
5553                  * we give an EOF when tracing is again disabled.
5554                  *
5555                  * iter->pos will be 0 if we haven't read anything.
5556                  */
5557                 if (!tracing_is_on() && iter->pos)
5558                         break;
5559
5560                 mutex_unlock(&iter->mutex);
5561
5562                 ret = wait_on_pipe(iter, false);
5563
5564                 mutex_lock(&iter->mutex);
5565
5566                 if (ret)
5567                         return ret;
5568         }
5569
5570         return 1;
5571 }
5572
5573 /*
5574  * Consumer reader.
5575  */
5576 static ssize_t
5577 tracing_read_pipe(struct file *filp, char __user *ubuf,
5578                   size_t cnt, loff_t *ppos)
5579 {
5580         struct trace_iterator *iter = filp->private_data;
5581         ssize_t sret;
5582
5583         /*
5584          * Avoid more than one consumer on a single file descriptor
5585          * This is just a matter of traces coherency, the ring buffer itself
5586          * is protected.
5587          */
5588         mutex_lock(&iter->mutex);
5589
5590         /* return any leftover data */
5591         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5592         if (sret != -EBUSY)
5593                 goto out;
5594
5595         trace_seq_init(&iter->seq);
5596
5597         if (iter->trace->read) {
5598                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5599                 if (sret)
5600                         goto out;
5601         }
5602
5603 waitagain:
5604         sret = tracing_wait_pipe(filp);
5605         if (sret <= 0)
5606                 goto out;
5607
5608         /* stop when tracing is finished */
5609         if (trace_empty(iter)) {
5610                 sret = 0;
5611                 goto out;
5612         }
5613
5614         if (cnt >= PAGE_SIZE)
5615                 cnt = PAGE_SIZE - 1;
5616
5617         /* reset all but tr, trace, and overruns */
5618         memset(&iter->seq, 0,
5619                sizeof(struct trace_iterator) -
5620                offsetof(struct trace_iterator, seq));
5621         cpumask_clear(iter->started);
5622         iter->pos = -1;
5623
5624         trace_event_read_lock();
5625         trace_access_lock(iter->cpu_file);
5626         while (trace_find_next_entry_inc(iter) != NULL) {
5627                 enum print_line_t ret;
5628                 int save_len = iter->seq.seq.len;
5629
5630                 ret = print_trace_line(iter);
5631                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5632                         /* don't print partial lines */
5633                         iter->seq.seq.len = save_len;
5634                         break;
5635                 }
5636                 if (ret != TRACE_TYPE_NO_CONSUME)
5637                         trace_consume(iter);
5638
5639                 if (trace_seq_used(&iter->seq) >= cnt)
5640                         break;
5641
5642                 /*
5643                  * Setting the full flag means we reached the trace_seq buffer
5644                  * size and we should leave by partial output condition above.
5645                  * One of the trace_seq_* functions is not used properly.
5646                  */
5647                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5648                           iter->ent->type);
5649         }
5650         trace_access_unlock(iter->cpu_file);
5651         trace_event_read_unlock();
5652
5653         /* Now copy what we have to the user */
5654         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5655         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5656                 trace_seq_init(&iter->seq);
5657
5658         /*
5659          * If there was nothing to send to user, in spite of consuming trace
5660          * entries, go back to wait for more entries.
5661          */
5662         if (sret == -EBUSY)
5663                 goto waitagain;
5664
5665 out:
5666         mutex_unlock(&iter->mutex);
5667
5668         return sret;
5669 }
5670
5671 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5672                                      unsigned int idx)
5673 {
5674         __free_page(spd->pages[idx]);
5675 }
5676
5677 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5678         .can_merge              = 0,
5679         .confirm                = generic_pipe_buf_confirm,
5680         .release                = generic_pipe_buf_release,
5681         .steal                  = generic_pipe_buf_steal,
5682         .get                    = generic_pipe_buf_get,
5683 };
5684
5685 static size_t
5686 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5687 {
5688         size_t count;
5689         int save_len;
5690         int ret;
5691
5692         /* Seq buffer is page-sized, exactly what we need. */
5693         for (;;) {
5694                 save_len = iter->seq.seq.len;
5695                 ret = print_trace_line(iter);
5696
5697                 if (trace_seq_has_overflowed(&iter->seq)) {
5698                         iter->seq.seq.len = save_len;
5699                         break;
5700                 }
5701
5702                 /*
5703                  * This should not be hit, because it should only
5704                  * be set if the iter->seq overflowed. But check it
5705                  * anyway to be safe.
5706                  */
5707                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5708                         iter->seq.seq.len = save_len;
5709                         break;
5710                 }
5711
5712                 count = trace_seq_used(&iter->seq) - save_len;
5713                 if (rem < count) {
5714                         rem = 0;
5715                         iter->seq.seq.len = save_len;
5716                         break;
5717                 }
5718
5719                 if (ret != TRACE_TYPE_NO_CONSUME)
5720                         trace_consume(iter);
5721                 rem -= count;
5722                 if (!trace_find_next_entry_inc(iter))   {
5723                         rem = 0;
5724                         iter->ent = NULL;
5725                         break;
5726                 }
5727         }
5728
5729         return rem;
5730 }
5731
5732 static ssize_t tracing_splice_read_pipe(struct file *filp,
5733                                         loff_t *ppos,
5734                                         struct pipe_inode_info *pipe,
5735                                         size_t len,
5736                                         unsigned int flags)
5737 {
5738         struct page *pages_def[PIPE_DEF_BUFFERS];
5739         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5740         struct trace_iterator *iter = filp->private_data;
5741         struct splice_pipe_desc spd = {
5742                 .pages          = pages_def,
5743                 .partial        = partial_def,
5744                 .nr_pages       = 0, /* This gets updated below. */
5745                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5746                 .ops            = &tracing_pipe_buf_ops,
5747                 .spd_release    = tracing_spd_release_pipe,
5748         };
5749         ssize_t ret;
5750         size_t rem;
5751         unsigned int i;
5752
5753         if (splice_grow_spd(pipe, &spd))
5754                 return -ENOMEM;
5755
5756         mutex_lock(&iter->mutex);
5757
5758         if (iter->trace->splice_read) {
5759                 ret = iter->trace->splice_read(iter, filp,
5760                                                ppos, pipe, len, flags);
5761                 if (ret)
5762                         goto out_err;
5763         }
5764
5765         ret = tracing_wait_pipe(filp);
5766         if (ret <= 0)
5767                 goto out_err;
5768
5769         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5770                 ret = -EFAULT;
5771                 goto out_err;
5772         }
5773
5774         trace_event_read_lock();
5775         trace_access_lock(iter->cpu_file);
5776
5777         /* Fill as many pages as possible. */
5778         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5779                 spd.pages[i] = alloc_page(GFP_KERNEL);
5780                 if (!spd.pages[i])
5781                         break;
5782
5783                 rem = tracing_fill_pipe_page(rem, iter);
5784
5785                 /* Copy the data into the page, so we can start over. */
5786                 ret = trace_seq_to_buffer(&iter->seq,
5787                                           page_address(spd.pages[i]),
5788                                           trace_seq_used(&iter->seq));
5789                 if (ret < 0) {
5790                         __free_page(spd.pages[i]);
5791                         break;
5792                 }
5793                 spd.partial[i].offset = 0;
5794                 spd.partial[i].len = trace_seq_used(&iter->seq);
5795
5796                 trace_seq_init(&iter->seq);
5797         }
5798
5799         trace_access_unlock(iter->cpu_file);
5800         trace_event_read_unlock();
5801         mutex_unlock(&iter->mutex);
5802
5803         spd.nr_pages = i;
5804
5805         if (i)
5806                 ret = splice_to_pipe(pipe, &spd);
5807         else
5808                 ret = 0;
5809 out:
5810         splice_shrink_spd(&spd);
5811         return ret;
5812
5813 out_err:
5814         mutex_unlock(&iter->mutex);
5815         goto out;
5816 }
5817
5818 static ssize_t
5819 tracing_entries_read(struct file *filp, char __user *ubuf,
5820                      size_t cnt, loff_t *ppos)
5821 {
5822         struct inode *inode = file_inode(filp);
5823         struct trace_array *tr = inode->i_private;
5824         int cpu = tracing_get_cpu(inode);
5825         char buf[64];
5826         int r = 0;
5827         ssize_t ret;
5828
5829         mutex_lock(&trace_types_lock);
5830
5831         if (cpu == RING_BUFFER_ALL_CPUS) {
5832                 int cpu, buf_size_same;
5833                 unsigned long size;
5834
5835                 size = 0;
5836                 buf_size_same = 1;
5837                 /* check if all cpu sizes are same */
5838                 for_each_tracing_cpu(cpu) {
5839                         /* fill in the size from first enabled cpu */
5840                         if (size == 0)
5841                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5842                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5843                                 buf_size_same = 0;
5844                                 break;
5845                         }
5846                 }
5847
5848                 if (buf_size_same) {
5849                         if (!ring_buffer_expanded)
5850                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
5851                                             size >> 10,
5852                                             trace_buf_size >> 10);
5853                         else
5854                                 r = sprintf(buf, "%lu\n", size >> 10);
5855                 } else
5856                         r = sprintf(buf, "X\n");
5857         } else
5858                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5859
5860         mutex_unlock(&trace_types_lock);
5861
5862         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5863         return ret;
5864 }
5865
5866 static ssize_t
5867 tracing_entries_write(struct file *filp, const char __user *ubuf,
5868                       size_t cnt, loff_t *ppos)
5869 {
5870         struct inode *inode = file_inode(filp);
5871         struct trace_array *tr = inode->i_private;
5872         unsigned long val;
5873         int ret;
5874
5875         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5876         if (ret)
5877                 return ret;
5878
5879         /* must have at least 1 entry */
5880         if (!val)
5881                 return -EINVAL;
5882
5883         /* value is in KB */
5884         val <<= 10;
5885         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5886         if (ret < 0)
5887                 return ret;
5888
5889         *ppos += cnt;
5890
5891         return cnt;
5892 }
5893
5894 static ssize_t
5895 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5896                                 size_t cnt, loff_t *ppos)
5897 {
5898         struct trace_array *tr = filp->private_data;
5899         char buf[64];
5900         int r, cpu;
5901         unsigned long size = 0, expanded_size = 0;
5902
5903         mutex_lock(&trace_types_lock);
5904         for_each_tracing_cpu(cpu) {
5905                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5906                 if (!ring_buffer_expanded)
5907                         expanded_size += trace_buf_size >> 10;
5908         }
5909         if (ring_buffer_expanded)
5910                 r = sprintf(buf, "%lu\n", size);
5911         else
5912                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5913         mutex_unlock(&trace_types_lock);
5914
5915         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5916 }
5917
5918 static ssize_t
5919 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5920                           size_t cnt, loff_t *ppos)
5921 {
5922         /*
5923          * There is no need to read what the user has written, this function
5924          * is just to make sure that there is no error when "echo" is used
5925          */
5926
5927         *ppos += cnt;
5928
5929         return cnt;
5930 }
5931
5932 static int
5933 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5934 {
5935         struct trace_array *tr = inode->i_private;
5936
5937         /* disable tracing ? */
5938         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5939                 tracer_tracing_off(tr);
5940         /* resize the ring buffer to 0 */
5941         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5942
5943         trace_array_put(tr);
5944
5945         return 0;
5946 }
5947
5948 static ssize_t
5949 tracing_mark_write(struct file *filp, const char __user *ubuf,
5950                                         size_t cnt, loff_t *fpos)
5951 {
5952         struct trace_array *tr = filp->private_data;
5953         struct ring_buffer_event *event;
5954         struct ring_buffer *buffer;
5955         struct print_entry *entry;
5956         unsigned long irq_flags;
5957         const char faulted[] = "<faulted>";
5958         ssize_t written;
5959         int size;
5960         int len;
5961
5962 /* Used in tracing_mark_raw_write() as well */
5963 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
5964
5965         if (tracing_disabled)
5966                 return -EINVAL;
5967
5968         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5969                 return -EINVAL;
5970
5971         if (cnt > TRACE_BUF_SIZE)
5972                 cnt = TRACE_BUF_SIZE;
5973
5974         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5975
5976         local_save_flags(irq_flags);
5977         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
5978
5979         /* If less than "<faulted>", then make sure we can still add that */
5980         if (cnt < FAULTED_SIZE)
5981                 size += FAULTED_SIZE - cnt;
5982
5983         buffer = tr->trace_buffer.buffer;
5984         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5985                                             irq_flags, preempt_count());
5986         if (unlikely(!event))
5987                 /* Ring buffer disabled, return as if not open for write */
5988                 return -EBADF;
5989
5990         entry = ring_buffer_event_data(event);
5991         entry->ip = _THIS_IP_;
5992
5993         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
5994         if (len) {
5995                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
5996                 cnt = FAULTED_SIZE;
5997                 written = -EFAULT;
5998         } else
5999                 written = cnt;
6000         len = cnt;
6001
6002         if (entry->buf[cnt - 1] != '\n') {
6003                 entry->buf[cnt] = '\n';
6004                 entry->buf[cnt + 1] = '\0';
6005         } else
6006                 entry->buf[cnt] = '\0';
6007
6008         __buffer_unlock_commit(buffer, event);
6009
6010         if (written > 0)
6011                 *fpos += written;
6012
6013         return written;
6014 }
6015
6016 /* Limit it for now to 3K (including tag) */
6017 #define RAW_DATA_MAX_SIZE (1024*3)
6018
6019 static ssize_t
6020 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6021                                         size_t cnt, loff_t *fpos)
6022 {
6023         struct trace_array *tr = filp->private_data;
6024         struct ring_buffer_event *event;
6025         struct ring_buffer *buffer;
6026         struct raw_data_entry *entry;
6027         const char faulted[] = "<faulted>";
6028         unsigned long irq_flags;
6029         ssize_t written;
6030         int size;
6031         int len;
6032
6033 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6034
6035         if (tracing_disabled)
6036                 return -EINVAL;
6037
6038         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6039                 return -EINVAL;
6040
6041         /* The marker must at least have a tag id */
6042         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6043                 return -EINVAL;
6044
6045         if (cnt > TRACE_BUF_SIZE)
6046                 cnt = TRACE_BUF_SIZE;
6047
6048         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6049
6050         local_save_flags(irq_flags);
6051         size = sizeof(*entry) + cnt;
6052         if (cnt < FAULT_SIZE_ID)
6053                 size += FAULT_SIZE_ID - cnt;
6054
6055         buffer = tr->trace_buffer.buffer;
6056         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6057                                             irq_flags, preempt_count());
6058         if (!event)
6059                 /* Ring buffer disabled, return as if not open for write */
6060                 return -EBADF;
6061
6062         entry = ring_buffer_event_data(event);
6063
6064         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6065         if (len) {
6066                 entry->id = -1;
6067                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6068                 written = -EFAULT;
6069         } else
6070                 written = cnt;
6071
6072         __buffer_unlock_commit(buffer, event);
6073
6074         if (written > 0)
6075                 *fpos += written;
6076
6077         return written;
6078 }
6079
6080 static int tracing_clock_show(struct seq_file *m, void *v)
6081 {
6082         struct trace_array *tr = m->private;
6083         int i;
6084
6085         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6086                 seq_printf(m,
6087                         "%s%s%s%s", i ? " " : "",
6088                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6089                         i == tr->clock_id ? "]" : "");
6090         seq_putc(m, '\n');
6091
6092         return 0;
6093 }
6094
6095 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6096 {
6097         int i;
6098
6099         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6100                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6101                         break;
6102         }
6103         if (i == ARRAY_SIZE(trace_clocks))
6104                 return -EINVAL;
6105
6106         mutex_lock(&trace_types_lock);
6107
6108         tr->clock_id = i;
6109
6110         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6111
6112         /*
6113          * New clock may not be consistent with the previous clock.
6114          * Reset the buffer so that it doesn't have incomparable timestamps.
6115          */
6116         tracing_reset_online_cpus(&tr->trace_buffer);
6117
6118 #ifdef CONFIG_TRACER_MAX_TRACE
6119         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
6120                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6121         tracing_reset_online_cpus(&tr->max_buffer);
6122 #endif
6123
6124         mutex_unlock(&trace_types_lock);
6125
6126         return 0;
6127 }
6128
6129 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6130                                    size_t cnt, loff_t *fpos)
6131 {
6132         struct seq_file *m = filp->private_data;
6133         struct trace_array *tr = m->private;
6134         char buf[64];
6135         const char *clockstr;
6136         int ret;
6137
6138         if (cnt >= sizeof(buf))
6139                 return -EINVAL;
6140
6141         if (copy_from_user(buf, ubuf, cnt))
6142                 return -EFAULT;
6143
6144         buf[cnt] = 0;
6145
6146         clockstr = strstrip(buf);
6147
6148         ret = tracing_set_clock(tr, clockstr);
6149         if (ret)
6150                 return ret;
6151
6152         *fpos += cnt;
6153
6154         return cnt;
6155 }
6156
6157 static int tracing_clock_open(struct inode *inode, struct file *file)
6158 {
6159         struct trace_array *tr = inode->i_private;
6160         int ret;
6161
6162         if (tracing_disabled)
6163                 return -ENODEV;
6164
6165         if (trace_array_get(tr))
6166                 return -ENODEV;
6167
6168         ret = single_open(file, tracing_clock_show, inode->i_private);
6169         if (ret < 0)
6170                 trace_array_put(tr);
6171
6172         return ret;
6173 }
6174
6175 struct ftrace_buffer_info {
6176         struct trace_iterator   iter;
6177         void                    *spare;
6178         unsigned int            spare_cpu;
6179         unsigned int            read;
6180 };
6181
6182 #ifdef CONFIG_TRACER_SNAPSHOT
6183 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6184 {
6185         struct trace_array *tr = inode->i_private;
6186         struct trace_iterator *iter;
6187         struct seq_file *m;
6188         int ret = 0;
6189
6190         if (trace_array_get(tr) < 0)
6191                 return -ENODEV;
6192
6193         if (file->f_mode & FMODE_READ) {
6194                 iter = __tracing_open(inode, file, true);
6195                 if (IS_ERR(iter))
6196                         ret = PTR_ERR(iter);
6197         } else {
6198                 /* Writes still need the seq_file to hold the private data */
6199                 ret = -ENOMEM;
6200                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6201                 if (!m)
6202                         goto out;
6203                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6204                 if (!iter) {
6205                         kfree(m);
6206                         goto out;
6207                 }
6208                 ret = 0;
6209
6210                 iter->tr = tr;
6211                 iter->trace_buffer = &tr->max_buffer;
6212                 iter->cpu_file = tracing_get_cpu(inode);
6213                 m->private = iter;
6214                 file->private_data = m;
6215         }
6216 out:
6217         if (ret < 0)
6218                 trace_array_put(tr);
6219
6220         return ret;
6221 }
6222
6223 static ssize_t
6224 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6225                        loff_t *ppos)
6226 {
6227         struct seq_file *m = filp->private_data;
6228         struct trace_iterator *iter = m->private;
6229         struct trace_array *tr = iter->tr;
6230         unsigned long val;
6231         int ret;
6232
6233         ret = tracing_update_buffers();
6234         if (ret < 0)
6235                 return ret;
6236
6237         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6238         if (ret)
6239                 return ret;
6240
6241         mutex_lock(&trace_types_lock);
6242
6243         if (tr->current_trace->use_max_tr) {
6244                 ret = -EBUSY;
6245                 goto out;
6246         }
6247
6248         switch (val) {
6249         case 0:
6250                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6251                         ret = -EINVAL;
6252                         break;
6253                 }
6254                 if (tr->allocated_snapshot)
6255                         free_snapshot(tr);
6256                 break;
6257         case 1:
6258 /* Only allow per-cpu swap if the ring buffer supports it */
6259 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6260                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6261                         ret = -EINVAL;
6262                         break;
6263                 }
6264 #endif
6265                 if (!tr->allocated_snapshot) {
6266                         ret = alloc_snapshot(tr);
6267                         if (ret < 0)
6268                                 break;
6269                 }
6270                 local_irq_disable();
6271                 /* Now, we're going to swap */
6272                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6273                         update_max_tr(tr, current, smp_processor_id());
6274                 else
6275                         update_max_tr_single(tr, current, iter->cpu_file);
6276                 local_irq_enable();
6277                 break;
6278         default:
6279                 if (tr->allocated_snapshot) {
6280                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6281                                 tracing_reset_online_cpus(&tr->max_buffer);
6282                         else
6283                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
6284                 }
6285                 break;
6286         }
6287
6288         if (ret >= 0) {
6289                 *ppos += cnt;
6290                 ret = cnt;
6291         }
6292 out:
6293         mutex_unlock(&trace_types_lock);
6294         return ret;
6295 }
6296
6297 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6298 {
6299         struct seq_file *m = file->private_data;
6300         int ret;
6301
6302         ret = tracing_release(inode, file);
6303
6304         if (file->f_mode & FMODE_READ)
6305                 return ret;
6306
6307         /* If write only, the seq_file is just a stub */
6308         if (m)
6309                 kfree(m->private);
6310         kfree(m);
6311
6312         return 0;
6313 }
6314
6315 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6316 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6317                                     size_t count, loff_t *ppos);
6318 static int tracing_buffers_release(struct inode *inode, struct file *file);
6319 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6320                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6321
6322 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6323 {
6324         struct ftrace_buffer_info *info;
6325         int ret;
6326
6327         ret = tracing_buffers_open(inode, filp);
6328         if (ret < 0)
6329                 return ret;
6330
6331         info = filp->private_data;
6332
6333         if (info->iter.trace->use_max_tr) {
6334                 tracing_buffers_release(inode, filp);
6335                 return -EBUSY;
6336         }
6337
6338         info->iter.snapshot = true;
6339         info->iter.trace_buffer = &info->iter.tr->max_buffer;
6340
6341         return ret;
6342 }
6343
6344 #endif /* CONFIG_TRACER_SNAPSHOT */
6345
6346
6347 static const struct file_operations tracing_thresh_fops = {
6348         .open           = tracing_open_generic,
6349         .read           = tracing_thresh_read,
6350         .write          = tracing_thresh_write,
6351         .llseek         = generic_file_llseek,
6352 };
6353
6354 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6355 static const struct file_operations tracing_max_lat_fops = {
6356         .open           = tracing_open_generic,
6357         .read           = tracing_max_lat_read,
6358         .write          = tracing_max_lat_write,
6359         .llseek         = generic_file_llseek,
6360 };
6361 #endif
6362
6363 static const struct file_operations set_tracer_fops = {
6364         .open           = tracing_open_generic,
6365         .read           = tracing_set_trace_read,
6366         .write          = tracing_set_trace_write,
6367         .llseek         = generic_file_llseek,
6368 };
6369
6370 static const struct file_operations tracing_pipe_fops = {
6371         .open           = tracing_open_pipe,
6372         .poll           = tracing_poll_pipe,
6373         .read           = tracing_read_pipe,
6374         .splice_read    = tracing_splice_read_pipe,
6375         .release        = tracing_release_pipe,
6376         .llseek         = no_llseek,
6377 };
6378
6379 static const struct file_operations tracing_entries_fops = {
6380         .open           = tracing_open_generic_tr,
6381         .read           = tracing_entries_read,
6382         .write          = tracing_entries_write,
6383         .llseek         = generic_file_llseek,
6384         .release        = tracing_release_generic_tr,
6385 };
6386
6387 static const struct file_operations tracing_total_entries_fops = {
6388         .open           = tracing_open_generic_tr,
6389         .read           = tracing_total_entries_read,
6390         .llseek         = generic_file_llseek,
6391         .release        = tracing_release_generic_tr,
6392 };
6393
6394 static const struct file_operations tracing_free_buffer_fops = {
6395         .open           = tracing_open_generic_tr,
6396         .write          = tracing_free_buffer_write,
6397         .release        = tracing_free_buffer_release,
6398 };
6399
6400 static const struct file_operations tracing_mark_fops = {
6401         .open           = tracing_open_generic_tr,
6402         .write          = tracing_mark_write,
6403         .llseek         = generic_file_llseek,
6404         .release        = tracing_release_generic_tr,
6405 };
6406
6407 static const struct file_operations tracing_mark_raw_fops = {
6408         .open           = tracing_open_generic_tr,
6409         .write          = tracing_mark_raw_write,
6410         .llseek         = generic_file_llseek,
6411         .release        = tracing_release_generic_tr,
6412 };
6413
6414 static const struct file_operations trace_clock_fops = {
6415         .open           = tracing_clock_open,
6416         .read           = seq_read,
6417         .llseek         = seq_lseek,
6418         .release        = tracing_single_release_tr,
6419         .write          = tracing_clock_write,
6420 };
6421
6422 #ifdef CONFIG_TRACER_SNAPSHOT
6423 static const struct file_operations snapshot_fops = {
6424         .open           = tracing_snapshot_open,
6425         .read           = seq_read,
6426         .write          = tracing_snapshot_write,
6427         .llseek         = tracing_lseek,
6428         .release        = tracing_snapshot_release,
6429 };
6430
6431 static const struct file_operations snapshot_raw_fops = {
6432         .open           = snapshot_raw_open,
6433         .read           = tracing_buffers_read,
6434         .release        = tracing_buffers_release,
6435         .splice_read    = tracing_buffers_splice_read,
6436         .llseek         = no_llseek,
6437 };
6438
6439 #endif /* CONFIG_TRACER_SNAPSHOT */
6440
6441 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6442 {
6443         struct trace_array *tr = inode->i_private;
6444         struct ftrace_buffer_info *info;
6445         int ret;
6446
6447         if (tracing_disabled)
6448                 return -ENODEV;
6449
6450         if (trace_array_get(tr) < 0)
6451                 return -ENODEV;
6452
6453         info = kzalloc(sizeof(*info), GFP_KERNEL);
6454         if (!info) {
6455                 trace_array_put(tr);
6456                 return -ENOMEM;
6457         }
6458
6459         mutex_lock(&trace_types_lock);
6460
6461         info->iter.tr           = tr;
6462         info->iter.cpu_file     = tracing_get_cpu(inode);
6463         info->iter.trace        = tr->current_trace;
6464         info->iter.trace_buffer = &tr->trace_buffer;
6465         info->spare             = NULL;
6466         /* Force reading ring buffer for first read */
6467         info->read              = (unsigned int)-1;
6468
6469         filp->private_data = info;
6470
6471         tr->current_trace->ref++;
6472
6473         mutex_unlock(&trace_types_lock);
6474
6475         ret = nonseekable_open(inode, filp);
6476         if (ret < 0)
6477                 trace_array_put(tr);
6478
6479         return ret;
6480 }
6481
6482 static unsigned int
6483 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6484 {
6485         struct ftrace_buffer_info *info = filp->private_data;
6486         struct trace_iterator *iter = &info->iter;
6487
6488         return trace_poll(iter, filp, poll_table);
6489 }
6490
6491 static ssize_t
6492 tracing_buffers_read(struct file *filp, char __user *ubuf,
6493                      size_t count, loff_t *ppos)
6494 {
6495         struct ftrace_buffer_info *info = filp->private_data;
6496         struct trace_iterator *iter = &info->iter;
6497         ssize_t ret;
6498         ssize_t size;
6499
6500         if (!count)
6501                 return 0;
6502
6503 #ifdef CONFIG_TRACER_MAX_TRACE
6504         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6505                 return -EBUSY;
6506 #endif
6507
6508         if (!info->spare) {
6509                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6510                                                           iter->cpu_file);
6511                 info->spare_cpu = iter->cpu_file;
6512         }
6513         if (!info->spare)
6514                 return -ENOMEM;
6515
6516         /* Do we have previous read data to read? */
6517         if (info->read < PAGE_SIZE)
6518                 goto read;
6519
6520  again:
6521         trace_access_lock(iter->cpu_file);
6522         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6523                                     &info->spare,
6524                                     count,
6525                                     iter->cpu_file, 0);
6526         trace_access_unlock(iter->cpu_file);
6527
6528         if (ret < 0) {
6529                 if (trace_empty(iter)) {
6530                         if ((filp->f_flags & O_NONBLOCK))
6531                                 return -EAGAIN;
6532
6533                         ret = wait_on_pipe(iter, false);
6534                         if (ret)
6535                                 return ret;
6536
6537                         goto again;
6538                 }
6539                 return 0;
6540         }
6541
6542         info->read = 0;
6543  read:
6544         size = PAGE_SIZE - info->read;
6545         if (size > count)
6546                 size = count;
6547
6548         ret = copy_to_user(ubuf, info->spare + info->read, size);
6549         if (ret == size)
6550                 return -EFAULT;
6551
6552         size -= ret;
6553
6554         *ppos += size;
6555         info->read += size;
6556
6557         return size;
6558 }
6559
6560 static int tracing_buffers_release(struct inode *inode, struct file *file)
6561 {
6562         struct ftrace_buffer_info *info = file->private_data;
6563         struct trace_iterator *iter = &info->iter;
6564
6565         mutex_lock(&trace_types_lock);
6566
6567         iter->tr->current_trace->ref--;
6568
6569         __trace_array_put(iter->tr);
6570
6571         if (info->spare)
6572                 ring_buffer_free_read_page(iter->trace_buffer->buffer,
6573                                            info->spare_cpu, info->spare);
6574         kfree(info);
6575
6576         mutex_unlock(&trace_types_lock);
6577
6578         return 0;
6579 }
6580
6581 struct buffer_ref {
6582         struct ring_buffer      *buffer;
6583         void                    *page;
6584         int                     cpu;
6585         int                     ref;
6586 };
6587
6588 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6589                                     struct pipe_buffer *buf)
6590 {
6591         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6592
6593         if (--ref->ref)
6594                 return;
6595
6596         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6597         kfree(ref);
6598         buf->private = 0;
6599 }
6600
6601 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6602                                 struct pipe_buffer *buf)
6603 {
6604         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6605
6606         ref->ref++;
6607 }
6608
6609 /* Pipe buffer operations for a buffer. */
6610 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6611         .can_merge              = 0,
6612         .confirm                = generic_pipe_buf_confirm,
6613         .release                = buffer_pipe_buf_release,
6614         .steal                  = generic_pipe_buf_steal,
6615         .get                    = buffer_pipe_buf_get,
6616 };
6617
6618 /*
6619  * Callback from splice_to_pipe(), if we need to release some pages
6620  * at the end of the spd in case we error'ed out in filling the pipe.
6621  */
6622 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6623 {
6624         struct buffer_ref *ref =
6625                 (struct buffer_ref *)spd->partial[i].private;
6626
6627         if (--ref->ref)
6628                 return;
6629
6630         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6631         kfree(ref);
6632         spd->partial[i].private = 0;
6633 }
6634
6635 static ssize_t
6636 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6637                             struct pipe_inode_info *pipe, size_t len,
6638                             unsigned int flags)
6639 {
6640         struct ftrace_buffer_info *info = file->private_data;
6641         struct trace_iterator *iter = &info->iter;
6642         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6643         struct page *pages_def[PIPE_DEF_BUFFERS];
6644         struct splice_pipe_desc spd = {
6645                 .pages          = pages_def,
6646                 .partial        = partial_def,
6647                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6648                 .ops            = &buffer_pipe_buf_ops,
6649                 .spd_release    = buffer_spd_release,
6650         };
6651         struct buffer_ref *ref;
6652         int entries, size, i;
6653         ssize_t ret = 0;
6654
6655 #ifdef CONFIG_TRACER_MAX_TRACE
6656         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6657                 return -EBUSY;
6658 #endif
6659
6660         if (*ppos & (PAGE_SIZE - 1))
6661                 return -EINVAL;
6662
6663         if (len & (PAGE_SIZE - 1)) {
6664                 if (len < PAGE_SIZE)
6665                         return -EINVAL;
6666                 len &= PAGE_MASK;
6667         }
6668
6669         if (splice_grow_spd(pipe, &spd))
6670                 return -ENOMEM;
6671
6672  again:
6673         trace_access_lock(iter->cpu_file);
6674         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6675
6676         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6677                 struct page *page;
6678                 int r;
6679
6680                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6681                 if (!ref) {
6682                         ret = -ENOMEM;
6683                         break;
6684                 }
6685
6686                 ref->ref = 1;
6687                 ref->buffer = iter->trace_buffer->buffer;
6688                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6689                 if (!ref->page) {
6690                         ret = -ENOMEM;
6691                         kfree(ref);
6692                         break;
6693                 }
6694                 ref->cpu = iter->cpu_file;
6695
6696                 r = ring_buffer_read_page(ref->buffer, &ref->page,
6697                                           len, iter->cpu_file, 1);
6698                 if (r < 0) {
6699                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
6700                                                    ref->page);
6701                         kfree(ref);
6702                         break;
6703                 }
6704
6705                 /*
6706                  * zero out any left over data, this is going to
6707                  * user land.
6708                  */
6709                 size = ring_buffer_page_len(ref->page);
6710                 if (size < PAGE_SIZE)
6711                         memset(ref->page + size, 0, PAGE_SIZE - size);
6712
6713                 page = virt_to_page(ref->page);
6714
6715                 spd.pages[i] = page;
6716                 spd.partial[i].len = PAGE_SIZE;
6717                 spd.partial[i].offset = 0;
6718                 spd.partial[i].private = (unsigned long)ref;
6719                 spd.nr_pages++;
6720                 *ppos += PAGE_SIZE;
6721
6722                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6723         }
6724
6725         trace_access_unlock(iter->cpu_file);
6726         spd.nr_pages = i;
6727
6728         /* did we read anything? */
6729         if (!spd.nr_pages) {
6730                 if (ret)
6731                         goto out;
6732
6733                 ret = -EAGAIN;
6734                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6735                         goto out;
6736
6737                 ret = wait_on_pipe(iter, true);
6738                 if (ret)
6739                         goto out;
6740
6741                 goto again;
6742         }
6743
6744         ret = splice_to_pipe(pipe, &spd);
6745 out:
6746         splice_shrink_spd(&spd);
6747
6748         return ret;
6749 }
6750
6751 static const struct file_operations tracing_buffers_fops = {
6752         .open           = tracing_buffers_open,
6753         .read           = tracing_buffers_read,
6754         .poll           = tracing_buffers_poll,
6755         .release        = tracing_buffers_release,
6756         .splice_read    = tracing_buffers_splice_read,
6757         .llseek         = no_llseek,
6758 };
6759
6760 static ssize_t
6761 tracing_stats_read(struct file *filp, char __user *ubuf,
6762                    size_t count, loff_t *ppos)
6763 {
6764         struct inode *inode = file_inode(filp);
6765         struct trace_array *tr = inode->i_private;
6766         struct trace_buffer *trace_buf = &tr->trace_buffer;
6767         int cpu = tracing_get_cpu(inode);
6768         struct trace_seq *s;
6769         unsigned long cnt;
6770         unsigned long long t;
6771         unsigned long usec_rem;
6772
6773         s = kmalloc(sizeof(*s), GFP_KERNEL);
6774         if (!s)
6775                 return -ENOMEM;
6776
6777         trace_seq_init(s);
6778
6779         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6780         trace_seq_printf(s, "entries: %ld\n", cnt);
6781
6782         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6783         trace_seq_printf(s, "overrun: %ld\n", cnt);
6784
6785         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6786         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6787
6788         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6789         trace_seq_printf(s, "bytes: %ld\n", cnt);
6790
6791         if (trace_clocks[tr->clock_id].in_ns) {
6792                 /* local or global for trace_clock */
6793                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6794                 usec_rem = do_div(t, USEC_PER_SEC);
6795                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6796                                                                 t, usec_rem);
6797
6798                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6799                 usec_rem = do_div(t, USEC_PER_SEC);
6800                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6801         } else {
6802                 /* counter or tsc mode for trace_clock */
6803                 trace_seq_printf(s, "oldest event ts: %llu\n",
6804                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6805
6806                 trace_seq_printf(s, "now ts: %llu\n",
6807                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
6808         }
6809
6810         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6811         trace_seq_printf(s, "dropped events: %ld\n", cnt);
6812
6813         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6814         trace_seq_printf(s, "read events: %ld\n", cnt);
6815
6816         count = simple_read_from_buffer(ubuf, count, ppos,
6817                                         s->buffer, trace_seq_used(s));
6818
6819         kfree(s);
6820
6821         return count;
6822 }
6823
6824 static const struct file_operations tracing_stats_fops = {
6825         .open           = tracing_open_generic_tr,
6826         .read           = tracing_stats_read,
6827         .llseek         = generic_file_llseek,
6828         .release        = tracing_release_generic_tr,
6829 };
6830
6831 #ifdef CONFIG_DYNAMIC_FTRACE
6832
6833 static ssize_t
6834 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6835                   size_t cnt, loff_t *ppos)
6836 {
6837         unsigned long *p = filp->private_data;
6838         char buf[64]; /* Not too big for a shallow stack */
6839         int r;
6840
6841         r = scnprintf(buf, 63, "%ld", *p);
6842         buf[r++] = '\n';
6843
6844         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6845 }
6846
6847 static const struct file_operations tracing_dyn_info_fops = {
6848         .open           = tracing_open_generic,
6849         .read           = tracing_read_dyn_info,
6850         .llseek         = generic_file_llseek,
6851 };
6852 #endif /* CONFIG_DYNAMIC_FTRACE */
6853
6854 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6855 static void
6856 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
6857                 struct trace_array *tr, struct ftrace_probe_ops *ops,
6858                 void *data)
6859 {
6860         tracing_snapshot_instance(tr);
6861 }
6862
6863 static void
6864 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
6865                       struct trace_array *tr, struct ftrace_probe_ops *ops,
6866                       void *data)
6867 {
6868         struct ftrace_func_mapper *mapper = data;
6869         long *count = NULL;
6870
6871         if (mapper)
6872                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
6873
6874         if (count) {
6875
6876                 if (*count <= 0)
6877                         return;
6878
6879                 (*count)--;
6880         }
6881
6882         tracing_snapshot_instance(tr);
6883 }
6884
6885 static int
6886 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
6887                       struct ftrace_probe_ops *ops, void *data)
6888 {
6889         struct ftrace_func_mapper *mapper = data;
6890         long *count = NULL;
6891
6892         seq_printf(m, "%ps:", (void *)ip);
6893
6894         seq_puts(m, "snapshot");
6895
6896         if (mapper)
6897                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
6898
6899         if (count)
6900                 seq_printf(m, ":count=%ld\n", *count);
6901         else
6902                 seq_puts(m, ":unlimited\n");
6903
6904         return 0;
6905 }
6906
6907 static int
6908 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
6909                      unsigned long ip, void *init_data, void **data)
6910 {
6911         struct ftrace_func_mapper *mapper = *data;
6912
6913         if (!mapper) {
6914                 mapper = allocate_ftrace_func_mapper();
6915                 if (!mapper)
6916                         return -ENOMEM;
6917                 *data = mapper;
6918         }
6919
6920         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
6921 }
6922
6923 static void
6924 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
6925                      unsigned long ip, void *data)
6926 {
6927         struct ftrace_func_mapper *mapper = data;
6928
6929         if (!ip) {
6930                 if (!mapper)
6931                         return;
6932                 free_ftrace_func_mapper(mapper, NULL);
6933                 return;
6934         }
6935
6936         ftrace_func_mapper_remove_ip(mapper, ip);
6937 }
6938
6939 static struct ftrace_probe_ops snapshot_probe_ops = {
6940         .func                   = ftrace_snapshot,
6941         .print                  = ftrace_snapshot_print,
6942 };
6943
6944 static struct ftrace_probe_ops snapshot_count_probe_ops = {
6945         .func                   = ftrace_count_snapshot,
6946         .print                  = ftrace_snapshot_print,
6947         .init                   = ftrace_snapshot_init,
6948         .free                   = ftrace_snapshot_free,
6949 };
6950
6951 static int
6952 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
6953                                char *glob, char *cmd, char *param, int enable)
6954 {
6955         struct ftrace_probe_ops *ops;
6956         void *count = (void *)-1;
6957         char *number;
6958         int ret;
6959
6960         if (!tr)
6961                 return -ENODEV;
6962
6963         /* hash funcs only work with set_ftrace_filter */
6964         if (!enable)
6965                 return -EINVAL;
6966
6967         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
6968
6969         if (glob[0] == '!')
6970                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
6971
6972         if (!param)
6973                 goto out_reg;
6974
6975         number = strsep(&param, ":");
6976
6977         if (!strlen(number))
6978                 goto out_reg;
6979
6980         /*
6981          * We use the callback data field (which is a pointer)
6982          * as our counter.
6983          */
6984         ret = kstrtoul(number, 0, (unsigned long *)&count);
6985         if (ret)
6986                 return ret;
6987
6988  out_reg:
6989         ret = alloc_snapshot(tr);
6990         if (ret < 0)
6991                 goto out;
6992
6993         ret = register_ftrace_function_probe(glob, tr, ops, count);
6994
6995  out:
6996         return ret < 0 ? ret : 0;
6997 }
6998
6999 static struct ftrace_func_command ftrace_snapshot_cmd = {
7000         .name                   = "snapshot",
7001         .func                   = ftrace_trace_snapshot_callback,
7002 };
7003
7004 static __init int register_snapshot_cmd(void)
7005 {
7006         return register_ftrace_command(&ftrace_snapshot_cmd);
7007 }
7008 #else
7009 static inline __init int register_snapshot_cmd(void) { return 0; }
7010 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7011
7012 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7013 {
7014         if (WARN_ON(!tr->dir))
7015                 return ERR_PTR(-ENODEV);
7016
7017         /* Top directory uses NULL as the parent */
7018         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7019                 return NULL;
7020
7021         /* All sub buffers have a descriptor */
7022         return tr->dir;
7023 }
7024
7025 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7026 {
7027         struct dentry *d_tracer;
7028
7029         if (tr->percpu_dir)
7030                 return tr->percpu_dir;
7031
7032         d_tracer = tracing_get_dentry(tr);
7033         if (IS_ERR(d_tracer))
7034                 return NULL;
7035
7036         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7037
7038         WARN_ONCE(!tr->percpu_dir,
7039                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7040
7041         return tr->percpu_dir;
7042 }
7043
7044 static struct dentry *
7045 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7046                       void *data, long cpu, const struct file_operations *fops)
7047 {
7048         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7049
7050         if (ret) /* See tracing_get_cpu() */
7051                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
7052         return ret;
7053 }
7054
7055 static void
7056 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7057 {
7058         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7059         struct dentry *d_cpu;
7060         char cpu_dir[30]; /* 30 characters should be more than enough */
7061
7062         if (!d_percpu)
7063                 return;
7064
7065         snprintf(cpu_dir, 30, "cpu%ld", cpu);
7066         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7067         if (!d_cpu) {
7068                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7069                 return;
7070         }
7071
7072         /* per cpu trace_pipe */
7073         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7074                                 tr, cpu, &tracing_pipe_fops);
7075
7076         /* per cpu trace */
7077         trace_create_cpu_file("trace", 0644, d_cpu,
7078                                 tr, cpu, &tracing_fops);
7079
7080         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7081                                 tr, cpu, &tracing_buffers_fops);
7082
7083         trace_create_cpu_file("stats", 0444, d_cpu,
7084                                 tr, cpu, &tracing_stats_fops);
7085
7086         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7087                                 tr, cpu, &tracing_entries_fops);
7088
7089 #ifdef CONFIG_TRACER_SNAPSHOT
7090         trace_create_cpu_file("snapshot", 0644, d_cpu,
7091                                 tr, cpu, &snapshot_fops);
7092
7093         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7094                                 tr, cpu, &snapshot_raw_fops);
7095 #endif
7096 }
7097
7098 #ifdef CONFIG_FTRACE_SELFTEST
7099 /* Let selftest have access to static functions in this file */
7100 #include "trace_selftest.c"
7101 #endif
7102
7103 static ssize_t
7104 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7105                         loff_t *ppos)
7106 {
7107         struct trace_option_dentry *topt = filp->private_data;
7108         char *buf;
7109
7110         if (topt->flags->val & topt->opt->bit)
7111                 buf = "1\n";
7112         else
7113                 buf = "0\n";
7114
7115         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7116 }
7117
7118 static ssize_t
7119 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7120                          loff_t *ppos)
7121 {
7122         struct trace_option_dentry *topt = filp->private_data;
7123         unsigned long val;
7124         int ret;
7125
7126         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7127         if (ret)
7128                 return ret;
7129
7130         if (val != 0 && val != 1)
7131                 return -EINVAL;
7132
7133         if (!!(topt->flags->val & topt->opt->bit) != val) {
7134                 mutex_lock(&trace_types_lock);
7135                 ret = __set_tracer_option(topt->tr, topt->flags,
7136                                           topt->opt, !val);
7137                 mutex_unlock(&trace_types_lock);
7138                 if (ret)
7139                         return ret;
7140         }
7141
7142         *ppos += cnt;
7143
7144         return cnt;
7145 }
7146
7147
7148 static const struct file_operations trace_options_fops = {
7149         .open = tracing_open_generic,
7150         .read = trace_options_read,
7151         .write = trace_options_write,
7152         .llseek = generic_file_llseek,
7153 };
7154
7155 /*
7156  * In order to pass in both the trace_array descriptor as well as the index
7157  * to the flag that the trace option file represents, the trace_array
7158  * has a character array of trace_flags_index[], which holds the index
7159  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7160  * The address of this character array is passed to the flag option file
7161  * read/write callbacks.
7162  *
7163  * In order to extract both the index and the trace_array descriptor,
7164  * get_tr_index() uses the following algorithm.
7165  *
7166  *   idx = *ptr;
7167  *
7168  * As the pointer itself contains the address of the index (remember
7169  * index[1] == 1).
7170  *
7171  * Then to get the trace_array descriptor, by subtracting that index
7172  * from the ptr, we get to the start of the index itself.
7173  *
7174  *   ptr - idx == &index[0]
7175  *
7176  * Then a simple container_of() from that pointer gets us to the
7177  * trace_array descriptor.
7178  */
7179 static void get_tr_index(void *data, struct trace_array **ptr,
7180                          unsigned int *pindex)
7181 {
7182         *pindex = *(unsigned char *)data;
7183
7184         *ptr = container_of(data - *pindex, struct trace_array,
7185                             trace_flags_index);
7186 }
7187
7188 static ssize_t
7189 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7190                         loff_t *ppos)
7191 {
7192         void *tr_index = filp->private_data;
7193         struct trace_array *tr;
7194         unsigned int index;
7195         char *buf;
7196
7197         get_tr_index(tr_index, &tr, &index);
7198
7199         if (tr->trace_flags & (1 << index))
7200                 buf = "1\n";
7201         else
7202                 buf = "0\n";
7203
7204         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7205 }
7206
7207 static ssize_t
7208 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7209                          loff_t *ppos)
7210 {
7211         void *tr_index = filp->private_data;
7212         struct trace_array *tr;
7213         unsigned int index;
7214         unsigned long val;
7215         int ret;
7216
7217         get_tr_index(tr_index, &tr, &index);
7218
7219         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7220         if (ret)
7221                 return ret;
7222
7223         if (val != 0 && val != 1)
7224                 return -EINVAL;
7225
7226         mutex_lock(&trace_types_lock);
7227         ret = set_tracer_flag(tr, 1 << index, val);
7228         mutex_unlock(&trace_types_lock);
7229
7230         if (ret < 0)
7231                 return ret;
7232
7233         *ppos += cnt;
7234
7235         return cnt;
7236 }
7237
7238 static const struct file_operations trace_options_core_fops = {
7239         .open = tracing_open_generic,
7240         .read = trace_options_core_read,
7241         .write = trace_options_core_write,
7242         .llseek = generic_file_llseek,
7243 };
7244
7245 struct dentry *trace_create_file(const char *name,
7246                                  umode_t mode,
7247                                  struct dentry *parent,
7248                                  void *data,
7249                                  const struct file_operations *fops)
7250 {
7251         struct dentry *ret;
7252
7253         ret = tracefs_create_file(name, mode, parent, data, fops);
7254         if (!ret)
7255                 pr_warn("Could not create tracefs '%s' entry\n", name);
7256
7257         return ret;
7258 }
7259
7260
7261 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7262 {
7263         struct dentry *d_tracer;
7264
7265         if (tr->options)
7266                 return tr->options;
7267
7268         d_tracer = tracing_get_dentry(tr);
7269         if (IS_ERR(d_tracer))
7270                 return NULL;
7271
7272         tr->options = tracefs_create_dir("options", d_tracer);
7273         if (!tr->options) {
7274                 pr_warn("Could not create tracefs directory 'options'\n");
7275                 return NULL;
7276         }
7277
7278         return tr->options;
7279 }
7280
7281 static void
7282 create_trace_option_file(struct trace_array *tr,
7283                          struct trace_option_dentry *topt,
7284                          struct tracer_flags *flags,
7285                          struct tracer_opt *opt)
7286 {
7287         struct dentry *t_options;
7288
7289         t_options = trace_options_init_dentry(tr);
7290         if (!t_options)
7291                 return;
7292
7293         topt->flags = flags;
7294         topt->opt = opt;
7295         topt->tr = tr;
7296
7297         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7298                                     &trace_options_fops);
7299
7300 }
7301
7302 static void
7303 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7304 {
7305         struct trace_option_dentry *topts;
7306         struct trace_options *tr_topts;
7307         struct tracer_flags *flags;
7308         struct tracer_opt *opts;
7309         int cnt;
7310         int i;
7311
7312         if (!tracer)
7313                 return;
7314
7315         flags = tracer->flags;
7316
7317         if (!flags || !flags->opts)
7318                 return;
7319
7320         /*
7321          * If this is an instance, only create flags for tracers
7322          * the instance may have.
7323          */
7324         if (!trace_ok_for_array(tracer, tr))
7325                 return;
7326
7327         for (i = 0; i < tr->nr_topts; i++) {
7328                 /* Make sure there's no duplicate flags. */
7329                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7330                         return;
7331         }
7332
7333         opts = flags->opts;
7334
7335         for (cnt = 0; opts[cnt].name; cnt++)
7336                 ;
7337
7338         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7339         if (!topts)
7340                 return;
7341
7342         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7343                             GFP_KERNEL);
7344         if (!tr_topts) {
7345                 kfree(topts);
7346                 return;
7347         }
7348
7349         tr->topts = tr_topts;
7350         tr->topts[tr->nr_topts].tracer = tracer;
7351         tr->topts[tr->nr_topts].topts = topts;
7352         tr->nr_topts++;
7353
7354         for (cnt = 0; opts[cnt].name; cnt++) {
7355                 create_trace_option_file(tr, &topts[cnt], flags,
7356                                          &opts[cnt]);
7357                 WARN_ONCE(topts[cnt].entry == NULL,
7358                           "Failed to create trace option: %s",
7359                           opts[cnt].name);
7360         }
7361 }
7362
7363 static struct dentry *
7364 create_trace_option_core_file(struct trace_array *tr,
7365                               const char *option, long index)
7366 {
7367         struct dentry *t_options;
7368
7369         t_options = trace_options_init_dentry(tr);
7370         if (!t_options)
7371                 return NULL;
7372
7373         return trace_create_file(option, 0644, t_options,
7374                                  (void *)&tr->trace_flags_index[index],
7375                                  &trace_options_core_fops);
7376 }
7377
7378 static void create_trace_options_dir(struct trace_array *tr)
7379 {
7380         struct dentry *t_options;
7381         bool top_level = tr == &global_trace;
7382         int i;
7383
7384         t_options = trace_options_init_dentry(tr);
7385         if (!t_options)
7386                 return;
7387
7388         for (i = 0; trace_options[i]; i++) {
7389                 if (top_level ||
7390                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7391                         create_trace_option_core_file(tr, trace_options[i], i);
7392         }
7393 }
7394
7395 static ssize_t
7396 rb_simple_read(struct file *filp, char __user *ubuf,
7397                size_t cnt, loff_t *ppos)
7398 {
7399         struct trace_array *tr = filp->private_data;
7400         char buf[64];
7401         int r;
7402
7403         r = tracer_tracing_is_on(tr);
7404         r = sprintf(buf, "%d\n", r);
7405
7406         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7407 }
7408
7409 static ssize_t
7410 rb_simple_write(struct file *filp, const char __user *ubuf,
7411                 size_t cnt, loff_t *ppos)
7412 {
7413         struct trace_array *tr = filp->private_data;
7414         struct ring_buffer *buffer = tr->trace_buffer.buffer;
7415         unsigned long val;
7416         int ret;
7417
7418         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7419         if (ret)
7420                 return ret;
7421
7422         if (buffer) {
7423                 mutex_lock(&trace_types_lock);
7424                 if (val) {
7425                         tracer_tracing_on(tr);
7426                         if (tr->current_trace->start)
7427                                 tr->current_trace->start(tr);
7428                 } else {
7429                         tracer_tracing_off(tr);
7430                         if (tr->current_trace->stop)
7431                                 tr->current_trace->stop(tr);
7432                 }
7433                 mutex_unlock(&trace_types_lock);
7434         }
7435
7436         (*ppos)++;
7437
7438         return cnt;
7439 }
7440
7441 static const struct file_operations rb_simple_fops = {
7442         .open           = tracing_open_generic_tr,
7443         .read           = rb_simple_read,
7444         .write          = rb_simple_write,
7445         .release        = tracing_release_generic_tr,
7446         .llseek         = default_llseek,
7447 };
7448
7449 struct dentry *trace_instance_dir;
7450
7451 static void
7452 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7453
7454 static int
7455 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7456 {
7457         enum ring_buffer_flags rb_flags;
7458
7459         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7460
7461         buf->tr = tr;
7462
7463         buf->buffer = ring_buffer_alloc(size, rb_flags);
7464         if (!buf->buffer)
7465                 return -ENOMEM;
7466
7467         buf->data = alloc_percpu(struct trace_array_cpu);
7468         if (!buf->data) {
7469                 ring_buffer_free(buf->buffer);
7470                 return -ENOMEM;
7471         }
7472
7473         /* Allocate the first page for all buffers */
7474         set_buffer_entries(&tr->trace_buffer,
7475                            ring_buffer_size(tr->trace_buffer.buffer, 0));
7476
7477         return 0;
7478 }
7479
7480 static int allocate_trace_buffers(struct trace_array *tr, int size)
7481 {
7482         int ret;
7483
7484         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7485         if (ret)
7486                 return ret;
7487
7488 #ifdef CONFIG_TRACER_MAX_TRACE
7489         ret = allocate_trace_buffer(tr, &tr->max_buffer,
7490                                     allocate_snapshot ? size : 1);
7491         if (WARN_ON(ret)) {
7492                 ring_buffer_free(tr->trace_buffer.buffer);
7493                 free_percpu(tr->trace_buffer.data);
7494                 return -ENOMEM;
7495         }
7496         tr->allocated_snapshot = allocate_snapshot;
7497
7498         /*
7499          * Only the top level trace array gets its snapshot allocated
7500          * from the kernel command line.
7501          */
7502         allocate_snapshot = false;
7503 #endif
7504         return 0;
7505 }
7506
7507 static void free_trace_buffer(struct trace_buffer *buf)
7508 {
7509         if (buf->buffer) {
7510                 ring_buffer_free(buf->buffer);
7511                 buf->buffer = NULL;
7512                 free_percpu(buf->data);
7513                 buf->data = NULL;
7514         }
7515 }
7516
7517 static void free_trace_buffers(struct trace_array *tr)
7518 {
7519         if (!tr)
7520                 return;
7521
7522         free_trace_buffer(&tr->trace_buffer);
7523
7524 #ifdef CONFIG_TRACER_MAX_TRACE
7525         free_trace_buffer(&tr->max_buffer);
7526 #endif
7527 }
7528
7529 static void init_trace_flags_index(struct trace_array *tr)
7530 {
7531         int i;
7532
7533         /* Used by the trace options files */
7534         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7535                 tr->trace_flags_index[i] = i;
7536 }
7537
7538 static void __update_tracer_options(struct trace_array *tr)
7539 {
7540         struct tracer *t;
7541
7542         for (t = trace_types; t; t = t->next)
7543                 add_tracer_options(tr, t);
7544 }
7545
7546 static void update_tracer_options(struct trace_array *tr)
7547 {
7548         mutex_lock(&trace_types_lock);
7549         __update_tracer_options(tr);
7550         mutex_unlock(&trace_types_lock);
7551 }
7552
7553 static int instance_mkdir(const char *name)
7554 {
7555         struct trace_array *tr;
7556         int ret;
7557
7558         mutex_lock(&trace_types_lock);
7559
7560         ret = -EEXIST;
7561         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7562                 if (tr->name && strcmp(tr->name, name) == 0)
7563                         goto out_unlock;
7564         }
7565
7566         ret = -ENOMEM;
7567         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7568         if (!tr)
7569                 goto out_unlock;
7570
7571         tr->name = kstrdup(name, GFP_KERNEL);
7572         if (!tr->name)
7573                 goto out_free_tr;
7574
7575         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7576                 goto out_free_tr;
7577
7578         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7579
7580         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7581
7582         raw_spin_lock_init(&tr->start_lock);
7583
7584         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7585
7586         tr->current_trace = &nop_trace;
7587
7588         INIT_LIST_HEAD(&tr->systems);
7589         INIT_LIST_HEAD(&tr->events);
7590
7591         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7592                 goto out_free_tr;
7593
7594         tr->dir = tracefs_create_dir(name, trace_instance_dir);
7595         if (!tr->dir)
7596                 goto out_free_tr;
7597
7598         ret = event_trace_add_tracer(tr->dir, tr);
7599         if (ret) {
7600                 tracefs_remove_recursive(tr->dir);
7601                 goto out_free_tr;
7602         }
7603
7604         ftrace_init_trace_array(tr);
7605
7606         init_tracer_tracefs(tr, tr->dir);
7607         init_trace_flags_index(tr);
7608         __update_tracer_options(tr);
7609
7610         list_add(&tr->list, &ftrace_trace_arrays);
7611
7612         mutex_unlock(&trace_types_lock);
7613
7614         return 0;
7615
7616  out_free_tr:
7617         free_trace_buffers(tr);
7618         free_cpumask_var(tr->tracing_cpumask);
7619         kfree(tr->name);
7620         kfree(tr);
7621
7622  out_unlock:
7623         mutex_unlock(&trace_types_lock);
7624
7625         return ret;
7626
7627 }
7628
7629 static int instance_rmdir(const char *name)
7630 {
7631         struct trace_array *tr;
7632         int found = 0;
7633         int ret;
7634         int i;
7635
7636         mutex_lock(&trace_types_lock);
7637
7638         ret = -ENODEV;
7639         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7640                 if (tr->name && strcmp(tr->name, name) == 0) {
7641                         found = 1;
7642                         break;
7643                 }
7644         }
7645         if (!found)
7646                 goto out_unlock;
7647
7648         ret = -EBUSY;
7649         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7650                 goto out_unlock;
7651
7652         list_del(&tr->list);
7653
7654         /* Disable all the flags that were enabled coming in */
7655         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7656                 if ((1 << i) & ZEROED_TRACE_FLAGS)
7657                         set_tracer_flag(tr, 1 << i, 0);
7658         }
7659
7660         tracing_set_nop(tr);
7661         clear_ftrace_function_probes(tr);
7662         event_trace_del_tracer(tr);
7663         ftrace_clear_pids(tr);
7664         ftrace_destroy_function_files(tr);
7665         tracefs_remove_recursive(tr->dir);
7666         free_trace_buffers(tr);
7667
7668         for (i = 0; i < tr->nr_topts; i++) {
7669                 kfree(tr->topts[i].topts);
7670         }
7671         kfree(tr->topts);
7672
7673         kfree(tr->name);
7674         kfree(tr);
7675
7676         ret = 0;
7677
7678  out_unlock:
7679         mutex_unlock(&trace_types_lock);
7680
7681         return ret;
7682 }
7683
7684 static __init void create_trace_instances(struct dentry *d_tracer)
7685 {
7686         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7687                                                          instance_mkdir,
7688                                                          instance_rmdir);
7689         if (WARN_ON(!trace_instance_dir))
7690                 return;
7691 }
7692
7693 static void
7694 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7695 {
7696         int cpu;
7697
7698         trace_create_file("available_tracers", 0444, d_tracer,
7699                         tr, &show_traces_fops);
7700
7701         trace_create_file("current_tracer", 0644, d_tracer,
7702                         tr, &set_tracer_fops);
7703
7704         trace_create_file("tracing_cpumask", 0644, d_tracer,
7705                           tr, &tracing_cpumask_fops);
7706
7707         trace_create_file("trace_options", 0644, d_tracer,
7708                           tr, &tracing_iter_fops);
7709
7710         trace_create_file("trace", 0644, d_tracer,
7711                           tr, &tracing_fops);
7712
7713         trace_create_file("trace_pipe", 0444, d_tracer,
7714                           tr, &tracing_pipe_fops);
7715
7716         trace_create_file("buffer_size_kb", 0644, d_tracer,
7717                           tr, &tracing_entries_fops);
7718
7719         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7720                           tr, &tracing_total_entries_fops);
7721
7722         trace_create_file("free_buffer", 0200, d_tracer,
7723                           tr, &tracing_free_buffer_fops);
7724
7725         trace_create_file("trace_marker", 0220, d_tracer,
7726                           tr, &tracing_mark_fops);
7727
7728         trace_create_file("trace_marker_raw", 0220, d_tracer,
7729                           tr, &tracing_mark_raw_fops);
7730
7731         trace_create_file("trace_clock", 0644, d_tracer, tr,
7732                           &trace_clock_fops);
7733
7734         trace_create_file("tracing_on", 0644, d_tracer,
7735                           tr, &rb_simple_fops);
7736
7737         create_trace_options_dir(tr);
7738
7739 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7740         trace_create_file("tracing_max_latency", 0644, d_tracer,
7741                         &tr->max_latency, &tracing_max_lat_fops);
7742 #endif
7743
7744         if (ftrace_create_function_files(tr, d_tracer))
7745                 WARN(1, "Could not allocate function filter files");
7746
7747 #ifdef CONFIG_TRACER_SNAPSHOT
7748         trace_create_file("snapshot", 0644, d_tracer,
7749                           tr, &snapshot_fops);
7750 #endif
7751
7752         for_each_tracing_cpu(cpu)
7753                 tracing_init_tracefs_percpu(tr, cpu);
7754
7755         ftrace_init_tracefs(tr, d_tracer);
7756 }
7757
7758 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7759 {
7760         struct vfsmount *mnt;
7761         struct file_system_type *type;
7762
7763         /*
7764          * To maintain backward compatibility for tools that mount
7765          * debugfs to get to the tracing facility, tracefs is automatically
7766          * mounted to the debugfs/tracing directory.
7767          */
7768         type = get_fs_type("tracefs");
7769         if (!type)
7770                 return NULL;
7771         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
7772         put_filesystem(type);
7773         if (IS_ERR(mnt))
7774                 return NULL;
7775         mntget(mnt);
7776
7777         return mnt;
7778 }
7779
7780 /**
7781  * tracing_init_dentry - initialize top level trace array
7782  *
7783  * This is called when creating files or directories in the tracing
7784  * directory. It is called via fs_initcall() by any of the boot up code
7785  * and expects to return the dentry of the top level tracing directory.
7786  */
7787 struct dentry *tracing_init_dentry(void)
7788 {
7789         struct trace_array *tr = &global_trace;
7790
7791         /* The top level trace array uses  NULL as parent */
7792         if (tr->dir)
7793                 return NULL;
7794
7795         if (WARN_ON(!tracefs_initialized()) ||
7796                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
7797                  WARN_ON(!debugfs_initialized())))
7798                 return ERR_PTR(-ENODEV);
7799
7800         /*
7801          * As there may still be users that expect the tracing
7802          * files to exist in debugfs/tracing, we must automount
7803          * the tracefs file system there, so older tools still
7804          * work with the newer kerenl.
7805          */
7806         tr->dir = debugfs_create_automount("tracing", NULL,
7807                                            trace_automount, NULL);
7808         if (!tr->dir) {
7809                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
7810                 return ERR_PTR(-ENOMEM);
7811         }
7812
7813         return NULL;
7814 }
7815
7816 extern struct trace_eval_map *__start_ftrace_eval_maps[];
7817 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
7818
7819 static void __init trace_eval_init(void)
7820 {
7821         int len;
7822
7823         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
7824         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
7825 }
7826
7827 #ifdef CONFIG_MODULES
7828 static void trace_module_add_evals(struct module *mod)
7829 {
7830         if (!mod->num_trace_evals)
7831                 return;
7832
7833         /*
7834          * Modules with bad taint do not have events created, do
7835          * not bother with enums either.
7836          */
7837         if (trace_module_has_bad_taint(mod))
7838                 return;
7839
7840         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
7841 }
7842
7843 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
7844 static void trace_module_remove_evals(struct module *mod)
7845 {
7846         union trace_eval_map_item *map;
7847         union trace_eval_map_item **last = &trace_eval_maps;
7848
7849         if (!mod->num_trace_evals)
7850                 return;
7851
7852         mutex_lock(&trace_eval_mutex);
7853
7854         map = trace_eval_maps;
7855
7856         while (map) {
7857                 if (map->head.mod == mod)
7858                         break;
7859                 map = trace_eval_jmp_to_tail(map);
7860                 last = &map->tail.next;
7861                 map = map->tail.next;
7862         }
7863         if (!map)
7864                 goto out;
7865
7866         *last = trace_eval_jmp_to_tail(map)->tail.next;
7867         kfree(map);
7868  out:
7869         mutex_unlock(&trace_eval_mutex);
7870 }
7871 #else
7872 static inline void trace_module_remove_evals(struct module *mod) { }
7873 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
7874
7875 static int trace_module_notify(struct notifier_block *self,
7876                                unsigned long val, void *data)
7877 {
7878         struct module *mod = data;
7879
7880         switch (val) {
7881         case MODULE_STATE_COMING:
7882                 trace_module_add_evals(mod);
7883                 break;
7884         case MODULE_STATE_GOING:
7885                 trace_module_remove_evals(mod);
7886                 break;
7887         }
7888
7889         return 0;
7890 }
7891
7892 static struct notifier_block trace_module_nb = {
7893         .notifier_call = trace_module_notify,
7894         .priority = 0,
7895 };
7896 #endif /* CONFIG_MODULES */
7897
7898 static __init int tracer_init_tracefs(void)
7899 {
7900         struct dentry *d_tracer;
7901
7902         trace_access_lock_init();
7903
7904         d_tracer = tracing_init_dentry();
7905         if (IS_ERR(d_tracer))
7906                 return 0;
7907
7908         init_tracer_tracefs(&global_trace, d_tracer);
7909         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
7910
7911         trace_create_file("tracing_thresh", 0644, d_tracer,
7912                         &global_trace, &tracing_thresh_fops);
7913
7914         trace_create_file("README", 0444, d_tracer,
7915                         NULL, &tracing_readme_fops);
7916
7917         trace_create_file("saved_cmdlines", 0444, d_tracer,
7918                         NULL, &tracing_saved_cmdlines_fops);
7919
7920         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
7921                           NULL, &tracing_saved_cmdlines_size_fops);
7922
7923         trace_eval_init();
7924
7925         trace_create_eval_file(d_tracer);
7926
7927 #ifdef CONFIG_MODULES
7928         register_module_notifier(&trace_module_nb);
7929 #endif
7930
7931 #ifdef CONFIG_DYNAMIC_FTRACE
7932         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
7933                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
7934 #endif
7935
7936         create_trace_instances(d_tracer);
7937
7938         update_tracer_options(&global_trace);
7939
7940         return 0;
7941 }
7942
7943 static int trace_panic_handler(struct notifier_block *this,
7944                                unsigned long event, void *unused)
7945 {
7946         if (ftrace_dump_on_oops)
7947                 ftrace_dump(ftrace_dump_on_oops);
7948         return NOTIFY_OK;
7949 }
7950
7951 static struct notifier_block trace_panic_notifier = {
7952         .notifier_call  = trace_panic_handler,
7953         .next           = NULL,
7954         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
7955 };
7956
7957 static int trace_die_handler(struct notifier_block *self,
7958                              unsigned long val,
7959                              void *data)
7960 {
7961         switch (val) {
7962         case DIE_OOPS:
7963                 if (ftrace_dump_on_oops)
7964                         ftrace_dump(ftrace_dump_on_oops);
7965                 break;
7966         default:
7967                 break;
7968         }
7969         return NOTIFY_OK;
7970 }
7971
7972 static struct notifier_block trace_die_notifier = {
7973         .notifier_call = trace_die_handler,
7974         .priority = 200
7975 };
7976
7977 /*
7978  * printk is set to max of 1024, we really don't need it that big.
7979  * Nothing should be printing 1000 characters anyway.
7980  */
7981 #define TRACE_MAX_PRINT         1000
7982
7983 /*
7984  * Define here KERN_TRACE so that we have one place to modify
7985  * it if we decide to change what log level the ftrace dump
7986  * should be at.
7987  */
7988 #define KERN_TRACE              KERN_EMERG
7989
7990 void
7991 trace_printk_seq(struct trace_seq *s)
7992 {
7993         /* Probably should print a warning here. */
7994         if (s->seq.len >= TRACE_MAX_PRINT)
7995                 s->seq.len = TRACE_MAX_PRINT;
7996
7997         /*
7998          * More paranoid code. Although the buffer size is set to
7999          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8000          * an extra layer of protection.
8001          */
8002         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8003                 s->seq.len = s->seq.size - 1;
8004
8005         /* should be zero ended, but we are paranoid. */
8006         s->buffer[s->seq.len] = 0;
8007
8008         printk(KERN_TRACE "%s", s->buffer);
8009
8010         trace_seq_init(s);
8011 }
8012
8013 void trace_init_global_iter(struct trace_iterator *iter)
8014 {
8015         iter->tr = &global_trace;
8016         iter->trace = iter->tr->current_trace;
8017         iter->cpu_file = RING_BUFFER_ALL_CPUS;
8018         iter->trace_buffer = &global_trace.trace_buffer;
8019
8020         if (iter->trace && iter->trace->open)
8021                 iter->trace->open(iter);
8022
8023         /* Annotate start of buffers if we had overruns */
8024         if (ring_buffer_overruns(iter->trace_buffer->buffer))
8025                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
8026
8027         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
8028         if (trace_clocks[iter->tr->clock_id].in_ns)
8029                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8030 }
8031
8032 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8033 {
8034         /* use static because iter can be a bit big for the stack */
8035         static struct trace_iterator iter;
8036         static atomic_t dump_running;
8037         struct trace_array *tr = &global_trace;
8038         unsigned int old_userobj;
8039         unsigned long flags;
8040         int cnt = 0, cpu;
8041
8042         /* Only allow one dump user at a time. */
8043         if (atomic_inc_return(&dump_running) != 1) {
8044                 atomic_dec(&dump_running);
8045                 return;
8046         }
8047
8048         /*
8049          * Always turn off tracing when we dump.
8050          * We don't need to show trace output of what happens
8051          * between multiple crashes.
8052          *
8053          * If the user does a sysrq-z, then they can re-enable
8054          * tracing with echo 1 > tracing_on.
8055          */
8056         tracing_off();
8057
8058         local_irq_save(flags);
8059
8060         /* Simulate the iterator */
8061         trace_init_global_iter(&iter);
8062
8063         for_each_tracing_cpu(cpu) {
8064                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8065         }
8066
8067         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8068
8069         /* don't look at user memory in panic mode */
8070         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8071
8072         switch (oops_dump_mode) {
8073         case DUMP_ALL:
8074                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8075                 break;
8076         case DUMP_ORIG:
8077                 iter.cpu_file = raw_smp_processor_id();
8078                 break;
8079         case DUMP_NONE:
8080                 goto out_enable;
8081         default:
8082                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8083                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8084         }
8085
8086         printk(KERN_TRACE "Dumping ftrace buffer:\n");
8087
8088         /* Did function tracer already get disabled? */
8089         if (ftrace_is_dead()) {
8090                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8091                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8092         }
8093
8094         /*
8095          * We need to stop all tracing on all CPUS to read the
8096          * the next buffer. This is a bit expensive, but is
8097          * not done often. We fill all what we can read,
8098          * and then release the locks again.
8099          */
8100
8101         while (!trace_empty(&iter)) {
8102
8103                 if (!cnt)
8104                         printk(KERN_TRACE "---------------------------------\n");
8105
8106                 cnt++;
8107
8108                 /* reset all but tr, trace, and overruns */
8109                 memset(&iter.seq, 0,
8110                        sizeof(struct trace_iterator) -
8111                        offsetof(struct trace_iterator, seq));
8112                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
8113                 iter.pos = -1;
8114
8115                 if (trace_find_next_entry_inc(&iter) != NULL) {
8116                         int ret;
8117
8118                         ret = print_trace_line(&iter);
8119                         if (ret != TRACE_TYPE_NO_CONSUME)
8120                                 trace_consume(&iter);
8121                 }
8122                 touch_nmi_watchdog();
8123
8124                 trace_printk_seq(&iter.seq);
8125         }
8126
8127         if (!cnt)
8128                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
8129         else
8130                 printk(KERN_TRACE "---------------------------------\n");
8131
8132  out_enable:
8133         tr->trace_flags |= old_userobj;
8134
8135         for_each_tracing_cpu(cpu) {
8136                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8137         }
8138         atomic_dec(&dump_running);
8139         local_irq_restore(flags);
8140 }
8141 EXPORT_SYMBOL_GPL(ftrace_dump);
8142
8143 __init static int tracer_alloc_buffers(void)
8144 {
8145         int ring_buf_size;
8146         int ret = -ENOMEM;
8147
8148         /*
8149          * Make sure we don't accidently add more trace options
8150          * than we have bits for.
8151          */
8152         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8153
8154         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8155                 goto out;
8156
8157         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8158                 goto out_free_buffer_mask;
8159
8160         /* Only allocate trace_printk buffers if a trace_printk exists */
8161         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
8162                 /* Must be called before global_trace.buffer is allocated */
8163                 trace_printk_init_buffers();
8164
8165         /* To save memory, keep the ring buffer size to its minimum */
8166         if (ring_buffer_expanded)
8167                 ring_buf_size = trace_buf_size;
8168         else
8169                 ring_buf_size = 1;
8170
8171         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8172         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8173
8174         raw_spin_lock_init(&global_trace.start_lock);
8175
8176         /*
8177          * The prepare callbacks allocates some memory for the ring buffer. We
8178          * don't free the buffer if the if the CPU goes down. If we were to free
8179          * the buffer, then the user would lose any trace that was in the
8180          * buffer. The memory will be removed once the "instance" is removed.
8181          */
8182         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8183                                       "trace/RB:preapre", trace_rb_cpu_prepare,
8184                                       NULL);
8185         if (ret < 0)
8186                 goto out_free_cpumask;
8187         /* Used for event triggers */
8188         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8189         if (!temp_buffer)
8190                 goto out_rm_hp_state;
8191
8192         if (trace_create_savedcmd() < 0)
8193                 goto out_free_temp_buffer;
8194
8195         /* TODO: make the number of buffers hot pluggable with CPUS */
8196         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8197                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8198                 WARN_ON(1);
8199                 goto out_free_savedcmd;
8200         }
8201
8202         if (global_trace.buffer_disabled)
8203                 tracing_off();
8204
8205         if (trace_boot_clock) {
8206                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
8207                 if (ret < 0)
8208                         pr_warn("Trace clock %s not defined, going back to default\n",
8209                                 trace_boot_clock);
8210         }
8211
8212         /*
8213          * register_tracer() might reference current_trace, so it
8214          * needs to be set before we register anything. This is
8215          * just a bootstrap of current_trace anyway.
8216          */
8217         global_trace.current_trace = &nop_trace;
8218
8219         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8220
8221         ftrace_init_global_array_ops(&global_trace);
8222
8223         init_trace_flags_index(&global_trace);
8224
8225         register_tracer(&nop_trace);
8226
8227         /* Function tracing may start here (via kernel command line) */
8228         init_function_trace();
8229
8230         /* All seems OK, enable tracing */
8231         tracing_disabled = 0;
8232
8233         atomic_notifier_chain_register(&panic_notifier_list,
8234                                        &trace_panic_notifier);
8235
8236         register_die_notifier(&trace_die_notifier);
8237
8238         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8239
8240         INIT_LIST_HEAD(&global_trace.systems);
8241         INIT_LIST_HEAD(&global_trace.events);
8242         list_add(&global_trace.list, &ftrace_trace_arrays);
8243
8244         apply_trace_boot_options();
8245
8246         register_snapshot_cmd();
8247
8248         return 0;
8249
8250 out_free_savedcmd:
8251         free_saved_cmdlines_buffer(savedcmd);
8252 out_free_temp_buffer:
8253         ring_buffer_free(temp_buffer);
8254 out_rm_hp_state:
8255         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8256 out_free_cpumask:
8257         free_cpumask_var(global_trace.tracing_cpumask);
8258 out_free_buffer_mask:
8259         free_cpumask_var(tracing_buffer_mask);
8260 out:
8261         return ret;
8262 }
8263
8264 void __init early_trace_init(void)
8265 {
8266         if (tracepoint_printk) {
8267                 tracepoint_print_iter =
8268                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8269                 if (WARN_ON(!tracepoint_print_iter))
8270                         tracepoint_printk = 0;
8271                 else
8272                         static_key_enable(&tracepoint_printk_key.key);
8273         }
8274         tracer_alloc_buffers();
8275 }
8276
8277 void __init trace_init(void)
8278 {
8279         trace_event_init();
8280 }
8281
8282 __init static int clear_boot_tracer(void)
8283 {
8284         /*
8285          * The default tracer at boot buffer is an init section.
8286          * This function is called in lateinit. If we did not
8287          * find the boot tracer, then clear it out, to prevent
8288          * later registration from accessing the buffer that is
8289          * about to be freed.
8290          */
8291         if (!default_bootup_tracer)
8292                 return 0;
8293
8294         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8295                default_bootup_tracer);
8296         default_bootup_tracer = NULL;
8297
8298         return 0;
8299 }
8300
8301 fs_initcall(tracer_init_tracefs);
8302 late_initcall(clear_boot_tracer);