]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - kernel/trace/trace.c
Merge branch 'nowait-aio-btrfs-fixup' of git://git.kernel.org/pub/scm/linux/kernel...
[karo-tx-linux.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/trace.h>
44 #include <linux/sched/rt.h>
45
46 #include "trace.h"
47 #include "trace_output.h"
48
49 /*
50  * On boot up, the ring buffer is set to the minimum size, so that
51  * we do not waste memory on systems that are not using tracing.
52  */
53 bool ring_buffer_expanded;
54
55 /*
56  * We need to change this state when a selftest is running.
57  * A selftest will lurk into the ring-buffer to count the
58  * entries inserted during the selftest although some concurrent
59  * insertions into the ring-buffer such as trace_printk could occurred
60  * at the same time, giving false positive or negative results.
61  */
62 static bool __read_mostly tracing_selftest_running;
63
64 /*
65  * If a tracer is running, we do not want to run SELFTEST.
66  */
67 bool __read_mostly tracing_selftest_disabled;
68
69 /* Pipe tracepoints to printk */
70 struct trace_iterator *tracepoint_print_iter;
71 int tracepoint_printk;
72 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
73
74 /* For tracers that don't implement custom flags */
75 static struct tracer_opt dummy_tracer_opt[] = {
76         { }
77 };
78
79 static int
80 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
81 {
82         return 0;
83 }
84
85 /*
86  * To prevent the comm cache from being overwritten when no
87  * tracing is active, only save the comm when a trace event
88  * occurred.
89  */
90 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
91
92 /*
93  * Kill all tracing for good (never come back).
94  * It is initialized to 1 but will turn to zero if the initialization
95  * of the tracer is successful. But that is the only place that sets
96  * this back to zero.
97  */
98 static int tracing_disabled = 1;
99
100 cpumask_var_t __read_mostly     tracing_buffer_mask;
101
102 /*
103  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
104  *
105  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
106  * is set, then ftrace_dump is called. This will output the contents
107  * of the ftrace buffers to the console.  This is very useful for
108  * capturing traces that lead to crashes and outputing it to a
109  * serial console.
110  *
111  * It is default off, but you can enable it with either specifying
112  * "ftrace_dump_on_oops" in the kernel command line, or setting
113  * /proc/sys/kernel/ftrace_dump_on_oops
114  * Set 1 if you want to dump buffers of all CPUs
115  * Set 2 if you want to dump the buffer of the CPU that triggered oops
116  */
117
118 enum ftrace_dump_mode ftrace_dump_on_oops;
119
120 /* When set, tracing will stop when a WARN*() is hit */
121 int __disable_trace_on_warning;
122
123 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
124 /* Map of enums to their values, for "eval_map" file */
125 struct trace_eval_map_head {
126         struct module                   *mod;
127         unsigned long                   length;
128 };
129
130 union trace_eval_map_item;
131
132 struct trace_eval_map_tail {
133         /*
134          * "end" is first and points to NULL as it must be different
135          * than "mod" or "eval_string"
136          */
137         union trace_eval_map_item       *next;
138         const char                      *end;   /* points to NULL */
139 };
140
141 static DEFINE_MUTEX(trace_eval_mutex);
142
143 /*
144  * The trace_eval_maps are saved in an array with two extra elements,
145  * one at the beginning, and one at the end. The beginning item contains
146  * the count of the saved maps (head.length), and the module they
147  * belong to if not built in (head.mod). The ending item contains a
148  * pointer to the next array of saved eval_map items.
149  */
150 union trace_eval_map_item {
151         struct trace_eval_map           map;
152         struct trace_eval_map_head      head;
153         struct trace_eval_map_tail      tail;
154 };
155
156 static union trace_eval_map_item *trace_eval_maps;
157 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
158
159 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
160
161 #define MAX_TRACER_SIZE         100
162 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
163 static char *default_bootup_tracer;
164
165 static bool allocate_snapshot;
166
167 static int __init set_cmdline_ftrace(char *str)
168 {
169         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
170         default_bootup_tracer = bootup_tracer_buf;
171         /* We are using ftrace early, expand it */
172         ring_buffer_expanded = true;
173         return 1;
174 }
175 __setup("ftrace=", set_cmdline_ftrace);
176
177 static int __init set_ftrace_dump_on_oops(char *str)
178 {
179         if (*str++ != '=' || !*str) {
180                 ftrace_dump_on_oops = DUMP_ALL;
181                 return 1;
182         }
183
184         if (!strcmp("orig_cpu", str)) {
185                 ftrace_dump_on_oops = DUMP_ORIG;
186                 return 1;
187         }
188
189         return 0;
190 }
191 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
192
193 static int __init stop_trace_on_warning(char *str)
194 {
195         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
196                 __disable_trace_on_warning = 1;
197         return 1;
198 }
199 __setup("traceoff_on_warning", stop_trace_on_warning);
200
201 static int __init boot_alloc_snapshot(char *str)
202 {
203         allocate_snapshot = true;
204         /* We also need the main ring buffer expanded */
205         ring_buffer_expanded = true;
206         return 1;
207 }
208 __setup("alloc_snapshot", boot_alloc_snapshot);
209
210
211 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
212
213 static int __init set_trace_boot_options(char *str)
214 {
215         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
216         return 0;
217 }
218 __setup("trace_options=", set_trace_boot_options);
219
220 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
221 static char *trace_boot_clock __initdata;
222
223 static int __init set_trace_boot_clock(char *str)
224 {
225         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
226         trace_boot_clock = trace_boot_clock_buf;
227         return 0;
228 }
229 __setup("trace_clock=", set_trace_boot_clock);
230
231 static int __init set_tracepoint_printk(char *str)
232 {
233         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
234                 tracepoint_printk = 1;
235         return 1;
236 }
237 __setup("tp_printk", set_tracepoint_printk);
238
239 unsigned long long ns2usecs(u64 nsec)
240 {
241         nsec += 500;
242         do_div(nsec, 1000);
243         return nsec;
244 }
245
246 /* trace_flags holds trace_options default values */
247 #define TRACE_DEFAULT_FLAGS                                             \
248         (FUNCTION_DEFAULT_FLAGS |                                       \
249          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
250          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
251          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
252          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
253
254 /* trace_options that are only supported by global_trace */
255 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
256                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
257
258 /* trace_flags that are default zero for instances */
259 #define ZEROED_TRACE_FLAGS \
260         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
261
262 /*
263  * The global_trace is the descriptor that holds the top-level tracing
264  * buffers for the live tracing.
265  */
266 static struct trace_array global_trace = {
267         .trace_flags = TRACE_DEFAULT_FLAGS,
268 };
269
270 LIST_HEAD(ftrace_trace_arrays);
271
272 int trace_array_get(struct trace_array *this_tr)
273 {
274         struct trace_array *tr;
275         int ret = -ENODEV;
276
277         mutex_lock(&trace_types_lock);
278         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
279                 if (tr == this_tr) {
280                         tr->ref++;
281                         ret = 0;
282                         break;
283                 }
284         }
285         mutex_unlock(&trace_types_lock);
286
287         return ret;
288 }
289
290 static void __trace_array_put(struct trace_array *this_tr)
291 {
292         WARN_ON(!this_tr->ref);
293         this_tr->ref--;
294 }
295
296 void trace_array_put(struct trace_array *this_tr)
297 {
298         mutex_lock(&trace_types_lock);
299         __trace_array_put(this_tr);
300         mutex_unlock(&trace_types_lock);
301 }
302
303 int call_filter_check_discard(struct trace_event_call *call, void *rec,
304                               struct ring_buffer *buffer,
305                               struct ring_buffer_event *event)
306 {
307         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
308             !filter_match_preds(call->filter, rec)) {
309                 __trace_event_discard_commit(buffer, event);
310                 return 1;
311         }
312
313         return 0;
314 }
315
316 void trace_free_pid_list(struct trace_pid_list *pid_list)
317 {
318         vfree(pid_list->pids);
319         kfree(pid_list);
320 }
321
322 /**
323  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
324  * @filtered_pids: The list of pids to check
325  * @search_pid: The PID to find in @filtered_pids
326  *
327  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
328  */
329 bool
330 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
331 {
332         /*
333          * If pid_max changed after filtered_pids was created, we
334          * by default ignore all pids greater than the previous pid_max.
335          */
336         if (search_pid >= filtered_pids->pid_max)
337                 return false;
338
339         return test_bit(search_pid, filtered_pids->pids);
340 }
341
342 /**
343  * trace_ignore_this_task - should a task be ignored for tracing
344  * @filtered_pids: The list of pids to check
345  * @task: The task that should be ignored if not filtered
346  *
347  * Checks if @task should be traced or not from @filtered_pids.
348  * Returns true if @task should *NOT* be traced.
349  * Returns false if @task should be traced.
350  */
351 bool
352 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
353 {
354         /*
355          * Return false, because if filtered_pids does not exist,
356          * all pids are good to trace.
357          */
358         if (!filtered_pids)
359                 return false;
360
361         return !trace_find_filtered_pid(filtered_pids, task->pid);
362 }
363
364 /**
365  * trace_pid_filter_add_remove - Add or remove a task from a pid_list
366  * @pid_list: The list to modify
367  * @self: The current task for fork or NULL for exit
368  * @task: The task to add or remove
369  *
370  * If adding a task, if @self is defined, the task is only added if @self
371  * is also included in @pid_list. This happens on fork and tasks should
372  * only be added when the parent is listed. If @self is NULL, then the
373  * @task pid will be removed from the list, which would happen on exit
374  * of a task.
375  */
376 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
377                                   struct task_struct *self,
378                                   struct task_struct *task)
379 {
380         if (!pid_list)
381                 return;
382
383         /* For forks, we only add if the forking task is listed */
384         if (self) {
385                 if (!trace_find_filtered_pid(pid_list, self->pid))
386                         return;
387         }
388
389         /* Sorry, but we don't support pid_max changing after setting */
390         if (task->pid >= pid_list->pid_max)
391                 return;
392
393         /* "self" is set for forks, and NULL for exits */
394         if (self)
395                 set_bit(task->pid, pid_list->pids);
396         else
397                 clear_bit(task->pid, pid_list->pids);
398 }
399
400 /**
401  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
402  * @pid_list: The pid list to show
403  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
404  * @pos: The position of the file
405  *
406  * This is used by the seq_file "next" operation to iterate the pids
407  * listed in a trace_pid_list structure.
408  *
409  * Returns the pid+1 as we want to display pid of zero, but NULL would
410  * stop the iteration.
411  */
412 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
413 {
414         unsigned long pid = (unsigned long)v;
415
416         (*pos)++;
417
418         /* pid already is +1 of the actual prevous bit */
419         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
420
421         /* Return pid + 1 to allow zero to be represented */
422         if (pid < pid_list->pid_max)
423                 return (void *)(pid + 1);
424
425         return NULL;
426 }
427
428 /**
429  * trace_pid_start - Used for seq_file to start reading pid lists
430  * @pid_list: The pid list to show
431  * @pos: The position of the file
432  *
433  * This is used by seq_file "start" operation to start the iteration
434  * of listing pids.
435  *
436  * Returns the pid+1 as we want to display pid of zero, but NULL would
437  * stop the iteration.
438  */
439 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
440 {
441         unsigned long pid;
442         loff_t l = 0;
443
444         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
445         if (pid >= pid_list->pid_max)
446                 return NULL;
447
448         /* Return pid + 1 so that zero can be the exit value */
449         for (pid++; pid && l < *pos;
450              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
451                 ;
452         return (void *)pid;
453 }
454
455 /**
456  * trace_pid_show - show the current pid in seq_file processing
457  * @m: The seq_file structure to write into
458  * @v: A void pointer of the pid (+1) value to display
459  *
460  * Can be directly used by seq_file operations to display the current
461  * pid value.
462  */
463 int trace_pid_show(struct seq_file *m, void *v)
464 {
465         unsigned long pid = (unsigned long)v - 1;
466
467         seq_printf(m, "%lu\n", pid);
468         return 0;
469 }
470
471 /* 128 should be much more than enough */
472 #define PID_BUF_SIZE            127
473
474 int trace_pid_write(struct trace_pid_list *filtered_pids,
475                     struct trace_pid_list **new_pid_list,
476                     const char __user *ubuf, size_t cnt)
477 {
478         struct trace_pid_list *pid_list;
479         struct trace_parser parser;
480         unsigned long val;
481         int nr_pids = 0;
482         ssize_t read = 0;
483         ssize_t ret = 0;
484         loff_t pos;
485         pid_t pid;
486
487         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
488                 return -ENOMEM;
489
490         /*
491          * Always recreate a new array. The write is an all or nothing
492          * operation. Always create a new array when adding new pids by
493          * the user. If the operation fails, then the current list is
494          * not modified.
495          */
496         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
497         if (!pid_list)
498                 return -ENOMEM;
499
500         pid_list->pid_max = READ_ONCE(pid_max);
501
502         /* Only truncating will shrink pid_max */
503         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
504                 pid_list->pid_max = filtered_pids->pid_max;
505
506         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
507         if (!pid_list->pids) {
508                 kfree(pid_list);
509                 return -ENOMEM;
510         }
511
512         if (filtered_pids) {
513                 /* copy the current bits to the new max */
514                 for_each_set_bit(pid, filtered_pids->pids,
515                                  filtered_pids->pid_max) {
516                         set_bit(pid, pid_list->pids);
517                         nr_pids++;
518                 }
519         }
520
521         while (cnt > 0) {
522
523                 pos = 0;
524
525                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
526                 if (ret < 0 || !trace_parser_loaded(&parser))
527                         break;
528
529                 read += ret;
530                 ubuf += ret;
531                 cnt -= ret;
532
533                 parser.buffer[parser.idx] = 0;
534
535                 ret = -EINVAL;
536                 if (kstrtoul(parser.buffer, 0, &val))
537                         break;
538                 if (val >= pid_list->pid_max)
539                         break;
540
541                 pid = (pid_t)val;
542
543                 set_bit(pid, pid_list->pids);
544                 nr_pids++;
545
546                 trace_parser_clear(&parser);
547                 ret = 0;
548         }
549         trace_parser_put(&parser);
550
551         if (ret < 0) {
552                 trace_free_pid_list(pid_list);
553                 return ret;
554         }
555
556         if (!nr_pids) {
557                 /* Cleared the list of pids */
558                 trace_free_pid_list(pid_list);
559                 read = ret;
560                 pid_list = NULL;
561         }
562
563         *new_pid_list = pid_list;
564
565         return read;
566 }
567
568 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
569 {
570         u64 ts;
571
572         /* Early boot up does not have a buffer yet */
573         if (!buf->buffer)
574                 return trace_clock_local();
575
576         ts = ring_buffer_time_stamp(buf->buffer, cpu);
577         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
578
579         return ts;
580 }
581
582 u64 ftrace_now(int cpu)
583 {
584         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
585 }
586
587 /**
588  * tracing_is_enabled - Show if global_trace has been disabled
589  *
590  * Shows if the global trace has been enabled or not. It uses the
591  * mirror flag "buffer_disabled" to be used in fast paths such as for
592  * the irqsoff tracer. But it may be inaccurate due to races. If you
593  * need to know the accurate state, use tracing_is_on() which is a little
594  * slower, but accurate.
595  */
596 int tracing_is_enabled(void)
597 {
598         /*
599          * For quick access (irqsoff uses this in fast path), just
600          * return the mirror variable of the state of the ring buffer.
601          * It's a little racy, but we don't really care.
602          */
603         smp_rmb();
604         return !global_trace.buffer_disabled;
605 }
606
607 /*
608  * trace_buf_size is the size in bytes that is allocated
609  * for a buffer. Note, the number of bytes is always rounded
610  * to page size.
611  *
612  * This number is purposely set to a low number of 16384.
613  * If the dump on oops happens, it will be much appreciated
614  * to not have to wait for all that output. Anyway this can be
615  * boot time and run time configurable.
616  */
617 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
618
619 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
620
621 /* trace_types holds a link list of available tracers. */
622 static struct tracer            *trace_types __read_mostly;
623
624 /*
625  * trace_types_lock is used to protect the trace_types list.
626  */
627 DEFINE_MUTEX(trace_types_lock);
628
629 /*
630  * serialize the access of the ring buffer
631  *
632  * ring buffer serializes readers, but it is low level protection.
633  * The validity of the events (which returns by ring_buffer_peek() ..etc)
634  * are not protected by ring buffer.
635  *
636  * The content of events may become garbage if we allow other process consumes
637  * these events concurrently:
638  *   A) the page of the consumed events may become a normal page
639  *      (not reader page) in ring buffer, and this page will be rewrited
640  *      by events producer.
641  *   B) The page of the consumed events may become a page for splice_read,
642  *      and this page will be returned to system.
643  *
644  * These primitives allow multi process access to different cpu ring buffer
645  * concurrently.
646  *
647  * These primitives don't distinguish read-only and read-consume access.
648  * Multi read-only access are also serialized.
649  */
650
651 #ifdef CONFIG_SMP
652 static DECLARE_RWSEM(all_cpu_access_lock);
653 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
654
655 static inline void trace_access_lock(int cpu)
656 {
657         if (cpu == RING_BUFFER_ALL_CPUS) {
658                 /* gain it for accessing the whole ring buffer. */
659                 down_write(&all_cpu_access_lock);
660         } else {
661                 /* gain it for accessing a cpu ring buffer. */
662
663                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
664                 down_read(&all_cpu_access_lock);
665
666                 /* Secondly block other access to this @cpu ring buffer. */
667                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
668         }
669 }
670
671 static inline void trace_access_unlock(int cpu)
672 {
673         if (cpu == RING_BUFFER_ALL_CPUS) {
674                 up_write(&all_cpu_access_lock);
675         } else {
676                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
677                 up_read(&all_cpu_access_lock);
678         }
679 }
680
681 static inline void trace_access_lock_init(void)
682 {
683         int cpu;
684
685         for_each_possible_cpu(cpu)
686                 mutex_init(&per_cpu(cpu_access_lock, cpu));
687 }
688
689 #else
690
691 static DEFINE_MUTEX(access_lock);
692
693 static inline void trace_access_lock(int cpu)
694 {
695         (void)cpu;
696         mutex_lock(&access_lock);
697 }
698
699 static inline void trace_access_unlock(int cpu)
700 {
701         (void)cpu;
702         mutex_unlock(&access_lock);
703 }
704
705 static inline void trace_access_lock_init(void)
706 {
707 }
708
709 #endif
710
711 #ifdef CONFIG_STACKTRACE
712 static void __ftrace_trace_stack(struct ring_buffer *buffer,
713                                  unsigned long flags,
714                                  int skip, int pc, struct pt_regs *regs);
715 static inline void ftrace_trace_stack(struct trace_array *tr,
716                                       struct ring_buffer *buffer,
717                                       unsigned long flags,
718                                       int skip, int pc, struct pt_regs *regs);
719
720 #else
721 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
722                                         unsigned long flags,
723                                         int skip, int pc, struct pt_regs *regs)
724 {
725 }
726 static inline void ftrace_trace_stack(struct trace_array *tr,
727                                       struct ring_buffer *buffer,
728                                       unsigned long flags,
729                                       int skip, int pc, struct pt_regs *regs)
730 {
731 }
732
733 #endif
734
735 static __always_inline void
736 trace_event_setup(struct ring_buffer_event *event,
737                   int type, unsigned long flags, int pc)
738 {
739         struct trace_entry *ent = ring_buffer_event_data(event);
740
741         tracing_generic_entry_update(ent, flags, pc);
742         ent->type = type;
743 }
744
745 static __always_inline struct ring_buffer_event *
746 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
747                           int type,
748                           unsigned long len,
749                           unsigned long flags, int pc)
750 {
751         struct ring_buffer_event *event;
752
753         event = ring_buffer_lock_reserve(buffer, len);
754         if (event != NULL)
755                 trace_event_setup(event, type, flags, pc);
756
757         return event;
758 }
759
760 void tracer_tracing_on(struct trace_array *tr)
761 {
762         if (tr->trace_buffer.buffer)
763                 ring_buffer_record_on(tr->trace_buffer.buffer);
764         /*
765          * This flag is looked at when buffers haven't been allocated
766          * yet, or by some tracers (like irqsoff), that just want to
767          * know if the ring buffer has been disabled, but it can handle
768          * races of where it gets disabled but we still do a record.
769          * As the check is in the fast path of the tracers, it is more
770          * important to be fast than accurate.
771          */
772         tr->buffer_disabled = 0;
773         /* Make the flag seen by readers */
774         smp_wmb();
775 }
776
777 /**
778  * tracing_on - enable tracing buffers
779  *
780  * This function enables tracing buffers that may have been
781  * disabled with tracing_off.
782  */
783 void tracing_on(void)
784 {
785         tracer_tracing_on(&global_trace);
786 }
787 EXPORT_SYMBOL_GPL(tracing_on);
788
789
790 static __always_inline void
791 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
792 {
793         __this_cpu_write(trace_taskinfo_save, true);
794
795         /* If this is the temp buffer, we need to commit fully */
796         if (this_cpu_read(trace_buffered_event) == event) {
797                 /* Length is in event->array[0] */
798                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
799                 /* Release the temp buffer */
800                 this_cpu_dec(trace_buffered_event_cnt);
801         } else
802                 ring_buffer_unlock_commit(buffer, event);
803 }
804
805 /**
806  * __trace_puts - write a constant string into the trace buffer.
807  * @ip:    The address of the caller
808  * @str:   The constant string to write
809  * @size:  The size of the string.
810  */
811 int __trace_puts(unsigned long ip, const char *str, int size)
812 {
813         struct ring_buffer_event *event;
814         struct ring_buffer *buffer;
815         struct print_entry *entry;
816         unsigned long irq_flags;
817         int alloc;
818         int pc;
819
820         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
821                 return 0;
822
823         pc = preempt_count();
824
825         if (unlikely(tracing_selftest_running || tracing_disabled))
826                 return 0;
827
828         alloc = sizeof(*entry) + size + 2; /* possible \n added */
829
830         local_save_flags(irq_flags);
831         buffer = global_trace.trace_buffer.buffer;
832         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
833                                             irq_flags, pc);
834         if (!event)
835                 return 0;
836
837         entry = ring_buffer_event_data(event);
838         entry->ip = ip;
839
840         memcpy(&entry->buf, str, size);
841
842         /* Add a newline if necessary */
843         if (entry->buf[size - 1] != '\n') {
844                 entry->buf[size] = '\n';
845                 entry->buf[size + 1] = '\0';
846         } else
847                 entry->buf[size] = '\0';
848
849         __buffer_unlock_commit(buffer, event);
850         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
851
852         return size;
853 }
854 EXPORT_SYMBOL_GPL(__trace_puts);
855
856 /**
857  * __trace_bputs - write the pointer to a constant string into trace buffer
858  * @ip:    The address of the caller
859  * @str:   The constant string to write to the buffer to
860  */
861 int __trace_bputs(unsigned long ip, const char *str)
862 {
863         struct ring_buffer_event *event;
864         struct ring_buffer *buffer;
865         struct bputs_entry *entry;
866         unsigned long irq_flags;
867         int size = sizeof(struct bputs_entry);
868         int pc;
869
870         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
871                 return 0;
872
873         pc = preempt_count();
874
875         if (unlikely(tracing_selftest_running || tracing_disabled))
876                 return 0;
877
878         local_save_flags(irq_flags);
879         buffer = global_trace.trace_buffer.buffer;
880         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
881                                             irq_flags, pc);
882         if (!event)
883                 return 0;
884
885         entry = ring_buffer_event_data(event);
886         entry->ip                       = ip;
887         entry->str                      = str;
888
889         __buffer_unlock_commit(buffer, event);
890         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
891
892         return 1;
893 }
894 EXPORT_SYMBOL_GPL(__trace_bputs);
895
896 #ifdef CONFIG_TRACER_SNAPSHOT
897 static void tracing_snapshot_instance(struct trace_array *tr)
898 {
899         struct tracer *tracer = tr->current_trace;
900         unsigned long flags;
901
902         if (in_nmi()) {
903                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
904                 internal_trace_puts("*** snapshot is being ignored        ***\n");
905                 return;
906         }
907
908         if (!tr->allocated_snapshot) {
909                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
910                 internal_trace_puts("*** stopping trace here!   ***\n");
911                 tracing_off();
912                 return;
913         }
914
915         /* Note, snapshot can not be used when the tracer uses it */
916         if (tracer->use_max_tr) {
917                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
918                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
919                 return;
920         }
921
922         local_irq_save(flags);
923         update_max_tr(tr, current, smp_processor_id());
924         local_irq_restore(flags);
925 }
926
927 /**
928  * trace_snapshot - take a snapshot of the current buffer.
929  *
930  * This causes a swap between the snapshot buffer and the current live
931  * tracing buffer. You can use this to take snapshots of the live
932  * trace when some condition is triggered, but continue to trace.
933  *
934  * Note, make sure to allocate the snapshot with either
935  * a tracing_snapshot_alloc(), or by doing it manually
936  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
937  *
938  * If the snapshot buffer is not allocated, it will stop tracing.
939  * Basically making a permanent snapshot.
940  */
941 void tracing_snapshot(void)
942 {
943         struct trace_array *tr = &global_trace;
944
945         tracing_snapshot_instance(tr);
946 }
947 EXPORT_SYMBOL_GPL(tracing_snapshot);
948
949 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
950                                         struct trace_buffer *size_buf, int cpu_id);
951 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
952
953 static int alloc_snapshot(struct trace_array *tr)
954 {
955         int ret;
956
957         if (!tr->allocated_snapshot) {
958
959                 /* allocate spare buffer */
960                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
961                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
962                 if (ret < 0)
963                         return ret;
964
965                 tr->allocated_snapshot = true;
966         }
967
968         return 0;
969 }
970
971 static void free_snapshot(struct trace_array *tr)
972 {
973         /*
974          * We don't free the ring buffer. instead, resize it because
975          * The max_tr ring buffer has some state (e.g. ring->clock) and
976          * we want preserve it.
977          */
978         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
979         set_buffer_entries(&tr->max_buffer, 1);
980         tracing_reset_online_cpus(&tr->max_buffer);
981         tr->allocated_snapshot = false;
982 }
983
984 /**
985  * tracing_alloc_snapshot - allocate snapshot buffer.
986  *
987  * This only allocates the snapshot buffer if it isn't already
988  * allocated - it doesn't also take a snapshot.
989  *
990  * This is meant to be used in cases where the snapshot buffer needs
991  * to be set up for events that can't sleep but need to be able to
992  * trigger a snapshot.
993  */
994 int tracing_alloc_snapshot(void)
995 {
996         struct trace_array *tr = &global_trace;
997         int ret;
998
999         ret = alloc_snapshot(tr);
1000         WARN_ON(ret < 0);
1001
1002         return ret;
1003 }
1004 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1005
1006 /**
1007  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
1008  *
1009  * This is similar to trace_snapshot(), but it will allocate the
1010  * snapshot buffer if it isn't already allocated. Use this only
1011  * where it is safe to sleep, as the allocation may sleep.
1012  *
1013  * This causes a swap between the snapshot buffer and the current live
1014  * tracing buffer. You can use this to take snapshots of the live
1015  * trace when some condition is triggered, but continue to trace.
1016  */
1017 void tracing_snapshot_alloc(void)
1018 {
1019         int ret;
1020
1021         ret = tracing_alloc_snapshot();
1022         if (ret < 0)
1023                 return;
1024
1025         tracing_snapshot();
1026 }
1027 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1028 #else
1029 void tracing_snapshot(void)
1030 {
1031         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1032 }
1033 EXPORT_SYMBOL_GPL(tracing_snapshot);
1034 int tracing_alloc_snapshot(void)
1035 {
1036         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1037         return -ENODEV;
1038 }
1039 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1040 void tracing_snapshot_alloc(void)
1041 {
1042         /* Give warning */
1043         tracing_snapshot();
1044 }
1045 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1046 #endif /* CONFIG_TRACER_SNAPSHOT */
1047
1048 void tracer_tracing_off(struct trace_array *tr)
1049 {
1050         if (tr->trace_buffer.buffer)
1051                 ring_buffer_record_off(tr->trace_buffer.buffer);
1052         /*
1053          * This flag is looked at when buffers haven't been allocated
1054          * yet, or by some tracers (like irqsoff), that just want to
1055          * know if the ring buffer has been disabled, but it can handle
1056          * races of where it gets disabled but we still do a record.
1057          * As the check is in the fast path of the tracers, it is more
1058          * important to be fast than accurate.
1059          */
1060         tr->buffer_disabled = 1;
1061         /* Make the flag seen by readers */
1062         smp_wmb();
1063 }
1064
1065 /**
1066  * tracing_off - turn off tracing buffers
1067  *
1068  * This function stops the tracing buffers from recording data.
1069  * It does not disable any overhead the tracers themselves may
1070  * be causing. This function simply causes all recording to
1071  * the ring buffers to fail.
1072  */
1073 void tracing_off(void)
1074 {
1075         tracer_tracing_off(&global_trace);
1076 }
1077 EXPORT_SYMBOL_GPL(tracing_off);
1078
1079 void disable_trace_on_warning(void)
1080 {
1081         if (__disable_trace_on_warning)
1082                 tracing_off();
1083 }
1084
1085 /**
1086  * tracer_tracing_is_on - show real state of ring buffer enabled
1087  * @tr : the trace array to know if ring buffer is enabled
1088  *
1089  * Shows real state of the ring buffer if it is enabled or not.
1090  */
1091 int tracer_tracing_is_on(struct trace_array *tr)
1092 {
1093         if (tr->trace_buffer.buffer)
1094                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1095         return !tr->buffer_disabled;
1096 }
1097
1098 /**
1099  * tracing_is_on - show state of ring buffers enabled
1100  */
1101 int tracing_is_on(void)
1102 {
1103         return tracer_tracing_is_on(&global_trace);
1104 }
1105 EXPORT_SYMBOL_GPL(tracing_is_on);
1106
1107 static int __init set_buf_size(char *str)
1108 {
1109         unsigned long buf_size;
1110
1111         if (!str)
1112                 return 0;
1113         buf_size = memparse(str, &str);
1114         /* nr_entries can not be zero */
1115         if (buf_size == 0)
1116                 return 0;
1117         trace_buf_size = buf_size;
1118         return 1;
1119 }
1120 __setup("trace_buf_size=", set_buf_size);
1121
1122 static int __init set_tracing_thresh(char *str)
1123 {
1124         unsigned long threshold;
1125         int ret;
1126
1127         if (!str)
1128                 return 0;
1129         ret = kstrtoul(str, 0, &threshold);
1130         if (ret < 0)
1131                 return 0;
1132         tracing_thresh = threshold * 1000;
1133         return 1;
1134 }
1135 __setup("tracing_thresh=", set_tracing_thresh);
1136
1137 unsigned long nsecs_to_usecs(unsigned long nsecs)
1138 {
1139         return nsecs / 1000;
1140 }
1141
1142 /*
1143  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1144  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1145  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1146  * of strings in the order that the evals (enum) were defined.
1147  */
1148 #undef C
1149 #define C(a, b) b
1150
1151 /* These must match the bit postions in trace_iterator_flags */
1152 static const char *trace_options[] = {
1153         TRACE_FLAGS
1154         NULL
1155 };
1156
1157 static struct {
1158         u64 (*func)(void);
1159         const char *name;
1160         int in_ns;              /* is this clock in nanoseconds? */
1161 } trace_clocks[] = {
1162         { trace_clock_local,            "local",        1 },
1163         { trace_clock_global,           "global",       1 },
1164         { trace_clock_counter,          "counter",      0 },
1165         { trace_clock_jiffies,          "uptime",       0 },
1166         { trace_clock,                  "perf",         1 },
1167         { ktime_get_mono_fast_ns,       "mono",         1 },
1168         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1169         { ktime_get_boot_fast_ns,       "boot",         1 },
1170         ARCH_TRACE_CLOCKS
1171 };
1172
1173 /*
1174  * trace_parser_get_init - gets the buffer for trace parser
1175  */
1176 int trace_parser_get_init(struct trace_parser *parser, int size)
1177 {
1178         memset(parser, 0, sizeof(*parser));
1179
1180         parser->buffer = kmalloc(size, GFP_KERNEL);
1181         if (!parser->buffer)
1182                 return 1;
1183
1184         parser->size = size;
1185         return 0;
1186 }
1187
1188 /*
1189  * trace_parser_put - frees the buffer for trace parser
1190  */
1191 void trace_parser_put(struct trace_parser *parser)
1192 {
1193         kfree(parser->buffer);
1194         parser->buffer = NULL;
1195 }
1196
1197 /*
1198  * trace_get_user - reads the user input string separated by  space
1199  * (matched by isspace(ch))
1200  *
1201  * For each string found the 'struct trace_parser' is updated,
1202  * and the function returns.
1203  *
1204  * Returns number of bytes read.
1205  *
1206  * See kernel/trace/trace.h for 'struct trace_parser' details.
1207  */
1208 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1209         size_t cnt, loff_t *ppos)
1210 {
1211         char ch;
1212         size_t read = 0;
1213         ssize_t ret;
1214
1215         if (!*ppos)
1216                 trace_parser_clear(parser);
1217
1218         ret = get_user(ch, ubuf++);
1219         if (ret)
1220                 goto out;
1221
1222         read++;
1223         cnt--;
1224
1225         /*
1226          * The parser is not finished with the last write,
1227          * continue reading the user input without skipping spaces.
1228          */
1229         if (!parser->cont) {
1230                 /* skip white space */
1231                 while (cnt && isspace(ch)) {
1232                         ret = get_user(ch, ubuf++);
1233                         if (ret)
1234                                 goto out;
1235                         read++;
1236                         cnt--;
1237                 }
1238
1239                 /* only spaces were written */
1240                 if (isspace(ch)) {
1241                         *ppos += read;
1242                         ret = read;
1243                         goto out;
1244                 }
1245
1246                 parser->idx = 0;
1247         }
1248
1249         /* read the non-space input */
1250         while (cnt && !isspace(ch)) {
1251                 if (parser->idx < parser->size - 1)
1252                         parser->buffer[parser->idx++] = ch;
1253                 else {
1254                         ret = -EINVAL;
1255                         goto out;
1256                 }
1257                 ret = get_user(ch, ubuf++);
1258                 if (ret)
1259                         goto out;
1260                 read++;
1261                 cnt--;
1262         }
1263
1264         /* We either got finished input or we have to wait for another call. */
1265         if (isspace(ch)) {
1266                 parser->buffer[parser->idx] = 0;
1267                 parser->cont = false;
1268         } else if (parser->idx < parser->size - 1) {
1269                 parser->cont = true;
1270                 parser->buffer[parser->idx++] = ch;
1271         } else {
1272                 ret = -EINVAL;
1273                 goto out;
1274         }
1275
1276         *ppos += read;
1277         ret = read;
1278
1279 out:
1280         return ret;
1281 }
1282
1283 /* TODO add a seq_buf_to_buffer() */
1284 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1285 {
1286         int len;
1287
1288         if (trace_seq_used(s) <= s->seq.readpos)
1289                 return -EBUSY;
1290
1291         len = trace_seq_used(s) - s->seq.readpos;
1292         if (cnt > len)
1293                 cnt = len;
1294         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1295
1296         s->seq.readpos += cnt;
1297         return cnt;
1298 }
1299
1300 unsigned long __read_mostly     tracing_thresh;
1301
1302 #ifdef CONFIG_TRACER_MAX_TRACE
1303 /*
1304  * Copy the new maximum trace into the separate maximum-trace
1305  * structure. (this way the maximum trace is permanently saved,
1306  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1307  */
1308 static void
1309 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1310 {
1311         struct trace_buffer *trace_buf = &tr->trace_buffer;
1312         struct trace_buffer *max_buf = &tr->max_buffer;
1313         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1314         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1315
1316         max_buf->cpu = cpu;
1317         max_buf->time_start = data->preempt_timestamp;
1318
1319         max_data->saved_latency = tr->max_latency;
1320         max_data->critical_start = data->critical_start;
1321         max_data->critical_end = data->critical_end;
1322
1323         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1324         max_data->pid = tsk->pid;
1325         /*
1326          * If tsk == current, then use current_uid(), as that does not use
1327          * RCU. The irq tracer can be called out of RCU scope.
1328          */
1329         if (tsk == current)
1330                 max_data->uid = current_uid();
1331         else
1332                 max_data->uid = task_uid(tsk);
1333
1334         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1335         max_data->policy = tsk->policy;
1336         max_data->rt_priority = tsk->rt_priority;
1337
1338         /* record this tasks comm */
1339         tracing_record_cmdline(tsk);
1340 }
1341
1342 /**
1343  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1344  * @tr: tracer
1345  * @tsk: the task with the latency
1346  * @cpu: The cpu that initiated the trace.
1347  *
1348  * Flip the buffers between the @tr and the max_tr and record information
1349  * about which task was the cause of this latency.
1350  */
1351 void
1352 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1353 {
1354         struct ring_buffer *buf;
1355
1356         if (tr->stop_count)
1357                 return;
1358
1359         WARN_ON_ONCE(!irqs_disabled());
1360
1361         if (!tr->allocated_snapshot) {
1362                 /* Only the nop tracer should hit this when disabling */
1363                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1364                 return;
1365         }
1366
1367         arch_spin_lock(&tr->max_lock);
1368
1369         buf = tr->trace_buffer.buffer;
1370         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1371         tr->max_buffer.buffer = buf;
1372
1373         __update_max_tr(tr, tsk, cpu);
1374         arch_spin_unlock(&tr->max_lock);
1375 }
1376
1377 /**
1378  * update_max_tr_single - only copy one trace over, and reset the rest
1379  * @tr - tracer
1380  * @tsk - task with the latency
1381  * @cpu - the cpu of the buffer to copy.
1382  *
1383  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1384  */
1385 void
1386 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1387 {
1388         int ret;
1389
1390         if (tr->stop_count)
1391                 return;
1392
1393         WARN_ON_ONCE(!irqs_disabled());
1394         if (!tr->allocated_snapshot) {
1395                 /* Only the nop tracer should hit this when disabling */
1396                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1397                 return;
1398         }
1399
1400         arch_spin_lock(&tr->max_lock);
1401
1402         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1403
1404         if (ret == -EBUSY) {
1405                 /*
1406                  * We failed to swap the buffer due to a commit taking
1407                  * place on this CPU. We fail to record, but we reset
1408                  * the max trace buffer (no one writes directly to it)
1409                  * and flag that it failed.
1410                  */
1411                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1412                         "Failed to swap buffers due to commit in progress\n");
1413         }
1414
1415         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1416
1417         __update_max_tr(tr, tsk, cpu);
1418         arch_spin_unlock(&tr->max_lock);
1419 }
1420 #endif /* CONFIG_TRACER_MAX_TRACE */
1421
1422 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1423 {
1424         /* Iterators are static, they should be filled or empty */
1425         if (trace_buffer_iter(iter, iter->cpu_file))
1426                 return 0;
1427
1428         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1429                                 full);
1430 }
1431
1432 #ifdef CONFIG_FTRACE_STARTUP_TEST
1433 static bool selftests_can_run;
1434
1435 struct trace_selftests {
1436         struct list_head                list;
1437         struct tracer                   *type;
1438 };
1439
1440 static LIST_HEAD(postponed_selftests);
1441
1442 static int save_selftest(struct tracer *type)
1443 {
1444         struct trace_selftests *selftest;
1445
1446         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1447         if (!selftest)
1448                 return -ENOMEM;
1449
1450         selftest->type = type;
1451         list_add(&selftest->list, &postponed_selftests);
1452         return 0;
1453 }
1454
1455 static int run_tracer_selftest(struct tracer *type)
1456 {
1457         struct trace_array *tr = &global_trace;
1458         struct tracer *saved_tracer = tr->current_trace;
1459         int ret;
1460
1461         if (!type->selftest || tracing_selftest_disabled)
1462                 return 0;
1463
1464         /*
1465          * If a tracer registers early in boot up (before scheduling is
1466          * initialized and such), then do not run its selftests yet.
1467          * Instead, run it a little later in the boot process.
1468          */
1469         if (!selftests_can_run)
1470                 return save_selftest(type);
1471
1472         /*
1473          * Run a selftest on this tracer.
1474          * Here we reset the trace buffer, and set the current
1475          * tracer to be this tracer. The tracer can then run some
1476          * internal tracing to verify that everything is in order.
1477          * If we fail, we do not register this tracer.
1478          */
1479         tracing_reset_online_cpus(&tr->trace_buffer);
1480
1481         tr->current_trace = type;
1482
1483 #ifdef CONFIG_TRACER_MAX_TRACE
1484         if (type->use_max_tr) {
1485                 /* If we expanded the buffers, make sure the max is expanded too */
1486                 if (ring_buffer_expanded)
1487                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1488                                            RING_BUFFER_ALL_CPUS);
1489                 tr->allocated_snapshot = true;
1490         }
1491 #endif
1492
1493         /* the test is responsible for initializing and enabling */
1494         pr_info("Testing tracer %s: ", type->name);
1495         ret = type->selftest(type, tr);
1496         /* the test is responsible for resetting too */
1497         tr->current_trace = saved_tracer;
1498         if (ret) {
1499                 printk(KERN_CONT "FAILED!\n");
1500                 /* Add the warning after printing 'FAILED' */
1501                 WARN_ON(1);
1502                 return -1;
1503         }
1504         /* Only reset on passing, to avoid touching corrupted buffers */
1505         tracing_reset_online_cpus(&tr->trace_buffer);
1506
1507 #ifdef CONFIG_TRACER_MAX_TRACE
1508         if (type->use_max_tr) {
1509                 tr->allocated_snapshot = false;
1510
1511                 /* Shrink the max buffer again */
1512                 if (ring_buffer_expanded)
1513                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1514                                            RING_BUFFER_ALL_CPUS);
1515         }
1516 #endif
1517
1518         printk(KERN_CONT "PASSED\n");
1519         return 0;
1520 }
1521
1522 static __init int init_trace_selftests(void)
1523 {
1524         struct trace_selftests *p, *n;
1525         struct tracer *t, **last;
1526         int ret;
1527
1528         selftests_can_run = true;
1529
1530         mutex_lock(&trace_types_lock);
1531
1532         if (list_empty(&postponed_selftests))
1533                 goto out;
1534
1535         pr_info("Running postponed tracer tests:\n");
1536
1537         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1538                 ret = run_tracer_selftest(p->type);
1539                 /* If the test fails, then warn and remove from available_tracers */
1540                 if (ret < 0) {
1541                         WARN(1, "tracer: %s failed selftest, disabling\n",
1542                              p->type->name);
1543                         last = &trace_types;
1544                         for (t = trace_types; t; t = t->next) {
1545                                 if (t == p->type) {
1546                                         *last = t->next;
1547                                         break;
1548                                 }
1549                                 last = &t->next;
1550                         }
1551                 }
1552                 list_del(&p->list);
1553                 kfree(p);
1554         }
1555
1556  out:
1557         mutex_unlock(&trace_types_lock);
1558
1559         return 0;
1560 }
1561 core_initcall(init_trace_selftests);
1562 #else
1563 static inline int run_tracer_selftest(struct tracer *type)
1564 {
1565         return 0;
1566 }
1567 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1568
1569 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1570
1571 static void __init apply_trace_boot_options(void);
1572
1573 /**
1574  * register_tracer - register a tracer with the ftrace system.
1575  * @type - the plugin for the tracer
1576  *
1577  * Register a new plugin tracer.
1578  */
1579 int __init register_tracer(struct tracer *type)
1580 {
1581         struct tracer *t;
1582         int ret = 0;
1583
1584         if (!type->name) {
1585                 pr_info("Tracer must have a name\n");
1586                 return -1;
1587         }
1588
1589         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1590                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1591                 return -1;
1592         }
1593
1594         mutex_lock(&trace_types_lock);
1595
1596         tracing_selftest_running = true;
1597
1598         for (t = trace_types; t; t = t->next) {
1599                 if (strcmp(type->name, t->name) == 0) {
1600                         /* already found */
1601                         pr_info("Tracer %s already registered\n",
1602                                 type->name);
1603                         ret = -1;
1604                         goto out;
1605                 }
1606         }
1607
1608         if (!type->set_flag)
1609                 type->set_flag = &dummy_set_flag;
1610         if (!type->flags) {
1611                 /*allocate a dummy tracer_flags*/
1612                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1613                 if (!type->flags) {
1614                         ret = -ENOMEM;
1615                         goto out;
1616                 }
1617                 type->flags->val = 0;
1618                 type->flags->opts = dummy_tracer_opt;
1619         } else
1620                 if (!type->flags->opts)
1621                         type->flags->opts = dummy_tracer_opt;
1622
1623         /* store the tracer for __set_tracer_option */
1624         type->flags->trace = type;
1625
1626         ret = run_tracer_selftest(type);
1627         if (ret < 0)
1628                 goto out;
1629
1630         type->next = trace_types;
1631         trace_types = type;
1632         add_tracer_options(&global_trace, type);
1633
1634  out:
1635         tracing_selftest_running = false;
1636         mutex_unlock(&trace_types_lock);
1637
1638         if (ret || !default_bootup_tracer)
1639                 goto out_unlock;
1640
1641         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1642                 goto out_unlock;
1643
1644         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1645         /* Do we want this tracer to start on bootup? */
1646         tracing_set_tracer(&global_trace, type->name);
1647         default_bootup_tracer = NULL;
1648
1649         apply_trace_boot_options();
1650
1651         /* disable other selftests, since this will break it. */
1652         tracing_selftest_disabled = true;
1653 #ifdef CONFIG_FTRACE_STARTUP_TEST
1654         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1655                type->name);
1656 #endif
1657
1658  out_unlock:
1659         return ret;
1660 }
1661
1662 void tracing_reset(struct trace_buffer *buf, int cpu)
1663 {
1664         struct ring_buffer *buffer = buf->buffer;
1665
1666         if (!buffer)
1667                 return;
1668
1669         ring_buffer_record_disable(buffer);
1670
1671         /* Make sure all commits have finished */
1672         synchronize_sched();
1673         ring_buffer_reset_cpu(buffer, cpu);
1674
1675         ring_buffer_record_enable(buffer);
1676 }
1677
1678 void tracing_reset_online_cpus(struct trace_buffer *buf)
1679 {
1680         struct ring_buffer *buffer = buf->buffer;
1681         int cpu;
1682
1683         if (!buffer)
1684                 return;
1685
1686         ring_buffer_record_disable(buffer);
1687
1688         /* Make sure all commits have finished */
1689         synchronize_sched();
1690
1691         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1692
1693         for_each_online_cpu(cpu)
1694                 ring_buffer_reset_cpu(buffer, cpu);
1695
1696         ring_buffer_record_enable(buffer);
1697 }
1698
1699 /* Must have trace_types_lock held */
1700 void tracing_reset_all_online_cpus(void)
1701 {
1702         struct trace_array *tr;
1703
1704         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1705                 tracing_reset_online_cpus(&tr->trace_buffer);
1706 #ifdef CONFIG_TRACER_MAX_TRACE
1707                 tracing_reset_online_cpus(&tr->max_buffer);
1708 #endif
1709         }
1710 }
1711
1712 static int *tgid_map;
1713
1714 #define SAVED_CMDLINES_DEFAULT 128
1715 #define NO_CMDLINE_MAP UINT_MAX
1716 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1717 struct saved_cmdlines_buffer {
1718         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1719         unsigned *map_cmdline_to_pid;
1720         unsigned cmdline_num;
1721         int cmdline_idx;
1722         char *saved_cmdlines;
1723 };
1724 static struct saved_cmdlines_buffer *savedcmd;
1725
1726 /* temporary disable recording */
1727 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1728
1729 static inline char *get_saved_cmdlines(int idx)
1730 {
1731         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1732 }
1733
1734 static inline void set_cmdline(int idx, const char *cmdline)
1735 {
1736         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1737 }
1738
1739 static int allocate_cmdlines_buffer(unsigned int val,
1740                                     struct saved_cmdlines_buffer *s)
1741 {
1742         s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1743                                         GFP_KERNEL);
1744         if (!s->map_cmdline_to_pid)
1745                 return -ENOMEM;
1746
1747         s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1748         if (!s->saved_cmdlines) {
1749                 kfree(s->map_cmdline_to_pid);
1750                 return -ENOMEM;
1751         }
1752
1753         s->cmdline_idx = 0;
1754         s->cmdline_num = val;
1755         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1756                sizeof(s->map_pid_to_cmdline));
1757         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1758                val * sizeof(*s->map_cmdline_to_pid));
1759
1760         return 0;
1761 }
1762
1763 static int trace_create_savedcmd(void)
1764 {
1765         int ret;
1766
1767         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1768         if (!savedcmd)
1769                 return -ENOMEM;
1770
1771         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1772         if (ret < 0) {
1773                 kfree(savedcmd);
1774                 savedcmd = NULL;
1775                 return -ENOMEM;
1776         }
1777
1778         return 0;
1779 }
1780
1781 int is_tracing_stopped(void)
1782 {
1783         return global_trace.stop_count;
1784 }
1785
1786 /**
1787  * tracing_start - quick start of the tracer
1788  *
1789  * If tracing is enabled but was stopped by tracing_stop,
1790  * this will start the tracer back up.
1791  */
1792 void tracing_start(void)
1793 {
1794         struct ring_buffer *buffer;
1795         unsigned long flags;
1796
1797         if (tracing_disabled)
1798                 return;
1799
1800         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1801         if (--global_trace.stop_count) {
1802                 if (global_trace.stop_count < 0) {
1803                         /* Someone screwed up their debugging */
1804                         WARN_ON_ONCE(1);
1805                         global_trace.stop_count = 0;
1806                 }
1807                 goto out;
1808         }
1809
1810         /* Prevent the buffers from switching */
1811         arch_spin_lock(&global_trace.max_lock);
1812
1813         buffer = global_trace.trace_buffer.buffer;
1814         if (buffer)
1815                 ring_buffer_record_enable(buffer);
1816
1817 #ifdef CONFIG_TRACER_MAX_TRACE
1818         buffer = global_trace.max_buffer.buffer;
1819         if (buffer)
1820                 ring_buffer_record_enable(buffer);
1821 #endif
1822
1823         arch_spin_unlock(&global_trace.max_lock);
1824
1825  out:
1826         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1827 }
1828
1829 static void tracing_start_tr(struct trace_array *tr)
1830 {
1831         struct ring_buffer *buffer;
1832         unsigned long flags;
1833
1834         if (tracing_disabled)
1835                 return;
1836
1837         /* If global, we need to also start the max tracer */
1838         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1839                 return tracing_start();
1840
1841         raw_spin_lock_irqsave(&tr->start_lock, flags);
1842
1843         if (--tr->stop_count) {
1844                 if (tr->stop_count < 0) {
1845                         /* Someone screwed up their debugging */
1846                         WARN_ON_ONCE(1);
1847                         tr->stop_count = 0;
1848                 }
1849                 goto out;
1850         }
1851
1852         buffer = tr->trace_buffer.buffer;
1853         if (buffer)
1854                 ring_buffer_record_enable(buffer);
1855
1856  out:
1857         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1858 }
1859
1860 /**
1861  * tracing_stop - quick stop of the tracer
1862  *
1863  * Light weight way to stop tracing. Use in conjunction with
1864  * tracing_start.
1865  */
1866 void tracing_stop(void)
1867 {
1868         struct ring_buffer *buffer;
1869         unsigned long flags;
1870
1871         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1872         if (global_trace.stop_count++)
1873                 goto out;
1874
1875         /* Prevent the buffers from switching */
1876         arch_spin_lock(&global_trace.max_lock);
1877
1878         buffer = global_trace.trace_buffer.buffer;
1879         if (buffer)
1880                 ring_buffer_record_disable(buffer);
1881
1882 #ifdef CONFIG_TRACER_MAX_TRACE
1883         buffer = global_trace.max_buffer.buffer;
1884         if (buffer)
1885                 ring_buffer_record_disable(buffer);
1886 #endif
1887
1888         arch_spin_unlock(&global_trace.max_lock);
1889
1890  out:
1891         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1892 }
1893
1894 static void tracing_stop_tr(struct trace_array *tr)
1895 {
1896         struct ring_buffer *buffer;
1897         unsigned long flags;
1898
1899         /* If global, we need to also stop the max tracer */
1900         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1901                 return tracing_stop();
1902
1903         raw_spin_lock_irqsave(&tr->start_lock, flags);
1904         if (tr->stop_count++)
1905                 goto out;
1906
1907         buffer = tr->trace_buffer.buffer;
1908         if (buffer)
1909                 ring_buffer_record_disable(buffer);
1910
1911  out:
1912         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1913 }
1914
1915 static int trace_save_cmdline(struct task_struct *tsk)
1916 {
1917         unsigned pid, idx;
1918
1919         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1920                 return 0;
1921
1922         /*
1923          * It's not the end of the world if we don't get
1924          * the lock, but we also don't want to spin
1925          * nor do we want to disable interrupts,
1926          * so if we miss here, then better luck next time.
1927          */
1928         if (!arch_spin_trylock(&trace_cmdline_lock))
1929                 return 0;
1930
1931         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1932         if (idx == NO_CMDLINE_MAP) {
1933                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1934
1935                 /*
1936                  * Check whether the cmdline buffer at idx has a pid
1937                  * mapped. We are going to overwrite that entry so we
1938                  * need to clear the map_pid_to_cmdline. Otherwise we
1939                  * would read the new comm for the old pid.
1940                  */
1941                 pid = savedcmd->map_cmdline_to_pid[idx];
1942                 if (pid != NO_CMDLINE_MAP)
1943                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1944
1945                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1946                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1947
1948                 savedcmd->cmdline_idx = idx;
1949         }
1950
1951         set_cmdline(idx, tsk->comm);
1952
1953         arch_spin_unlock(&trace_cmdline_lock);
1954
1955         return 1;
1956 }
1957
1958 static void __trace_find_cmdline(int pid, char comm[])
1959 {
1960         unsigned map;
1961
1962         if (!pid) {
1963                 strcpy(comm, "<idle>");
1964                 return;
1965         }
1966
1967         if (WARN_ON_ONCE(pid < 0)) {
1968                 strcpy(comm, "<XXX>");
1969                 return;
1970         }
1971
1972         if (pid > PID_MAX_DEFAULT) {
1973                 strcpy(comm, "<...>");
1974                 return;
1975         }
1976
1977         map = savedcmd->map_pid_to_cmdline[pid];
1978         if (map != NO_CMDLINE_MAP)
1979                 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
1980         else
1981                 strcpy(comm, "<...>");
1982 }
1983
1984 void trace_find_cmdline(int pid, char comm[])
1985 {
1986         preempt_disable();
1987         arch_spin_lock(&trace_cmdline_lock);
1988
1989         __trace_find_cmdline(pid, comm);
1990
1991         arch_spin_unlock(&trace_cmdline_lock);
1992         preempt_enable();
1993 }
1994
1995 int trace_find_tgid(int pid)
1996 {
1997         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
1998                 return 0;
1999
2000         return tgid_map[pid];
2001 }
2002
2003 static int trace_save_tgid(struct task_struct *tsk)
2004 {
2005         if (unlikely(!tgid_map || !tsk->pid || tsk->pid > PID_MAX_DEFAULT))
2006                 return 0;
2007
2008         tgid_map[tsk->pid] = tsk->tgid;
2009         return 1;
2010 }
2011
2012 static bool tracing_record_taskinfo_skip(int flags)
2013 {
2014         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2015                 return true;
2016         if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2017                 return true;
2018         if (!__this_cpu_read(trace_taskinfo_save))
2019                 return true;
2020         return false;
2021 }
2022
2023 /**
2024  * tracing_record_taskinfo - record the task info of a task
2025  *
2026  * @task  - task to record
2027  * @flags - TRACE_RECORD_CMDLINE for recording comm
2028  *        - TRACE_RECORD_TGID for recording tgid
2029  */
2030 void tracing_record_taskinfo(struct task_struct *task, int flags)
2031 {
2032         if (tracing_record_taskinfo_skip(flags))
2033                 return;
2034         if ((flags & TRACE_RECORD_CMDLINE) && !trace_save_cmdline(task))
2035                 return;
2036         if ((flags & TRACE_RECORD_TGID) && !trace_save_tgid(task))
2037                 return;
2038
2039         __this_cpu_write(trace_taskinfo_save, false);
2040 }
2041
2042 /**
2043  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2044  *
2045  * @prev - previous task during sched_switch
2046  * @next - next task during sched_switch
2047  * @flags - TRACE_RECORD_CMDLINE for recording comm
2048  *          TRACE_RECORD_TGID for recording tgid
2049  */
2050 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2051                                           struct task_struct *next, int flags)
2052 {
2053         if (tracing_record_taskinfo_skip(flags))
2054                 return;
2055
2056         if ((flags & TRACE_RECORD_CMDLINE) &&
2057             (!trace_save_cmdline(prev) || !trace_save_cmdline(next)))
2058                 return;
2059
2060         if ((flags & TRACE_RECORD_TGID) &&
2061             (!trace_save_tgid(prev) || !trace_save_tgid(next)))
2062                 return;
2063
2064         __this_cpu_write(trace_taskinfo_save, false);
2065 }
2066
2067 /* Helpers to record a specific task information */
2068 void tracing_record_cmdline(struct task_struct *task)
2069 {
2070         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2071 }
2072
2073 void tracing_record_tgid(struct task_struct *task)
2074 {
2075         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2076 }
2077
2078 /*
2079  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2080  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2081  * simplifies those functions and keeps them in sync.
2082  */
2083 enum print_line_t trace_handle_return(struct trace_seq *s)
2084 {
2085         return trace_seq_has_overflowed(s) ?
2086                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2087 }
2088 EXPORT_SYMBOL_GPL(trace_handle_return);
2089
2090 void
2091 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2092                              int pc)
2093 {
2094         struct task_struct *tsk = current;
2095
2096         entry->preempt_count            = pc & 0xff;
2097         entry->pid                      = (tsk) ? tsk->pid : 0;
2098         entry->flags =
2099 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2100                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2101 #else
2102                 TRACE_FLAG_IRQS_NOSUPPORT |
2103 #endif
2104                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2105                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2106                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2107                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2108                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2109 }
2110 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2111
2112 struct ring_buffer_event *
2113 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2114                           int type,
2115                           unsigned long len,
2116                           unsigned long flags, int pc)
2117 {
2118         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2119 }
2120
2121 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2122 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2123 static int trace_buffered_event_ref;
2124
2125 /**
2126  * trace_buffered_event_enable - enable buffering events
2127  *
2128  * When events are being filtered, it is quicker to use a temporary
2129  * buffer to write the event data into if there's a likely chance
2130  * that it will not be committed. The discard of the ring buffer
2131  * is not as fast as committing, and is much slower than copying
2132  * a commit.
2133  *
2134  * When an event is to be filtered, allocate per cpu buffers to
2135  * write the event data into, and if the event is filtered and discarded
2136  * it is simply dropped, otherwise, the entire data is to be committed
2137  * in one shot.
2138  */
2139 void trace_buffered_event_enable(void)
2140 {
2141         struct ring_buffer_event *event;
2142         struct page *page;
2143         int cpu;
2144
2145         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2146
2147         if (trace_buffered_event_ref++)
2148                 return;
2149
2150         for_each_tracing_cpu(cpu) {
2151                 page = alloc_pages_node(cpu_to_node(cpu),
2152                                         GFP_KERNEL | __GFP_NORETRY, 0);
2153                 if (!page)
2154                         goto failed;
2155
2156                 event = page_address(page);
2157                 memset(event, 0, sizeof(*event));
2158
2159                 per_cpu(trace_buffered_event, cpu) = event;
2160
2161                 preempt_disable();
2162                 if (cpu == smp_processor_id() &&
2163                     this_cpu_read(trace_buffered_event) !=
2164                     per_cpu(trace_buffered_event, cpu))
2165                         WARN_ON_ONCE(1);
2166                 preempt_enable();
2167         }
2168
2169         return;
2170  failed:
2171         trace_buffered_event_disable();
2172 }
2173
2174 static void enable_trace_buffered_event(void *data)
2175 {
2176         /* Probably not needed, but do it anyway */
2177         smp_rmb();
2178         this_cpu_dec(trace_buffered_event_cnt);
2179 }
2180
2181 static void disable_trace_buffered_event(void *data)
2182 {
2183         this_cpu_inc(trace_buffered_event_cnt);
2184 }
2185
2186 /**
2187  * trace_buffered_event_disable - disable buffering events
2188  *
2189  * When a filter is removed, it is faster to not use the buffered
2190  * events, and to commit directly into the ring buffer. Free up
2191  * the temp buffers when there are no more users. This requires
2192  * special synchronization with current events.
2193  */
2194 void trace_buffered_event_disable(void)
2195 {
2196         int cpu;
2197
2198         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2199
2200         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2201                 return;
2202
2203         if (--trace_buffered_event_ref)
2204                 return;
2205
2206         preempt_disable();
2207         /* For each CPU, set the buffer as used. */
2208         smp_call_function_many(tracing_buffer_mask,
2209                                disable_trace_buffered_event, NULL, 1);
2210         preempt_enable();
2211
2212         /* Wait for all current users to finish */
2213         synchronize_sched();
2214
2215         for_each_tracing_cpu(cpu) {
2216                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2217                 per_cpu(trace_buffered_event, cpu) = NULL;
2218         }
2219         /*
2220          * Make sure trace_buffered_event is NULL before clearing
2221          * trace_buffered_event_cnt.
2222          */
2223         smp_wmb();
2224
2225         preempt_disable();
2226         /* Do the work on each cpu */
2227         smp_call_function_many(tracing_buffer_mask,
2228                                enable_trace_buffered_event, NULL, 1);
2229         preempt_enable();
2230 }
2231
2232 static struct ring_buffer *temp_buffer;
2233
2234 struct ring_buffer_event *
2235 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2236                           struct trace_event_file *trace_file,
2237                           int type, unsigned long len,
2238                           unsigned long flags, int pc)
2239 {
2240         struct ring_buffer_event *entry;
2241         int val;
2242
2243         *current_rb = trace_file->tr->trace_buffer.buffer;
2244
2245         if ((trace_file->flags &
2246              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2247             (entry = this_cpu_read(trace_buffered_event))) {
2248                 /* Try to use the per cpu buffer first */
2249                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2250                 if (val == 1) {
2251                         trace_event_setup(entry, type, flags, pc);
2252                         entry->array[0] = len;
2253                         return entry;
2254                 }
2255                 this_cpu_dec(trace_buffered_event_cnt);
2256         }
2257
2258         entry = __trace_buffer_lock_reserve(*current_rb,
2259                                             type, len, flags, pc);
2260         /*
2261          * If tracing is off, but we have triggers enabled
2262          * we still need to look at the event data. Use the temp_buffer
2263          * to store the trace event for the tigger to use. It's recusive
2264          * safe and will not be recorded anywhere.
2265          */
2266         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2267                 *current_rb = temp_buffer;
2268                 entry = __trace_buffer_lock_reserve(*current_rb,
2269                                                     type, len, flags, pc);
2270         }
2271         return entry;
2272 }
2273 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2274
2275 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2276 static DEFINE_MUTEX(tracepoint_printk_mutex);
2277
2278 static void output_printk(struct trace_event_buffer *fbuffer)
2279 {
2280         struct trace_event_call *event_call;
2281         struct trace_event *event;
2282         unsigned long flags;
2283         struct trace_iterator *iter = tracepoint_print_iter;
2284
2285         /* We should never get here if iter is NULL */
2286         if (WARN_ON_ONCE(!iter))
2287                 return;
2288
2289         event_call = fbuffer->trace_file->event_call;
2290         if (!event_call || !event_call->event.funcs ||
2291             !event_call->event.funcs->trace)
2292                 return;
2293
2294         event = &fbuffer->trace_file->event_call->event;
2295
2296         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2297         trace_seq_init(&iter->seq);
2298         iter->ent = fbuffer->entry;
2299         event_call->event.funcs->trace(iter, 0, event);
2300         trace_seq_putc(&iter->seq, 0);
2301         printk("%s", iter->seq.buffer);
2302
2303         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2304 }
2305
2306 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2307                              void __user *buffer, size_t *lenp,
2308                              loff_t *ppos)
2309 {
2310         int save_tracepoint_printk;
2311         int ret;
2312
2313         mutex_lock(&tracepoint_printk_mutex);
2314         save_tracepoint_printk = tracepoint_printk;
2315
2316         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2317
2318         /*
2319          * This will force exiting early, as tracepoint_printk
2320          * is always zero when tracepoint_printk_iter is not allocated
2321          */
2322         if (!tracepoint_print_iter)
2323                 tracepoint_printk = 0;
2324
2325         if (save_tracepoint_printk == tracepoint_printk)
2326                 goto out;
2327
2328         if (tracepoint_printk)
2329                 static_key_enable(&tracepoint_printk_key.key);
2330         else
2331                 static_key_disable(&tracepoint_printk_key.key);
2332
2333  out:
2334         mutex_unlock(&tracepoint_printk_mutex);
2335
2336         return ret;
2337 }
2338
2339 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2340 {
2341         if (static_key_false(&tracepoint_printk_key.key))
2342                 output_printk(fbuffer);
2343
2344         event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2345                                     fbuffer->event, fbuffer->entry,
2346                                     fbuffer->flags, fbuffer->pc);
2347 }
2348 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2349
2350 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2351                                      struct ring_buffer *buffer,
2352                                      struct ring_buffer_event *event,
2353                                      unsigned long flags, int pc,
2354                                      struct pt_regs *regs)
2355 {
2356         __buffer_unlock_commit(buffer, event);
2357
2358         /*
2359          * If regs is not set, then skip the following callers:
2360          *   trace_buffer_unlock_commit_regs
2361          *   event_trigger_unlock_commit
2362          *   trace_event_buffer_commit
2363          *   trace_event_raw_event_sched_switch
2364          * Note, we can still get here via blktrace, wakeup tracer
2365          * and mmiotrace, but that's ok if they lose a function or
2366          * two. They are that meaningful.
2367          */
2368         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
2369         ftrace_trace_userstack(buffer, flags, pc);
2370 }
2371
2372 /*
2373  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2374  */
2375 void
2376 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2377                                    struct ring_buffer_event *event)
2378 {
2379         __buffer_unlock_commit(buffer, event);
2380 }
2381
2382 static void
2383 trace_process_export(struct trace_export *export,
2384                struct ring_buffer_event *event)
2385 {
2386         struct trace_entry *entry;
2387         unsigned int size = 0;
2388
2389         entry = ring_buffer_event_data(event);
2390         size = ring_buffer_event_length(event);
2391         export->write(entry, size);
2392 }
2393
2394 static DEFINE_MUTEX(ftrace_export_lock);
2395
2396 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2397
2398 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2399
2400 static inline void ftrace_exports_enable(void)
2401 {
2402         static_branch_enable(&ftrace_exports_enabled);
2403 }
2404
2405 static inline void ftrace_exports_disable(void)
2406 {
2407         static_branch_disable(&ftrace_exports_enabled);
2408 }
2409
2410 void ftrace_exports(struct ring_buffer_event *event)
2411 {
2412         struct trace_export *export;
2413
2414         preempt_disable_notrace();
2415
2416         export = rcu_dereference_raw_notrace(ftrace_exports_list);
2417         while (export) {
2418                 trace_process_export(export, event);
2419                 export = rcu_dereference_raw_notrace(export->next);
2420         }
2421
2422         preempt_enable_notrace();
2423 }
2424
2425 static inline void
2426 add_trace_export(struct trace_export **list, struct trace_export *export)
2427 {
2428         rcu_assign_pointer(export->next, *list);
2429         /*
2430          * We are entering export into the list but another
2431          * CPU might be walking that list. We need to make sure
2432          * the export->next pointer is valid before another CPU sees
2433          * the export pointer included into the list.
2434          */
2435         rcu_assign_pointer(*list, export);
2436 }
2437
2438 static inline int
2439 rm_trace_export(struct trace_export **list, struct trace_export *export)
2440 {
2441         struct trace_export **p;
2442
2443         for (p = list; *p != NULL; p = &(*p)->next)
2444                 if (*p == export)
2445                         break;
2446
2447         if (*p != export)
2448                 return -1;
2449
2450         rcu_assign_pointer(*p, (*p)->next);
2451
2452         return 0;
2453 }
2454
2455 static inline void
2456 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2457 {
2458         if (*list == NULL)
2459                 ftrace_exports_enable();
2460
2461         add_trace_export(list, export);
2462 }
2463
2464 static inline int
2465 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2466 {
2467         int ret;
2468
2469         ret = rm_trace_export(list, export);
2470         if (*list == NULL)
2471                 ftrace_exports_disable();
2472
2473         return ret;
2474 }
2475
2476 int register_ftrace_export(struct trace_export *export)
2477 {
2478         if (WARN_ON_ONCE(!export->write))
2479                 return -1;
2480
2481         mutex_lock(&ftrace_export_lock);
2482
2483         add_ftrace_export(&ftrace_exports_list, export);
2484
2485         mutex_unlock(&ftrace_export_lock);
2486
2487         return 0;
2488 }
2489 EXPORT_SYMBOL_GPL(register_ftrace_export);
2490
2491 int unregister_ftrace_export(struct trace_export *export)
2492 {
2493         int ret;
2494
2495         mutex_lock(&ftrace_export_lock);
2496
2497         ret = rm_ftrace_export(&ftrace_exports_list, export);
2498
2499         mutex_unlock(&ftrace_export_lock);
2500
2501         return ret;
2502 }
2503 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2504
2505 void
2506 trace_function(struct trace_array *tr,
2507                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2508                int pc)
2509 {
2510         struct trace_event_call *call = &event_function;
2511         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2512         struct ring_buffer_event *event;
2513         struct ftrace_entry *entry;
2514
2515         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2516                                             flags, pc);
2517         if (!event)
2518                 return;
2519         entry   = ring_buffer_event_data(event);
2520         entry->ip                       = ip;
2521         entry->parent_ip                = parent_ip;
2522
2523         if (!call_filter_check_discard(call, entry, buffer, event)) {
2524                 if (static_branch_unlikely(&ftrace_exports_enabled))
2525                         ftrace_exports(event);
2526                 __buffer_unlock_commit(buffer, event);
2527         }
2528 }
2529
2530 #ifdef CONFIG_STACKTRACE
2531
2532 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2533 struct ftrace_stack {
2534         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
2535 };
2536
2537 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2538 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2539
2540 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2541                                  unsigned long flags,
2542                                  int skip, int pc, struct pt_regs *regs)
2543 {
2544         struct trace_event_call *call = &event_kernel_stack;
2545         struct ring_buffer_event *event;
2546         struct stack_entry *entry;
2547         struct stack_trace trace;
2548         int use_stack;
2549         int size = FTRACE_STACK_ENTRIES;
2550
2551         trace.nr_entries        = 0;
2552         trace.skip              = skip;
2553
2554         /*
2555          * Add two, for this function and the call to save_stack_trace()
2556          * If regs is set, then these functions will not be in the way.
2557          */
2558         if (!regs)
2559                 trace.skip += 2;
2560
2561         /*
2562          * Since events can happen in NMIs there's no safe way to
2563          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2564          * or NMI comes in, it will just have to use the default
2565          * FTRACE_STACK_SIZE.
2566          */
2567         preempt_disable_notrace();
2568
2569         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2570         /*
2571          * We don't need any atomic variables, just a barrier.
2572          * If an interrupt comes in, we don't care, because it would
2573          * have exited and put the counter back to what we want.
2574          * We just need a barrier to keep gcc from moving things
2575          * around.
2576          */
2577         barrier();
2578         if (use_stack == 1) {
2579                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
2580                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
2581
2582                 if (regs)
2583                         save_stack_trace_regs(regs, &trace);
2584                 else
2585                         save_stack_trace(&trace);
2586
2587                 if (trace.nr_entries > size)
2588                         size = trace.nr_entries;
2589         } else
2590                 /* From now on, use_stack is a boolean */
2591                 use_stack = 0;
2592
2593         size *= sizeof(unsigned long);
2594
2595         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2596                                             sizeof(*entry) + size, flags, pc);
2597         if (!event)
2598                 goto out;
2599         entry = ring_buffer_event_data(event);
2600
2601         memset(&entry->caller, 0, size);
2602
2603         if (use_stack)
2604                 memcpy(&entry->caller, trace.entries,
2605                        trace.nr_entries * sizeof(unsigned long));
2606         else {
2607                 trace.max_entries       = FTRACE_STACK_ENTRIES;
2608                 trace.entries           = entry->caller;
2609                 if (regs)
2610                         save_stack_trace_regs(regs, &trace);
2611                 else
2612                         save_stack_trace(&trace);
2613         }
2614
2615         entry->size = trace.nr_entries;
2616
2617         if (!call_filter_check_discard(call, entry, buffer, event))
2618                 __buffer_unlock_commit(buffer, event);
2619
2620  out:
2621         /* Again, don't let gcc optimize things here */
2622         barrier();
2623         __this_cpu_dec(ftrace_stack_reserve);
2624         preempt_enable_notrace();
2625
2626 }
2627
2628 static inline void ftrace_trace_stack(struct trace_array *tr,
2629                                       struct ring_buffer *buffer,
2630                                       unsigned long flags,
2631                                       int skip, int pc, struct pt_regs *regs)
2632 {
2633         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2634                 return;
2635
2636         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2637 }
2638
2639 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2640                    int pc)
2641 {
2642         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2643
2644         if (rcu_is_watching()) {
2645                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2646                 return;
2647         }
2648
2649         /*
2650          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2651          * but if the above rcu_is_watching() failed, then the NMI
2652          * triggered someplace critical, and rcu_irq_enter() should
2653          * not be called from NMI.
2654          */
2655         if (unlikely(in_nmi()))
2656                 return;
2657
2658         /*
2659          * It is possible that a function is being traced in a
2660          * location that RCU is not watching. A call to
2661          * rcu_irq_enter() will make sure that it is, but there's
2662          * a few internal rcu functions that could be traced
2663          * where that wont work either. In those cases, we just
2664          * do nothing.
2665          */
2666         if (unlikely(rcu_irq_enter_disabled()))
2667                 return;
2668
2669         rcu_irq_enter_irqson();
2670         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2671         rcu_irq_exit_irqson();
2672 }
2673
2674 /**
2675  * trace_dump_stack - record a stack back trace in the trace buffer
2676  * @skip: Number of functions to skip (helper handlers)
2677  */
2678 void trace_dump_stack(int skip)
2679 {
2680         unsigned long flags;
2681
2682         if (tracing_disabled || tracing_selftest_running)
2683                 return;
2684
2685         local_save_flags(flags);
2686
2687         /*
2688          * Skip 3 more, seems to get us at the caller of
2689          * this function.
2690          */
2691         skip += 3;
2692         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2693                              flags, skip, preempt_count(), NULL);
2694 }
2695
2696 static DEFINE_PER_CPU(int, user_stack_count);
2697
2698 void
2699 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2700 {
2701         struct trace_event_call *call = &event_user_stack;
2702         struct ring_buffer_event *event;
2703         struct userstack_entry *entry;
2704         struct stack_trace trace;
2705
2706         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2707                 return;
2708
2709         /*
2710          * NMIs can not handle page faults, even with fix ups.
2711          * The save user stack can (and often does) fault.
2712          */
2713         if (unlikely(in_nmi()))
2714                 return;
2715
2716         /*
2717          * prevent recursion, since the user stack tracing may
2718          * trigger other kernel events.
2719          */
2720         preempt_disable();
2721         if (__this_cpu_read(user_stack_count))
2722                 goto out;
2723
2724         __this_cpu_inc(user_stack_count);
2725
2726         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2727                                             sizeof(*entry), flags, pc);
2728         if (!event)
2729                 goto out_drop_count;
2730         entry   = ring_buffer_event_data(event);
2731
2732         entry->tgid             = current->tgid;
2733         memset(&entry->caller, 0, sizeof(entry->caller));
2734
2735         trace.nr_entries        = 0;
2736         trace.max_entries       = FTRACE_STACK_ENTRIES;
2737         trace.skip              = 0;
2738         trace.entries           = entry->caller;
2739
2740         save_stack_trace_user(&trace);
2741         if (!call_filter_check_discard(call, entry, buffer, event))
2742                 __buffer_unlock_commit(buffer, event);
2743
2744  out_drop_count:
2745         __this_cpu_dec(user_stack_count);
2746  out:
2747         preempt_enable();
2748 }
2749
2750 #ifdef UNUSED
2751 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2752 {
2753         ftrace_trace_userstack(tr, flags, preempt_count());
2754 }
2755 #endif /* UNUSED */
2756
2757 #endif /* CONFIG_STACKTRACE */
2758
2759 /* created for use with alloc_percpu */
2760 struct trace_buffer_struct {
2761         int nesting;
2762         char buffer[4][TRACE_BUF_SIZE];
2763 };
2764
2765 static struct trace_buffer_struct *trace_percpu_buffer;
2766
2767 /*
2768  * Thise allows for lockless recording.  If we're nested too deeply, then
2769  * this returns NULL.
2770  */
2771 static char *get_trace_buf(void)
2772 {
2773         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2774
2775         if (!buffer || buffer->nesting >= 4)
2776                 return NULL;
2777
2778         return &buffer->buffer[buffer->nesting++][0];
2779 }
2780
2781 static void put_trace_buf(void)
2782 {
2783         this_cpu_dec(trace_percpu_buffer->nesting);
2784 }
2785
2786 static int alloc_percpu_trace_buffer(void)
2787 {
2788         struct trace_buffer_struct *buffers;
2789
2790         buffers = alloc_percpu(struct trace_buffer_struct);
2791         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2792                 return -ENOMEM;
2793
2794         trace_percpu_buffer = buffers;
2795         return 0;
2796 }
2797
2798 static int buffers_allocated;
2799
2800 void trace_printk_init_buffers(void)
2801 {
2802         if (buffers_allocated)
2803                 return;
2804
2805         if (alloc_percpu_trace_buffer())
2806                 return;
2807
2808         /* trace_printk() is for debug use only. Don't use it in production. */
2809
2810         pr_warn("\n");
2811         pr_warn("**********************************************************\n");
2812         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2813         pr_warn("**                                                      **\n");
2814         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2815         pr_warn("**                                                      **\n");
2816         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2817         pr_warn("** unsafe for production use.                           **\n");
2818         pr_warn("**                                                      **\n");
2819         pr_warn("** If you see this message and you are not debugging    **\n");
2820         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2821         pr_warn("**                                                      **\n");
2822         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2823         pr_warn("**********************************************************\n");
2824
2825         /* Expand the buffers to set size */
2826         tracing_update_buffers();
2827
2828         buffers_allocated = 1;
2829
2830         /*
2831          * trace_printk_init_buffers() can be called by modules.
2832          * If that happens, then we need to start cmdline recording
2833          * directly here. If the global_trace.buffer is already
2834          * allocated here, then this was called by module code.
2835          */
2836         if (global_trace.trace_buffer.buffer)
2837                 tracing_start_cmdline_record();
2838 }
2839
2840 void trace_printk_start_comm(void)
2841 {
2842         /* Start tracing comms if trace printk is set */
2843         if (!buffers_allocated)
2844                 return;
2845         tracing_start_cmdline_record();
2846 }
2847
2848 static void trace_printk_start_stop_comm(int enabled)
2849 {
2850         if (!buffers_allocated)
2851                 return;
2852
2853         if (enabled)
2854                 tracing_start_cmdline_record();
2855         else
2856                 tracing_stop_cmdline_record();
2857 }
2858
2859 /**
2860  * trace_vbprintk - write binary msg to tracing buffer
2861  *
2862  */
2863 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2864 {
2865         struct trace_event_call *call = &event_bprint;
2866         struct ring_buffer_event *event;
2867         struct ring_buffer *buffer;
2868         struct trace_array *tr = &global_trace;
2869         struct bprint_entry *entry;
2870         unsigned long flags;
2871         char *tbuffer;
2872         int len = 0, size, pc;
2873
2874         if (unlikely(tracing_selftest_running || tracing_disabled))
2875                 return 0;
2876
2877         /* Don't pollute graph traces with trace_vprintk internals */
2878         pause_graph_tracing();
2879
2880         pc = preempt_count();
2881         preempt_disable_notrace();
2882
2883         tbuffer = get_trace_buf();
2884         if (!tbuffer) {
2885                 len = 0;
2886                 goto out_nobuffer;
2887         }
2888
2889         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2890
2891         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2892                 goto out;
2893
2894         local_save_flags(flags);
2895         size = sizeof(*entry) + sizeof(u32) * len;
2896         buffer = tr->trace_buffer.buffer;
2897         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2898                                             flags, pc);
2899         if (!event)
2900                 goto out;
2901         entry = ring_buffer_event_data(event);
2902         entry->ip                       = ip;
2903         entry->fmt                      = fmt;
2904
2905         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2906         if (!call_filter_check_discard(call, entry, buffer, event)) {
2907                 __buffer_unlock_commit(buffer, event);
2908                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2909         }
2910
2911 out:
2912         put_trace_buf();
2913
2914 out_nobuffer:
2915         preempt_enable_notrace();
2916         unpause_graph_tracing();
2917
2918         return len;
2919 }
2920 EXPORT_SYMBOL_GPL(trace_vbprintk);
2921
2922 static int
2923 __trace_array_vprintk(struct ring_buffer *buffer,
2924                       unsigned long ip, const char *fmt, va_list args)
2925 {
2926         struct trace_event_call *call = &event_print;
2927         struct ring_buffer_event *event;
2928         int len = 0, size, pc;
2929         struct print_entry *entry;
2930         unsigned long flags;
2931         char *tbuffer;
2932
2933         if (tracing_disabled || tracing_selftest_running)
2934                 return 0;
2935
2936         /* Don't pollute graph traces with trace_vprintk internals */
2937         pause_graph_tracing();
2938
2939         pc = preempt_count();
2940         preempt_disable_notrace();
2941
2942
2943         tbuffer = get_trace_buf();
2944         if (!tbuffer) {
2945                 len = 0;
2946                 goto out_nobuffer;
2947         }
2948
2949         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2950
2951         local_save_flags(flags);
2952         size = sizeof(*entry) + len + 1;
2953         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2954                                             flags, pc);
2955         if (!event)
2956                 goto out;
2957         entry = ring_buffer_event_data(event);
2958         entry->ip = ip;
2959
2960         memcpy(&entry->buf, tbuffer, len + 1);
2961         if (!call_filter_check_discard(call, entry, buffer, event)) {
2962                 __buffer_unlock_commit(buffer, event);
2963                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2964         }
2965
2966 out:
2967         put_trace_buf();
2968
2969 out_nobuffer:
2970         preempt_enable_notrace();
2971         unpause_graph_tracing();
2972
2973         return len;
2974 }
2975
2976 int trace_array_vprintk(struct trace_array *tr,
2977                         unsigned long ip, const char *fmt, va_list args)
2978 {
2979         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2980 }
2981
2982 int trace_array_printk(struct trace_array *tr,
2983                        unsigned long ip, const char *fmt, ...)
2984 {
2985         int ret;
2986         va_list ap;
2987
2988         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2989                 return 0;
2990
2991         va_start(ap, fmt);
2992         ret = trace_array_vprintk(tr, ip, fmt, ap);
2993         va_end(ap);
2994         return ret;
2995 }
2996
2997 int trace_array_printk_buf(struct ring_buffer *buffer,
2998                            unsigned long ip, const char *fmt, ...)
2999 {
3000         int ret;
3001         va_list ap;
3002
3003         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3004                 return 0;
3005
3006         va_start(ap, fmt);
3007         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3008         va_end(ap);
3009         return ret;
3010 }
3011
3012 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3013 {
3014         return trace_array_vprintk(&global_trace, ip, fmt, args);
3015 }
3016 EXPORT_SYMBOL_GPL(trace_vprintk);
3017
3018 static void trace_iterator_increment(struct trace_iterator *iter)
3019 {
3020         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3021
3022         iter->idx++;
3023         if (buf_iter)
3024                 ring_buffer_read(buf_iter, NULL);
3025 }
3026
3027 static struct trace_entry *
3028 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3029                 unsigned long *lost_events)
3030 {
3031         struct ring_buffer_event *event;
3032         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3033
3034         if (buf_iter)
3035                 event = ring_buffer_iter_peek(buf_iter, ts);
3036         else
3037                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3038                                          lost_events);
3039
3040         if (event) {
3041                 iter->ent_size = ring_buffer_event_length(event);
3042                 return ring_buffer_event_data(event);
3043         }
3044         iter->ent_size = 0;
3045         return NULL;
3046 }
3047
3048 static struct trace_entry *
3049 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3050                   unsigned long *missing_events, u64 *ent_ts)
3051 {
3052         struct ring_buffer *buffer = iter->trace_buffer->buffer;
3053         struct trace_entry *ent, *next = NULL;
3054         unsigned long lost_events = 0, next_lost = 0;
3055         int cpu_file = iter->cpu_file;
3056         u64 next_ts = 0, ts;
3057         int next_cpu = -1;
3058         int next_size = 0;
3059         int cpu;
3060
3061         /*
3062          * If we are in a per_cpu trace file, don't bother by iterating over
3063          * all cpu and peek directly.
3064          */
3065         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3066                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3067                         return NULL;
3068                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3069                 if (ent_cpu)
3070                         *ent_cpu = cpu_file;
3071
3072                 return ent;
3073         }
3074
3075         for_each_tracing_cpu(cpu) {
3076
3077                 if (ring_buffer_empty_cpu(buffer, cpu))
3078                         continue;
3079
3080                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3081
3082                 /*
3083                  * Pick the entry with the smallest timestamp:
3084                  */
3085                 if (ent && (!next || ts < next_ts)) {
3086                         next = ent;
3087                         next_cpu = cpu;
3088                         next_ts = ts;
3089                         next_lost = lost_events;
3090                         next_size = iter->ent_size;
3091                 }
3092         }
3093
3094         iter->ent_size = next_size;
3095
3096         if (ent_cpu)
3097                 *ent_cpu = next_cpu;
3098
3099         if (ent_ts)
3100                 *ent_ts = next_ts;
3101
3102         if (missing_events)
3103                 *missing_events = next_lost;
3104
3105         return next;
3106 }
3107
3108 /* Find the next real entry, without updating the iterator itself */
3109 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3110                                           int *ent_cpu, u64 *ent_ts)
3111 {
3112         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3113 }
3114
3115 /* Find the next real entry, and increment the iterator to the next entry */
3116 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3117 {
3118         iter->ent = __find_next_entry(iter, &iter->cpu,
3119                                       &iter->lost_events, &iter->ts);
3120
3121         if (iter->ent)
3122                 trace_iterator_increment(iter);
3123
3124         return iter->ent ? iter : NULL;
3125 }
3126
3127 static void trace_consume(struct trace_iterator *iter)
3128 {
3129         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3130                             &iter->lost_events);
3131 }
3132
3133 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3134 {
3135         struct trace_iterator *iter = m->private;
3136         int i = (int)*pos;
3137         void *ent;
3138
3139         WARN_ON_ONCE(iter->leftover);
3140
3141         (*pos)++;
3142
3143         /* can't go backwards */
3144         if (iter->idx > i)
3145                 return NULL;
3146
3147         if (iter->idx < 0)
3148                 ent = trace_find_next_entry_inc(iter);
3149         else
3150                 ent = iter;
3151
3152         while (ent && iter->idx < i)
3153                 ent = trace_find_next_entry_inc(iter);
3154
3155         iter->pos = *pos;
3156
3157         return ent;
3158 }
3159
3160 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3161 {
3162         struct ring_buffer_event *event;
3163         struct ring_buffer_iter *buf_iter;
3164         unsigned long entries = 0;
3165         u64 ts;
3166
3167         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3168
3169         buf_iter = trace_buffer_iter(iter, cpu);
3170         if (!buf_iter)
3171                 return;
3172
3173         ring_buffer_iter_reset(buf_iter);
3174
3175         /*
3176          * We could have the case with the max latency tracers
3177          * that a reset never took place on a cpu. This is evident
3178          * by the timestamp being before the start of the buffer.
3179          */
3180         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3181                 if (ts >= iter->trace_buffer->time_start)
3182                         break;
3183                 entries++;
3184                 ring_buffer_read(buf_iter, NULL);
3185         }
3186
3187         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3188 }
3189
3190 /*
3191  * The current tracer is copied to avoid a global locking
3192  * all around.
3193  */
3194 static void *s_start(struct seq_file *m, loff_t *pos)
3195 {
3196         struct trace_iterator *iter = m->private;
3197         struct trace_array *tr = iter->tr;
3198         int cpu_file = iter->cpu_file;
3199         void *p = NULL;
3200         loff_t l = 0;
3201         int cpu;
3202
3203         /*
3204          * copy the tracer to avoid using a global lock all around.
3205          * iter->trace is a copy of current_trace, the pointer to the
3206          * name may be used instead of a strcmp(), as iter->trace->name
3207          * will point to the same string as current_trace->name.
3208          */
3209         mutex_lock(&trace_types_lock);
3210         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3211                 *iter->trace = *tr->current_trace;
3212         mutex_unlock(&trace_types_lock);
3213
3214 #ifdef CONFIG_TRACER_MAX_TRACE
3215         if (iter->snapshot && iter->trace->use_max_tr)
3216                 return ERR_PTR(-EBUSY);
3217 #endif
3218
3219         if (!iter->snapshot)
3220                 atomic_inc(&trace_record_taskinfo_disabled);
3221
3222         if (*pos != iter->pos) {
3223                 iter->ent = NULL;
3224                 iter->cpu = 0;
3225                 iter->idx = -1;
3226
3227                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3228                         for_each_tracing_cpu(cpu)
3229                                 tracing_iter_reset(iter, cpu);
3230                 } else
3231                         tracing_iter_reset(iter, cpu_file);
3232
3233                 iter->leftover = 0;
3234                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3235                         ;
3236
3237         } else {
3238                 /*
3239                  * If we overflowed the seq_file before, then we want
3240                  * to just reuse the trace_seq buffer again.
3241                  */
3242                 if (iter->leftover)
3243                         p = iter;
3244                 else {
3245                         l = *pos - 1;
3246                         p = s_next(m, p, &l);
3247                 }
3248         }
3249
3250         trace_event_read_lock();
3251         trace_access_lock(cpu_file);
3252         return p;
3253 }
3254
3255 static void s_stop(struct seq_file *m, void *p)
3256 {
3257         struct trace_iterator *iter = m->private;
3258
3259 #ifdef CONFIG_TRACER_MAX_TRACE
3260         if (iter->snapshot && iter->trace->use_max_tr)
3261                 return;
3262 #endif
3263
3264         if (!iter->snapshot)
3265                 atomic_dec(&trace_record_taskinfo_disabled);
3266
3267         trace_access_unlock(iter->cpu_file);
3268         trace_event_read_unlock();
3269 }
3270
3271 static void
3272 get_total_entries(struct trace_buffer *buf,
3273                   unsigned long *total, unsigned long *entries)
3274 {
3275         unsigned long count;
3276         int cpu;
3277
3278         *total = 0;
3279         *entries = 0;
3280
3281         for_each_tracing_cpu(cpu) {
3282                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3283                 /*
3284                  * If this buffer has skipped entries, then we hold all
3285                  * entries for the trace and we need to ignore the
3286                  * ones before the time stamp.
3287                  */
3288                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3289                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3290                         /* total is the same as the entries */
3291                         *total += count;
3292                 } else
3293                         *total += count +
3294                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
3295                 *entries += count;
3296         }
3297 }
3298
3299 static void print_lat_help_header(struct seq_file *m)
3300 {
3301         seq_puts(m, "#                  _------=> CPU#            \n"
3302                     "#                 / _-----=> irqs-off        \n"
3303                     "#                | / _----=> need-resched    \n"
3304                     "#                || / _---=> hardirq/softirq \n"
3305                     "#                ||| / _--=> preempt-depth   \n"
3306                     "#                |||| /     delay            \n"
3307                     "#  cmd     pid   ||||| time  |   caller      \n"
3308                     "#     \\   /      |||||  \\    |   /         \n");
3309 }
3310
3311 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3312 {
3313         unsigned long total;
3314         unsigned long entries;
3315
3316         get_total_entries(buf, &total, &entries);
3317         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3318                    entries, total, num_online_cpus());
3319         seq_puts(m, "#\n");
3320 }
3321
3322 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3323                                    unsigned int flags)
3324 {
3325         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3326
3327         print_event_info(buf, m);
3328
3329         seq_printf(m, "#           TASK-PID   CPU#   %s  TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3330         seq_printf(m, "#              | |       |    %s     |         |\n",      tgid ? "  |      " : "");
3331 }
3332
3333 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3334                                        unsigned int flags)
3335 {
3336         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3337
3338         seq_printf(m, "#                          %s  _-----=> irqs-off\n",         tgid ? "          " : "");
3339         seq_printf(m, "#                          %s / _----=> need-resched\n",     tgid ? "          " : "");
3340         seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",  tgid ? "          " : "");
3341         seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",    tgid ? "          " : "");
3342         seq_printf(m, "#                          %s||| /     delay\n",             tgid ? "          " : "");
3343         seq_printf(m, "#           TASK-PID   CPU#%s||||    TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
3344         seq_printf(m, "#              | |       | %s||||       |         |\n",      tgid ? "     |    " : "");
3345 }
3346
3347 void
3348 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3349 {
3350         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3351         struct trace_buffer *buf = iter->trace_buffer;
3352         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3353         struct tracer *type = iter->trace;
3354         unsigned long entries;
3355         unsigned long total;
3356         const char *name = "preemption";
3357
3358         name = type->name;
3359
3360         get_total_entries(buf, &total, &entries);
3361
3362         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3363                    name, UTS_RELEASE);
3364         seq_puts(m, "# -----------------------------------"
3365                  "---------------------------------\n");
3366         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3367                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3368                    nsecs_to_usecs(data->saved_latency),
3369                    entries,
3370                    total,
3371                    buf->cpu,
3372 #if defined(CONFIG_PREEMPT_NONE)
3373                    "server",
3374 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3375                    "desktop",
3376 #elif defined(CONFIG_PREEMPT)
3377                    "preempt",
3378 #else
3379                    "unknown",
3380 #endif
3381                    /* These are reserved for later use */
3382                    0, 0, 0, 0);
3383 #ifdef CONFIG_SMP
3384         seq_printf(m, " #P:%d)\n", num_online_cpus());
3385 #else
3386         seq_puts(m, ")\n");
3387 #endif
3388         seq_puts(m, "#    -----------------\n");
3389         seq_printf(m, "#    | task: %.16s-%d "
3390                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3391                    data->comm, data->pid,
3392                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3393                    data->policy, data->rt_priority);
3394         seq_puts(m, "#    -----------------\n");
3395
3396         if (data->critical_start) {
3397                 seq_puts(m, "#  => started at: ");
3398                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3399                 trace_print_seq(m, &iter->seq);
3400                 seq_puts(m, "\n#  => ended at:   ");
3401                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3402                 trace_print_seq(m, &iter->seq);
3403                 seq_puts(m, "\n#\n");
3404         }
3405
3406         seq_puts(m, "#\n");
3407 }
3408
3409 static void test_cpu_buff_start(struct trace_iterator *iter)
3410 {
3411         struct trace_seq *s = &iter->seq;
3412         struct trace_array *tr = iter->tr;
3413
3414         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3415                 return;
3416
3417         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3418                 return;
3419
3420         if (cpumask_available(iter->started) &&
3421             cpumask_test_cpu(iter->cpu, iter->started))
3422                 return;
3423
3424         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3425                 return;
3426
3427         if (cpumask_available(iter->started))
3428                 cpumask_set_cpu(iter->cpu, iter->started);
3429
3430         /* Don't print started cpu buffer for the first entry of the trace */
3431         if (iter->idx > 1)
3432                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3433                                 iter->cpu);
3434 }
3435
3436 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3437 {
3438         struct trace_array *tr = iter->tr;
3439         struct trace_seq *s = &iter->seq;
3440         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3441         struct trace_entry *entry;
3442         struct trace_event *event;
3443
3444         entry = iter->ent;
3445
3446         test_cpu_buff_start(iter);
3447
3448         event = ftrace_find_event(entry->type);
3449
3450         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3451                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3452                         trace_print_lat_context(iter);
3453                 else
3454                         trace_print_context(iter);
3455         }
3456
3457         if (trace_seq_has_overflowed(s))
3458                 return TRACE_TYPE_PARTIAL_LINE;
3459
3460         if (event)
3461                 return event->funcs->trace(iter, sym_flags, event);
3462
3463         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3464
3465         return trace_handle_return(s);
3466 }
3467
3468 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3469 {
3470         struct trace_array *tr = iter->tr;
3471         struct trace_seq *s = &iter->seq;
3472         struct trace_entry *entry;
3473         struct trace_event *event;
3474
3475         entry = iter->ent;
3476
3477         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3478                 trace_seq_printf(s, "%d %d %llu ",
3479                                  entry->pid, iter->cpu, iter->ts);
3480
3481         if (trace_seq_has_overflowed(s))
3482                 return TRACE_TYPE_PARTIAL_LINE;
3483
3484         event = ftrace_find_event(entry->type);
3485         if (event)
3486                 return event->funcs->raw(iter, 0, event);
3487
3488         trace_seq_printf(s, "%d ?\n", entry->type);
3489
3490         return trace_handle_return(s);
3491 }
3492
3493 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3494 {
3495         struct trace_array *tr = iter->tr;
3496         struct trace_seq *s = &iter->seq;
3497         unsigned char newline = '\n';
3498         struct trace_entry *entry;
3499         struct trace_event *event;
3500
3501         entry = iter->ent;
3502
3503         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3504                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3505                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3506                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3507                 if (trace_seq_has_overflowed(s))
3508                         return TRACE_TYPE_PARTIAL_LINE;
3509         }
3510
3511         event = ftrace_find_event(entry->type);
3512         if (event) {
3513                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3514                 if (ret != TRACE_TYPE_HANDLED)
3515                         return ret;
3516         }
3517
3518         SEQ_PUT_FIELD(s, newline);
3519
3520         return trace_handle_return(s);
3521 }
3522
3523 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3524 {
3525         struct trace_array *tr = iter->tr;
3526         struct trace_seq *s = &iter->seq;
3527         struct trace_entry *entry;
3528         struct trace_event *event;
3529
3530         entry = iter->ent;
3531
3532         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3533                 SEQ_PUT_FIELD(s, entry->pid);
3534                 SEQ_PUT_FIELD(s, iter->cpu);
3535                 SEQ_PUT_FIELD(s, iter->ts);
3536                 if (trace_seq_has_overflowed(s))
3537                         return TRACE_TYPE_PARTIAL_LINE;
3538         }
3539
3540         event = ftrace_find_event(entry->type);
3541         return event ? event->funcs->binary(iter, 0, event) :
3542                 TRACE_TYPE_HANDLED;
3543 }
3544
3545 int trace_empty(struct trace_iterator *iter)
3546 {
3547         struct ring_buffer_iter *buf_iter;
3548         int cpu;
3549
3550         /* If we are looking at one CPU buffer, only check that one */
3551         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3552                 cpu = iter->cpu_file;
3553                 buf_iter = trace_buffer_iter(iter, cpu);
3554                 if (buf_iter) {
3555                         if (!ring_buffer_iter_empty(buf_iter))
3556                                 return 0;
3557                 } else {
3558                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3559                                 return 0;
3560                 }
3561                 return 1;
3562         }
3563
3564         for_each_tracing_cpu(cpu) {
3565                 buf_iter = trace_buffer_iter(iter, cpu);
3566                 if (buf_iter) {
3567                         if (!ring_buffer_iter_empty(buf_iter))
3568                                 return 0;
3569                 } else {
3570                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3571                                 return 0;
3572                 }
3573         }
3574
3575         return 1;
3576 }
3577
3578 /*  Called with trace_event_read_lock() held. */
3579 enum print_line_t print_trace_line(struct trace_iterator *iter)
3580 {
3581         struct trace_array *tr = iter->tr;
3582         unsigned long trace_flags = tr->trace_flags;
3583         enum print_line_t ret;
3584
3585         if (iter->lost_events) {
3586                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3587                                  iter->cpu, iter->lost_events);
3588                 if (trace_seq_has_overflowed(&iter->seq))
3589                         return TRACE_TYPE_PARTIAL_LINE;
3590         }
3591
3592         if (iter->trace && iter->trace->print_line) {
3593                 ret = iter->trace->print_line(iter);
3594                 if (ret != TRACE_TYPE_UNHANDLED)
3595                         return ret;
3596         }
3597
3598         if (iter->ent->type == TRACE_BPUTS &&
3599                         trace_flags & TRACE_ITER_PRINTK &&
3600                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3601                 return trace_print_bputs_msg_only(iter);
3602
3603         if (iter->ent->type == TRACE_BPRINT &&
3604                         trace_flags & TRACE_ITER_PRINTK &&
3605                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3606                 return trace_print_bprintk_msg_only(iter);
3607
3608         if (iter->ent->type == TRACE_PRINT &&
3609                         trace_flags & TRACE_ITER_PRINTK &&
3610                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3611                 return trace_print_printk_msg_only(iter);
3612
3613         if (trace_flags & TRACE_ITER_BIN)
3614                 return print_bin_fmt(iter);
3615
3616         if (trace_flags & TRACE_ITER_HEX)
3617                 return print_hex_fmt(iter);
3618
3619         if (trace_flags & TRACE_ITER_RAW)
3620                 return print_raw_fmt(iter);
3621
3622         return print_trace_fmt(iter);
3623 }
3624
3625 void trace_latency_header(struct seq_file *m)
3626 {
3627         struct trace_iterator *iter = m->private;
3628         struct trace_array *tr = iter->tr;
3629
3630         /* print nothing if the buffers are empty */
3631         if (trace_empty(iter))
3632                 return;
3633
3634         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3635                 print_trace_header(m, iter);
3636
3637         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3638                 print_lat_help_header(m);
3639 }
3640
3641 void trace_default_header(struct seq_file *m)
3642 {
3643         struct trace_iterator *iter = m->private;
3644         struct trace_array *tr = iter->tr;
3645         unsigned long trace_flags = tr->trace_flags;
3646
3647         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3648                 return;
3649
3650         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3651                 /* print nothing if the buffers are empty */
3652                 if (trace_empty(iter))
3653                         return;
3654                 print_trace_header(m, iter);
3655                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3656                         print_lat_help_header(m);
3657         } else {
3658                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3659                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3660                                 print_func_help_header_irq(iter->trace_buffer,
3661                                                            m, trace_flags);
3662                         else
3663                                 print_func_help_header(iter->trace_buffer, m,
3664                                                        trace_flags);
3665                 }
3666         }
3667 }
3668
3669 static void test_ftrace_alive(struct seq_file *m)
3670 {
3671         if (!ftrace_is_dead())
3672                 return;
3673         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3674                     "#          MAY BE MISSING FUNCTION EVENTS\n");
3675 }
3676
3677 #ifdef CONFIG_TRACER_MAX_TRACE
3678 static void show_snapshot_main_help(struct seq_file *m)
3679 {
3680         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3681                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3682                     "#                      Takes a snapshot of the main buffer.\n"
3683                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3684                     "#                      (Doesn't have to be '2' works with any number that\n"
3685                     "#                       is not a '0' or '1')\n");
3686 }
3687
3688 static void show_snapshot_percpu_help(struct seq_file *m)
3689 {
3690         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3691 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3692         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3693                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
3694 #else
3695         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3696                     "#                     Must use main snapshot file to allocate.\n");
3697 #endif
3698         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3699                     "#                      (Doesn't have to be '2' works with any number that\n"
3700                     "#                       is not a '0' or '1')\n");
3701 }
3702
3703 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3704 {
3705         if (iter->tr->allocated_snapshot)
3706                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3707         else
3708                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3709
3710         seq_puts(m, "# Snapshot commands:\n");
3711         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3712                 show_snapshot_main_help(m);
3713         else
3714                 show_snapshot_percpu_help(m);
3715 }
3716 #else
3717 /* Should never be called */
3718 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3719 #endif
3720
3721 static int s_show(struct seq_file *m, void *v)
3722 {
3723         struct trace_iterator *iter = v;
3724         int ret;
3725
3726         if (iter->ent == NULL) {
3727                 if (iter->tr) {
3728                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
3729                         seq_puts(m, "#\n");
3730                         test_ftrace_alive(m);
3731                 }
3732                 if (iter->snapshot && trace_empty(iter))
3733                         print_snapshot_help(m, iter);
3734                 else if (iter->trace && iter->trace->print_header)
3735                         iter->trace->print_header(m);
3736                 else
3737                         trace_default_header(m);
3738
3739         } else if (iter->leftover) {
3740                 /*
3741                  * If we filled the seq_file buffer earlier, we
3742                  * want to just show it now.
3743                  */
3744                 ret = trace_print_seq(m, &iter->seq);
3745
3746                 /* ret should this time be zero, but you never know */
3747                 iter->leftover = ret;
3748
3749         } else {
3750                 print_trace_line(iter);
3751                 ret = trace_print_seq(m, &iter->seq);
3752                 /*
3753                  * If we overflow the seq_file buffer, then it will
3754                  * ask us for this data again at start up.
3755                  * Use that instead.
3756                  *  ret is 0 if seq_file write succeeded.
3757                  *        -1 otherwise.
3758                  */
3759                 iter->leftover = ret;
3760         }
3761
3762         return 0;
3763 }
3764
3765 /*
3766  * Should be used after trace_array_get(), trace_types_lock
3767  * ensures that i_cdev was already initialized.
3768  */
3769 static inline int tracing_get_cpu(struct inode *inode)
3770 {
3771         if (inode->i_cdev) /* See trace_create_cpu_file() */
3772                 return (long)inode->i_cdev - 1;
3773         return RING_BUFFER_ALL_CPUS;
3774 }
3775
3776 static const struct seq_operations tracer_seq_ops = {
3777         .start          = s_start,
3778         .next           = s_next,
3779         .stop           = s_stop,
3780         .show           = s_show,
3781 };
3782
3783 static struct trace_iterator *
3784 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3785 {
3786         struct trace_array *tr = inode->i_private;
3787         struct trace_iterator *iter;
3788         int cpu;
3789
3790         if (tracing_disabled)
3791                 return ERR_PTR(-ENODEV);
3792
3793         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3794         if (!iter)
3795                 return ERR_PTR(-ENOMEM);
3796
3797         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3798                                     GFP_KERNEL);
3799         if (!iter->buffer_iter)
3800                 goto release;
3801
3802         /*
3803          * We make a copy of the current tracer to avoid concurrent
3804          * changes on it while we are reading.
3805          */
3806         mutex_lock(&trace_types_lock);
3807         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3808         if (!iter->trace)
3809                 goto fail;
3810
3811         *iter->trace = *tr->current_trace;
3812
3813         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3814                 goto fail;
3815
3816         iter->tr = tr;
3817
3818 #ifdef CONFIG_TRACER_MAX_TRACE
3819         /* Currently only the top directory has a snapshot */
3820         if (tr->current_trace->print_max || snapshot)
3821                 iter->trace_buffer = &tr->max_buffer;
3822         else
3823 #endif
3824                 iter->trace_buffer = &tr->trace_buffer;
3825         iter->snapshot = snapshot;
3826         iter->pos = -1;
3827         iter->cpu_file = tracing_get_cpu(inode);
3828         mutex_init(&iter->mutex);
3829
3830         /* Notify the tracer early; before we stop tracing. */
3831         if (iter->trace && iter->trace->open)
3832                 iter->trace->open(iter);
3833
3834         /* Annotate start of buffers if we had overruns */
3835         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3836                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3837
3838         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3839         if (trace_clocks[tr->clock_id].in_ns)
3840                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3841
3842         /* stop the trace while dumping if we are not opening "snapshot" */
3843         if (!iter->snapshot)
3844                 tracing_stop_tr(tr);
3845
3846         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3847                 for_each_tracing_cpu(cpu) {
3848                         iter->buffer_iter[cpu] =
3849                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3850                 }
3851                 ring_buffer_read_prepare_sync();
3852                 for_each_tracing_cpu(cpu) {
3853                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3854                         tracing_iter_reset(iter, cpu);
3855                 }
3856         } else {
3857                 cpu = iter->cpu_file;
3858                 iter->buffer_iter[cpu] =
3859                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3860                 ring_buffer_read_prepare_sync();
3861                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3862                 tracing_iter_reset(iter, cpu);
3863         }
3864
3865         mutex_unlock(&trace_types_lock);
3866
3867         return iter;
3868
3869  fail:
3870         mutex_unlock(&trace_types_lock);
3871         kfree(iter->trace);
3872         kfree(iter->buffer_iter);
3873 release:
3874         seq_release_private(inode, file);
3875         return ERR_PTR(-ENOMEM);
3876 }
3877
3878 int tracing_open_generic(struct inode *inode, struct file *filp)
3879 {
3880         if (tracing_disabled)
3881                 return -ENODEV;
3882
3883         filp->private_data = inode->i_private;
3884         return 0;
3885 }
3886
3887 bool tracing_is_disabled(void)
3888 {
3889         return (tracing_disabled) ? true: false;
3890 }
3891
3892 /*
3893  * Open and update trace_array ref count.
3894  * Must have the current trace_array passed to it.
3895  */
3896 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3897 {
3898         struct trace_array *tr = inode->i_private;
3899
3900         if (tracing_disabled)
3901                 return -ENODEV;
3902
3903         if (trace_array_get(tr) < 0)
3904                 return -ENODEV;
3905
3906         filp->private_data = inode->i_private;
3907
3908         return 0;
3909 }
3910
3911 static int tracing_release(struct inode *inode, struct file *file)
3912 {
3913         struct trace_array *tr = inode->i_private;
3914         struct seq_file *m = file->private_data;
3915         struct trace_iterator *iter;
3916         int cpu;
3917
3918         if (!(file->f_mode & FMODE_READ)) {
3919                 trace_array_put(tr);
3920                 return 0;
3921         }
3922
3923         /* Writes do not use seq_file */
3924         iter = m->private;
3925         mutex_lock(&trace_types_lock);
3926
3927         for_each_tracing_cpu(cpu) {
3928                 if (iter->buffer_iter[cpu])
3929                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3930         }
3931
3932         if (iter->trace && iter->trace->close)
3933                 iter->trace->close(iter);
3934
3935         if (!iter->snapshot)
3936                 /* reenable tracing if it was previously enabled */
3937                 tracing_start_tr(tr);
3938
3939         __trace_array_put(tr);
3940
3941         mutex_unlock(&trace_types_lock);
3942
3943         mutex_destroy(&iter->mutex);
3944         free_cpumask_var(iter->started);
3945         kfree(iter->trace);
3946         kfree(iter->buffer_iter);
3947         seq_release_private(inode, file);
3948
3949         return 0;
3950 }
3951
3952 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3953 {
3954         struct trace_array *tr = inode->i_private;
3955
3956         trace_array_put(tr);
3957         return 0;
3958 }
3959
3960 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3961 {
3962         struct trace_array *tr = inode->i_private;
3963
3964         trace_array_put(tr);
3965
3966         return single_release(inode, file);
3967 }
3968
3969 static int tracing_open(struct inode *inode, struct file *file)
3970 {
3971         struct trace_array *tr = inode->i_private;
3972         struct trace_iterator *iter;
3973         int ret = 0;
3974
3975         if (trace_array_get(tr) < 0)
3976                 return -ENODEV;
3977
3978         /* If this file was open for write, then erase contents */
3979         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3980                 int cpu = tracing_get_cpu(inode);
3981
3982                 if (cpu == RING_BUFFER_ALL_CPUS)
3983                         tracing_reset_online_cpus(&tr->trace_buffer);
3984                 else
3985                         tracing_reset(&tr->trace_buffer, cpu);
3986         }
3987
3988         if (file->f_mode & FMODE_READ) {
3989                 iter = __tracing_open(inode, file, false);
3990                 if (IS_ERR(iter))
3991                         ret = PTR_ERR(iter);
3992                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3993                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3994         }
3995
3996         if (ret < 0)
3997                 trace_array_put(tr);
3998
3999         return ret;
4000 }
4001
4002 /*
4003  * Some tracers are not suitable for instance buffers.
4004  * A tracer is always available for the global array (toplevel)
4005  * or if it explicitly states that it is.
4006  */
4007 static bool
4008 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4009 {
4010         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4011 }
4012
4013 /* Find the next tracer that this trace array may use */
4014 static struct tracer *
4015 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4016 {
4017         while (t && !trace_ok_for_array(t, tr))
4018                 t = t->next;
4019
4020         return t;
4021 }
4022
4023 static void *
4024 t_next(struct seq_file *m, void *v, loff_t *pos)
4025 {
4026         struct trace_array *tr = m->private;
4027         struct tracer *t = v;
4028
4029         (*pos)++;
4030
4031         if (t)
4032                 t = get_tracer_for_array(tr, t->next);
4033
4034         return t;
4035 }
4036
4037 static void *t_start(struct seq_file *m, loff_t *pos)
4038 {
4039         struct trace_array *tr = m->private;
4040         struct tracer *t;
4041         loff_t l = 0;
4042
4043         mutex_lock(&trace_types_lock);
4044
4045         t = get_tracer_for_array(tr, trace_types);
4046         for (; t && l < *pos; t = t_next(m, t, &l))
4047                         ;
4048
4049         return t;
4050 }
4051
4052 static void t_stop(struct seq_file *m, void *p)
4053 {
4054         mutex_unlock(&trace_types_lock);
4055 }
4056
4057 static int t_show(struct seq_file *m, void *v)
4058 {
4059         struct tracer *t = v;
4060
4061         if (!t)
4062                 return 0;
4063
4064         seq_puts(m, t->name);
4065         if (t->next)
4066                 seq_putc(m, ' ');
4067         else
4068                 seq_putc(m, '\n');
4069
4070         return 0;
4071 }
4072
4073 static const struct seq_operations show_traces_seq_ops = {
4074         .start          = t_start,
4075         .next           = t_next,
4076         .stop           = t_stop,
4077         .show           = t_show,
4078 };
4079
4080 static int show_traces_open(struct inode *inode, struct file *file)
4081 {
4082         struct trace_array *tr = inode->i_private;
4083         struct seq_file *m;
4084         int ret;
4085
4086         if (tracing_disabled)
4087                 return -ENODEV;
4088
4089         ret = seq_open(file, &show_traces_seq_ops);
4090         if (ret)
4091                 return ret;
4092
4093         m = file->private_data;
4094         m->private = tr;
4095
4096         return 0;
4097 }
4098
4099 static ssize_t
4100 tracing_write_stub(struct file *filp, const char __user *ubuf,
4101                    size_t count, loff_t *ppos)
4102 {
4103         return count;
4104 }
4105
4106 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4107 {
4108         int ret;
4109
4110         if (file->f_mode & FMODE_READ)
4111                 ret = seq_lseek(file, offset, whence);
4112         else
4113                 file->f_pos = ret = 0;
4114
4115         return ret;
4116 }
4117
4118 static const struct file_operations tracing_fops = {
4119         .open           = tracing_open,
4120         .read           = seq_read,
4121         .write          = tracing_write_stub,
4122         .llseek         = tracing_lseek,
4123         .release        = tracing_release,
4124 };
4125
4126 static const struct file_operations show_traces_fops = {
4127         .open           = show_traces_open,
4128         .read           = seq_read,
4129         .release        = seq_release,
4130         .llseek         = seq_lseek,
4131 };
4132
4133 /*
4134  * The tracer itself will not take this lock, but still we want
4135  * to provide a consistent cpumask to user-space:
4136  */
4137 static DEFINE_MUTEX(tracing_cpumask_update_lock);
4138
4139 /*
4140  * Temporary storage for the character representation of the
4141  * CPU bitmask (and one more byte for the newline):
4142  */
4143 static char mask_str[NR_CPUS + 1];
4144
4145 static ssize_t
4146 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4147                      size_t count, loff_t *ppos)
4148 {
4149         struct trace_array *tr = file_inode(filp)->i_private;
4150         int len;
4151
4152         mutex_lock(&tracing_cpumask_update_lock);
4153
4154         len = snprintf(mask_str, count, "%*pb\n",
4155                        cpumask_pr_args(tr->tracing_cpumask));
4156         if (len >= count) {
4157                 count = -EINVAL;
4158                 goto out_err;
4159         }
4160         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
4161
4162 out_err:
4163         mutex_unlock(&tracing_cpumask_update_lock);
4164
4165         return count;
4166 }
4167
4168 static ssize_t
4169 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4170                       size_t count, loff_t *ppos)
4171 {
4172         struct trace_array *tr = file_inode(filp)->i_private;
4173         cpumask_var_t tracing_cpumask_new;
4174         int err, cpu;
4175
4176         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4177                 return -ENOMEM;
4178
4179         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4180         if (err)
4181                 goto err_unlock;
4182
4183         mutex_lock(&tracing_cpumask_update_lock);
4184
4185         local_irq_disable();
4186         arch_spin_lock(&tr->max_lock);
4187         for_each_tracing_cpu(cpu) {
4188                 /*
4189                  * Increase/decrease the disabled counter if we are
4190                  * about to flip a bit in the cpumask:
4191                  */
4192                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4193                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4194                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4195                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4196                 }
4197                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4198                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4199                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4200                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4201                 }
4202         }
4203         arch_spin_unlock(&tr->max_lock);
4204         local_irq_enable();
4205
4206         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4207
4208         mutex_unlock(&tracing_cpumask_update_lock);
4209         free_cpumask_var(tracing_cpumask_new);
4210
4211         return count;
4212
4213 err_unlock:
4214         free_cpumask_var(tracing_cpumask_new);
4215
4216         return err;
4217 }
4218
4219 static const struct file_operations tracing_cpumask_fops = {
4220         .open           = tracing_open_generic_tr,
4221         .read           = tracing_cpumask_read,
4222         .write          = tracing_cpumask_write,
4223         .release        = tracing_release_generic_tr,
4224         .llseek         = generic_file_llseek,
4225 };
4226
4227 static int tracing_trace_options_show(struct seq_file *m, void *v)
4228 {
4229         struct tracer_opt *trace_opts;
4230         struct trace_array *tr = m->private;
4231         u32 tracer_flags;
4232         int i;
4233
4234         mutex_lock(&trace_types_lock);
4235         tracer_flags = tr->current_trace->flags->val;
4236         trace_opts = tr->current_trace->flags->opts;
4237
4238         for (i = 0; trace_options[i]; i++) {
4239                 if (tr->trace_flags & (1 << i))
4240                         seq_printf(m, "%s\n", trace_options[i]);
4241                 else
4242                         seq_printf(m, "no%s\n", trace_options[i]);
4243         }
4244
4245         for (i = 0; trace_opts[i].name; i++) {
4246                 if (tracer_flags & trace_opts[i].bit)
4247                         seq_printf(m, "%s\n", trace_opts[i].name);
4248                 else
4249                         seq_printf(m, "no%s\n", trace_opts[i].name);
4250         }
4251         mutex_unlock(&trace_types_lock);
4252
4253         return 0;
4254 }
4255
4256 static int __set_tracer_option(struct trace_array *tr,
4257                                struct tracer_flags *tracer_flags,
4258                                struct tracer_opt *opts, int neg)
4259 {
4260         struct tracer *trace = tracer_flags->trace;
4261         int ret;
4262
4263         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4264         if (ret)
4265                 return ret;
4266
4267         if (neg)
4268                 tracer_flags->val &= ~opts->bit;
4269         else
4270                 tracer_flags->val |= opts->bit;
4271         return 0;
4272 }
4273
4274 /* Try to assign a tracer specific option */
4275 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4276 {
4277         struct tracer *trace = tr->current_trace;
4278         struct tracer_flags *tracer_flags = trace->flags;
4279         struct tracer_opt *opts = NULL;
4280         int i;
4281
4282         for (i = 0; tracer_flags->opts[i].name; i++) {
4283                 opts = &tracer_flags->opts[i];
4284
4285                 if (strcmp(cmp, opts->name) == 0)
4286                         return __set_tracer_option(tr, trace->flags, opts, neg);
4287         }
4288
4289         return -EINVAL;
4290 }
4291
4292 /* Some tracers require overwrite to stay enabled */
4293 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4294 {
4295         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4296                 return -1;
4297
4298         return 0;
4299 }
4300
4301 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4302 {
4303         /* do nothing if flag is already set */
4304         if (!!(tr->trace_flags & mask) == !!enabled)
4305                 return 0;
4306
4307         /* Give the tracer a chance to approve the change */
4308         if (tr->current_trace->flag_changed)
4309                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4310                         return -EINVAL;
4311
4312         if (enabled)
4313                 tr->trace_flags |= mask;
4314         else
4315                 tr->trace_flags &= ~mask;
4316
4317         if (mask == TRACE_ITER_RECORD_CMD)
4318                 trace_event_enable_cmd_record(enabled);
4319
4320         if (mask == TRACE_ITER_RECORD_TGID) {
4321                 if (!tgid_map)
4322                         tgid_map = kzalloc((PID_MAX_DEFAULT + 1) * sizeof(*tgid_map),
4323                                            GFP_KERNEL);
4324                 if (!tgid_map) {
4325                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4326                         return -ENOMEM;
4327                 }
4328
4329                 trace_event_enable_tgid_record(enabled);
4330         }
4331
4332         if (mask == TRACE_ITER_EVENT_FORK)
4333                 trace_event_follow_fork(tr, enabled);
4334
4335         if (mask == TRACE_ITER_FUNC_FORK)
4336                 ftrace_pid_follow_fork(tr, enabled);
4337
4338         if (mask == TRACE_ITER_OVERWRITE) {
4339                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4340 #ifdef CONFIG_TRACER_MAX_TRACE
4341                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4342 #endif
4343         }
4344
4345         if (mask == TRACE_ITER_PRINTK) {
4346                 trace_printk_start_stop_comm(enabled);
4347                 trace_printk_control(enabled);
4348         }
4349
4350         return 0;
4351 }
4352
4353 static int trace_set_options(struct trace_array *tr, char *option)
4354 {
4355         char *cmp;
4356         int neg = 0;
4357         int ret = -ENODEV;
4358         int i;
4359         size_t orig_len = strlen(option);
4360
4361         cmp = strstrip(option);
4362
4363         if (strncmp(cmp, "no", 2) == 0) {
4364                 neg = 1;
4365                 cmp += 2;
4366         }
4367
4368         mutex_lock(&trace_types_lock);
4369
4370         for (i = 0; trace_options[i]; i++) {
4371                 if (strcmp(cmp, trace_options[i]) == 0) {
4372                         ret = set_tracer_flag(tr, 1 << i, !neg);
4373                         break;
4374                 }
4375         }
4376
4377         /* If no option could be set, test the specific tracer options */
4378         if (!trace_options[i])
4379                 ret = set_tracer_option(tr, cmp, neg);
4380
4381         mutex_unlock(&trace_types_lock);
4382
4383         /*
4384          * If the first trailing whitespace is replaced with '\0' by strstrip,
4385          * turn it back into a space.
4386          */
4387         if (orig_len > strlen(option))
4388                 option[strlen(option)] = ' ';
4389
4390         return ret;
4391 }
4392
4393 static void __init apply_trace_boot_options(void)
4394 {
4395         char *buf = trace_boot_options_buf;
4396         char *option;
4397
4398         while (true) {
4399                 option = strsep(&buf, ",");
4400
4401                 if (!option)
4402                         break;
4403
4404                 if (*option)
4405                         trace_set_options(&global_trace, option);
4406
4407                 /* Put back the comma to allow this to be called again */
4408                 if (buf)
4409                         *(buf - 1) = ',';
4410         }
4411 }
4412
4413 static ssize_t
4414 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4415                         size_t cnt, loff_t *ppos)
4416 {
4417         struct seq_file *m = filp->private_data;
4418         struct trace_array *tr = m->private;
4419         char buf[64];
4420         int ret;
4421
4422         if (cnt >= sizeof(buf))
4423                 return -EINVAL;
4424
4425         if (copy_from_user(buf, ubuf, cnt))
4426                 return -EFAULT;
4427
4428         buf[cnt] = 0;
4429
4430         ret = trace_set_options(tr, buf);
4431         if (ret < 0)
4432                 return ret;
4433
4434         *ppos += cnt;
4435
4436         return cnt;
4437 }
4438
4439 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4440 {
4441         struct trace_array *tr = inode->i_private;
4442         int ret;
4443
4444         if (tracing_disabled)
4445                 return -ENODEV;
4446
4447         if (trace_array_get(tr) < 0)
4448                 return -ENODEV;
4449
4450         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4451         if (ret < 0)
4452                 trace_array_put(tr);
4453
4454         return ret;
4455 }
4456
4457 static const struct file_operations tracing_iter_fops = {
4458         .open           = tracing_trace_options_open,
4459         .read           = seq_read,
4460         .llseek         = seq_lseek,
4461         .release        = tracing_single_release_tr,
4462         .write          = tracing_trace_options_write,
4463 };
4464
4465 static const char readme_msg[] =
4466         "tracing mini-HOWTO:\n\n"
4467         "# echo 0 > tracing_on : quick way to disable tracing\n"
4468         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4469         " Important files:\n"
4470         "  trace\t\t\t- The static contents of the buffer\n"
4471         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4472         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4473         "  current_tracer\t- function and latency tracers\n"
4474         "  available_tracers\t- list of configured tracers for current_tracer\n"
4475         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4476         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4477         "  trace_clock\t\t-change the clock used to order events\n"
4478         "       local:   Per cpu clock but may not be synced across CPUs\n"
4479         "      global:   Synced across CPUs but slows tracing down.\n"
4480         "     counter:   Not a clock, but just an increment\n"
4481         "      uptime:   Jiffy counter from time of boot\n"
4482         "        perf:   Same clock that perf events use\n"
4483 #ifdef CONFIG_X86_64
4484         "     x86-tsc:   TSC cycle counter\n"
4485 #endif
4486         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4487         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4488         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4489         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4490         "\t\t\t  Remove sub-buffer with rmdir\n"
4491         "  trace_options\t\t- Set format or modify how tracing happens\n"
4492         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4493         "\t\t\t  option name\n"
4494         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4495 #ifdef CONFIG_DYNAMIC_FTRACE
4496         "\n  available_filter_functions - list of functions that can be filtered on\n"
4497         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4498         "\t\t\t  functions\n"
4499         "\t     accepts: func_full_name or glob-matching-pattern\n"
4500         "\t     modules: Can select a group via module\n"
4501         "\t      Format: :mod:<module-name>\n"
4502         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4503         "\t    triggers: a command to perform when function is hit\n"
4504         "\t      Format: <function>:<trigger>[:count]\n"
4505         "\t     trigger: traceon, traceoff\n"
4506         "\t\t      enable_event:<system>:<event>\n"
4507         "\t\t      disable_event:<system>:<event>\n"
4508 #ifdef CONFIG_STACKTRACE
4509         "\t\t      stacktrace\n"
4510 #endif
4511 #ifdef CONFIG_TRACER_SNAPSHOT
4512         "\t\t      snapshot\n"
4513 #endif
4514         "\t\t      dump\n"
4515         "\t\t      cpudump\n"
4516         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4517         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4518         "\t     The first one will disable tracing every time do_fault is hit\n"
4519         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4520         "\t       The first time do trap is hit and it disables tracing, the\n"
4521         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4522         "\t       the counter will not decrement. It only decrements when the\n"
4523         "\t       trigger did work\n"
4524         "\t     To remove trigger without count:\n"
4525         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4526         "\t     To remove trigger with a count:\n"
4527         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4528         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4529         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4530         "\t    modules: Can select a group via module command :mod:\n"
4531         "\t    Does not accept triggers\n"
4532 #endif /* CONFIG_DYNAMIC_FTRACE */
4533 #ifdef CONFIG_FUNCTION_TRACER
4534         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4535         "\t\t    (function)\n"
4536 #endif
4537 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4538         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4539         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4540         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4541 #endif
4542 #ifdef CONFIG_TRACER_SNAPSHOT
4543         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4544         "\t\t\t  snapshot buffer. Read the contents for more\n"
4545         "\t\t\t  information\n"
4546 #endif
4547 #ifdef CONFIG_STACK_TRACER
4548         "  stack_trace\t\t- Shows the max stack trace when active\n"
4549         "  stack_max_size\t- Shows current max stack size that was traced\n"
4550         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4551         "\t\t\t  new trace)\n"
4552 #ifdef CONFIG_DYNAMIC_FTRACE
4553         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4554         "\t\t\t  traces\n"
4555 #endif
4556 #endif /* CONFIG_STACK_TRACER */
4557 #ifdef CONFIG_KPROBE_EVENTS
4558         "  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4559         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4560 #endif
4561 #ifdef CONFIG_UPROBE_EVENTS
4562         "  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4563         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4564 #endif
4565 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4566         "\t  accepts: event-definitions (one definition per line)\n"
4567         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4568         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4569         "\t           -:[<group>/]<event>\n"
4570 #ifdef CONFIG_KPROBE_EVENTS
4571         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4572   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4573 #endif
4574 #ifdef CONFIG_UPROBE_EVENTS
4575         "\t    place: <path>:<offset>\n"
4576 #endif
4577         "\t     args: <name>=fetcharg[:type]\n"
4578         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4579         "\t           $stack<index>, $stack, $retval, $comm\n"
4580         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4581         "\t           b<bit-width>@<bit-offset>/<container-size>\n"
4582 #endif
4583         "  events/\t\t- Directory containing all trace event subsystems:\n"
4584         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4585         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4586         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4587         "\t\t\t  events\n"
4588         "      filter\t\t- If set, only events passing filter are traced\n"
4589         "  events/<system>/<event>/\t- Directory containing control files for\n"
4590         "\t\t\t  <event>:\n"
4591         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4592         "      filter\t\t- If set, only events passing filter are traced\n"
4593         "      trigger\t\t- If set, a command to perform when event is hit\n"
4594         "\t    Format: <trigger>[:count][if <filter>]\n"
4595         "\t   trigger: traceon, traceoff\n"
4596         "\t            enable_event:<system>:<event>\n"
4597         "\t            disable_event:<system>:<event>\n"
4598 #ifdef CONFIG_HIST_TRIGGERS
4599         "\t            enable_hist:<system>:<event>\n"
4600         "\t            disable_hist:<system>:<event>\n"
4601 #endif
4602 #ifdef CONFIG_STACKTRACE
4603         "\t\t    stacktrace\n"
4604 #endif
4605 #ifdef CONFIG_TRACER_SNAPSHOT
4606         "\t\t    snapshot\n"
4607 #endif
4608 #ifdef CONFIG_HIST_TRIGGERS
4609         "\t\t    hist (see below)\n"
4610 #endif
4611         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4612         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4613         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4614         "\t                  events/block/block_unplug/trigger\n"
4615         "\t   The first disables tracing every time block_unplug is hit.\n"
4616         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4617         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4618         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4619         "\t   Like function triggers, the counter is only decremented if it\n"
4620         "\t    enabled or disabled tracing.\n"
4621         "\t   To remove a trigger without a count:\n"
4622         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4623         "\t   To remove a trigger with a count:\n"
4624         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4625         "\t   Filters can be ignored when removing a trigger.\n"
4626 #ifdef CONFIG_HIST_TRIGGERS
4627         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4628         "\t    Format: hist:keys=<field1[,field2,...]>\n"
4629         "\t            [:values=<field1[,field2,...]>]\n"
4630         "\t            [:sort=<field1[,field2,...]>]\n"
4631         "\t            [:size=#entries]\n"
4632         "\t            [:pause][:continue][:clear]\n"
4633         "\t            [:name=histname1]\n"
4634         "\t            [if <filter>]\n\n"
4635         "\t    When a matching event is hit, an entry is added to a hash\n"
4636         "\t    table using the key(s) and value(s) named, and the value of a\n"
4637         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4638         "\t    correspond to fields in the event's format description.  Keys\n"
4639         "\t    can be any field, or the special string 'stacktrace'.\n"
4640         "\t    Compound keys consisting of up to two fields can be specified\n"
4641         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4642         "\t    fields.  Sort keys consisting of up to two fields can be\n"
4643         "\t    specified using the 'sort' keyword.  The sort direction can\n"
4644         "\t    be modified by appending '.descending' or '.ascending' to a\n"
4645         "\t    sort field.  The 'size' parameter can be used to specify more\n"
4646         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4647         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4648         "\t    its histogram data will be shared with other triggers of the\n"
4649         "\t    same name, and trigger hits will update this common data.\n\n"
4650         "\t    Reading the 'hist' file for the event will dump the hash\n"
4651         "\t    table in its entirety to stdout.  If there are multiple hist\n"
4652         "\t    triggers attached to an event, there will be a table for each\n"
4653         "\t    trigger in the output.  The table displayed for a named\n"
4654         "\t    trigger will be the same as any other instance having the\n"
4655         "\t    same name.  The default format used to display a given field\n"
4656         "\t    can be modified by appending any of the following modifiers\n"
4657         "\t    to the field name, as applicable:\n\n"
4658         "\t            .hex        display a number as a hex value\n"
4659         "\t            .sym        display an address as a symbol\n"
4660         "\t            .sym-offset display an address as a symbol and offset\n"
4661         "\t            .execname   display a common_pid as a program name\n"
4662         "\t            .syscall    display a syscall id as a syscall name\n\n"
4663         "\t            .log2       display log2 value rather than raw number\n\n"
4664         "\t    The 'pause' parameter can be used to pause an existing hist\n"
4665         "\t    trigger or to start a hist trigger but not log any events\n"
4666         "\t    until told to do so.  'continue' can be used to start or\n"
4667         "\t    restart a paused hist trigger.\n\n"
4668         "\t    The 'clear' parameter will clear the contents of a running\n"
4669         "\t    hist trigger and leave its current paused/active state\n"
4670         "\t    unchanged.\n\n"
4671         "\t    The enable_hist and disable_hist triggers can be used to\n"
4672         "\t    have one event conditionally start and stop another event's\n"
4673         "\t    already-attached hist trigger.  The syntax is analagous to\n"
4674         "\t    the enable_event and disable_event triggers.\n"
4675 #endif
4676 ;
4677
4678 static ssize_t
4679 tracing_readme_read(struct file *filp, char __user *ubuf,
4680                        size_t cnt, loff_t *ppos)
4681 {
4682         return simple_read_from_buffer(ubuf, cnt, ppos,
4683                                         readme_msg, strlen(readme_msg));
4684 }
4685
4686 static const struct file_operations tracing_readme_fops = {
4687         .open           = tracing_open_generic,
4688         .read           = tracing_readme_read,
4689         .llseek         = generic_file_llseek,
4690 };
4691
4692 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4693 {
4694         unsigned int *ptr = v;
4695
4696         if (*pos || m->count)
4697                 ptr++;
4698
4699         (*pos)++;
4700
4701         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4702              ptr++) {
4703                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4704                         continue;
4705
4706                 return ptr;
4707         }
4708
4709         return NULL;
4710 }
4711
4712 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4713 {
4714         void *v;
4715         loff_t l = 0;
4716
4717         preempt_disable();
4718         arch_spin_lock(&trace_cmdline_lock);
4719
4720         v = &savedcmd->map_cmdline_to_pid[0];
4721         while (l <= *pos) {
4722                 v = saved_cmdlines_next(m, v, &l);
4723                 if (!v)
4724                         return NULL;
4725         }
4726
4727         return v;
4728 }
4729
4730 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4731 {
4732         arch_spin_unlock(&trace_cmdline_lock);
4733         preempt_enable();
4734 }
4735
4736 static int saved_cmdlines_show(struct seq_file *m, void *v)
4737 {
4738         char buf[TASK_COMM_LEN];
4739         unsigned int *pid = v;
4740
4741         __trace_find_cmdline(*pid, buf);
4742         seq_printf(m, "%d %s\n", *pid, buf);
4743         return 0;
4744 }
4745
4746 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4747         .start          = saved_cmdlines_start,
4748         .next           = saved_cmdlines_next,
4749         .stop           = saved_cmdlines_stop,
4750         .show           = saved_cmdlines_show,
4751 };
4752
4753 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4754 {
4755         if (tracing_disabled)
4756                 return -ENODEV;
4757
4758         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4759 }
4760
4761 static const struct file_operations tracing_saved_cmdlines_fops = {
4762         .open           = tracing_saved_cmdlines_open,
4763         .read           = seq_read,
4764         .llseek         = seq_lseek,
4765         .release        = seq_release,
4766 };
4767
4768 static ssize_t
4769 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4770                                  size_t cnt, loff_t *ppos)
4771 {
4772         char buf[64];
4773         int r;
4774
4775         arch_spin_lock(&trace_cmdline_lock);
4776         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4777         arch_spin_unlock(&trace_cmdline_lock);
4778
4779         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4780 }
4781
4782 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4783 {
4784         kfree(s->saved_cmdlines);
4785         kfree(s->map_cmdline_to_pid);
4786         kfree(s);
4787 }
4788
4789 static int tracing_resize_saved_cmdlines(unsigned int val)
4790 {
4791         struct saved_cmdlines_buffer *s, *savedcmd_temp;
4792
4793         s = kmalloc(sizeof(*s), GFP_KERNEL);
4794         if (!s)
4795                 return -ENOMEM;
4796
4797         if (allocate_cmdlines_buffer(val, s) < 0) {
4798                 kfree(s);
4799                 return -ENOMEM;
4800         }
4801
4802         arch_spin_lock(&trace_cmdline_lock);
4803         savedcmd_temp = savedcmd;
4804         savedcmd = s;
4805         arch_spin_unlock(&trace_cmdline_lock);
4806         free_saved_cmdlines_buffer(savedcmd_temp);
4807
4808         return 0;
4809 }
4810
4811 static ssize_t
4812 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4813                                   size_t cnt, loff_t *ppos)
4814 {
4815         unsigned long val;
4816         int ret;
4817
4818         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4819         if (ret)
4820                 return ret;
4821
4822         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4823         if (!val || val > PID_MAX_DEFAULT)
4824                 return -EINVAL;
4825
4826         ret = tracing_resize_saved_cmdlines((unsigned int)val);
4827         if (ret < 0)
4828                 return ret;
4829
4830         *ppos += cnt;
4831
4832         return cnt;
4833 }
4834
4835 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4836         .open           = tracing_open_generic,
4837         .read           = tracing_saved_cmdlines_size_read,
4838         .write          = tracing_saved_cmdlines_size_write,
4839 };
4840
4841 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
4842 static union trace_eval_map_item *
4843 update_eval_map(union trace_eval_map_item *ptr)
4844 {
4845         if (!ptr->map.eval_string) {
4846                 if (ptr->tail.next) {
4847                         ptr = ptr->tail.next;
4848                         /* Set ptr to the next real item (skip head) */
4849                         ptr++;
4850                 } else
4851                         return NULL;
4852         }
4853         return ptr;
4854 }
4855
4856 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
4857 {
4858         union trace_eval_map_item *ptr = v;
4859
4860         /*
4861          * Paranoid! If ptr points to end, we don't want to increment past it.
4862          * This really should never happen.
4863          */
4864         ptr = update_eval_map(ptr);
4865         if (WARN_ON_ONCE(!ptr))
4866                 return NULL;
4867
4868         ptr++;
4869
4870         (*pos)++;
4871
4872         ptr = update_eval_map(ptr);
4873
4874         return ptr;
4875 }
4876
4877 static void *eval_map_start(struct seq_file *m, loff_t *pos)
4878 {
4879         union trace_eval_map_item *v;
4880         loff_t l = 0;
4881
4882         mutex_lock(&trace_eval_mutex);
4883
4884         v = trace_eval_maps;
4885         if (v)
4886                 v++;
4887
4888         while (v && l < *pos) {
4889                 v = eval_map_next(m, v, &l);
4890         }
4891
4892         return v;
4893 }
4894
4895 static void eval_map_stop(struct seq_file *m, void *v)
4896 {
4897         mutex_unlock(&trace_eval_mutex);
4898 }
4899
4900 static int eval_map_show(struct seq_file *m, void *v)
4901 {
4902         union trace_eval_map_item *ptr = v;
4903
4904         seq_printf(m, "%s %ld (%s)\n",
4905                    ptr->map.eval_string, ptr->map.eval_value,
4906                    ptr->map.system);
4907
4908         return 0;
4909 }
4910
4911 static const struct seq_operations tracing_eval_map_seq_ops = {
4912         .start          = eval_map_start,
4913         .next           = eval_map_next,
4914         .stop           = eval_map_stop,
4915         .show           = eval_map_show,
4916 };
4917
4918 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
4919 {
4920         if (tracing_disabled)
4921                 return -ENODEV;
4922
4923         return seq_open(filp, &tracing_eval_map_seq_ops);
4924 }
4925
4926 static const struct file_operations tracing_eval_map_fops = {
4927         .open           = tracing_eval_map_open,
4928         .read           = seq_read,
4929         .llseek         = seq_lseek,
4930         .release        = seq_release,
4931 };
4932
4933 static inline union trace_eval_map_item *
4934 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
4935 {
4936         /* Return tail of array given the head */
4937         return ptr + ptr->head.length + 1;
4938 }
4939
4940 static void
4941 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
4942                            int len)
4943 {
4944         struct trace_eval_map **stop;
4945         struct trace_eval_map **map;
4946         union trace_eval_map_item *map_array;
4947         union trace_eval_map_item *ptr;
4948
4949         stop = start + len;
4950
4951         /*
4952          * The trace_eval_maps contains the map plus a head and tail item,
4953          * where the head holds the module and length of array, and the
4954          * tail holds a pointer to the next list.
4955          */
4956         map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4957         if (!map_array) {
4958                 pr_warn("Unable to allocate trace eval mapping\n");
4959                 return;
4960         }
4961
4962         mutex_lock(&trace_eval_mutex);
4963
4964         if (!trace_eval_maps)
4965                 trace_eval_maps = map_array;
4966         else {
4967                 ptr = trace_eval_maps;
4968                 for (;;) {
4969                         ptr = trace_eval_jmp_to_tail(ptr);
4970                         if (!ptr->tail.next)
4971                                 break;
4972                         ptr = ptr->tail.next;
4973
4974                 }
4975                 ptr->tail.next = map_array;
4976         }
4977         map_array->head.mod = mod;
4978         map_array->head.length = len;
4979         map_array++;
4980
4981         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4982                 map_array->map = **map;
4983                 map_array++;
4984         }
4985         memset(map_array, 0, sizeof(*map_array));
4986
4987         mutex_unlock(&trace_eval_mutex);
4988 }
4989
4990 static void trace_create_eval_file(struct dentry *d_tracer)
4991 {
4992         trace_create_file("eval_map", 0444, d_tracer,
4993                           NULL, &tracing_eval_map_fops);
4994 }
4995
4996 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
4997 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
4998 static inline void trace_insert_eval_map_file(struct module *mod,
4999                               struct trace_eval_map **start, int len) { }
5000 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5001
5002 static void trace_insert_eval_map(struct module *mod,
5003                                   struct trace_eval_map **start, int len)
5004 {
5005         struct trace_eval_map **map;
5006
5007         if (len <= 0)
5008                 return;
5009
5010         map = start;
5011
5012         trace_event_eval_update(map, len);
5013
5014         trace_insert_eval_map_file(mod, start, len);
5015 }
5016
5017 static ssize_t
5018 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5019                        size_t cnt, loff_t *ppos)
5020 {
5021         struct trace_array *tr = filp->private_data;
5022         char buf[MAX_TRACER_SIZE+2];
5023         int r;
5024
5025         mutex_lock(&trace_types_lock);
5026         r = sprintf(buf, "%s\n", tr->current_trace->name);
5027         mutex_unlock(&trace_types_lock);
5028
5029         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5030 }
5031
5032 int tracer_init(struct tracer *t, struct trace_array *tr)
5033 {
5034         tracing_reset_online_cpus(&tr->trace_buffer);
5035         return t->init(tr);
5036 }
5037
5038 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5039 {
5040         int cpu;
5041
5042         for_each_tracing_cpu(cpu)
5043                 per_cpu_ptr(buf->data, cpu)->entries = val;
5044 }
5045
5046 #ifdef CONFIG_TRACER_MAX_TRACE
5047 /* resize @tr's buffer to the size of @size_tr's entries */
5048 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5049                                         struct trace_buffer *size_buf, int cpu_id)
5050 {
5051         int cpu, ret = 0;
5052
5053         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5054                 for_each_tracing_cpu(cpu) {
5055                         ret = ring_buffer_resize(trace_buf->buffer,
5056                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5057                         if (ret < 0)
5058                                 break;
5059                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5060                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5061                 }
5062         } else {
5063                 ret = ring_buffer_resize(trace_buf->buffer,
5064                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5065                 if (ret == 0)
5066                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5067                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5068         }
5069
5070         return ret;
5071 }
5072 #endif /* CONFIG_TRACER_MAX_TRACE */
5073
5074 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5075                                         unsigned long size, int cpu)
5076 {
5077         int ret;
5078
5079         /*
5080          * If kernel or user changes the size of the ring buffer
5081          * we use the size that was given, and we can forget about
5082          * expanding it later.
5083          */
5084         ring_buffer_expanded = true;
5085
5086         /* May be called before buffers are initialized */
5087         if (!tr->trace_buffer.buffer)
5088                 return 0;
5089
5090         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5091         if (ret < 0)
5092                 return ret;
5093
5094 #ifdef CONFIG_TRACER_MAX_TRACE
5095         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5096             !tr->current_trace->use_max_tr)
5097                 goto out;
5098
5099         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5100         if (ret < 0) {
5101                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5102                                                      &tr->trace_buffer, cpu);
5103                 if (r < 0) {
5104                         /*
5105                          * AARGH! We are left with different
5106                          * size max buffer!!!!
5107                          * The max buffer is our "snapshot" buffer.
5108                          * When a tracer needs a snapshot (one of the
5109                          * latency tracers), it swaps the max buffer
5110                          * with the saved snap shot. We succeeded to
5111                          * update the size of the main buffer, but failed to
5112                          * update the size of the max buffer. But when we tried
5113                          * to reset the main buffer to the original size, we
5114                          * failed there too. This is very unlikely to
5115                          * happen, but if it does, warn and kill all
5116                          * tracing.
5117                          */
5118                         WARN_ON(1);
5119                         tracing_disabled = 1;
5120                 }
5121                 return ret;
5122         }
5123
5124         if (cpu == RING_BUFFER_ALL_CPUS)
5125                 set_buffer_entries(&tr->max_buffer, size);
5126         else
5127                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5128
5129  out:
5130 #endif /* CONFIG_TRACER_MAX_TRACE */
5131
5132         if (cpu == RING_BUFFER_ALL_CPUS)
5133                 set_buffer_entries(&tr->trace_buffer, size);
5134         else
5135                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5136
5137         return ret;
5138 }
5139
5140 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5141                                           unsigned long size, int cpu_id)
5142 {
5143         int ret = size;
5144
5145         mutex_lock(&trace_types_lock);
5146
5147         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5148                 /* make sure, this cpu is enabled in the mask */
5149                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5150                         ret = -EINVAL;
5151                         goto out;
5152                 }
5153         }
5154
5155         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5156         if (ret < 0)
5157                 ret = -ENOMEM;
5158
5159 out:
5160         mutex_unlock(&trace_types_lock);
5161
5162         return ret;
5163 }
5164
5165
5166 /**
5167  * tracing_update_buffers - used by tracing facility to expand ring buffers
5168  *
5169  * To save on memory when the tracing is never used on a system with it
5170  * configured in. The ring buffers are set to a minimum size. But once
5171  * a user starts to use the tracing facility, then they need to grow
5172  * to their default size.
5173  *
5174  * This function is to be called when a tracer is about to be used.
5175  */
5176 int tracing_update_buffers(void)
5177 {
5178         int ret = 0;
5179
5180         mutex_lock(&trace_types_lock);
5181         if (!ring_buffer_expanded)
5182                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5183                                                 RING_BUFFER_ALL_CPUS);
5184         mutex_unlock(&trace_types_lock);
5185
5186         return ret;
5187 }
5188
5189 struct trace_option_dentry;
5190
5191 static void
5192 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5193
5194 /*
5195  * Used to clear out the tracer before deletion of an instance.
5196  * Must have trace_types_lock held.
5197  */
5198 static void tracing_set_nop(struct trace_array *tr)
5199 {
5200         if (tr->current_trace == &nop_trace)
5201                 return;
5202         
5203         tr->current_trace->enabled--;
5204
5205         if (tr->current_trace->reset)
5206                 tr->current_trace->reset(tr);
5207
5208         tr->current_trace = &nop_trace;
5209 }
5210
5211 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5212 {
5213         /* Only enable if the directory has been created already. */
5214         if (!tr->dir)
5215                 return;
5216
5217         create_trace_option_files(tr, t);
5218 }
5219
5220 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5221 {
5222         struct tracer *t;
5223 #ifdef CONFIG_TRACER_MAX_TRACE
5224         bool had_max_tr;
5225 #endif
5226         int ret = 0;
5227
5228         mutex_lock(&trace_types_lock);
5229
5230         if (!ring_buffer_expanded) {
5231                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5232                                                 RING_BUFFER_ALL_CPUS);
5233                 if (ret < 0)
5234                         goto out;
5235                 ret = 0;
5236         }
5237
5238         for (t = trace_types; t; t = t->next) {
5239                 if (strcmp(t->name, buf) == 0)
5240                         break;
5241         }
5242         if (!t) {
5243                 ret = -EINVAL;
5244                 goto out;
5245         }
5246         if (t == tr->current_trace)
5247                 goto out;
5248
5249         /* Some tracers are only allowed for the top level buffer */
5250         if (!trace_ok_for_array(t, tr)) {
5251                 ret = -EINVAL;
5252                 goto out;
5253         }
5254
5255         /* If trace pipe files are being read, we can't change the tracer */
5256         if (tr->current_trace->ref) {
5257                 ret = -EBUSY;
5258                 goto out;
5259         }
5260
5261         trace_branch_disable();
5262
5263         tr->current_trace->enabled--;
5264
5265         if (tr->current_trace->reset)
5266                 tr->current_trace->reset(tr);
5267
5268         /* Current trace needs to be nop_trace before synchronize_sched */
5269         tr->current_trace = &nop_trace;
5270
5271 #ifdef CONFIG_TRACER_MAX_TRACE
5272         had_max_tr = tr->allocated_snapshot;
5273
5274         if (had_max_tr && !t->use_max_tr) {
5275                 /*
5276                  * We need to make sure that the update_max_tr sees that
5277                  * current_trace changed to nop_trace to keep it from
5278                  * swapping the buffers after we resize it.
5279                  * The update_max_tr is called from interrupts disabled
5280                  * so a synchronized_sched() is sufficient.
5281                  */
5282                 synchronize_sched();
5283                 free_snapshot(tr);
5284         }
5285 #endif
5286
5287 #ifdef CONFIG_TRACER_MAX_TRACE
5288         if (t->use_max_tr && !had_max_tr) {
5289                 ret = alloc_snapshot(tr);
5290                 if (ret < 0)
5291                         goto out;
5292         }
5293 #endif
5294
5295         if (t->init) {
5296                 ret = tracer_init(t, tr);
5297                 if (ret)
5298                         goto out;
5299         }
5300
5301         tr->current_trace = t;
5302         tr->current_trace->enabled++;
5303         trace_branch_enable(tr);
5304  out:
5305         mutex_unlock(&trace_types_lock);
5306
5307         return ret;
5308 }
5309
5310 static ssize_t
5311 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5312                         size_t cnt, loff_t *ppos)
5313 {
5314         struct trace_array *tr = filp->private_data;
5315         char buf[MAX_TRACER_SIZE+1];
5316         int i;
5317         size_t ret;
5318         int err;
5319
5320         ret = cnt;
5321
5322         if (cnt > MAX_TRACER_SIZE)
5323                 cnt = MAX_TRACER_SIZE;
5324
5325         if (copy_from_user(buf, ubuf, cnt))
5326                 return -EFAULT;
5327
5328         buf[cnt] = 0;
5329
5330         /* strip ending whitespace. */
5331         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5332                 buf[i] = 0;
5333
5334         err = tracing_set_tracer(tr, buf);
5335         if (err)
5336                 return err;
5337
5338         *ppos += ret;
5339
5340         return ret;
5341 }
5342
5343 static ssize_t
5344 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5345                    size_t cnt, loff_t *ppos)
5346 {
5347         char buf[64];
5348         int r;
5349
5350         r = snprintf(buf, sizeof(buf), "%ld\n",
5351                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5352         if (r > sizeof(buf))
5353                 r = sizeof(buf);
5354         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5355 }
5356
5357 static ssize_t
5358 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5359                     size_t cnt, loff_t *ppos)
5360 {
5361         unsigned long val;
5362         int ret;
5363
5364         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5365         if (ret)
5366                 return ret;
5367
5368         *ptr = val * 1000;
5369
5370         return cnt;
5371 }
5372
5373 static ssize_t
5374 tracing_thresh_read(struct file *filp, char __user *ubuf,
5375                     size_t cnt, loff_t *ppos)
5376 {
5377         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5378 }
5379
5380 static ssize_t
5381 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5382                      size_t cnt, loff_t *ppos)
5383 {
5384         struct trace_array *tr = filp->private_data;
5385         int ret;
5386
5387         mutex_lock(&trace_types_lock);
5388         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5389         if (ret < 0)
5390                 goto out;
5391
5392         if (tr->current_trace->update_thresh) {
5393                 ret = tr->current_trace->update_thresh(tr);
5394                 if (ret < 0)
5395                         goto out;
5396         }
5397
5398         ret = cnt;
5399 out:
5400         mutex_unlock(&trace_types_lock);
5401
5402         return ret;
5403 }
5404
5405 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5406
5407 static ssize_t
5408 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5409                      size_t cnt, loff_t *ppos)
5410 {
5411         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5412 }
5413
5414 static ssize_t
5415 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5416                       size_t cnt, loff_t *ppos)
5417 {
5418         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5419 }
5420
5421 #endif
5422
5423 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5424 {
5425         struct trace_array *tr = inode->i_private;
5426         struct trace_iterator *iter;
5427         int ret = 0;
5428
5429         if (tracing_disabled)
5430                 return -ENODEV;
5431
5432         if (trace_array_get(tr) < 0)
5433                 return -ENODEV;
5434
5435         mutex_lock(&trace_types_lock);
5436
5437         /* create a buffer to store the information to pass to userspace */
5438         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5439         if (!iter) {
5440                 ret = -ENOMEM;
5441                 __trace_array_put(tr);
5442                 goto out;
5443         }
5444
5445         trace_seq_init(&iter->seq);
5446         iter->trace = tr->current_trace;
5447
5448         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5449                 ret = -ENOMEM;
5450                 goto fail;
5451         }
5452
5453         /* trace pipe does not show start of buffer */
5454         cpumask_setall(iter->started);
5455
5456         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5457                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5458
5459         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5460         if (trace_clocks[tr->clock_id].in_ns)
5461                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5462
5463         iter->tr = tr;
5464         iter->trace_buffer = &tr->trace_buffer;
5465         iter->cpu_file = tracing_get_cpu(inode);
5466         mutex_init(&iter->mutex);
5467         filp->private_data = iter;
5468
5469         if (iter->trace->pipe_open)
5470                 iter->trace->pipe_open(iter);
5471
5472         nonseekable_open(inode, filp);
5473
5474         tr->current_trace->ref++;
5475 out:
5476         mutex_unlock(&trace_types_lock);
5477         return ret;
5478
5479 fail:
5480         kfree(iter->trace);
5481         kfree(iter);
5482         __trace_array_put(tr);
5483         mutex_unlock(&trace_types_lock);
5484         return ret;
5485 }
5486
5487 static int tracing_release_pipe(struct inode *inode, struct file *file)
5488 {
5489         struct trace_iterator *iter = file->private_data;
5490         struct trace_array *tr = inode->i_private;
5491
5492         mutex_lock(&trace_types_lock);
5493
5494         tr->current_trace->ref--;
5495
5496         if (iter->trace->pipe_close)
5497                 iter->trace->pipe_close(iter);
5498
5499         mutex_unlock(&trace_types_lock);
5500
5501         free_cpumask_var(iter->started);
5502         mutex_destroy(&iter->mutex);
5503         kfree(iter);
5504
5505         trace_array_put(tr);
5506
5507         return 0;
5508 }
5509
5510 static unsigned int
5511 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5512 {
5513         struct trace_array *tr = iter->tr;
5514
5515         /* Iterators are static, they should be filled or empty */
5516         if (trace_buffer_iter(iter, iter->cpu_file))
5517                 return POLLIN | POLLRDNORM;
5518
5519         if (tr->trace_flags & TRACE_ITER_BLOCK)
5520                 /*
5521                  * Always select as readable when in blocking mode
5522                  */
5523                 return POLLIN | POLLRDNORM;
5524         else
5525                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5526                                              filp, poll_table);
5527 }
5528
5529 static unsigned int
5530 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5531 {
5532         struct trace_iterator *iter = filp->private_data;
5533
5534         return trace_poll(iter, filp, poll_table);
5535 }
5536
5537 /* Must be called with iter->mutex held. */
5538 static int tracing_wait_pipe(struct file *filp)
5539 {
5540         struct trace_iterator *iter = filp->private_data;
5541         int ret;
5542
5543         while (trace_empty(iter)) {
5544
5545                 if ((filp->f_flags & O_NONBLOCK)) {
5546                         return -EAGAIN;
5547                 }
5548
5549                 /*
5550                  * We block until we read something and tracing is disabled.
5551                  * We still block if tracing is disabled, but we have never
5552                  * read anything. This allows a user to cat this file, and
5553                  * then enable tracing. But after we have read something,
5554                  * we give an EOF when tracing is again disabled.
5555                  *
5556                  * iter->pos will be 0 if we haven't read anything.
5557                  */
5558                 if (!tracing_is_on() && iter->pos)
5559                         break;
5560
5561                 mutex_unlock(&iter->mutex);
5562
5563                 ret = wait_on_pipe(iter, false);
5564
5565                 mutex_lock(&iter->mutex);
5566
5567                 if (ret)
5568                         return ret;
5569         }
5570
5571         return 1;
5572 }
5573
5574 /*
5575  * Consumer reader.
5576  */
5577 static ssize_t
5578 tracing_read_pipe(struct file *filp, char __user *ubuf,
5579                   size_t cnt, loff_t *ppos)
5580 {
5581         struct trace_iterator *iter = filp->private_data;
5582         ssize_t sret;
5583
5584         /*
5585          * Avoid more than one consumer on a single file descriptor
5586          * This is just a matter of traces coherency, the ring buffer itself
5587          * is protected.
5588          */
5589         mutex_lock(&iter->mutex);
5590
5591         /* return any leftover data */
5592         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5593         if (sret != -EBUSY)
5594                 goto out;
5595
5596         trace_seq_init(&iter->seq);
5597
5598         if (iter->trace->read) {
5599                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5600                 if (sret)
5601                         goto out;
5602         }
5603
5604 waitagain:
5605         sret = tracing_wait_pipe(filp);
5606         if (sret <= 0)
5607                 goto out;
5608
5609         /* stop when tracing is finished */
5610         if (trace_empty(iter)) {
5611                 sret = 0;
5612                 goto out;
5613         }
5614
5615         if (cnt >= PAGE_SIZE)
5616                 cnt = PAGE_SIZE - 1;
5617
5618         /* reset all but tr, trace, and overruns */
5619         memset(&iter->seq, 0,
5620                sizeof(struct trace_iterator) -
5621                offsetof(struct trace_iterator, seq));
5622         cpumask_clear(iter->started);
5623         iter->pos = -1;
5624
5625         trace_event_read_lock();
5626         trace_access_lock(iter->cpu_file);
5627         while (trace_find_next_entry_inc(iter) != NULL) {
5628                 enum print_line_t ret;
5629                 int save_len = iter->seq.seq.len;
5630
5631                 ret = print_trace_line(iter);
5632                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5633                         /* don't print partial lines */
5634                         iter->seq.seq.len = save_len;
5635                         break;
5636                 }
5637                 if (ret != TRACE_TYPE_NO_CONSUME)
5638                         trace_consume(iter);
5639
5640                 if (trace_seq_used(&iter->seq) >= cnt)
5641                         break;
5642
5643                 /*
5644                  * Setting the full flag means we reached the trace_seq buffer
5645                  * size and we should leave by partial output condition above.
5646                  * One of the trace_seq_* functions is not used properly.
5647                  */
5648                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5649                           iter->ent->type);
5650         }
5651         trace_access_unlock(iter->cpu_file);
5652         trace_event_read_unlock();
5653
5654         /* Now copy what we have to the user */
5655         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5656         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5657                 trace_seq_init(&iter->seq);
5658
5659         /*
5660          * If there was nothing to send to user, in spite of consuming trace
5661          * entries, go back to wait for more entries.
5662          */
5663         if (sret == -EBUSY)
5664                 goto waitagain;
5665
5666 out:
5667         mutex_unlock(&iter->mutex);
5668
5669         return sret;
5670 }
5671
5672 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5673                                      unsigned int idx)
5674 {
5675         __free_page(spd->pages[idx]);
5676 }
5677
5678 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5679         .can_merge              = 0,
5680         .confirm                = generic_pipe_buf_confirm,
5681         .release                = generic_pipe_buf_release,
5682         .steal                  = generic_pipe_buf_steal,
5683         .get                    = generic_pipe_buf_get,
5684 };
5685
5686 static size_t
5687 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5688 {
5689         size_t count;
5690         int save_len;
5691         int ret;
5692
5693         /* Seq buffer is page-sized, exactly what we need. */
5694         for (;;) {
5695                 save_len = iter->seq.seq.len;
5696                 ret = print_trace_line(iter);
5697
5698                 if (trace_seq_has_overflowed(&iter->seq)) {
5699                         iter->seq.seq.len = save_len;
5700                         break;
5701                 }
5702
5703                 /*
5704                  * This should not be hit, because it should only
5705                  * be set if the iter->seq overflowed. But check it
5706                  * anyway to be safe.
5707                  */
5708                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5709                         iter->seq.seq.len = save_len;
5710                         break;
5711                 }
5712
5713                 count = trace_seq_used(&iter->seq) - save_len;
5714                 if (rem < count) {
5715                         rem = 0;
5716                         iter->seq.seq.len = save_len;
5717                         break;
5718                 }
5719
5720                 if (ret != TRACE_TYPE_NO_CONSUME)
5721                         trace_consume(iter);
5722                 rem -= count;
5723                 if (!trace_find_next_entry_inc(iter))   {
5724                         rem = 0;
5725                         iter->ent = NULL;
5726                         break;
5727                 }
5728         }
5729
5730         return rem;
5731 }
5732
5733 static ssize_t tracing_splice_read_pipe(struct file *filp,
5734                                         loff_t *ppos,
5735                                         struct pipe_inode_info *pipe,
5736                                         size_t len,
5737                                         unsigned int flags)
5738 {
5739         struct page *pages_def[PIPE_DEF_BUFFERS];
5740         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5741         struct trace_iterator *iter = filp->private_data;
5742         struct splice_pipe_desc spd = {
5743                 .pages          = pages_def,
5744                 .partial        = partial_def,
5745                 .nr_pages       = 0, /* This gets updated below. */
5746                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5747                 .ops            = &tracing_pipe_buf_ops,
5748                 .spd_release    = tracing_spd_release_pipe,
5749         };
5750         ssize_t ret;
5751         size_t rem;
5752         unsigned int i;
5753
5754         if (splice_grow_spd(pipe, &spd))
5755                 return -ENOMEM;
5756
5757         mutex_lock(&iter->mutex);
5758
5759         if (iter->trace->splice_read) {
5760                 ret = iter->trace->splice_read(iter, filp,
5761                                                ppos, pipe, len, flags);
5762                 if (ret)
5763                         goto out_err;
5764         }
5765
5766         ret = tracing_wait_pipe(filp);
5767         if (ret <= 0)
5768                 goto out_err;
5769
5770         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5771                 ret = -EFAULT;
5772                 goto out_err;
5773         }
5774
5775         trace_event_read_lock();
5776         trace_access_lock(iter->cpu_file);
5777
5778         /* Fill as many pages as possible. */
5779         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5780                 spd.pages[i] = alloc_page(GFP_KERNEL);
5781                 if (!spd.pages[i])
5782                         break;
5783
5784                 rem = tracing_fill_pipe_page(rem, iter);
5785
5786                 /* Copy the data into the page, so we can start over. */
5787                 ret = trace_seq_to_buffer(&iter->seq,
5788                                           page_address(spd.pages[i]),
5789                                           trace_seq_used(&iter->seq));
5790                 if (ret < 0) {
5791                         __free_page(spd.pages[i]);
5792                         break;
5793                 }
5794                 spd.partial[i].offset = 0;
5795                 spd.partial[i].len = trace_seq_used(&iter->seq);
5796
5797                 trace_seq_init(&iter->seq);
5798         }
5799
5800         trace_access_unlock(iter->cpu_file);
5801         trace_event_read_unlock();
5802         mutex_unlock(&iter->mutex);
5803
5804         spd.nr_pages = i;
5805
5806         if (i)
5807                 ret = splice_to_pipe(pipe, &spd);
5808         else
5809                 ret = 0;
5810 out:
5811         splice_shrink_spd(&spd);
5812         return ret;
5813
5814 out_err:
5815         mutex_unlock(&iter->mutex);
5816         goto out;
5817 }
5818
5819 static ssize_t
5820 tracing_entries_read(struct file *filp, char __user *ubuf,
5821                      size_t cnt, loff_t *ppos)
5822 {
5823         struct inode *inode = file_inode(filp);
5824         struct trace_array *tr = inode->i_private;
5825         int cpu = tracing_get_cpu(inode);
5826         char buf[64];
5827         int r = 0;
5828         ssize_t ret;
5829
5830         mutex_lock(&trace_types_lock);
5831
5832         if (cpu == RING_BUFFER_ALL_CPUS) {
5833                 int cpu, buf_size_same;
5834                 unsigned long size;
5835
5836                 size = 0;
5837                 buf_size_same = 1;
5838                 /* check if all cpu sizes are same */
5839                 for_each_tracing_cpu(cpu) {
5840                         /* fill in the size from first enabled cpu */
5841                         if (size == 0)
5842                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5843                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5844                                 buf_size_same = 0;
5845                                 break;
5846                         }
5847                 }
5848
5849                 if (buf_size_same) {
5850                         if (!ring_buffer_expanded)
5851                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
5852                                             size >> 10,
5853                                             trace_buf_size >> 10);
5854                         else
5855                                 r = sprintf(buf, "%lu\n", size >> 10);
5856                 } else
5857                         r = sprintf(buf, "X\n");
5858         } else
5859                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5860
5861         mutex_unlock(&trace_types_lock);
5862
5863         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5864         return ret;
5865 }
5866
5867 static ssize_t
5868 tracing_entries_write(struct file *filp, const char __user *ubuf,
5869                       size_t cnt, loff_t *ppos)
5870 {
5871         struct inode *inode = file_inode(filp);
5872         struct trace_array *tr = inode->i_private;
5873         unsigned long val;
5874         int ret;
5875
5876         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5877         if (ret)
5878                 return ret;
5879
5880         /* must have at least 1 entry */
5881         if (!val)
5882                 return -EINVAL;
5883
5884         /* value is in KB */
5885         val <<= 10;
5886         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5887         if (ret < 0)
5888                 return ret;
5889
5890         *ppos += cnt;
5891
5892         return cnt;
5893 }
5894
5895 static ssize_t
5896 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5897                                 size_t cnt, loff_t *ppos)
5898 {
5899         struct trace_array *tr = filp->private_data;
5900         char buf[64];
5901         int r, cpu;
5902         unsigned long size = 0, expanded_size = 0;
5903
5904         mutex_lock(&trace_types_lock);
5905         for_each_tracing_cpu(cpu) {
5906                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5907                 if (!ring_buffer_expanded)
5908                         expanded_size += trace_buf_size >> 10;
5909         }
5910         if (ring_buffer_expanded)
5911                 r = sprintf(buf, "%lu\n", size);
5912         else
5913                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5914         mutex_unlock(&trace_types_lock);
5915
5916         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5917 }
5918
5919 static ssize_t
5920 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5921                           size_t cnt, loff_t *ppos)
5922 {
5923         /*
5924          * There is no need to read what the user has written, this function
5925          * is just to make sure that there is no error when "echo" is used
5926          */
5927
5928         *ppos += cnt;
5929
5930         return cnt;
5931 }
5932
5933 static int
5934 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5935 {
5936         struct trace_array *tr = inode->i_private;
5937
5938         /* disable tracing ? */
5939         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5940                 tracer_tracing_off(tr);
5941         /* resize the ring buffer to 0 */
5942         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5943
5944         trace_array_put(tr);
5945
5946         return 0;
5947 }
5948
5949 static ssize_t
5950 tracing_mark_write(struct file *filp, const char __user *ubuf,
5951                                         size_t cnt, loff_t *fpos)
5952 {
5953         struct trace_array *tr = filp->private_data;
5954         struct ring_buffer_event *event;
5955         struct ring_buffer *buffer;
5956         struct print_entry *entry;
5957         unsigned long irq_flags;
5958         const char faulted[] = "<faulted>";
5959         ssize_t written;
5960         int size;
5961         int len;
5962
5963 /* Used in tracing_mark_raw_write() as well */
5964 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
5965
5966         if (tracing_disabled)
5967                 return -EINVAL;
5968
5969         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5970                 return -EINVAL;
5971
5972         if (cnt > TRACE_BUF_SIZE)
5973                 cnt = TRACE_BUF_SIZE;
5974
5975         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5976
5977         local_save_flags(irq_flags);
5978         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
5979
5980         /* If less than "<faulted>", then make sure we can still add that */
5981         if (cnt < FAULTED_SIZE)
5982                 size += FAULTED_SIZE - cnt;
5983
5984         buffer = tr->trace_buffer.buffer;
5985         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5986                                             irq_flags, preempt_count());
5987         if (unlikely(!event))
5988                 /* Ring buffer disabled, return as if not open for write */
5989                 return -EBADF;
5990
5991         entry = ring_buffer_event_data(event);
5992         entry->ip = _THIS_IP_;
5993
5994         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
5995         if (len) {
5996                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
5997                 cnt = FAULTED_SIZE;
5998                 written = -EFAULT;
5999         } else
6000                 written = cnt;
6001         len = cnt;
6002
6003         if (entry->buf[cnt - 1] != '\n') {
6004                 entry->buf[cnt] = '\n';
6005                 entry->buf[cnt + 1] = '\0';
6006         } else
6007                 entry->buf[cnt] = '\0';
6008
6009         __buffer_unlock_commit(buffer, event);
6010
6011         if (written > 0)
6012                 *fpos += written;
6013
6014         return written;
6015 }
6016
6017 /* Limit it for now to 3K (including tag) */
6018 #define RAW_DATA_MAX_SIZE (1024*3)
6019
6020 static ssize_t
6021 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6022                                         size_t cnt, loff_t *fpos)
6023 {
6024         struct trace_array *tr = filp->private_data;
6025         struct ring_buffer_event *event;
6026         struct ring_buffer *buffer;
6027         struct raw_data_entry *entry;
6028         const char faulted[] = "<faulted>";
6029         unsigned long irq_flags;
6030         ssize_t written;
6031         int size;
6032         int len;
6033
6034 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6035
6036         if (tracing_disabled)
6037                 return -EINVAL;
6038
6039         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6040                 return -EINVAL;
6041
6042         /* The marker must at least have a tag id */
6043         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6044                 return -EINVAL;
6045
6046         if (cnt > TRACE_BUF_SIZE)
6047                 cnt = TRACE_BUF_SIZE;
6048
6049         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6050
6051         local_save_flags(irq_flags);
6052         size = sizeof(*entry) + cnt;
6053         if (cnt < FAULT_SIZE_ID)
6054                 size += FAULT_SIZE_ID - cnt;
6055
6056         buffer = tr->trace_buffer.buffer;
6057         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6058                                             irq_flags, preempt_count());
6059         if (!event)
6060                 /* Ring buffer disabled, return as if not open for write */
6061                 return -EBADF;
6062
6063         entry = ring_buffer_event_data(event);
6064
6065         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6066         if (len) {
6067                 entry->id = -1;
6068                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6069                 written = -EFAULT;
6070         } else
6071                 written = cnt;
6072
6073         __buffer_unlock_commit(buffer, event);
6074
6075         if (written > 0)
6076                 *fpos += written;
6077
6078         return written;
6079 }
6080
6081 static int tracing_clock_show(struct seq_file *m, void *v)
6082 {
6083         struct trace_array *tr = m->private;
6084         int i;
6085
6086         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6087                 seq_printf(m,
6088                         "%s%s%s%s", i ? " " : "",
6089                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6090                         i == tr->clock_id ? "]" : "");
6091         seq_putc(m, '\n');
6092
6093         return 0;
6094 }
6095
6096 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6097 {
6098         int i;
6099
6100         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6101                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6102                         break;
6103         }
6104         if (i == ARRAY_SIZE(trace_clocks))
6105                 return -EINVAL;
6106
6107         mutex_lock(&trace_types_lock);
6108
6109         tr->clock_id = i;
6110
6111         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6112
6113         /*
6114          * New clock may not be consistent with the previous clock.
6115          * Reset the buffer so that it doesn't have incomparable timestamps.
6116          */
6117         tracing_reset_online_cpus(&tr->trace_buffer);
6118
6119 #ifdef CONFIG_TRACER_MAX_TRACE
6120         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
6121                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6122         tracing_reset_online_cpus(&tr->max_buffer);
6123 #endif
6124
6125         mutex_unlock(&trace_types_lock);
6126
6127         return 0;
6128 }
6129
6130 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6131                                    size_t cnt, loff_t *fpos)
6132 {
6133         struct seq_file *m = filp->private_data;
6134         struct trace_array *tr = m->private;
6135         char buf[64];
6136         const char *clockstr;
6137         int ret;
6138
6139         if (cnt >= sizeof(buf))
6140                 return -EINVAL;
6141
6142         if (copy_from_user(buf, ubuf, cnt))
6143                 return -EFAULT;
6144
6145         buf[cnt] = 0;
6146
6147         clockstr = strstrip(buf);
6148
6149         ret = tracing_set_clock(tr, clockstr);
6150         if (ret)
6151                 return ret;
6152
6153         *fpos += cnt;
6154
6155         return cnt;
6156 }
6157
6158 static int tracing_clock_open(struct inode *inode, struct file *file)
6159 {
6160         struct trace_array *tr = inode->i_private;
6161         int ret;
6162
6163         if (tracing_disabled)
6164                 return -ENODEV;
6165
6166         if (trace_array_get(tr))
6167                 return -ENODEV;
6168
6169         ret = single_open(file, tracing_clock_show, inode->i_private);
6170         if (ret < 0)
6171                 trace_array_put(tr);
6172
6173         return ret;
6174 }
6175
6176 struct ftrace_buffer_info {
6177         struct trace_iterator   iter;
6178         void                    *spare;
6179         unsigned int            spare_cpu;
6180         unsigned int            read;
6181 };
6182
6183 #ifdef CONFIG_TRACER_SNAPSHOT
6184 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6185 {
6186         struct trace_array *tr = inode->i_private;
6187         struct trace_iterator *iter;
6188         struct seq_file *m;
6189         int ret = 0;
6190
6191         if (trace_array_get(tr) < 0)
6192                 return -ENODEV;
6193
6194         if (file->f_mode & FMODE_READ) {
6195                 iter = __tracing_open(inode, file, true);
6196                 if (IS_ERR(iter))
6197                         ret = PTR_ERR(iter);
6198         } else {
6199                 /* Writes still need the seq_file to hold the private data */
6200                 ret = -ENOMEM;
6201                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6202                 if (!m)
6203                         goto out;
6204                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6205                 if (!iter) {
6206                         kfree(m);
6207                         goto out;
6208                 }
6209                 ret = 0;
6210
6211                 iter->tr = tr;
6212                 iter->trace_buffer = &tr->max_buffer;
6213                 iter->cpu_file = tracing_get_cpu(inode);
6214                 m->private = iter;
6215                 file->private_data = m;
6216         }
6217 out:
6218         if (ret < 0)
6219                 trace_array_put(tr);
6220
6221         return ret;
6222 }
6223
6224 static ssize_t
6225 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6226                        loff_t *ppos)
6227 {
6228         struct seq_file *m = filp->private_data;
6229         struct trace_iterator *iter = m->private;
6230         struct trace_array *tr = iter->tr;
6231         unsigned long val;
6232         int ret;
6233
6234         ret = tracing_update_buffers();
6235         if (ret < 0)
6236                 return ret;
6237
6238         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6239         if (ret)
6240                 return ret;
6241
6242         mutex_lock(&trace_types_lock);
6243
6244         if (tr->current_trace->use_max_tr) {
6245                 ret = -EBUSY;
6246                 goto out;
6247         }
6248
6249         switch (val) {
6250         case 0:
6251                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6252                         ret = -EINVAL;
6253                         break;
6254                 }
6255                 if (tr->allocated_snapshot)
6256                         free_snapshot(tr);
6257                 break;
6258         case 1:
6259 /* Only allow per-cpu swap if the ring buffer supports it */
6260 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6261                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6262                         ret = -EINVAL;
6263                         break;
6264                 }
6265 #endif
6266                 if (!tr->allocated_snapshot) {
6267                         ret = alloc_snapshot(tr);
6268                         if (ret < 0)
6269                                 break;
6270                 }
6271                 local_irq_disable();
6272                 /* Now, we're going to swap */
6273                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6274                         update_max_tr(tr, current, smp_processor_id());
6275                 else
6276                         update_max_tr_single(tr, current, iter->cpu_file);
6277                 local_irq_enable();
6278                 break;
6279         default:
6280                 if (tr->allocated_snapshot) {
6281                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6282                                 tracing_reset_online_cpus(&tr->max_buffer);
6283                         else
6284                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
6285                 }
6286                 break;
6287         }
6288
6289         if (ret >= 0) {
6290                 *ppos += cnt;
6291                 ret = cnt;
6292         }
6293 out:
6294         mutex_unlock(&trace_types_lock);
6295         return ret;
6296 }
6297
6298 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6299 {
6300         struct seq_file *m = file->private_data;
6301         int ret;
6302
6303         ret = tracing_release(inode, file);
6304
6305         if (file->f_mode & FMODE_READ)
6306                 return ret;
6307
6308         /* If write only, the seq_file is just a stub */
6309         if (m)
6310                 kfree(m->private);
6311         kfree(m);
6312
6313         return 0;
6314 }
6315
6316 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6317 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6318                                     size_t count, loff_t *ppos);
6319 static int tracing_buffers_release(struct inode *inode, struct file *file);
6320 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6321                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6322
6323 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6324 {
6325         struct ftrace_buffer_info *info;
6326         int ret;
6327
6328         ret = tracing_buffers_open(inode, filp);
6329         if (ret < 0)
6330                 return ret;
6331
6332         info = filp->private_data;
6333
6334         if (info->iter.trace->use_max_tr) {
6335                 tracing_buffers_release(inode, filp);
6336                 return -EBUSY;
6337         }
6338
6339         info->iter.snapshot = true;
6340         info->iter.trace_buffer = &info->iter.tr->max_buffer;
6341
6342         return ret;
6343 }
6344
6345 #endif /* CONFIG_TRACER_SNAPSHOT */
6346
6347
6348 static const struct file_operations tracing_thresh_fops = {
6349         .open           = tracing_open_generic,
6350         .read           = tracing_thresh_read,
6351         .write          = tracing_thresh_write,
6352         .llseek         = generic_file_llseek,
6353 };
6354
6355 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6356 static const struct file_operations tracing_max_lat_fops = {
6357         .open           = tracing_open_generic,
6358         .read           = tracing_max_lat_read,
6359         .write          = tracing_max_lat_write,
6360         .llseek         = generic_file_llseek,
6361 };
6362 #endif
6363
6364 static const struct file_operations set_tracer_fops = {
6365         .open           = tracing_open_generic,
6366         .read           = tracing_set_trace_read,
6367         .write          = tracing_set_trace_write,
6368         .llseek         = generic_file_llseek,
6369 };
6370
6371 static const struct file_operations tracing_pipe_fops = {
6372         .open           = tracing_open_pipe,
6373         .poll           = tracing_poll_pipe,
6374         .read           = tracing_read_pipe,
6375         .splice_read    = tracing_splice_read_pipe,
6376         .release        = tracing_release_pipe,
6377         .llseek         = no_llseek,
6378 };
6379
6380 static const struct file_operations tracing_entries_fops = {
6381         .open           = tracing_open_generic_tr,
6382         .read           = tracing_entries_read,
6383         .write          = tracing_entries_write,
6384         .llseek         = generic_file_llseek,
6385         .release        = tracing_release_generic_tr,
6386 };
6387
6388 static const struct file_operations tracing_total_entries_fops = {
6389         .open           = tracing_open_generic_tr,
6390         .read           = tracing_total_entries_read,
6391         .llseek         = generic_file_llseek,
6392         .release        = tracing_release_generic_tr,
6393 };
6394
6395 static const struct file_operations tracing_free_buffer_fops = {
6396         .open           = tracing_open_generic_tr,
6397         .write          = tracing_free_buffer_write,
6398         .release        = tracing_free_buffer_release,
6399 };
6400
6401 static const struct file_operations tracing_mark_fops = {
6402         .open           = tracing_open_generic_tr,
6403         .write          = tracing_mark_write,
6404         .llseek         = generic_file_llseek,
6405         .release        = tracing_release_generic_tr,
6406 };
6407
6408 static const struct file_operations tracing_mark_raw_fops = {
6409         .open           = tracing_open_generic_tr,
6410         .write          = tracing_mark_raw_write,
6411         .llseek         = generic_file_llseek,
6412         .release        = tracing_release_generic_tr,
6413 };
6414
6415 static const struct file_operations trace_clock_fops = {
6416         .open           = tracing_clock_open,
6417         .read           = seq_read,
6418         .llseek         = seq_lseek,
6419         .release        = tracing_single_release_tr,
6420         .write          = tracing_clock_write,
6421 };
6422
6423 #ifdef CONFIG_TRACER_SNAPSHOT
6424 static const struct file_operations snapshot_fops = {
6425         .open           = tracing_snapshot_open,
6426         .read           = seq_read,
6427         .write          = tracing_snapshot_write,
6428         .llseek         = tracing_lseek,
6429         .release        = tracing_snapshot_release,
6430 };
6431
6432 static const struct file_operations snapshot_raw_fops = {
6433         .open           = snapshot_raw_open,
6434         .read           = tracing_buffers_read,
6435         .release        = tracing_buffers_release,
6436         .splice_read    = tracing_buffers_splice_read,
6437         .llseek         = no_llseek,
6438 };
6439
6440 #endif /* CONFIG_TRACER_SNAPSHOT */
6441
6442 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6443 {
6444         struct trace_array *tr = inode->i_private;
6445         struct ftrace_buffer_info *info;
6446         int ret;
6447
6448         if (tracing_disabled)
6449                 return -ENODEV;
6450
6451         if (trace_array_get(tr) < 0)
6452                 return -ENODEV;
6453
6454         info = kzalloc(sizeof(*info), GFP_KERNEL);
6455         if (!info) {
6456                 trace_array_put(tr);
6457                 return -ENOMEM;
6458         }
6459
6460         mutex_lock(&trace_types_lock);
6461
6462         info->iter.tr           = tr;
6463         info->iter.cpu_file     = tracing_get_cpu(inode);
6464         info->iter.trace        = tr->current_trace;
6465         info->iter.trace_buffer = &tr->trace_buffer;
6466         info->spare             = NULL;
6467         /* Force reading ring buffer for first read */
6468         info->read              = (unsigned int)-1;
6469
6470         filp->private_data = info;
6471
6472         tr->current_trace->ref++;
6473
6474         mutex_unlock(&trace_types_lock);
6475
6476         ret = nonseekable_open(inode, filp);
6477         if (ret < 0)
6478                 trace_array_put(tr);
6479
6480         return ret;
6481 }
6482
6483 static unsigned int
6484 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6485 {
6486         struct ftrace_buffer_info *info = filp->private_data;
6487         struct trace_iterator *iter = &info->iter;
6488
6489         return trace_poll(iter, filp, poll_table);
6490 }
6491
6492 static ssize_t
6493 tracing_buffers_read(struct file *filp, char __user *ubuf,
6494                      size_t count, loff_t *ppos)
6495 {
6496         struct ftrace_buffer_info *info = filp->private_data;
6497         struct trace_iterator *iter = &info->iter;
6498         ssize_t ret;
6499         ssize_t size;
6500
6501         if (!count)
6502                 return 0;
6503
6504 #ifdef CONFIG_TRACER_MAX_TRACE
6505         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6506                 return -EBUSY;
6507 #endif
6508
6509         if (!info->spare) {
6510                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6511                                                           iter->cpu_file);
6512                 info->spare_cpu = iter->cpu_file;
6513         }
6514         if (!info->spare)
6515                 return -ENOMEM;
6516
6517         /* Do we have previous read data to read? */
6518         if (info->read < PAGE_SIZE)
6519                 goto read;
6520
6521  again:
6522         trace_access_lock(iter->cpu_file);
6523         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6524                                     &info->spare,
6525                                     count,
6526                                     iter->cpu_file, 0);
6527         trace_access_unlock(iter->cpu_file);
6528
6529         if (ret < 0) {
6530                 if (trace_empty(iter)) {
6531                         if ((filp->f_flags & O_NONBLOCK))
6532                                 return -EAGAIN;
6533
6534                         ret = wait_on_pipe(iter, false);
6535                         if (ret)
6536                                 return ret;
6537
6538                         goto again;
6539                 }
6540                 return 0;
6541         }
6542
6543         info->read = 0;
6544  read:
6545         size = PAGE_SIZE - info->read;
6546         if (size > count)
6547                 size = count;
6548
6549         ret = copy_to_user(ubuf, info->spare + info->read, size);
6550         if (ret == size)
6551                 return -EFAULT;
6552
6553         size -= ret;
6554
6555         *ppos += size;
6556         info->read += size;
6557
6558         return size;
6559 }
6560
6561 static int tracing_buffers_release(struct inode *inode, struct file *file)
6562 {
6563         struct ftrace_buffer_info *info = file->private_data;
6564         struct trace_iterator *iter = &info->iter;
6565
6566         mutex_lock(&trace_types_lock);
6567
6568         iter->tr->current_trace->ref--;
6569
6570         __trace_array_put(iter->tr);
6571
6572         if (info->spare)
6573                 ring_buffer_free_read_page(iter->trace_buffer->buffer,
6574                                            info->spare_cpu, info->spare);
6575         kfree(info);
6576
6577         mutex_unlock(&trace_types_lock);
6578
6579         return 0;
6580 }
6581
6582 struct buffer_ref {
6583         struct ring_buffer      *buffer;
6584         void                    *page;
6585         int                     cpu;
6586         int                     ref;
6587 };
6588
6589 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6590                                     struct pipe_buffer *buf)
6591 {
6592         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6593
6594         if (--ref->ref)
6595                 return;
6596
6597         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6598         kfree(ref);
6599         buf->private = 0;
6600 }
6601
6602 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6603                                 struct pipe_buffer *buf)
6604 {
6605         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6606
6607         ref->ref++;
6608 }
6609
6610 /* Pipe buffer operations for a buffer. */
6611 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6612         .can_merge              = 0,
6613         .confirm                = generic_pipe_buf_confirm,
6614         .release                = buffer_pipe_buf_release,
6615         .steal                  = generic_pipe_buf_steal,
6616         .get                    = buffer_pipe_buf_get,
6617 };
6618
6619 /*
6620  * Callback from splice_to_pipe(), if we need to release some pages
6621  * at the end of the spd in case we error'ed out in filling the pipe.
6622  */
6623 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6624 {
6625         struct buffer_ref *ref =
6626                 (struct buffer_ref *)spd->partial[i].private;
6627
6628         if (--ref->ref)
6629                 return;
6630
6631         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6632         kfree(ref);
6633         spd->partial[i].private = 0;
6634 }
6635
6636 static ssize_t
6637 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6638                             struct pipe_inode_info *pipe, size_t len,
6639                             unsigned int flags)
6640 {
6641         struct ftrace_buffer_info *info = file->private_data;
6642         struct trace_iterator *iter = &info->iter;
6643         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6644         struct page *pages_def[PIPE_DEF_BUFFERS];
6645         struct splice_pipe_desc spd = {
6646                 .pages          = pages_def,
6647                 .partial        = partial_def,
6648                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6649                 .ops            = &buffer_pipe_buf_ops,
6650                 .spd_release    = buffer_spd_release,
6651         };
6652         struct buffer_ref *ref;
6653         int entries, size, i;
6654         ssize_t ret = 0;
6655
6656 #ifdef CONFIG_TRACER_MAX_TRACE
6657         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6658                 return -EBUSY;
6659 #endif
6660
6661         if (*ppos & (PAGE_SIZE - 1))
6662                 return -EINVAL;
6663
6664         if (len & (PAGE_SIZE - 1)) {
6665                 if (len < PAGE_SIZE)
6666                         return -EINVAL;
6667                 len &= PAGE_MASK;
6668         }
6669
6670         if (splice_grow_spd(pipe, &spd))
6671                 return -ENOMEM;
6672
6673  again:
6674         trace_access_lock(iter->cpu_file);
6675         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6676
6677         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6678                 struct page *page;
6679                 int r;
6680
6681                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6682                 if (!ref) {
6683                         ret = -ENOMEM;
6684                         break;
6685                 }
6686
6687                 ref->ref = 1;
6688                 ref->buffer = iter->trace_buffer->buffer;
6689                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6690                 if (!ref->page) {
6691                         ret = -ENOMEM;
6692                         kfree(ref);
6693                         break;
6694                 }
6695                 ref->cpu = iter->cpu_file;
6696
6697                 r = ring_buffer_read_page(ref->buffer, &ref->page,
6698                                           len, iter->cpu_file, 1);
6699                 if (r < 0) {
6700                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
6701                                                    ref->page);
6702                         kfree(ref);
6703                         break;
6704                 }
6705
6706                 /*
6707                  * zero out any left over data, this is going to
6708                  * user land.
6709                  */
6710                 size = ring_buffer_page_len(ref->page);
6711                 if (size < PAGE_SIZE)
6712                         memset(ref->page + size, 0, PAGE_SIZE - size);
6713
6714                 page = virt_to_page(ref->page);
6715
6716                 spd.pages[i] = page;
6717                 spd.partial[i].len = PAGE_SIZE;
6718                 spd.partial[i].offset = 0;
6719                 spd.partial[i].private = (unsigned long)ref;
6720                 spd.nr_pages++;
6721                 *ppos += PAGE_SIZE;
6722
6723                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6724         }
6725
6726         trace_access_unlock(iter->cpu_file);
6727         spd.nr_pages = i;
6728
6729         /* did we read anything? */
6730         if (!spd.nr_pages) {
6731                 if (ret)
6732                         goto out;
6733
6734                 ret = -EAGAIN;
6735                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6736                         goto out;
6737
6738                 ret = wait_on_pipe(iter, true);
6739                 if (ret)
6740                         goto out;
6741
6742                 goto again;
6743         }
6744
6745         ret = splice_to_pipe(pipe, &spd);
6746 out:
6747         splice_shrink_spd(&spd);
6748
6749         return ret;
6750 }
6751
6752 static const struct file_operations tracing_buffers_fops = {
6753         .open           = tracing_buffers_open,
6754         .read           = tracing_buffers_read,
6755         .poll           = tracing_buffers_poll,
6756         .release        = tracing_buffers_release,
6757         .splice_read    = tracing_buffers_splice_read,
6758         .llseek         = no_llseek,
6759 };
6760
6761 static ssize_t
6762 tracing_stats_read(struct file *filp, char __user *ubuf,
6763                    size_t count, loff_t *ppos)
6764 {
6765         struct inode *inode = file_inode(filp);
6766         struct trace_array *tr = inode->i_private;
6767         struct trace_buffer *trace_buf = &tr->trace_buffer;
6768         int cpu = tracing_get_cpu(inode);
6769         struct trace_seq *s;
6770         unsigned long cnt;
6771         unsigned long long t;
6772         unsigned long usec_rem;
6773
6774         s = kmalloc(sizeof(*s), GFP_KERNEL);
6775         if (!s)
6776                 return -ENOMEM;
6777
6778         trace_seq_init(s);
6779
6780         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6781         trace_seq_printf(s, "entries: %ld\n", cnt);
6782
6783         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6784         trace_seq_printf(s, "overrun: %ld\n", cnt);
6785
6786         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6787         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6788
6789         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6790         trace_seq_printf(s, "bytes: %ld\n", cnt);
6791
6792         if (trace_clocks[tr->clock_id].in_ns) {
6793                 /* local or global for trace_clock */
6794                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6795                 usec_rem = do_div(t, USEC_PER_SEC);
6796                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6797                                                                 t, usec_rem);
6798
6799                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6800                 usec_rem = do_div(t, USEC_PER_SEC);
6801                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6802         } else {
6803                 /* counter or tsc mode for trace_clock */
6804                 trace_seq_printf(s, "oldest event ts: %llu\n",
6805                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6806
6807                 trace_seq_printf(s, "now ts: %llu\n",
6808                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
6809         }
6810
6811         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6812         trace_seq_printf(s, "dropped events: %ld\n", cnt);
6813
6814         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6815         trace_seq_printf(s, "read events: %ld\n", cnt);
6816
6817         count = simple_read_from_buffer(ubuf, count, ppos,
6818                                         s->buffer, trace_seq_used(s));
6819
6820         kfree(s);
6821
6822         return count;
6823 }
6824
6825 static const struct file_operations tracing_stats_fops = {
6826         .open           = tracing_open_generic_tr,
6827         .read           = tracing_stats_read,
6828         .llseek         = generic_file_llseek,
6829         .release        = tracing_release_generic_tr,
6830 };
6831
6832 #ifdef CONFIG_DYNAMIC_FTRACE
6833
6834 static ssize_t
6835 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6836                   size_t cnt, loff_t *ppos)
6837 {
6838         unsigned long *p = filp->private_data;
6839         char buf[64]; /* Not too big for a shallow stack */
6840         int r;
6841
6842         r = scnprintf(buf, 63, "%ld", *p);
6843         buf[r++] = '\n';
6844
6845         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6846 }
6847
6848 static const struct file_operations tracing_dyn_info_fops = {
6849         .open           = tracing_open_generic,
6850         .read           = tracing_read_dyn_info,
6851         .llseek         = generic_file_llseek,
6852 };
6853 #endif /* CONFIG_DYNAMIC_FTRACE */
6854
6855 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6856 static void
6857 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
6858                 struct trace_array *tr, struct ftrace_probe_ops *ops,
6859                 void *data)
6860 {
6861         tracing_snapshot_instance(tr);
6862 }
6863
6864 static void
6865 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
6866                       struct trace_array *tr, struct ftrace_probe_ops *ops,
6867                       void *data)
6868 {
6869         struct ftrace_func_mapper *mapper = data;
6870         long *count = NULL;
6871
6872         if (mapper)
6873                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
6874
6875         if (count) {
6876
6877                 if (*count <= 0)
6878                         return;
6879
6880                 (*count)--;
6881         }
6882
6883         tracing_snapshot_instance(tr);
6884 }
6885
6886 static int
6887 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
6888                       struct ftrace_probe_ops *ops, void *data)
6889 {
6890         struct ftrace_func_mapper *mapper = data;
6891         long *count = NULL;
6892
6893         seq_printf(m, "%ps:", (void *)ip);
6894
6895         seq_puts(m, "snapshot");
6896
6897         if (mapper)
6898                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
6899
6900         if (count)
6901                 seq_printf(m, ":count=%ld\n", *count);
6902         else
6903                 seq_puts(m, ":unlimited\n");
6904
6905         return 0;
6906 }
6907
6908 static int
6909 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
6910                      unsigned long ip, void *init_data, void **data)
6911 {
6912         struct ftrace_func_mapper *mapper = *data;
6913
6914         if (!mapper) {
6915                 mapper = allocate_ftrace_func_mapper();
6916                 if (!mapper)
6917                         return -ENOMEM;
6918                 *data = mapper;
6919         }
6920
6921         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
6922 }
6923
6924 static void
6925 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
6926                      unsigned long ip, void *data)
6927 {
6928         struct ftrace_func_mapper *mapper = data;
6929
6930         if (!ip) {
6931                 if (!mapper)
6932                         return;
6933                 free_ftrace_func_mapper(mapper, NULL);
6934                 return;
6935         }
6936
6937         ftrace_func_mapper_remove_ip(mapper, ip);
6938 }
6939
6940 static struct ftrace_probe_ops snapshot_probe_ops = {
6941         .func                   = ftrace_snapshot,
6942         .print                  = ftrace_snapshot_print,
6943 };
6944
6945 static struct ftrace_probe_ops snapshot_count_probe_ops = {
6946         .func                   = ftrace_count_snapshot,
6947         .print                  = ftrace_snapshot_print,
6948         .init                   = ftrace_snapshot_init,
6949         .free                   = ftrace_snapshot_free,
6950 };
6951
6952 static int
6953 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
6954                                char *glob, char *cmd, char *param, int enable)
6955 {
6956         struct ftrace_probe_ops *ops;
6957         void *count = (void *)-1;
6958         char *number;
6959         int ret;
6960
6961         if (!tr)
6962                 return -ENODEV;
6963
6964         /* hash funcs only work with set_ftrace_filter */
6965         if (!enable)
6966                 return -EINVAL;
6967
6968         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
6969
6970         if (glob[0] == '!')
6971                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
6972
6973         if (!param)
6974                 goto out_reg;
6975
6976         number = strsep(&param, ":");
6977
6978         if (!strlen(number))
6979                 goto out_reg;
6980
6981         /*
6982          * We use the callback data field (which is a pointer)
6983          * as our counter.
6984          */
6985         ret = kstrtoul(number, 0, (unsigned long *)&count);
6986         if (ret)
6987                 return ret;
6988
6989  out_reg:
6990         ret = alloc_snapshot(tr);
6991         if (ret < 0)
6992                 goto out;
6993
6994         ret = register_ftrace_function_probe(glob, tr, ops, count);
6995
6996  out:
6997         return ret < 0 ? ret : 0;
6998 }
6999
7000 static struct ftrace_func_command ftrace_snapshot_cmd = {
7001         .name                   = "snapshot",
7002         .func                   = ftrace_trace_snapshot_callback,
7003 };
7004
7005 static __init int register_snapshot_cmd(void)
7006 {
7007         return register_ftrace_command(&ftrace_snapshot_cmd);
7008 }
7009 #else
7010 static inline __init int register_snapshot_cmd(void) { return 0; }
7011 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7012
7013 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7014 {
7015         if (WARN_ON(!tr->dir))
7016                 return ERR_PTR(-ENODEV);
7017
7018         /* Top directory uses NULL as the parent */
7019         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7020                 return NULL;
7021
7022         /* All sub buffers have a descriptor */
7023         return tr->dir;
7024 }
7025
7026 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7027 {
7028         struct dentry *d_tracer;
7029
7030         if (tr->percpu_dir)
7031                 return tr->percpu_dir;
7032
7033         d_tracer = tracing_get_dentry(tr);
7034         if (IS_ERR(d_tracer))
7035                 return NULL;
7036
7037         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7038
7039         WARN_ONCE(!tr->percpu_dir,
7040                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7041
7042         return tr->percpu_dir;
7043 }
7044
7045 static struct dentry *
7046 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7047                       void *data, long cpu, const struct file_operations *fops)
7048 {
7049         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7050
7051         if (ret) /* See tracing_get_cpu() */
7052                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
7053         return ret;
7054 }
7055
7056 static void
7057 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7058 {
7059         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7060         struct dentry *d_cpu;
7061         char cpu_dir[30]; /* 30 characters should be more than enough */
7062
7063         if (!d_percpu)
7064                 return;
7065
7066         snprintf(cpu_dir, 30, "cpu%ld", cpu);
7067         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7068         if (!d_cpu) {
7069                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7070                 return;
7071         }
7072
7073         /* per cpu trace_pipe */
7074         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7075                                 tr, cpu, &tracing_pipe_fops);
7076
7077         /* per cpu trace */
7078         trace_create_cpu_file("trace", 0644, d_cpu,
7079                                 tr, cpu, &tracing_fops);
7080
7081         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7082                                 tr, cpu, &tracing_buffers_fops);
7083
7084         trace_create_cpu_file("stats", 0444, d_cpu,
7085                                 tr, cpu, &tracing_stats_fops);
7086
7087         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7088                                 tr, cpu, &tracing_entries_fops);
7089
7090 #ifdef CONFIG_TRACER_SNAPSHOT
7091         trace_create_cpu_file("snapshot", 0644, d_cpu,
7092                                 tr, cpu, &snapshot_fops);
7093
7094         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7095                                 tr, cpu, &snapshot_raw_fops);
7096 #endif
7097 }
7098
7099 #ifdef CONFIG_FTRACE_SELFTEST
7100 /* Let selftest have access to static functions in this file */
7101 #include "trace_selftest.c"
7102 #endif
7103
7104 static ssize_t
7105 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7106                         loff_t *ppos)
7107 {
7108         struct trace_option_dentry *topt = filp->private_data;
7109         char *buf;
7110
7111         if (topt->flags->val & topt->opt->bit)
7112                 buf = "1\n";
7113         else
7114                 buf = "0\n";
7115
7116         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7117 }
7118
7119 static ssize_t
7120 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7121                          loff_t *ppos)
7122 {
7123         struct trace_option_dentry *topt = filp->private_data;
7124         unsigned long val;
7125         int ret;
7126
7127         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7128         if (ret)
7129                 return ret;
7130
7131         if (val != 0 && val != 1)
7132                 return -EINVAL;
7133
7134         if (!!(topt->flags->val & topt->opt->bit) != val) {
7135                 mutex_lock(&trace_types_lock);
7136                 ret = __set_tracer_option(topt->tr, topt->flags,
7137                                           topt->opt, !val);
7138                 mutex_unlock(&trace_types_lock);
7139                 if (ret)
7140                         return ret;
7141         }
7142
7143         *ppos += cnt;
7144
7145         return cnt;
7146 }
7147
7148
7149 static const struct file_operations trace_options_fops = {
7150         .open = tracing_open_generic,
7151         .read = trace_options_read,
7152         .write = trace_options_write,
7153         .llseek = generic_file_llseek,
7154 };
7155
7156 /*
7157  * In order to pass in both the trace_array descriptor as well as the index
7158  * to the flag that the trace option file represents, the trace_array
7159  * has a character array of trace_flags_index[], which holds the index
7160  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7161  * The address of this character array is passed to the flag option file
7162  * read/write callbacks.
7163  *
7164  * In order to extract both the index and the trace_array descriptor,
7165  * get_tr_index() uses the following algorithm.
7166  *
7167  *   idx = *ptr;
7168  *
7169  * As the pointer itself contains the address of the index (remember
7170  * index[1] == 1).
7171  *
7172  * Then to get the trace_array descriptor, by subtracting that index
7173  * from the ptr, we get to the start of the index itself.
7174  *
7175  *   ptr - idx == &index[0]
7176  *
7177  * Then a simple container_of() from that pointer gets us to the
7178  * trace_array descriptor.
7179  */
7180 static void get_tr_index(void *data, struct trace_array **ptr,
7181                          unsigned int *pindex)
7182 {
7183         *pindex = *(unsigned char *)data;
7184
7185         *ptr = container_of(data - *pindex, struct trace_array,
7186                             trace_flags_index);
7187 }
7188
7189 static ssize_t
7190 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7191                         loff_t *ppos)
7192 {
7193         void *tr_index = filp->private_data;
7194         struct trace_array *tr;
7195         unsigned int index;
7196         char *buf;
7197
7198         get_tr_index(tr_index, &tr, &index);
7199
7200         if (tr->trace_flags & (1 << index))
7201                 buf = "1\n";
7202         else
7203                 buf = "0\n";
7204
7205         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7206 }
7207
7208 static ssize_t
7209 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7210                          loff_t *ppos)
7211 {
7212         void *tr_index = filp->private_data;
7213         struct trace_array *tr;
7214         unsigned int index;
7215         unsigned long val;
7216         int ret;
7217
7218         get_tr_index(tr_index, &tr, &index);
7219
7220         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7221         if (ret)
7222                 return ret;
7223
7224         if (val != 0 && val != 1)
7225                 return -EINVAL;
7226
7227         mutex_lock(&trace_types_lock);
7228         ret = set_tracer_flag(tr, 1 << index, val);
7229         mutex_unlock(&trace_types_lock);
7230
7231         if (ret < 0)
7232                 return ret;
7233
7234         *ppos += cnt;
7235
7236         return cnt;
7237 }
7238
7239 static const struct file_operations trace_options_core_fops = {
7240         .open = tracing_open_generic,
7241         .read = trace_options_core_read,
7242         .write = trace_options_core_write,
7243         .llseek = generic_file_llseek,
7244 };
7245
7246 struct dentry *trace_create_file(const char *name,
7247                                  umode_t mode,
7248                                  struct dentry *parent,
7249                                  void *data,
7250                                  const struct file_operations *fops)
7251 {
7252         struct dentry *ret;
7253
7254         ret = tracefs_create_file(name, mode, parent, data, fops);
7255         if (!ret)
7256                 pr_warn("Could not create tracefs '%s' entry\n", name);
7257
7258         return ret;
7259 }
7260
7261
7262 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7263 {
7264         struct dentry *d_tracer;
7265
7266         if (tr->options)
7267                 return tr->options;
7268
7269         d_tracer = tracing_get_dentry(tr);
7270         if (IS_ERR(d_tracer))
7271                 return NULL;
7272
7273         tr->options = tracefs_create_dir("options", d_tracer);
7274         if (!tr->options) {
7275                 pr_warn("Could not create tracefs directory 'options'\n");
7276                 return NULL;
7277         }
7278
7279         return tr->options;
7280 }
7281
7282 static void
7283 create_trace_option_file(struct trace_array *tr,
7284                          struct trace_option_dentry *topt,
7285                          struct tracer_flags *flags,
7286                          struct tracer_opt *opt)
7287 {
7288         struct dentry *t_options;
7289
7290         t_options = trace_options_init_dentry(tr);
7291         if (!t_options)
7292                 return;
7293
7294         topt->flags = flags;
7295         topt->opt = opt;
7296         topt->tr = tr;
7297
7298         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7299                                     &trace_options_fops);
7300
7301 }
7302
7303 static void
7304 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7305 {
7306         struct trace_option_dentry *topts;
7307         struct trace_options *tr_topts;
7308         struct tracer_flags *flags;
7309         struct tracer_opt *opts;
7310         int cnt;
7311         int i;
7312
7313         if (!tracer)
7314                 return;
7315
7316         flags = tracer->flags;
7317
7318         if (!flags || !flags->opts)
7319                 return;
7320
7321         /*
7322          * If this is an instance, only create flags for tracers
7323          * the instance may have.
7324          */
7325         if (!trace_ok_for_array(tracer, tr))
7326                 return;
7327
7328         for (i = 0; i < tr->nr_topts; i++) {
7329                 /* Make sure there's no duplicate flags. */
7330                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7331                         return;
7332         }
7333
7334         opts = flags->opts;
7335
7336         for (cnt = 0; opts[cnt].name; cnt++)
7337                 ;
7338
7339         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7340         if (!topts)
7341                 return;
7342
7343         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7344                             GFP_KERNEL);
7345         if (!tr_topts) {
7346                 kfree(topts);
7347                 return;
7348         }
7349
7350         tr->topts = tr_topts;
7351         tr->topts[tr->nr_topts].tracer = tracer;
7352         tr->topts[tr->nr_topts].topts = topts;
7353         tr->nr_topts++;
7354
7355         for (cnt = 0; opts[cnt].name; cnt++) {
7356                 create_trace_option_file(tr, &topts[cnt], flags,
7357                                          &opts[cnt]);
7358                 WARN_ONCE(topts[cnt].entry == NULL,
7359                           "Failed to create trace option: %s",
7360                           opts[cnt].name);
7361         }
7362 }
7363
7364 static struct dentry *
7365 create_trace_option_core_file(struct trace_array *tr,
7366                               const char *option, long index)
7367 {
7368         struct dentry *t_options;
7369
7370         t_options = trace_options_init_dentry(tr);
7371         if (!t_options)
7372                 return NULL;
7373
7374         return trace_create_file(option, 0644, t_options,
7375                                  (void *)&tr->trace_flags_index[index],
7376                                  &trace_options_core_fops);
7377 }
7378
7379 static void create_trace_options_dir(struct trace_array *tr)
7380 {
7381         struct dentry *t_options;
7382         bool top_level = tr == &global_trace;
7383         int i;
7384
7385         t_options = trace_options_init_dentry(tr);
7386         if (!t_options)
7387                 return;
7388
7389         for (i = 0; trace_options[i]; i++) {
7390                 if (top_level ||
7391                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7392                         create_trace_option_core_file(tr, trace_options[i], i);
7393         }
7394 }
7395
7396 static ssize_t
7397 rb_simple_read(struct file *filp, char __user *ubuf,
7398                size_t cnt, loff_t *ppos)
7399 {
7400         struct trace_array *tr = filp->private_data;
7401         char buf[64];
7402         int r;
7403
7404         r = tracer_tracing_is_on(tr);
7405         r = sprintf(buf, "%d\n", r);
7406
7407         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7408 }
7409
7410 static ssize_t
7411 rb_simple_write(struct file *filp, const char __user *ubuf,
7412                 size_t cnt, loff_t *ppos)
7413 {
7414         struct trace_array *tr = filp->private_data;
7415         struct ring_buffer *buffer = tr->trace_buffer.buffer;
7416         unsigned long val;
7417         int ret;
7418
7419         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7420         if (ret)
7421                 return ret;
7422
7423         if (buffer) {
7424                 mutex_lock(&trace_types_lock);
7425                 if (val) {
7426                         tracer_tracing_on(tr);
7427                         if (tr->current_trace->start)
7428                                 tr->current_trace->start(tr);
7429                 } else {
7430                         tracer_tracing_off(tr);
7431                         if (tr->current_trace->stop)
7432                                 tr->current_trace->stop(tr);
7433                 }
7434                 mutex_unlock(&trace_types_lock);
7435         }
7436
7437         (*ppos)++;
7438
7439         return cnt;
7440 }
7441
7442 static const struct file_operations rb_simple_fops = {
7443         .open           = tracing_open_generic_tr,
7444         .read           = rb_simple_read,
7445         .write          = rb_simple_write,
7446         .release        = tracing_release_generic_tr,
7447         .llseek         = default_llseek,
7448 };
7449
7450 struct dentry *trace_instance_dir;
7451
7452 static void
7453 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7454
7455 static int
7456 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7457 {
7458         enum ring_buffer_flags rb_flags;
7459
7460         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7461
7462         buf->tr = tr;
7463
7464         buf->buffer = ring_buffer_alloc(size, rb_flags);
7465         if (!buf->buffer)
7466                 return -ENOMEM;
7467
7468         buf->data = alloc_percpu(struct trace_array_cpu);
7469         if (!buf->data) {
7470                 ring_buffer_free(buf->buffer);
7471                 return -ENOMEM;
7472         }
7473
7474         /* Allocate the first page for all buffers */
7475         set_buffer_entries(&tr->trace_buffer,
7476                            ring_buffer_size(tr->trace_buffer.buffer, 0));
7477
7478         return 0;
7479 }
7480
7481 static int allocate_trace_buffers(struct trace_array *tr, int size)
7482 {
7483         int ret;
7484
7485         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7486         if (ret)
7487                 return ret;
7488
7489 #ifdef CONFIG_TRACER_MAX_TRACE
7490         ret = allocate_trace_buffer(tr, &tr->max_buffer,
7491                                     allocate_snapshot ? size : 1);
7492         if (WARN_ON(ret)) {
7493                 ring_buffer_free(tr->trace_buffer.buffer);
7494                 free_percpu(tr->trace_buffer.data);
7495                 return -ENOMEM;
7496         }
7497         tr->allocated_snapshot = allocate_snapshot;
7498
7499         /*
7500          * Only the top level trace array gets its snapshot allocated
7501          * from the kernel command line.
7502          */
7503         allocate_snapshot = false;
7504 #endif
7505         return 0;
7506 }
7507
7508 static void free_trace_buffer(struct trace_buffer *buf)
7509 {
7510         if (buf->buffer) {
7511                 ring_buffer_free(buf->buffer);
7512                 buf->buffer = NULL;
7513                 free_percpu(buf->data);
7514                 buf->data = NULL;
7515         }
7516 }
7517
7518 static void free_trace_buffers(struct trace_array *tr)
7519 {
7520         if (!tr)
7521                 return;
7522
7523         free_trace_buffer(&tr->trace_buffer);
7524
7525 #ifdef CONFIG_TRACER_MAX_TRACE
7526         free_trace_buffer(&tr->max_buffer);
7527 #endif
7528 }
7529
7530 static void init_trace_flags_index(struct trace_array *tr)
7531 {
7532         int i;
7533
7534         /* Used by the trace options files */
7535         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7536                 tr->trace_flags_index[i] = i;
7537 }
7538
7539 static void __update_tracer_options(struct trace_array *tr)
7540 {
7541         struct tracer *t;
7542
7543         for (t = trace_types; t; t = t->next)
7544                 add_tracer_options(tr, t);
7545 }
7546
7547 static void update_tracer_options(struct trace_array *tr)
7548 {
7549         mutex_lock(&trace_types_lock);
7550         __update_tracer_options(tr);
7551         mutex_unlock(&trace_types_lock);
7552 }
7553
7554 static int instance_mkdir(const char *name)
7555 {
7556         struct trace_array *tr;
7557         int ret;
7558
7559         mutex_lock(&trace_types_lock);
7560
7561         ret = -EEXIST;
7562         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7563                 if (tr->name && strcmp(tr->name, name) == 0)
7564                         goto out_unlock;
7565         }
7566
7567         ret = -ENOMEM;
7568         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7569         if (!tr)
7570                 goto out_unlock;
7571
7572         tr->name = kstrdup(name, GFP_KERNEL);
7573         if (!tr->name)
7574                 goto out_free_tr;
7575
7576         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7577                 goto out_free_tr;
7578
7579         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7580
7581         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7582
7583         raw_spin_lock_init(&tr->start_lock);
7584
7585         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7586
7587         tr->current_trace = &nop_trace;
7588
7589         INIT_LIST_HEAD(&tr->systems);
7590         INIT_LIST_HEAD(&tr->events);
7591
7592         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7593                 goto out_free_tr;
7594
7595         tr->dir = tracefs_create_dir(name, trace_instance_dir);
7596         if (!tr->dir)
7597                 goto out_free_tr;
7598
7599         ret = event_trace_add_tracer(tr->dir, tr);
7600         if (ret) {
7601                 tracefs_remove_recursive(tr->dir);
7602                 goto out_free_tr;
7603         }
7604
7605         ftrace_init_trace_array(tr);
7606
7607         init_tracer_tracefs(tr, tr->dir);
7608         init_trace_flags_index(tr);
7609         __update_tracer_options(tr);
7610
7611         list_add(&tr->list, &ftrace_trace_arrays);
7612
7613         mutex_unlock(&trace_types_lock);
7614
7615         return 0;
7616
7617  out_free_tr:
7618         free_trace_buffers(tr);
7619         free_cpumask_var(tr->tracing_cpumask);
7620         kfree(tr->name);
7621         kfree(tr);
7622
7623  out_unlock:
7624         mutex_unlock(&trace_types_lock);
7625
7626         return ret;
7627
7628 }
7629
7630 static int instance_rmdir(const char *name)
7631 {
7632         struct trace_array *tr;
7633         int found = 0;
7634         int ret;
7635         int i;
7636
7637         mutex_lock(&trace_types_lock);
7638
7639         ret = -ENODEV;
7640         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7641                 if (tr->name && strcmp(tr->name, name) == 0) {
7642                         found = 1;
7643                         break;
7644                 }
7645         }
7646         if (!found)
7647                 goto out_unlock;
7648
7649         ret = -EBUSY;
7650         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7651                 goto out_unlock;
7652
7653         list_del(&tr->list);
7654
7655         /* Disable all the flags that were enabled coming in */
7656         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7657                 if ((1 << i) & ZEROED_TRACE_FLAGS)
7658                         set_tracer_flag(tr, 1 << i, 0);
7659         }
7660
7661         tracing_set_nop(tr);
7662         clear_ftrace_function_probes(tr);
7663         event_trace_del_tracer(tr);
7664         ftrace_clear_pids(tr);
7665         ftrace_destroy_function_files(tr);
7666         tracefs_remove_recursive(tr->dir);
7667         free_trace_buffers(tr);
7668
7669         for (i = 0; i < tr->nr_topts; i++) {
7670                 kfree(tr->topts[i].topts);
7671         }
7672         kfree(tr->topts);
7673
7674         kfree(tr->name);
7675         kfree(tr);
7676
7677         ret = 0;
7678
7679  out_unlock:
7680         mutex_unlock(&trace_types_lock);
7681
7682         return ret;
7683 }
7684
7685 static __init void create_trace_instances(struct dentry *d_tracer)
7686 {
7687         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7688                                                          instance_mkdir,
7689                                                          instance_rmdir);
7690         if (WARN_ON(!trace_instance_dir))
7691                 return;
7692 }
7693
7694 static void
7695 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7696 {
7697         int cpu;
7698
7699         trace_create_file("available_tracers", 0444, d_tracer,
7700                         tr, &show_traces_fops);
7701
7702         trace_create_file("current_tracer", 0644, d_tracer,
7703                         tr, &set_tracer_fops);
7704
7705         trace_create_file("tracing_cpumask", 0644, d_tracer,
7706                           tr, &tracing_cpumask_fops);
7707
7708         trace_create_file("trace_options", 0644, d_tracer,
7709                           tr, &tracing_iter_fops);
7710
7711         trace_create_file("trace", 0644, d_tracer,
7712                           tr, &tracing_fops);
7713
7714         trace_create_file("trace_pipe", 0444, d_tracer,
7715                           tr, &tracing_pipe_fops);
7716
7717         trace_create_file("buffer_size_kb", 0644, d_tracer,
7718                           tr, &tracing_entries_fops);
7719
7720         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7721                           tr, &tracing_total_entries_fops);
7722
7723         trace_create_file("free_buffer", 0200, d_tracer,
7724                           tr, &tracing_free_buffer_fops);
7725
7726         trace_create_file("trace_marker", 0220, d_tracer,
7727                           tr, &tracing_mark_fops);
7728
7729         trace_create_file("trace_marker_raw", 0220, d_tracer,
7730                           tr, &tracing_mark_raw_fops);
7731
7732         trace_create_file("trace_clock", 0644, d_tracer, tr,
7733                           &trace_clock_fops);
7734
7735         trace_create_file("tracing_on", 0644, d_tracer,
7736                           tr, &rb_simple_fops);
7737
7738         create_trace_options_dir(tr);
7739
7740 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7741         trace_create_file("tracing_max_latency", 0644, d_tracer,
7742                         &tr->max_latency, &tracing_max_lat_fops);
7743 #endif
7744
7745         if (ftrace_create_function_files(tr, d_tracer))
7746                 WARN(1, "Could not allocate function filter files");
7747
7748 #ifdef CONFIG_TRACER_SNAPSHOT
7749         trace_create_file("snapshot", 0644, d_tracer,
7750                           tr, &snapshot_fops);
7751 #endif
7752
7753         for_each_tracing_cpu(cpu)
7754                 tracing_init_tracefs_percpu(tr, cpu);
7755
7756         ftrace_init_tracefs(tr, d_tracer);
7757 }
7758
7759 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7760 {
7761         struct vfsmount *mnt;
7762         struct file_system_type *type;
7763
7764         /*
7765          * To maintain backward compatibility for tools that mount
7766          * debugfs to get to the tracing facility, tracefs is automatically
7767          * mounted to the debugfs/tracing directory.
7768          */
7769         type = get_fs_type("tracefs");
7770         if (!type)
7771                 return NULL;
7772         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
7773         put_filesystem(type);
7774         if (IS_ERR(mnt))
7775                 return NULL;
7776         mntget(mnt);
7777
7778         return mnt;
7779 }
7780
7781 /**
7782  * tracing_init_dentry - initialize top level trace array
7783  *
7784  * This is called when creating files or directories in the tracing
7785  * directory. It is called via fs_initcall() by any of the boot up code
7786  * and expects to return the dentry of the top level tracing directory.
7787  */
7788 struct dentry *tracing_init_dentry(void)
7789 {
7790         struct trace_array *tr = &global_trace;
7791
7792         /* The top level trace array uses  NULL as parent */
7793         if (tr->dir)
7794                 return NULL;
7795
7796         if (WARN_ON(!tracefs_initialized()) ||
7797                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
7798                  WARN_ON(!debugfs_initialized())))
7799                 return ERR_PTR(-ENODEV);
7800
7801         /*
7802          * As there may still be users that expect the tracing
7803          * files to exist in debugfs/tracing, we must automount
7804          * the tracefs file system there, so older tools still
7805          * work with the newer kerenl.
7806          */
7807         tr->dir = debugfs_create_automount("tracing", NULL,
7808                                            trace_automount, NULL);
7809         if (!tr->dir) {
7810                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
7811                 return ERR_PTR(-ENOMEM);
7812         }
7813
7814         return NULL;
7815 }
7816
7817 extern struct trace_eval_map *__start_ftrace_eval_maps[];
7818 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
7819
7820 static void __init trace_eval_init(void)
7821 {
7822         int len;
7823
7824         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
7825         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
7826 }
7827
7828 #ifdef CONFIG_MODULES
7829 static void trace_module_add_evals(struct module *mod)
7830 {
7831         if (!mod->num_trace_evals)
7832                 return;
7833
7834         /*
7835          * Modules with bad taint do not have events created, do
7836          * not bother with enums either.
7837          */
7838         if (trace_module_has_bad_taint(mod))
7839                 return;
7840
7841         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
7842 }
7843
7844 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
7845 static void trace_module_remove_evals(struct module *mod)
7846 {
7847         union trace_eval_map_item *map;
7848         union trace_eval_map_item **last = &trace_eval_maps;
7849
7850         if (!mod->num_trace_evals)
7851                 return;
7852
7853         mutex_lock(&trace_eval_mutex);
7854
7855         map = trace_eval_maps;
7856
7857         while (map) {
7858                 if (map->head.mod == mod)
7859                         break;
7860                 map = trace_eval_jmp_to_tail(map);
7861                 last = &map->tail.next;
7862                 map = map->tail.next;
7863         }
7864         if (!map)
7865                 goto out;
7866
7867         *last = trace_eval_jmp_to_tail(map)->tail.next;
7868         kfree(map);
7869  out:
7870         mutex_unlock(&trace_eval_mutex);
7871 }
7872 #else
7873 static inline void trace_module_remove_evals(struct module *mod) { }
7874 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
7875
7876 static int trace_module_notify(struct notifier_block *self,
7877                                unsigned long val, void *data)
7878 {
7879         struct module *mod = data;
7880
7881         switch (val) {
7882         case MODULE_STATE_COMING:
7883                 trace_module_add_evals(mod);
7884                 break;
7885         case MODULE_STATE_GOING:
7886                 trace_module_remove_evals(mod);
7887                 break;
7888         }
7889
7890         return 0;
7891 }
7892
7893 static struct notifier_block trace_module_nb = {
7894         .notifier_call = trace_module_notify,
7895         .priority = 0,
7896 };
7897 #endif /* CONFIG_MODULES */
7898
7899 static __init int tracer_init_tracefs(void)
7900 {
7901         struct dentry *d_tracer;
7902
7903         trace_access_lock_init();
7904
7905         d_tracer = tracing_init_dentry();
7906         if (IS_ERR(d_tracer))
7907                 return 0;
7908
7909         init_tracer_tracefs(&global_trace, d_tracer);
7910         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
7911
7912         trace_create_file("tracing_thresh", 0644, d_tracer,
7913                         &global_trace, &tracing_thresh_fops);
7914
7915         trace_create_file("README", 0444, d_tracer,
7916                         NULL, &tracing_readme_fops);
7917
7918         trace_create_file("saved_cmdlines", 0444, d_tracer,
7919                         NULL, &tracing_saved_cmdlines_fops);
7920
7921         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
7922                           NULL, &tracing_saved_cmdlines_size_fops);
7923
7924         trace_eval_init();
7925
7926         trace_create_eval_file(d_tracer);
7927
7928 #ifdef CONFIG_MODULES
7929         register_module_notifier(&trace_module_nb);
7930 #endif
7931
7932 #ifdef CONFIG_DYNAMIC_FTRACE
7933         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
7934                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
7935 #endif
7936
7937         create_trace_instances(d_tracer);
7938
7939         update_tracer_options(&global_trace);
7940
7941         return 0;
7942 }
7943
7944 static int trace_panic_handler(struct notifier_block *this,
7945                                unsigned long event, void *unused)
7946 {
7947         if (ftrace_dump_on_oops)
7948                 ftrace_dump(ftrace_dump_on_oops);
7949         return NOTIFY_OK;
7950 }
7951
7952 static struct notifier_block trace_panic_notifier = {
7953         .notifier_call  = trace_panic_handler,
7954         .next           = NULL,
7955         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
7956 };
7957
7958 static int trace_die_handler(struct notifier_block *self,
7959                              unsigned long val,
7960                              void *data)
7961 {
7962         switch (val) {
7963         case DIE_OOPS:
7964                 if (ftrace_dump_on_oops)
7965                         ftrace_dump(ftrace_dump_on_oops);
7966                 break;
7967         default:
7968                 break;
7969         }
7970         return NOTIFY_OK;
7971 }
7972
7973 static struct notifier_block trace_die_notifier = {
7974         .notifier_call = trace_die_handler,
7975         .priority = 200
7976 };
7977
7978 /*
7979  * printk is set to max of 1024, we really don't need it that big.
7980  * Nothing should be printing 1000 characters anyway.
7981  */
7982 #define TRACE_MAX_PRINT         1000
7983
7984 /*
7985  * Define here KERN_TRACE so that we have one place to modify
7986  * it if we decide to change what log level the ftrace dump
7987  * should be at.
7988  */
7989 #define KERN_TRACE              KERN_EMERG
7990
7991 void
7992 trace_printk_seq(struct trace_seq *s)
7993 {
7994         /* Probably should print a warning here. */
7995         if (s->seq.len >= TRACE_MAX_PRINT)
7996                 s->seq.len = TRACE_MAX_PRINT;
7997
7998         /*
7999          * More paranoid code. Although the buffer size is set to
8000          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8001          * an extra layer of protection.
8002          */
8003         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8004                 s->seq.len = s->seq.size - 1;
8005
8006         /* should be zero ended, but we are paranoid. */
8007         s->buffer[s->seq.len] = 0;
8008
8009         printk(KERN_TRACE "%s", s->buffer);
8010
8011         trace_seq_init(s);
8012 }
8013
8014 void trace_init_global_iter(struct trace_iterator *iter)
8015 {
8016         iter->tr = &global_trace;
8017         iter->trace = iter->tr->current_trace;
8018         iter->cpu_file = RING_BUFFER_ALL_CPUS;
8019         iter->trace_buffer = &global_trace.trace_buffer;
8020
8021         if (iter->trace && iter->trace->open)
8022                 iter->trace->open(iter);
8023
8024         /* Annotate start of buffers if we had overruns */
8025         if (ring_buffer_overruns(iter->trace_buffer->buffer))
8026                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
8027
8028         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
8029         if (trace_clocks[iter->tr->clock_id].in_ns)
8030                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8031 }
8032
8033 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8034 {
8035         /* use static because iter can be a bit big for the stack */
8036         static struct trace_iterator iter;
8037         static atomic_t dump_running;
8038         struct trace_array *tr = &global_trace;
8039         unsigned int old_userobj;
8040         unsigned long flags;
8041         int cnt = 0, cpu;
8042
8043         /* Only allow one dump user at a time. */
8044         if (atomic_inc_return(&dump_running) != 1) {
8045                 atomic_dec(&dump_running);
8046                 return;
8047         }
8048
8049         /*
8050          * Always turn off tracing when we dump.
8051          * We don't need to show trace output of what happens
8052          * between multiple crashes.
8053          *
8054          * If the user does a sysrq-z, then they can re-enable
8055          * tracing with echo 1 > tracing_on.
8056          */
8057         tracing_off();
8058
8059         local_irq_save(flags);
8060
8061         /* Simulate the iterator */
8062         trace_init_global_iter(&iter);
8063
8064         for_each_tracing_cpu(cpu) {
8065                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8066         }
8067
8068         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8069
8070         /* don't look at user memory in panic mode */
8071         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8072
8073         switch (oops_dump_mode) {
8074         case DUMP_ALL:
8075                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8076                 break;
8077         case DUMP_ORIG:
8078                 iter.cpu_file = raw_smp_processor_id();
8079                 break;
8080         case DUMP_NONE:
8081                 goto out_enable;
8082         default:
8083                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8084                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8085         }
8086
8087         printk(KERN_TRACE "Dumping ftrace buffer:\n");
8088
8089         /* Did function tracer already get disabled? */
8090         if (ftrace_is_dead()) {
8091                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8092                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8093         }
8094
8095         /*
8096          * We need to stop all tracing on all CPUS to read the
8097          * the next buffer. This is a bit expensive, but is
8098          * not done often. We fill all what we can read,
8099          * and then release the locks again.
8100          */
8101
8102         while (!trace_empty(&iter)) {
8103
8104                 if (!cnt)
8105                         printk(KERN_TRACE "---------------------------------\n");
8106
8107                 cnt++;
8108
8109                 /* reset all but tr, trace, and overruns */
8110                 memset(&iter.seq, 0,
8111                        sizeof(struct trace_iterator) -
8112                        offsetof(struct trace_iterator, seq));
8113                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
8114                 iter.pos = -1;
8115
8116                 if (trace_find_next_entry_inc(&iter) != NULL) {
8117                         int ret;
8118
8119                         ret = print_trace_line(&iter);
8120                         if (ret != TRACE_TYPE_NO_CONSUME)
8121                                 trace_consume(&iter);
8122                 }
8123                 touch_nmi_watchdog();
8124
8125                 trace_printk_seq(&iter.seq);
8126         }
8127
8128         if (!cnt)
8129                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
8130         else
8131                 printk(KERN_TRACE "---------------------------------\n");
8132
8133  out_enable:
8134         tr->trace_flags |= old_userobj;
8135
8136         for_each_tracing_cpu(cpu) {
8137                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8138         }
8139         atomic_dec(&dump_running);
8140         local_irq_restore(flags);
8141 }
8142 EXPORT_SYMBOL_GPL(ftrace_dump);
8143
8144 __init static int tracer_alloc_buffers(void)
8145 {
8146         int ring_buf_size;
8147         int ret = -ENOMEM;
8148
8149         /*
8150          * Make sure we don't accidently add more trace options
8151          * than we have bits for.
8152          */
8153         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8154
8155         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8156                 goto out;
8157
8158         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8159                 goto out_free_buffer_mask;
8160
8161         /* Only allocate trace_printk buffers if a trace_printk exists */
8162         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
8163                 /* Must be called before global_trace.buffer is allocated */
8164                 trace_printk_init_buffers();
8165
8166         /* To save memory, keep the ring buffer size to its minimum */
8167         if (ring_buffer_expanded)
8168                 ring_buf_size = trace_buf_size;
8169         else
8170                 ring_buf_size = 1;
8171
8172         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8173         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8174
8175         raw_spin_lock_init(&global_trace.start_lock);
8176
8177         /*
8178          * The prepare callbacks allocates some memory for the ring buffer. We
8179          * don't free the buffer if the if the CPU goes down. If we were to free
8180          * the buffer, then the user would lose any trace that was in the
8181          * buffer. The memory will be removed once the "instance" is removed.
8182          */
8183         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8184                                       "trace/RB:preapre", trace_rb_cpu_prepare,
8185                                       NULL);
8186         if (ret < 0)
8187                 goto out_free_cpumask;
8188         /* Used for event triggers */
8189         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8190         if (!temp_buffer)
8191                 goto out_rm_hp_state;
8192
8193         if (trace_create_savedcmd() < 0)
8194                 goto out_free_temp_buffer;
8195
8196         /* TODO: make the number of buffers hot pluggable with CPUS */
8197         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8198                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8199                 WARN_ON(1);
8200                 goto out_free_savedcmd;
8201         }
8202
8203         if (global_trace.buffer_disabled)
8204                 tracing_off();
8205
8206         if (trace_boot_clock) {
8207                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
8208                 if (ret < 0)
8209                         pr_warn("Trace clock %s not defined, going back to default\n",
8210                                 trace_boot_clock);
8211         }
8212
8213         /*
8214          * register_tracer() might reference current_trace, so it
8215          * needs to be set before we register anything. This is
8216          * just a bootstrap of current_trace anyway.
8217          */
8218         global_trace.current_trace = &nop_trace;
8219
8220         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8221
8222         ftrace_init_global_array_ops(&global_trace);
8223
8224         init_trace_flags_index(&global_trace);
8225
8226         register_tracer(&nop_trace);
8227
8228         /* Function tracing may start here (via kernel command line) */
8229         init_function_trace();
8230
8231         /* All seems OK, enable tracing */
8232         tracing_disabled = 0;
8233
8234         atomic_notifier_chain_register(&panic_notifier_list,
8235                                        &trace_panic_notifier);
8236
8237         register_die_notifier(&trace_die_notifier);
8238
8239         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8240
8241         INIT_LIST_HEAD(&global_trace.systems);
8242         INIT_LIST_HEAD(&global_trace.events);
8243         list_add(&global_trace.list, &ftrace_trace_arrays);
8244
8245         apply_trace_boot_options();
8246
8247         register_snapshot_cmd();
8248
8249         return 0;
8250
8251 out_free_savedcmd:
8252         free_saved_cmdlines_buffer(savedcmd);
8253 out_free_temp_buffer:
8254         ring_buffer_free(temp_buffer);
8255 out_rm_hp_state:
8256         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8257 out_free_cpumask:
8258         free_cpumask_var(global_trace.tracing_cpumask);
8259 out_free_buffer_mask:
8260         free_cpumask_var(tracing_buffer_mask);
8261 out:
8262         return ret;
8263 }
8264
8265 void __init early_trace_init(void)
8266 {
8267         if (tracepoint_printk) {
8268                 tracepoint_print_iter =
8269                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8270                 if (WARN_ON(!tracepoint_print_iter))
8271                         tracepoint_printk = 0;
8272                 else
8273                         static_key_enable(&tracepoint_printk_key.key);
8274         }
8275         tracer_alloc_buffers();
8276 }
8277
8278 void __init trace_init(void)
8279 {
8280         trace_event_init();
8281 }
8282
8283 __init static int clear_boot_tracer(void)
8284 {
8285         /*
8286          * The default tracer at boot buffer is an init section.
8287          * This function is called in lateinit. If we did not
8288          * find the boot tracer, then clear it out, to prevent
8289          * later registration from accessing the buffer that is
8290          * about to be freed.
8291          */
8292         if (!default_bootup_tracer)
8293                 return 0;
8294
8295         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8296                default_bootup_tracer);
8297         default_bootup_tracer = NULL;
8298
8299         return 0;
8300 }
8301
8302 fs_initcall(tracer_init_tracefs);
8303 late_initcall(clear_boot_tracer);