]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - kernel/trace/trace.c
tracing: Fix crash from reading trace_pipe with sendfile
[karo-tx-linux.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/kprobes.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/sched/rt.h>
44
45 #include "trace.h"
46 #include "trace_output.h"
47
48 /*
49  * On boot up, the ring buffer is set to the minimum size, so that
50  * we do not waste memory on systems that are not using tracing.
51  */
52 bool ring_buffer_expanded;
53
54 /*
55  * We need to change this state when a selftest is running.
56  * A selftest will lurk into the ring-buffer to count the
57  * entries inserted during the selftest although some concurrent
58  * insertions into the ring-buffer such as trace_printk could occurred
59  * at the same time, giving false positive or negative results.
60  */
61 static bool __read_mostly tracing_selftest_running;
62
63 /*
64  * If a tracer is running, we do not want to run SELFTEST.
65  */
66 bool __read_mostly tracing_selftest_disabled;
67
68 /* Pipe tracepoints to printk */
69 struct trace_iterator *tracepoint_print_iter;
70 int tracepoint_printk;
71
72 /* For tracers that don't implement custom flags */
73 static struct tracer_opt dummy_tracer_opt[] = {
74         { }
75 };
76
77 static struct tracer_flags dummy_tracer_flags = {
78         .val = 0,
79         .opts = dummy_tracer_opt
80 };
81
82 static int
83 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
84 {
85         return 0;
86 }
87
88 /*
89  * To prevent the comm cache from being overwritten when no
90  * tracing is active, only save the comm when a trace event
91  * occurred.
92  */
93 static DEFINE_PER_CPU(bool, trace_cmdline_save);
94
95 /*
96  * Kill all tracing for good (never come back).
97  * It is initialized to 1 but will turn to zero if the initialization
98  * of the tracer is successful. But that is the only place that sets
99  * this back to zero.
100  */
101 static int tracing_disabled = 1;
102
103 cpumask_var_t __read_mostly     tracing_buffer_mask;
104
105 /*
106  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
107  *
108  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
109  * is set, then ftrace_dump is called. This will output the contents
110  * of the ftrace buffers to the console.  This is very useful for
111  * capturing traces that lead to crashes and outputing it to a
112  * serial console.
113  *
114  * It is default off, but you can enable it with either specifying
115  * "ftrace_dump_on_oops" in the kernel command line, or setting
116  * /proc/sys/kernel/ftrace_dump_on_oops
117  * Set 1 if you want to dump buffers of all CPUs
118  * Set 2 if you want to dump the buffer of the CPU that triggered oops
119  */
120
121 enum ftrace_dump_mode ftrace_dump_on_oops;
122
123 /* When set, tracing will stop when a WARN*() is hit */
124 int __disable_trace_on_warning;
125
126 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
127 /* Map of enums to their values, for "enum_map" file */
128 struct trace_enum_map_head {
129         struct module                   *mod;
130         unsigned long                   length;
131 };
132
133 union trace_enum_map_item;
134
135 struct trace_enum_map_tail {
136         /*
137          * "end" is first and points to NULL as it must be different
138          * than "mod" or "enum_string"
139          */
140         union trace_enum_map_item       *next;
141         const char                      *end;   /* points to NULL */
142 };
143
144 static DEFINE_MUTEX(trace_enum_mutex);
145
146 /*
147  * The trace_enum_maps are saved in an array with two extra elements,
148  * one at the beginning, and one at the end. The beginning item contains
149  * the count of the saved maps (head.length), and the module they
150  * belong to if not built in (head.mod). The ending item contains a
151  * pointer to the next array of saved enum_map items.
152  */
153 union trace_enum_map_item {
154         struct trace_enum_map           map;
155         struct trace_enum_map_head      head;
156         struct trace_enum_map_tail      tail;
157 };
158
159 static union trace_enum_map_item *trace_enum_maps;
160 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
161
162 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
163
164 #define MAX_TRACER_SIZE         100
165 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
166 static char *default_bootup_tracer;
167
168 static bool allocate_snapshot;
169
170 static int __init set_cmdline_ftrace(char *str)
171 {
172         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
173         default_bootup_tracer = bootup_tracer_buf;
174         /* We are using ftrace early, expand it */
175         ring_buffer_expanded = true;
176         return 1;
177 }
178 __setup("ftrace=", set_cmdline_ftrace);
179
180 static int __init set_ftrace_dump_on_oops(char *str)
181 {
182         if (*str++ != '=' || !*str) {
183                 ftrace_dump_on_oops = DUMP_ALL;
184                 return 1;
185         }
186
187         if (!strcmp("orig_cpu", str)) {
188                 ftrace_dump_on_oops = DUMP_ORIG;
189                 return 1;
190         }
191
192         return 0;
193 }
194 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
195
196 static int __init stop_trace_on_warning(char *str)
197 {
198         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
199                 __disable_trace_on_warning = 1;
200         return 1;
201 }
202 __setup("traceoff_on_warning", stop_trace_on_warning);
203
204 static int __init boot_alloc_snapshot(char *str)
205 {
206         allocate_snapshot = true;
207         /* We also need the main ring buffer expanded */
208         ring_buffer_expanded = true;
209         return 1;
210 }
211 __setup("alloc_snapshot", boot_alloc_snapshot);
212
213
214 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
215
216 static int __init set_trace_boot_options(char *str)
217 {
218         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
219         return 0;
220 }
221 __setup("trace_options=", set_trace_boot_options);
222
223 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
224 static char *trace_boot_clock __initdata;
225
226 static int __init set_trace_boot_clock(char *str)
227 {
228         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
229         trace_boot_clock = trace_boot_clock_buf;
230         return 0;
231 }
232 __setup("trace_clock=", set_trace_boot_clock);
233
234 static int __init set_tracepoint_printk(char *str)
235 {
236         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
237                 tracepoint_printk = 1;
238         return 1;
239 }
240 __setup("tp_printk", set_tracepoint_printk);
241
242 unsigned long long ns2usecs(cycle_t nsec)
243 {
244         nsec += 500;
245         do_div(nsec, 1000);
246         return nsec;
247 }
248
249 /* trace_flags holds trace_options default values */
250 #define TRACE_DEFAULT_FLAGS                                             \
251         (FUNCTION_DEFAULT_FLAGS |                                       \
252          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
253          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
254          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
255          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
256
257 /* trace_options that are only supported by global_trace */
258 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
259                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
260
261
262 /*
263  * The global_trace is the descriptor that holds the tracing
264  * buffers for the live tracing. For each CPU, it contains
265  * a link list of pages that will store trace entries. The
266  * page descriptor of the pages in the memory is used to hold
267  * the link list by linking the lru item in the page descriptor
268  * to each of the pages in the buffer per CPU.
269  *
270  * For each active CPU there is a data field that holds the
271  * pages for the buffer for that CPU. Each CPU has the same number
272  * of pages allocated for its buffer.
273  */
274 static struct trace_array global_trace = {
275         .trace_flags = TRACE_DEFAULT_FLAGS,
276 };
277
278 LIST_HEAD(ftrace_trace_arrays);
279
280 int trace_array_get(struct trace_array *this_tr)
281 {
282         struct trace_array *tr;
283         int ret = -ENODEV;
284
285         mutex_lock(&trace_types_lock);
286         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
287                 if (tr == this_tr) {
288                         tr->ref++;
289                         ret = 0;
290                         break;
291                 }
292         }
293         mutex_unlock(&trace_types_lock);
294
295         return ret;
296 }
297
298 static void __trace_array_put(struct trace_array *this_tr)
299 {
300         WARN_ON(!this_tr->ref);
301         this_tr->ref--;
302 }
303
304 void trace_array_put(struct trace_array *this_tr)
305 {
306         mutex_lock(&trace_types_lock);
307         __trace_array_put(this_tr);
308         mutex_unlock(&trace_types_lock);
309 }
310
311 int filter_check_discard(struct trace_event_file *file, void *rec,
312                          struct ring_buffer *buffer,
313                          struct ring_buffer_event *event)
314 {
315         if (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
316             !filter_match_preds(file->filter, rec)) {
317                 ring_buffer_discard_commit(buffer, event);
318                 return 1;
319         }
320
321         return 0;
322 }
323 EXPORT_SYMBOL_GPL(filter_check_discard);
324
325 int call_filter_check_discard(struct trace_event_call *call, void *rec,
326                               struct ring_buffer *buffer,
327                               struct ring_buffer_event *event)
328 {
329         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
330             !filter_match_preds(call->filter, rec)) {
331                 ring_buffer_discard_commit(buffer, event);
332                 return 1;
333         }
334
335         return 0;
336 }
337 EXPORT_SYMBOL_GPL(call_filter_check_discard);
338
339 static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
340 {
341         u64 ts;
342
343         /* Early boot up does not have a buffer yet */
344         if (!buf->buffer)
345                 return trace_clock_local();
346
347         ts = ring_buffer_time_stamp(buf->buffer, cpu);
348         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
349
350         return ts;
351 }
352
353 cycle_t ftrace_now(int cpu)
354 {
355         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
356 }
357
358 /**
359  * tracing_is_enabled - Show if global_trace has been disabled
360  *
361  * Shows if the global trace has been enabled or not. It uses the
362  * mirror flag "buffer_disabled" to be used in fast paths such as for
363  * the irqsoff tracer. But it may be inaccurate due to races. If you
364  * need to know the accurate state, use tracing_is_on() which is a little
365  * slower, but accurate.
366  */
367 int tracing_is_enabled(void)
368 {
369         /*
370          * For quick access (irqsoff uses this in fast path), just
371          * return the mirror variable of the state of the ring buffer.
372          * It's a little racy, but we don't really care.
373          */
374         smp_rmb();
375         return !global_trace.buffer_disabled;
376 }
377
378 /*
379  * trace_buf_size is the size in bytes that is allocated
380  * for a buffer. Note, the number of bytes is always rounded
381  * to page size.
382  *
383  * This number is purposely set to a low number of 16384.
384  * If the dump on oops happens, it will be much appreciated
385  * to not have to wait for all that output. Anyway this can be
386  * boot time and run time configurable.
387  */
388 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
389
390 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
391
392 /* trace_types holds a link list of available tracers. */
393 static struct tracer            *trace_types __read_mostly;
394
395 /*
396  * trace_types_lock is used to protect the trace_types list.
397  */
398 DEFINE_MUTEX(trace_types_lock);
399
400 /*
401  * serialize the access of the ring buffer
402  *
403  * ring buffer serializes readers, but it is low level protection.
404  * The validity of the events (which returns by ring_buffer_peek() ..etc)
405  * are not protected by ring buffer.
406  *
407  * The content of events may become garbage if we allow other process consumes
408  * these events concurrently:
409  *   A) the page of the consumed events may become a normal page
410  *      (not reader page) in ring buffer, and this page will be rewrited
411  *      by events producer.
412  *   B) The page of the consumed events may become a page for splice_read,
413  *      and this page will be returned to system.
414  *
415  * These primitives allow multi process access to different cpu ring buffer
416  * concurrently.
417  *
418  * These primitives don't distinguish read-only and read-consume access.
419  * Multi read-only access are also serialized.
420  */
421
422 #ifdef CONFIG_SMP
423 static DECLARE_RWSEM(all_cpu_access_lock);
424 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
425
426 static inline void trace_access_lock(int cpu)
427 {
428         if (cpu == RING_BUFFER_ALL_CPUS) {
429                 /* gain it for accessing the whole ring buffer. */
430                 down_write(&all_cpu_access_lock);
431         } else {
432                 /* gain it for accessing a cpu ring buffer. */
433
434                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
435                 down_read(&all_cpu_access_lock);
436
437                 /* Secondly block other access to this @cpu ring buffer. */
438                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
439         }
440 }
441
442 static inline void trace_access_unlock(int cpu)
443 {
444         if (cpu == RING_BUFFER_ALL_CPUS) {
445                 up_write(&all_cpu_access_lock);
446         } else {
447                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
448                 up_read(&all_cpu_access_lock);
449         }
450 }
451
452 static inline void trace_access_lock_init(void)
453 {
454         int cpu;
455
456         for_each_possible_cpu(cpu)
457                 mutex_init(&per_cpu(cpu_access_lock, cpu));
458 }
459
460 #else
461
462 static DEFINE_MUTEX(access_lock);
463
464 static inline void trace_access_lock(int cpu)
465 {
466         (void)cpu;
467         mutex_lock(&access_lock);
468 }
469
470 static inline void trace_access_unlock(int cpu)
471 {
472         (void)cpu;
473         mutex_unlock(&access_lock);
474 }
475
476 static inline void trace_access_lock_init(void)
477 {
478 }
479
480 #endif
481
482 #ifdef CONFIG_STACKTRACE
483 static void __ftrace_trace_stack(struct ring_buffer *buffer,
484                                  unsigned long flags,
485                                  int skip, int pc, struct pt_regs *regs);
486 static inline void ftrace_trace_stack(struct trace_array *tr,
487                                       struct ring_buffer *buffer,
488                                       unsigned long flags,
489                                       int skip, int pc, struct pt_regs *regs);
490
491 #else
492 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
493                                         unsigned long flags,
494                                         int skip, int pc, struct pt_regs *regs)
495 {
496 }
497 static inline void ftrace_trace_stack(struct trace_array *tr,
498                                       struct ring_buffer *buffer,
499                                       unsigned long flags,
500                                       int skip, int pc, struct pt_regs *regs)
501 {
502 }
503
504 #endif
505
506 static void tracer_tracing_on(struct trace_array *tr)
507 {
508         if (tr->trace_buffer.buffer)
509                 ring_buffer_record_on(tr->trace_buffer.buffer);
510         /*
511          * This flag is looked at when buffers haven't been allocated
512          * yet, or by some tracers (like irqsoff), that just want to
513          * know if the ring buffer has been disabled, but it can handle
514          * races of where it gets disabled but we still do a record.
515          * As the check is in the fast path of the tracers, it is more
516          * important to be fast than accurate.
517          */
518         tr->buffer_disabled = 0;
519         /* Make the flag seen by readers */
520         smp_wmb();
521 }
522
523 /**
524  * tracing_on - enable tracing buffers
525  *
526  * This function enables tracing buffers that may have been
527  * disabled with tracing_off.
528  */
529 void tracing_on(void)
530 {
531         tracer_tracing_on(&global_trace);
532 }
533 EXPORT_SYMBOL_GPL(tracing_on);
534
535 /**
536  * __trace_puts - write a constant string into the trace buffer.
537  * @ip:    The address of the caller
538  * @str:   The constant string to write
539  * @size:  The size of the string.
540  */
541 int __trace_puts(unsigned long ip, const char *str, int size)
542 {
543         struct ring_buffer_event *event;
544         struct ring_buffer *buffer;
545         struct print_entry *entry;
546         unsigned long irq_flags;
547         int alloc;
548         int pc;
549
550         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
551                 return 0;
552
553         pc = preempt_count();
554
555         if (unlikely(tracing_selftest_running || tracing_disabled))
556                 return 0;
557
558         alloc = sizeof(*entry) + size + 2; /* possible \n added */
559
560         local_save_flags(irq_flags);
561         buffer = global_trace.trace_buffer.buffer;
562         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
563                                           irq_flags, pc);
564         if (!event)
565                 return 0;
566
567         entry = ring_buffer_event_data(event);
568         entry->ip = ip;
569
570         memcpy(&entry->buf, str, size);
571
572         /* Add a newline if necessary */
573         if (entry->buf[size - 1] != '\n') {
574                 entry->buf[size] = '\n';
575                 entry->buf[size + 1] = '\0';
576         } else
577                 entry->buf[size] = '\0';
578
579         __buffer_unlock_commit(buffer, event);
580         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
581
582         return size;
583 }
584 EXPORT_SYMBOL_GPL(__trace_puts);
585
586 /**
587  * __trace_bputs - write the pointer to a constant string into trace buffer
588  * @ip:    The address of the caller
589  * @str:   The constant string to write to the buffer to
590  */
591 int __trace_bputs(unsigned long ip, const char *str)
592 {
593         struct ring_buffer_event *event;
594         struct ring_buffer *buffer;
595         struct bputs_entry *entry;
596         unsigned long irq_flags;
597         int size = sizeof(struct bputs_entry);
598         int pc;
599
600         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
601                 return 0;
602
603         pc = preempt_count();
604
605         if (unlikely(tracing_selftest_running || tracing_disabled))
606                 return 0;
607
608         local_save_flags(irq_flags);
609         buffer = global_trace.trace_buffer.buffer;
610         event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
611                                           irq_flags, pc);
612         if (!event)
613                 return 0;
614
615         entry = ring_buffer_event_data(event);
616         entry->ip                       = ip;
617         entry->str                      = str;
618
619         __buffer_unlock_commit(buffer, event);
620         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
621
622         return 1;
623 }
624 EXPORT_SYMBOL_GPL(__trace_bputs);
625
626 #ifdef CONFIG_TRACER_SNAPSHOT
627 /**
628  * trace_snapshot - take a snapshot of the current buffer.
629  *
630  * This causes a swap between the snapshot buffer and the current live
631  * tracing buffer. You can use this to take snapshots of the live
632  * trace when some condition is triggered, but continue to trace.
633  *
634  * Note, make sure to allocate the snapshot with either
635  * a tracing_snapshot_alloc(), or by doing it manually
636  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
637  *
638  * If the snapshot buffer is not allocated, it will stop tracing.
639  * Basically making a permanent snapshot.
640  */
641 void tracing_snapshot(void)
642 {
643         struct trace_array *tr = &global_trace;
644         struct tracer *tracer = tr->current_trace;
645         unsigned long flags;
646
647         if (in_nmi()) {
648                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
649                 internal_trace_puts("*** snapshot is being ignored        ***\n");
650                 return;
651         }
652
653         if (!tr->allocated_snapshot) {
654                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
655                 internal_trace_puts("*** stopping trace here!   ***\n");
656                 tracing_off();
657                 return;
658         }
659
660         /* Note, snapshot can not be used when the tracer uses it */
661         if (tracer->use_max_tr) {
662                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
663                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
664                 return;
665         }
666
667         local_irq_save(flags);
668         update_max_tr(tr, current, smp_processor_id());
669         local_irq_restore(flags);
670 }
671 EXPORT_SYMBOL_GPL(tracing_snapshot);
672
673 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
674                                         struct trace_buffer *size_buf, int cpu_id);
675 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
676
677 static int alloc_snapshot(struct trace_array *tr)
678 {
679         int ret;
680
681         if (!tr->allocated_snapshot) {
682
683                 /* allocate spare buffer */
684                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
685                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
686                 if (ret < 0)
687                         return ret;
688
689                 tr->allocated_snapshot = true;
690         }
691
692         return 0;
693 }
694
695 static void free_snapshot(struct trace_array *tr)
696 {
697         /*
698          * We don't free the ring buffer. instead, resize it because
699          * The max_tr ring buffer has some state (e.g. ring->clock) and
700          * we want preserve it.
701          */
702         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
703         set_buffer_entries(&tr->max_buffer, 1);
704         tracing_reset_online_cpus(&tr->max_buffer);
705         tr->allocated_snapshot = false;
706 }
707
708 /**
709  * tracing_alloc_snapshot - allocate snapshot buffer.
710  *
711  * This only allocates the snapshot buffer if it isn't already
712  * allocated - it doesn't also take a snapshot.
713  *
714  * This is meant to be used in cases where the snapshot buffer needs
715  * to be set up for events that can't sleep but need to be able to
716  * trigger a snapshot.
717  */
718 int tracing_alloc_snapshot(void)
719 {
720         struct trace_array *tr = &global_trace;
721         int ret;
722
723         ret = alloc_snapshot(tr);
724         WARN_ON(ret < 0);
725
726         return ret;
727 }
728 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
729
730 /**
731  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
732  *
733  * This is similar to trace_snapshot(), but it will allocate the
734  * snapshot buffer if it isn't already allocated. Use this only
735  * where it is safe to sleep, as the allocation may sleep.
736  *
737  * This causes a swap between the snapshot buffer and the current live
738  * tracing buffer. You can use this to take snapshots of the live
739  * trace when some condition is triggered, but continue to trace.
740  */
741 void tracing_snapshot_alloc(void)
742 {
743         int ret;
744
745         ret = tracing_alloc_snapshot();
746         if (ret < 0)
747                 return;
748
749         tracing_snapshot();
750 }
751 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
752 #else
753 void tracing_snapshot(void)
754 {
755         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
756 }
757 EXPORT_SYMBOL_GPL(tracing_snapshot);
758 int tracing_alloc_snapshot(void)
759 {
760         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
761         return -ENODEV;
762 }
763 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
764 void tracing_snapshot_alloc(void)
765 {
766         /* Give warning */
767         tracing_snapshot();
768 }
769 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
770 #endif /* CONFIG_TRACER_SNAPSHOT */
771
772 static void tracer_tracing_off(struct trace_array *tr)
773 {
774         if (tr->trace_buffer.buffer)
775                 ring_buffer_record_off(tr->trace_buffer.buffer);
776         /*
777          * This flag is looked at when buffers haven't been allocated
778          * yet, or by some tracers (like irqsoff), that just want to
779          * know if the ring buffer has been disabled, but it can handle
780          * races of where it gets disabled but we still do a record.
781          * As the check is in the fast path of the tracers, it is more
782          * important to be fast than accurate.
783          */
784         tr->buffer_disabled = 1;
785         /* Make the flag seen by readers */
786         smp_wmb();
787 }
788
789 /**
790  * tracing_off - turn off tracing buffers
791  *
792  * This function stops the tracing buffers from recording data.
793  * It does not disable any overhead the tracers themselves may
794  * be causing. This function simply causes all recording to
795  * the ring buffers to fail.
796  */
797 void tracing_off(void)
798 {
799         tracer_tracing_off(&global_trace);
800 }
801 EXPORT_SYMBOL_GPL(tracing_off);
802
803 void disable_trace_on_warning(void)
804 {
805         if (__disable_trace_on_warning)
806                 tracing_off();
807 }
808
809 /**
810  * tracer_tracing_is_on - show real state of ring buffer enabled
811  * @tr : the trace array to know if ring buffer is enabled
812  *
813  * Shows real state of the ring buffer if it is enabled or not.
814  */
815 static int tracer_tracing_is_on(struct trace_array *tr)
816 {
817         if (tr->trace_buffer.buffer)
818                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
819         return !tr->buffer_disabled;
820 }
821
822 /**
823  * tracing_is_on - show state of ring buffers enabled
824  */
825 int tracing_is_on(void)
826 {
827         return tracer_tracing_is_on(&global_trace);
828 }
829 EXPORT_SYMBOL_GPL(tracing_is_on);
830
831 static int __init set_buf_size(char *str)
832 {
833         unsigned long buf_size;
834
835         if (!str)
836                 return 0;
837         buf_size = memparse(str, &str);
838         /* nr_entries can not be zero */
839         if (buf_size == 0)
840                 return 0;
841         trace_buf_size = buf_size;
842         return 1;
843 }
844 __setup("trace_buf_size=", set_buf_size);
845
846 static int __init set_tracing_thresh(char *str)
847 {
848         unsigned long threshold;
849         int ret;
850
851         if (!str)
852                 return 0;
853         ret = kstrtoul(str, 0, &threshold);
854         if (ret < 0)
855                 return 0;
856         tracing_thresh = threshold * 1000;
857         return 1;
858 }
859 __setup("tracing_thresh=", set_tracing_thresh);
860
861 unsigned long nsecs_to_usecs(unsigned long nsecs)
862 {
863         return nsecs / 1000;
864 }
865
866 /*
867  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
868  * It uses C(a, b) where 'a' is the enum name and 'b' is the string that
869  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
870  * of strings in the order that the enums were defined.
871  */
872 #undef C
873 #define C(a, b) b
874
875 /* These must match the bit postions in trace_iterator_flags */
876 static const char *trace_options[] = {
877         TRACE_FLAGS
878         NULL
879 };
880
881 static struct {
882         u64 (*func)(void);
883         const char *name;
884         int in_ns;              /* is this clock in nanoseconds? */
885 } trace_clocks[] = {
886         { trace_clock_local,            "local",        1 },
887         { trace_clock_global,           "global",       1 },
888         { trace_clock_counter,          "counter",      0 },
889         { trace_clock_jiffies,          "uptime",       0 },
890         { trace_clock,                  "perf",         1 },
891         { ktime_get_mono_fast_ns,       "mono",         1 },
892         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
893         ARCH_TRACE_CLOCKS
894 };
895
896 /*
897  * trace_parser_get_init - gets the buffer for trace parser
898  */
899 int trace_parser_get_init(struct trace_parser *parser, int size)
900 {
901         memset(parser, 0, sizeof(*parser));
902
903         parser->buffer = kmalloc(size, GFP_KERNEL);
904         if (!parser->buffer)
905                 return 1;
906
907         parser->size = size;
908         return 0;
909 }
910
911 /*
912  * trace_parser_put - frees the buffer for trace parser
913  */
914 void trace_parser_put(struct trace_parser *parser)
915 {
916         kfree(parser->buffer);
917 }
918
919 /*
920  * trace_get_user - reads the user input string separated by  space
921  * (matched by isspace(ch))
922  *
923  * For each string found the 'struct trace_parser' is updated,
924  * and the function returns.
925  *
926  * Returns number of bytes read.
927  *
928  * See kernel/trace/trace.h for 'struct trace_parser' details.
929  */
930 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
931         size_t cnt, loff_t *ppos)
932 {
933         char ch;
934         size_t read = 0;
935         ssize_t ret;
936
937         if (!*ppos)
938                 trace_parser_clear(parser);
939
940         ret = get_user(ch, ubuf++);
941         if (ret)
942                 goto out;
943
944         read++;
945         cnt--;
946
947         /*
948          * The parser is not finished with the last write,
949          * continue reading the user input without skipping spaces.
950          */
951         if (!parser->cont) {
952                 /* skip white space */
953                 while (cnt && isspace(ch)) {
954                         ret = get_user(ch, ubuf++);
955                         if (ret)
956                                 goto out;
957                         read++;
958                         cnt--;
959                 }
960
961                 /* only spaces were written */
962                 if (isspace(ch)) {
963                         *ppos += read;
964                         ret = read;
965                         goto out;
966                 }
967
968                 parser->idx = 0;
969         }
970
971         /* read the non-space input */
972         while (cnt && !isspace(ch)) {
973                 if (parser->idx < parser->size - 1)
974                         parser->buffer[parser->idx++] = ch;
975                 else {
976                         ret = -EINVAL;
977                         goto out;
978                 }
979                 ret = get_user(ch, ubuf++);
980                 if (ret)
981                         goto out;
982                 read++;
983                 cnt--;
984         }
985
986         /* We either got finished input or we have to wait for another call. */
987         if (isspace(ch)) {
988                 parser->buffer[parser->idx] = 0;
989                 parser->cont = false;
990         } else if (parser->idx < parser->size - 1) {
991                 parser->cont = true;
992                 parser->buffer[parser->idx++] = ch;
993         } else {
994                 ret = -EINVAL;
995                 goto out;
996         }
997
998         *ppos += read;
999         ret = read;
1000
1001 out:
1002         return ret;
1003 }
1004
1005 /* TODO add a seq_buf_to_buffer() */
1006 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1007 {
1008         int len;
1009
1010         if (trace_seq_used(s) <= s->seq.readpos)
1011                 return -EBUSY;
1012
1013         len = trace_seq_used(s) - s->seq.readpos;
1014         if (cnt > len)
1015                 cnt = len;
1016         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1017
1018         s->seq.readpos += cnt;
1019         return cnt;
1020 }
1021
1022 unsigned long __read_mostly     tracing_thresh;
1023
1024 #ifdef CONFIG_TRACER_MAX_TRACE
1025 /*
1026  * Copy the new maximum trace into the separate maximum-trace
1027  * structure. (this way the maximum trace is permanently saved,
1028  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1029  */
1030 static void
1031 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1032 {
1033         struct trace_buffer *trace_buf = &tr->trace_buffer;
1034         struct trace_buffer *max_buf = &tr->max_buffer;
1035         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1036         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1037
1038         max_buf->cpu = cpu;
1039         max_buf->time_start = data->preempt_timestamp;
1040
1041         max_data->saved_latency = tr->max_latency;
1042         max_data->critical_start = data->critical_start;
1043         max_data->critical_end = data->critical_end;
1044
1045         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1046         max_data->pid = tsk->pid;
1047         /*
1048          * If tsk == current, then use current_uid(), as that does not use
1049          * RCU. The irq tracer can be called out of RCU scope.
1050          */
1051         if (tsk == current)
1052                 max_data->uid = current_uid();
1053         else
1054                 max_data->uid = task_uid(tsk);
1055
1056         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1057         max_data->policy = tsk->policy;
1058         max_data->rt_priority = tsk->rt_priority;
1059
1060         /* record this tasks comm */
1061         tracing_record_cmdline(tsk);
1062 }
1063
1064 /**
1065  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1066  * @tr: tracer
1067  * @tsk: the task with the latency
1068  * @cpu: The cpu that initiated the trace.
1069  *
1070  * Flip the buffers between the @tr and the max_tr and record information
1071  * about which task was the cause of this latency.
1072  */
1073 void
1074 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1075 {
1076         struct ring_buffer *buf;
1077
1078         if (tr->stop_count)
1079                 return;
1080
1081         WARN_ON_ONCE(!irqs_disabled());
1082
1083         if (!tr->allocated_snapshot) {
1084                 /* Only the nop tracer should hit this when disabling */
1085                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1086                 return;
1087         }
1088
1089         arch_spin_lock(&tr->max_lock);
1090
1091         buf = tr->trace_buffer.buffer;
1092         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1093         tr->max_buffer.buffer = buf;
1094
1095         __update_max_tr(tr, tsk, cpu);
1096         arch_spin_unlock(&tr->max_lock);
1097 }
1098
1099 /**
1100  * update_max_tr_single - only copy one trace over, and reset the rest
1101  * @tr - tracer
1102  * @tsk - task with the latency
1103  * @cpu - the cpu of the buffer to copy.
1104  *
1105  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1106  */
1107 void
1108 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1109 {
1110         int ret;
1111
1112         if (tr->stop_count)
1113                 return;
1114
1115         WARN_ON_ONCE(!irqs_disabled());
1116         if (!tr->allocated_snapshot) {
1117                 /* Only the nop tracer should hit this when disabling */
1118                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1119                 return;
1120         }
1121
1122         arch_spin_lock(&tr->max_lock);
1123
1124         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1125
1126         if (ret == -EBUSY) {
1127                 /*
1128                  * We failed to swap the buffer due to a commit taking
1129                  * place on this CPU. We fail to record, but we reset
1130                  * the max trace buffer (no one writes directly to it)
1131                  * and flag that it failed.
1132                  */
1133                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1134                         "Failed to swap buffers due to commit in progress\n");
1135         }
1136
1137         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1138
1139         __update_max_tr(tr, tsk, cpu);
1140         arch_spin_unlock(&tr->max_lock);
1141 }
1142 #endif /* CONFIG_TRACER_MAX_TRACE */
1143
1144 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1145 {
1146         /* Iterators are static, they should be filled or empty */
1147         if (trace_buffer_iter(iter, iter->cpu_file))
1148                 return 0;
1149
1150         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1151                                 full);
1152 }
1153
1154 #ifdef CONFIG_FTRACE_STARTUP_TEST
1155 static int run_tracer_selftest(struct tracer *type)
1156 {
1157         struct trace_array *tr = &global_trace;
1158         struct tracer *saved_tracer = tr->current_trace;
1159         int ret;
1160
1161         if (!type->selftest || tracing_selftest_disabled)
1162                 return 0;
1163
1164         /*
1165          * Run a selftest on this tracer.
1166          * Here we reset the trace buffer, and set the current
1167          * tracer to be this tracer. The tracer can then run some
1168          * internal tracing to verify that everything is in order.
1169          * If we fail, we do not register this tracer.
1170          */
1171         tracing_reset_online_cpus(&tr->trace_buffer);
1172
1173         tr->current_trace = type;
1174
1175 #ifdef CONFIG_TRACER_MAX_TRACE
1176         if (type->use_max_tr) {
1177                 /* If we expanded the buffers, make sure the max is expanded too */
1178                 if (ring_buffer_expanded)
1179                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1180                                            RING_BUFFER_ALL_CPUS);
1181                 tr->allocated_snapshot = true;
1182         }
1183 #endif
1184
1185         /* the test is responsible for initializing and enabling */
1186         pr_info("Testing tracer %s: ", type->name);
1187         ret = type->selftest(type, tr);
1188         /* the test is responsible for resetting too */
1189         tr->current_trace = saved_tracer;
1190         if (ret) {
1191                 printk(KERN_CONT "FAILED!\n");
1192                 /* Add the warning after printing 'FAILED' */
1193                 WARN_ON(1);
1194                 return -1;
1195         }
1196         /* Only reset on passing, to avoid touching corrupted buffers */
1197         tracing_reset_online_cpus(&tr->trace_buffer);
1198
1199 #ifdef CONFIG_TRACER_MAX_TRACE
1200         if (type->use_max_tr) {
1201                 tr->allocated_snapshot = false;
1202
1203                 /* Shrink the max buffer again */
1204                 if (ring_buffer_expanded)
1205                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1206                                            RING_BUFFER_ALL_CPUS);
1207         }
1208 #endif
1209
1210         printk(KERN_CONT "PASSED\n");
1211         return 0;
1212 }
1213 #else
1214 static inline int run_tracer_selftest(struct tracer *type)
1215 {
1216         return 0;
1217 }
1218 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1219
1220 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1221
1222 static void __init apply_trace_boot_options(void);
1223
1224 /**
1225  * register_tracer - register a tracer with the ftrace system.
1226  * @type - the plugin for the tracer
1227  *
1228  * Register a new plugin tracer.
1229  */
1230 int __init register_tracer(struct tracer *type)
1231 {
1232         struct tracer *t;
1233         int ret = 0;
1234
1235         if (!type->name) {
1236                 pr_info("Tracer must have a name\n");
1237                 return -1;
1238         }
1239
1240         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1241                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1242                 return -1;
1243         }
1244
1245         mutex_lock(&trace_types_lock);
1246
1247         tracing_selftest_running = true;
1248
1249         for (t = trace_types; t; t = t->next) {
1250                 if (strcmp(type->name, t->name) == 0) {
1251                         /* already found */
1252                         pr_info("Tracer %s already registered\n",
1253                                 type->name);
1254                         ret = -1;
1255                         goto out;
1256                 }
1257         }
1258
1259         if (!type->set_flag)
1260                 type->set_flag = &dummy_set_flag;
1261         if (!type->flags)
1262                 type->flags = &dummy_tracer_flags;
1263         else
1264                 if (!type->flags->opts)
1265                         type->flags->opts = dummy_tracer_opt;
1266
1267         ret = run_tracer_selftest(type);
1268         if (ret < 0)
1269                 goto out;
1270
1271         type->next = trace_types;
1272         trace_types = type;
1273         add_tracer_options(&global_trace, type);
1274
1275  out:
1276         tracing_selftest_running = false;
1277         mutex_unlock(&trace_types_lock);
1278
1279         if (ret || !default_bootup_tracer)
1280                 goto out_unlock;
1281
1282         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1283                 goto out_unlock;
1284
1285         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1286         /* Do we want this tracer to start on bootup? */
1287         tracing_set_tracer(&global_trace, type->name);
1288         default_bootup_tracer = NULL;
1289
1290         apply_trace_boot_options();
1291
1292         /* disable other selftests, since this will break it. */
1293         tracing_selftest_disabled = true;
1294 #ifdef CONFIG_FTRACE_STARTUP_TEST
1295         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1296                type->name);
1297 #endif
1298
1299  out_unlock:
1300         return ret;
1301 }
1302
1303 void tracing_reset(struct trace_buffer *buf, int cpu)
1304 {
1305         struct ring_buffer *buffer = buf->buffer;
1306
1307         if (!buffer)
1308                 return;
1309
1310         ring_buffer_record_disable(buffer);
1311
1312         /* Make sure all commits have finished */
1313         synchronize_sched();
1314         ring_buffer_reset_cpu(buffer, cpu);
1315
1316         ring_buffer_record_enable(buffer);
1317 }
1318
1319 void tracing_reset_online_cpus(struct trace_buffer *buf)
1320 {
1321         struct ring_buffer *buffer = buf->buffer;
1322         int cpu;
1323
1324         if (!buffer)
1325                 return;
1326
1327         ring_buffer_record_disable(buffer);
1328
1329         /* Make sure all commits have finished */
1330         synchronize_sched();
1331
1332         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1333
1334         for_each_online_cpu(cpu)
1335                 ring_buffer_reset_cpu(buffer, cpu);
1336
1337         ring_buffer_record_enable(buffer);
1338 }
1339
1340 /* Must have trace_types_lock held */
1341 void tracing_reset_all_online_cpus(void)
1342 {
1343         struct trace_array *tr;
1344
1345         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1346                 tracing_reset_online_cpus(&tr->trace_buffer);
1347 #ifdef CONFIG_TRACER_MAX_TRACE
1348                 tracing_reset_online_cpus(&tr->max_buffer);
1349 #endif
1350         }
1351 }
1352
1353 #define SAVED_CMDLINES_DEFAULT 128
1354 #define NO_CMDLINE_MAP UINT_MAX
1355 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1356 struct saved_cmdlines_buffer {
1357         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1358         unsigned *map_cmdline_to_pid;
1359         unsigned cmdline_num;
1360         int cmdline_idx;
1361         char *saved_cmdlines;
1362 };
1363 static struct saved_cmdlines_buffer *savedcmd;
1364
1365 /* temporary disable recording */
1366 static atomic_t trace_record_cmdline_disabled __read_mostly;
1367
1368 static inline char *get_saved_cmdlines(int idx)
1369 {
1370         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1371 }
1372
1373 static inline void set_cmdline(int idx, const char *cmdline)
1374 {
1375         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1376 }
1377
1378 static int allocate_cmdlines_buffer(unsigned int val,
1379                                     struct saved_cmdlines_buffer *s)
1380 {
1381         s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1382                                         GFP_KERNEL);
1383         if (!s->map_cmdline_to_pid)
1384                 return -ENOMEM;
1385
1386         s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1387         if (!s->saved_cmdlines) {
1388                 kfree(s->map_cmdline_to_pid);
1389                 return -ENOMEM;
1390         }
1391
1392         s->cmdline_idx = 0;
1393         s->cmdline_num = val;
1394         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1395                sizeof(s->map_pid_to_cmdline));
1396         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1397                val * sizeof(*s->map_cmdline_to_pid));
1398
1399         return 0;
1400 }
1401
1402 static int trace_create_savedcmd(void)
1403 {
1404         int ret;
1405
1406         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1407         if (!savedcmd)
1408                 return -ENOMEM;
1409
1410         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1411         if (ret < 0) {
1412                 kfree(savedcmd);
1413                 savedcmd = NULL;
1414                 return -ENOMEM;
1415         }
1416
1417         return 0;
1418 }
1419
1420 int is_tracing_stopped(void)
1421 {
1422         return global_trace.stop_count;
1423 }
1424
1425 /**
1426  * tracing_start - quick start of the tracer
1427  *
1428  * If tracing is enabled but was stopped by tracing_stop,
1429  * this will start the tracer back up.
1430  */
1431 void tracing_start(void)
1432 {
1433         struct ring_buffer *buffer;
1434         unsigned long flags;
1435
1436         if (tracing_disabled)
1437                 return;
1438
1439         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1440         if (--global_trace.stop_count) {
1441                 if (global_trace.stop_count < 0) {
1442                         /* Someone screwed up their debugging */
1443                         WARN_ON_ONCE(1);
1444                         global_trace.stop_count = 0;
1445                 }
1446                 goto out;
1447         }
1448
1449         /* Prevent the buffers from switching */
1450         arch_spin_lock(&global_trace.max_lock);
1451
1452         buffer = global_trace.trace_buffer.buffer;
1453         if (buffer)
1454                 ring_buffer_record_enable(buffer);
1455
1456 #ifdef CONFIG_TRACER_MAX_TRACE
1457         buffer = global_trace.max_buffer.buffer;
1458         if (buffer)
1459                 ring_buffer_record_enable(buffer);
1460 #endif
1461
1462         arch_spin_unlock(&global_trace.max_lock);
1463
1464  out:
1465         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1466 }
1467
1468 static void tracing_start_tr(struct trace_array *tr)
1469 {
1470         struct ring_buffer *buffer;
1471         unsigned long flags;
1472
1473         if (tracing_disabled)
1474                 return;
1475
1476         /* If global, we need to also start the max tracer */
1477         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1478                 return tracing_start();
1479
1480         raw_spin_lock_irqsave(&tr->start_lock, flags);
1481
1482         if (--tr->stop_count) {
1483                 if (tr->stop_count < 0) {
1484                         /* Someone screwed up their debugging */
1485                         WARN_ON_ONCE(1);
1486                         tr->stop_count = 0;
1487                 }
1488                 goto out;
1489         }
1490
1491         buffer = tr->trace_buffer.buffer;
1492         if (buffer)
1493                 ring_buffer_record_enable(buffer);
1494
1495  out:
1496         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1497 }
1498
1499 /**
1500  * tracing_stop - quick stop of the tracer
1501  *
1502  * Light weight way to stop tracing. Use in conjunction with
1503  * tracing_start.
1504  */
1505 void tracing_stop(void)
1506 {
1507         struct ring_buffer *buffer;
1508         unsigned long flags;
1509
1510         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1511         if (global_trace.stop_count++)
1512                 goto out;
1513
1514         /* Prevent the buffers from switching */
1515         arch_spin_lock(&global_trace.max_lock);
1516
1517         buffer = global_trace.trace_buffer.buffer;
1518         if (buffer)
1519                 ring_buffer_record_disable(buffer);
1520
1521 #ifdef CONFIG_TRACER_MAX_TRACE
1522         buffer = global_trace.max_buffer.buffer;
1523         if (buffer)
1524                 ring_buffer_record_disable(buffer);
1525 #endif
1526
1527         arch_spin_unlock(&global_trace.max_lock);
1528
1529  out:
1530         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1531 }
1532
1533 static void tracing_stop_tr(struct trace_array *tr)
1534 {
1535         struct ring_buffer *buffer;
1536         unsigned long flags;
1537
1538         /* If global, we need to also stop the max tracer */
1539         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1540                 return tracing_stop();
1541
1542         raw_spin_lock_irqsave(&tr->start_lock, flags);
1543         if (tr->stop_count++)
1544                 goto out;
1545
1546         buffer = tr->trace_buffer.buffer;
1547         if (buffer)
1548                 ring_buffer_record_disable(buffer);
1549
1550  out:
1551         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1552 }
1553
1554 void trace_stop_cmdline_recording(void);
1555
1556 static int trace_save_cmdline(struct task_struct *tsk)
1557 {
1558         unsigned pid, idx;
1559
1560         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1561                 return 0;
1562
1563         /*
1564          * It's not the end of the world if we don't get
1565          * the lock, but we also don't want to spin
1566          * nor do we want to disable interrupts,
1567          * so if we miss here, then better luck next time.
1568          */
1569         if (!arch_spin_trylock(&trace_cmdline_lock))
1570                 return 0;
1571
1572         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1573         if (idx == NO_CMDLINE_MAP) {
1574                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1575
1576                 /*
1577                  * Check whether the cmdline buffer at idx has a pid
1578                  * mapped. We are going to overwrite that entry so we
1579                  * need to clear the map_pid_to_cmdline. Otherwise we
1580                  * would read the new comm for the old pid.
1581                  */
1582                 pid = savedcmd->map_cmdline_to_pid[idx];
1583                 if (pid != NO_CMDLINE_MAP)
1584                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1585
1586                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1587                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1588
1589                 savedcmd->cmdline_idx = idx;
1590         }
1591
1592         set_cmdline(idx, tsk->comm);
1593
1594         arch_spin_unlock(&trace_cmdline_lock);
1595
1596         return 1;
1597 }
1598
1599 static void __trace_find_cmdline(int pid, char comm[])
1600 {
1601         unsigned map;
1602
1603         if (!pid) {
1604                 strcpy(comm, "<idle>");
1605                 return;
1606         }
1607
1608         if (WARN_ON_ONCE(pid < 0)) {
1609                 strcpy(comm, "<XXX>");
1610                 return;
1611         }
1612
1613         if (pid > PID_MAX_DEFAULT) {
1614                 strcpy(comm, "<...>");
1615                 return;
1616         }
1617
1618         map = savedcmd->map_pid_to_cmdline[pid];
1619         if (map != NO_CMDLINE_MAP)
1620                 strcpy(comm, get_saved_cmdlines(map));
1621         else
1622                 strcpy(comm, "<...>");
1623 }
1624
1625 void trace_find_cmdline(int pid, char comm[])
1626 {
1627         preempt_disable();
1628         arch_spin_lock(&trace_cmdline_lock);
1629
1630         __trace_find_cmdline(pid, comm);
1631
1632         arch_spin_unlock(&trace_cmdline_lock);
1633         preempt_enable();
1634 }
1635
1636 void tracing_record_cmdline(struct task_struct *tsk)
1637 {
1638         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1639                 return;
1640
1641         if (!__this_cpu_read(trace_cmdline_save))
1642                 return;
1643
1644         if (trace_save_cmdline(tsk))
1645                 __this_cpu_write(trace_cmdline_save, false);
1646 }
1647
1648 void
1649 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1650                              int pc)
1651 {
1652         struct task_struct *tsk = current;
1653
1654         entry->preempt_count            = pc & 0xff;
1655         entry->pid                      = (tsk) ? tsk->pid : 0;
1656         entry->flags =
1657 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1658                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1659 #else
1660                 TRACE_FLAG_IRQS_NOSUPPORT |
1661 #endif
1662                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1663                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1664                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1665                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1666 }
1667 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1668
1669 struct ring_buffer_event *
1670 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1671                           int type,
1672                           unsigned long len,
1673                           unsigned long flags, int pc)
1674 {
1675         struct ring_buffer_event *event;
1676
1677         event = ring_buffer_lock_reserve(buffer, len);
1678         if (event != NULL) {
1679                 struct trace_entry *ent = ring_buffer_event_data(event);
1680
1681                 tracing_generic_entry_update(ent, flags, pc);
1682                 ent->type = type;
1683         }
1684
1685         return event;
1686 }
1687
1688 void
1689 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1690 {
1691         __this_cpu_write(trace_cmdline_save, true);
1692         ring_buffer_unlock_commit(buffer, event);
1693 }
1694
1695 void trace_buffer_unlock_commit(struct trace_array *tr,
1696                                 struct ring_buffer *buffer,
1697                                 struct ring_buffer_event *event,
1698                                 unsigned long flags, int pc)
1699 {
1700         __buffer_unlock_commit(buffer, event);
1701
1702         ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
1703         ftrace_trace_userstack(buffer, flags, pc);
1704 }
1705 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
1706
1707 static struct ring_buffer *temp_buffer;
1708
1709 struct ring_buffer_event *
1710 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
1711                           struct trace_event_file *trace_file,
1712                           int type, unsigned long len,
1713                           unsigned long flags, int pc)
1714 {
1715         struct ring_buffer_event *entry;
1716
1717         *current_rb = trace_file->tr->trace_buffer.buffer;
1718         entry = trace_buffer_lock_reserve(*current_rb,
1719                                          type, len, flags, pc);
1720         /*
1721          * If tracing is off, but we have triggers enabled
1722          * we still need to look at the event data. Use the temp_buffer
1723          * to store the trace event for the tigger to use. It's recusive
1724          * safe and will not be recorded anywhere.
1725          */
1726         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
1727                 *current_rb = temp_buffer;
1728                 entry = trace_buffer_lock_reserve(*current_rb,
1729                                                   type, len, flags, pc);
1730         }
1731         return entry;
1732 }
1733 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
1734
1735 struct ring_buffer_event *
1736 trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
1737                                   int type, unsigned long len,
1738                                   unsigned long flags, int pc)
1739 {
1740         *current_rb = global_trace.trace_buffer.buffer;
1741         return trace_buffer_lock_reserve(*current_rb,
1742                                          type, len, flags, pc);
1743 }
1744 EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
1745
1746 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
1747                                      struct ring_buffer *buffer,
1748                                      struct ring_buffer_event *event,
1749                                      unsigned long flags, int pc,
1750                                      struct pt_regs *regs)
1751 {
1752         __buffer_unlock_commit(buffer, event);
1753
1754         ftrace_trace_stack(tr, buffer, flags, 0, pc, regs);
1755         ftrace_trace_userstack(buffer, flags, pc);
1756 }
1757 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
1758
1759 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1760                                          struct ring_buffer_event *event)
1761 {
1762         ring_buffer_discard_commit(buffer, event);
1763 }
1764 EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
1765
1766 void
1767 trace_function(struct trace_array *tr,
1768                unsigned long ip, unsigned long parent_ip, unsigned long flags,
1769                int pc)
1770 {
1771         struct trace_event_call *call = &event_function;
1772         struct ring_buffer *buffer = tr->trace_buffer.buffer;
1773         struct ring_buffer_event *event;
1774         struct ftrace_entry *entry;
1775
1776         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1777                                           flags, pc);
1778         if (!event)
1779                 return;
1780         entry   = ring_buffer_event_data(event);
1781         entry->ip                       = ip;
1782         entry->parent_ip                = parent_ip;
1783
1784         if (!call_filter_check_discard(call, entry, buffer, event))
1785                 __buffer_unlock_commit(buffer, event);
1786 }
1787
1788 #ifdef CONFIG_STACKTRACE
1789
1790 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
1791 struct ftrace_stack {
1792         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
1793 };
1794
1795 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
1796 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
1797
1798 static void __ftrace_trace_stack(struct ring_buffer *buffer,
1799                                  unsigned long flags,
1800                                  int skip, int pc, struct pt_regs *regs)
1801 {
1802         struct trace_event_call *call = &event_kernel_stack;
1803         struct ring_buffer_event *event;
1804         struct stack_entry *entry;
1805         struct stack_trace trace;
1806         int use_stack;
1807         int size = FTRACE_STACK_ENTRIES;
1808
1809         trace.nr_entries        = 0;
1810         trace.skip              = skip;
1811
1812         /*
1813          * Since events can happen in NMIs there's no safe way to
1814          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
1815          * or NMI comes in, it will just have to use the default
1816          * FTRACE_STACK_SIZE.
1817          */
1818         preempt_disable_notrace();
1819
1820         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1821         /*
1822          * We don't need any atomic variables, just a barrier.
1823          * If an interrupt comes in, we don't care, because it would
1824          * have exited and put the counter back to what we want.
1825          * We just need a barrier to keep gcc from moving things
1826          * around.
1827          */
1828         barrier();
1829         if (use_stack == 1) {
1830                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
1831                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
1832
1833                 if (regs)
1834                         save_stack_trace_regs(regs, &trace);
1835                 else
1836                         save_stack_trace(&trace);
1837
1838                 if (trace.nr_entries > size)
1839                         size = trace.nr_entries;
1840         } else
1841                 /* From now on, use_stack is a boolean */
1842                 use_stack = 0;
1843
1844         size *= sizeof(unsigned long);
1845
1846         event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1847                                           sizeof(*entry) + size, flags, pc);
1848         if (!event)
1849                 goto out;
1850         entry = ring_buffer_event_data(event);
1851
1852         memset(&entry->caller, 0, size);
1853
1854         if (use_stack)
1855                 memcpy(&entry->caller, trace.entries,
1856                        trace.nr_entries * sizeof(unsigned long));
1857         else {
1858                 trace.max_entries       = FTRACE_STACK_ENTRIES;
1859                 trace.entries           = entry->caller;
1860                 if (regs)
1861                         save_stack_trace_regs(regs, &trace);
1862                 else
1863                         save_stack_trace(&trace);
1864         }
1865
1866         entry->size = trace.nr_entries;
1867
1868         if (!call_filter_check_discard(call, entry, buffer, event))
1869                 __buffer_unlock_commit(buffer, event);
1870
1871  out:
1872         /* Again, don't let gcc optimize things here */
1873         barrier();
1874         __this_cpu_dec(ftrace_stack_reserve);
1875         preempt_enable_notrace();
1876
1877 }
1878
1879 static inline void ftrace_trace_stack(struct trace_array *tr,
1880                                       struct ring_buffer *buffer,
1881                                       unsigned long flags,
1882                                       int skip, int pc, struct pt_regs *regs)
1883 {
1884         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
1885                 return;
1886
1887         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
1888 }
1889
1890 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1891                    int pc)
1892 {
1893         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
1894 }
1895
1896 /**
1897  * trace_dump_stack - record a stack back trace in the trace buffer
1898  * @skip: Number of functions to skip (helper handlers)
1899  */
1900 void trace_dump_stack(int skip)
1901 {
1902         unsigned long flags;
1903
1904         if (tracing_disabled || tracing_selftest_running)
1905                 return;
1906
1907         local_save_flags(flags);
1908
1909         /*
1910          * Skip 3 more, seems to get us at the caller of
1911          * this function.
1912          */
1913         skip += 3;
1914         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
1915                              flags, skip, preempt_count(), NULL);
1916 }
1917
1918 static DEFINE_PER_CPU(int, user_stack_count);
1919
1920 void
1921 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1922 {
1923         struct trace_event_call *call = &event_user_stack;
1924         struct ring_buffer_event *event;
1925         struct userstack_entry *entry;
1926         struct stack_trace trace;
1927
1928         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
1929                 return;
1930
1931         /*
1932          * NMIs can not handle page faults, even with fix ups.
1933          * The save user stack can (and often does) fault.
1934          */
1935         if (unlikely(in_nmi()))
1936                 return;
1937
1938         /*
1939          * prevent recursion, since the user stack tracing may
1940          * trigger other kernel events.
1941          */
1942         preempt_disable();
1943         if (__this_cpu_read(user_stack_count))
1944                 goto out;
1945
1946         __this_cpu_inc(user_stack_count);
1947
1948         event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1949                                           sizeof(*entry), flags, pc);
1950         if (!event)
1951                 goto out_drop_count;
1952         entry   = ring_buffer_event_data(event);
1953
1954         entry->tgid             = current->tgid;
1955         memset(&entry->caller, 0, sizeof(entry->caller));
1956
1957         trace.nr_entries        = 0;
1958         trace.max_entries       = FTRACE_STACK_ENTRIES;
1959         trace.skip              = 0;
1960         trace.entries           = entry->caller;
1961
1962         save_stack_trace_user(&trace);
1963         if (!call_filter_check_discard(call, entry, buffer, event))
1964                 __buffer_unlock_commit(buffer, event);
1965
1966  out_drop_count:
1967         __this_cpu_dec(user_stack_count);
1968  out:
1969         preempt_enable();
1970 }
1971
1972 #ifdef UNUSED
1973 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1974 {
1975         ftrace_trace_userstack(tr, flags, preempt_count());
1976 }
1977 #endif /* UNUSED */
1978
1979 #endif /* CONFIG_STACKTRACE */
1980
1981 /* created for use with alloc_percpu */
1982 struct trace_buffer_struct {
1983         char buffer[TRACE_BUF_SIZE];
1984 };
1985
1986 static struct trace_buffer_struct *trace_percpu_buffer;
1987 static struct trace_buffer_struct *trace_percpu_sirq_buffer;
1988 static struct trace_buffer_struct *trace_percpu_irq_buffer;
1989 static struct trace_buffer_struct *trace_percpu_nmi_buffer;
1990
1991 /*
1992  * The buffer used is dependent on the context. There is a per cpu
1993  * buffer for normal context, softirq contex, hard irq context and
1994  * for NMI context. Thise allows for lockless recording.
1995  *
1996  * Note, if the buffers failed to be allocated, then this returns NULL
1997  */
1998 static char *get_trace_buf(void)
1999 {
2000         struct trace_buffer_struct *percpu_buffer;
2001
2002         /*
2003          * If we have allocated per cpu buffers, then we do not
2004          * need to do any locking.
2005          */
2006         if (in_nmi())
2007                 percpu_buffer = trace_percpu_nmi_buffer;
2008         else if (in_irq())
2009                 percpu_buffer = trace_percpu_irq_buffer;
2010         else if (in_softirq())
2011                 percpu_buffer = trace_percpu_sirq_buffer;
2012         else
2013                 percpu_buffer = trace_percpu_buffer;
2014
2015         if (!percpu_buffer)
2016                 return NULL;
2017
2018         return this_cpu_ptr(&percpu_buffer->buffer[0]);
2019 }
2020
2021 static int alloc_percpu_trace_buffer(void)
2022 {
2023         struct trace_buffer_struct *buffers;
2024         struct trace_buffer_struct *sirq_buffers;
2025         struct trace_buffer_struct *irq_buffers;
2026         struct trace_buffer_struct *nmi_buffers;
2027
2028         buffers = alloc_percpu(struct trace_buffer_struct);
2029         if (!buffers)
2030                 goto err_warn;
2031
2032         sirq_buffers = alloc_percpu(struct trace_buffer_struct);
2033         if (!sirq_buffers)
2034                 goto err_sirq;
2035
2036         irq_buffers = alloc_percpu(struct trace_buffer_struct);
2037         if (!irq_buffers)
2038                 goto err_irq;
2039
2040         nmi_buffers = alloc_percpu(struct trace_buffer_struct);
2041         if (!nmi_buffers)
2042                 goto err_nmi;
2043
2044         trace_percpu_buffer = buffers;
2045         trace_percpu_sirq_buffer = sirq_buffers;
2046         trace_percpu_irq_buffer = irq_buffers;
2047         trace_percpu_nmi_buffer = nmi_buffers;
2048
2049         return 0;
2050
2051  err_nmi:
2052         free_percpu(irq_buffers);
2053  err_irq:
2054         free_percpu(sirq_buffers);
2055  err_sirq:
2056         free_percpu(buffers);
2057  err_warn:
2058         WARN(1, "Could not allocate percpu trace_printk buffer");
2059         return -ENOMEM;
2060 }
2061
2062 static int buffers_allocated;
2063
2064 void trace_printk_init_buffers(void)
2065 {
2066         if (buffers_allocated)
2067                 return;
2068
2069         if (alloc_percpu_trace_buffer())
2070                 return;
2071
2072         /* trace_printk() is for debug use only. Don't use it in production. */
2073
2074         pr_warning("\n");
2075         pr_warning("**********************************************************\n");
2076         pr_warning("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2077         pr_warning("**                                                      **\n");
2078         pr_warning("** trace_printk() being used. Allocating extra memory.  **\n");
2079         pr_warning("**                                                      **\n");
2080         pr_warning("** This means that this is a DEBUG kernel and it is     **\n");
2081         pr_warning("** unsafe for production use.                           **\n");
2082         pr_warning("**                                                      **\n");
2083         pr_warning("** If you see this message and you are not debugging    **\n");
2084         pr_warning("** the kernel, report this immediately to your vendor!  **\n");
2085         pr_warning("**                                                      **\n");
2086         pr_warning("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2087         pr_warning("**********************************************************\n");
2088
2089         /* Expand the buffers to set size */
2090         tracing_update_buffers();
2091
2092         buffers_allocated = 1;
2093
2094         /*
2095          * trace_printk_init_buffers() can be called by modules.
2096          * If that happens, then we need to start cmdline recording
2097          * directly here. If the global_trace.buffer is already
2098          * allocated here, then this was called by module code.
2099          */
2100         if (global_trace.trace_buffer.buffer)
2101                 tracing_start_cmdline_record();
2102 }
2103
2104 void trace_printk_start_comm(void)
2105 {
2106         /* Start tracing comms if trace printk is set */
2107         if (!buffers_allocated)
2108                 return;
2109         tracing_start_cmdline_record();
2110 }
2111
2112 static void trace_printk_start_stop_comm(int enabled)
2113 {
2114         if (!buffers_allocated)
2115                 return;
2116
2117         if (enabled)
2118                 tracing_start_cmdline_record();
2119         else
2120                 tracing_stop_cmdline_record();
2121 }
2122
2123 /**
2124  * trace_vbprintk - write binary msg to tracing buffer
2125  *
2126  */
2127 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2128 {
2129         struct trace_event_call *call = &event_bprint;
2130         struct ring_buffer_event *event;
2131         struct ring_buffer *buffer;
2132         struct trace_array *tr = &global_trace;
2133         struct bprint_entry *entry;
2134         unsigned long flags;
2135         char *tbuffer;
2136         int len = 0, size, pc;
2137
2138         if (unlikely(tracing_selftest_running || tracing_disabled))
2139                 return 0;
2140
2141         /* Don't pollute graph traces with trace_vprintk internals */
2142         pause_graph_tracing();
2143
2144         pc = preempt_count();
2145         preempt_disable_notrace();
2146
2147         tbuffer = get_trace_buf();
2148         if (!tbuffer) {
2149                 len = 0;
2150                 goto out;
2151         }
2152
2153         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2154
2155         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2156                 goto out;
2157
2158         local_save_flags(flags);
2159         size = sizeof(*entry) + sizeof(u32) * len;
2160         buffer = tr->trace_buffer.buffer;
2161         event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2162                                           flags, pc);
2163         if (!event)
2164                 goto out;
2165         entry = ring_buffer_event_data(event);
2166         entry->ip                       = ip;
2167         entry->fmt                      = fmt;
2168
2169         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2170         if (!call_filter_check_discard(call, entry, buffer, event)) {
2171                 __buffer_unlock_commit(buffer, event);
2172                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2173         }
2174
2175 out:
2176         preempt_enable_notrace();
2177         unpause_graph_tracing();
2178
2179         return len;
2180 }
2181 EXPORT_SYMBOL_GPL(trace_vbprintk);
2182
2183 static int
2184 __trace_array_vprintk(struct ring_buffer *buffer,
2185                       unsigned long ip, const char *fmt, va_list args)
2186 {
2187         struct trace_event_call *call = &event_print;
2188         struct ring_buffer_event *event;
2189         int len = 0, size, pc;
2190         struct print_entry *entry;
2191         unsigned long flags;
2192         char *tbuffer;
2193
2194         if (tracing_disabled || tracing_selftest_running)
2195                 return 0;
2196
2197         /* Don't pollute graph traces with trace_vprintk internals */
2198         pause_graph_tracing();
2199
2200         pc = preempt_count();
2201         preempt_disable_notrace();
2202
2203
2204         tbuffer = get_trace_buf();
2205         if (!tbuffer) {
2206                 len = 0;
2207                 goto out;
2208         }
2209
2210         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2211
2212         local_save_flags(flags);
2213         size = sizeof(*entry) + len + 1;
2214         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2215                                           flags, pc);
2216         if (!event)
2217                 goto out;
2218         entry = ring_buffer_event_data(event);
2219         entry->ip = ip;
2220
2221         memcpy(&entry->buf, tbuffer, len + 1);
2222         if (!call_filter_check_discard(call, entry, buffer, event)) {
2223                 __buffer_unlock_commit(buffer, event);
2224                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2225         }
2226  out:
2227         preempt_enable_notrace();
2228         unpause_graph_tracing();
2229
2230         return len;
2231 }
2232
2233 int trace_array_vprintk(struct trace_array *tr,
2234                         unsigned long ip, const char *fmt, va_list args)
2235 {
2236         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2237 }
2238
2239 int trace_array_printk(struct trace_array *tr,
2240                        unsigned long ip, const char *fmt, ...)
2241 {
2242         int ret;
2243         va_list ap;
2244
2245         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2246                 return 0;
2247
2248         va_start(ap, fmt);
2249         ret = trace_array_vprintk(tr, ip, fmt, ap);
2250         va_end(ap);
2251         return ret;
2252 }
2253
2254 int trace_array_printk_buf(struct ring_buffer *buffer,
2255                            unsigned long ip, const char *fmt, ...)
2256 {
2257         int ret;
2258         va_list ap;
2259
2260         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2261                 return 0;
2262
2263         va_start(ap, fmt);
2264         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2265         va_end(ap);
2266         return ret;
2267 }
2268
2269 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2270 {
2271         return trace_array_vprintk(&global_trace, ip, fmt, args);
2272 }
2273 EXPORT_SYMBOL_GPL(trace_vprintk);
2274
2275 static void trace_iterator_increment(struct trace_iterator *iter)
2276 {
2277         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2278
2279         iter->idx++;
2280         if (buf_iter)
2281                 ring_buffer_read(buf_iter, NULL);
2282 }
2283
2284 static struct trace_entry *
2285 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2286                 unsigned long *lost_events)
2287 {
2288         struct ring_buffer_event *event;
2289         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2290
2291         if (buf_iter)
2292                 event = ring_buffer_iter_peek(buf_iter, ts);
2293         else
2294                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2295                                          lost_events);
2296
2297         if (event) {
2298                 iter->ent_size = ring_buffer_event_length(event);
2299                 return ring_buffer_event_data(event);
2300         }
2301         iter->ent_size = 0;
2302         return NULL;
2303 }
2304
2305 static struct trace_entry *
2306 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2307                   unsigned long *missing_events, u64 *ent_ts)
2308 {
2309         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2310         struct trace_entry *ent, *next = NULL;
2311         unsigned long lost_events = 0, next_lost = 0;
2312         int cpu_file = iter->cpu_file;
2313         u64 next_ts = 0, ts;
2314         int next_cpu = -1;
2315         int next_size = 0;
2316         int cpu;
2317
2318         /*
2319          * If we are in a per_cpu trace file, don't bother by iterating over
2320          * all cpu and peek directly.
2321          */
2322         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2323                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2324                         return NULL;
2325                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2326                 if (ent_cpu)
2327                         *ent_cpu = cpu_file;
2328
2329                 return ent;
2330         }
2331
2332         for_each_tracing_cpu(cpu) {
2333
2334                 if (ring_buffer_empty_cpu(buffer, cpu))
2335                         continue;
2336
2337                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2338
2339                 /*
2340                  * Pick the entry with the smallest timestamp:
2341                  */
2342                 if (ent && (!next || ts < next_ts)) {
2343                         next = ent;
2344                         next_cpu = cpu;
2345                         next_ts = ts;
2346                         next_lost = lost_events;
2347                         next_size = iter->ent_size;
2348                 }
2349         }
2350
2351         iter->ent_size = next_size;
2352
2353         if (ent_cpu)
2354                 *ent_cpu = next_cpu;
2355
2356         if (ent_ts)
2357                 *ent_ts = next_ts;
2358
2359         if (missing_events)
2360                 *missing_events = next_lost;
2361
2362         return next;
2363 }
2364
2365 /* Find the next real entry, without updating the iterator itself */
2366 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2367                                           int *ent_cpu, u64 *ent_ts)
2368 {
2369         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2370 }
2371
2372 /* Find the next real entry, and increment the iterator to the next entry */
2373 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2374 {
2375         iter->ent = __find_next_entry(iter, &iter->cpu,
2376                                       &iter->lost_events, &iter->ts);
2377
2378         if (iter->ent)
2379                 trace_iterator_increment(iter);
2380
2381         return iter->ent ? iter : NULL;
2382 }
2383
2384 static void trace_consume(struct trace_iterator *iter)
2385 {
2386         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2387                             &iter->lost_events);
2388 }
2389
2390 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2391 {
2392         struct trace_iterator *iter = m->private;
2393         int i = (int)*pos;
2394         void *ent;
2395
2396         WARN_ON_ONCE(iter->leftover);
2397
2398         (*pos)++;
2399
2400         /* can't go backwards */
2401         if (iter->idx > i)
2402                 return NULL;
2403
2404         if (iter->idx < 0)
2405                 ent = trace_find_next_entry_inc(iter);
2406         else
2407                 ent = iter;
2408
2409         while (ent && iter->idx < i)
2410                 ent = trace_find_next_entry_inc(iter);
2411
2412         iter->pos = *pos;
2413
2414         return ent;
2415 }
2416
2417 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2418 {
2419         struct ring_buffer_event *event;
2420         struct ring_buffer_iter *buf_iter;
2421         unsigned long entries = 0;
2422         u64 ts;
2423
2424         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2425
2426         buf_iter = trace_buffer_iter(iter, cpu);
2427         if (!buf_iter)
2428                 return;
2429
2430         ring_buffer_iter_reset(buf_iter);
2431
2432         /*
2433          * We could have the case with the max latency tracers
2434          * that a reset never took place on a cpu. This is evident
2435          * by the timestamp being before the start of the buffer.
2436          */
2437         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2438                 if (ts >= iter->trace_buffer->time_start)
2439                         break;
2440                 entries++;
2441                 ring_buffer_read(buf_iter, NULL);
2442         }
2443
2444         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2445 }
2446
2447 /*
2448  * The current tracer is copied to avoid a global locking
2449  * all around.
2450  */
2451 static void *s_start(struct seq_file *m, loff_t *pos)
2452 {
2453         struct trace_iterator *iter = m->private;
2454         struct trace_array *tr = iter->tr;
2455         int cpu_file = iter->cpu_file;
2456         void *p = NULL;
2457         loff_t l = 0;
2458         int cpu;
2459
2460         /*
2461          * copy the tracer to avoid using a global lock all around.
2462          * iter->trace is a copy of current_trace, the pointer to the
2463          * name may be used instead of a strcmp(), as iter->trace->name
2464          * will point to the same string as current_trace->name.
2465          */
2466         mutex_lock(&trace_types_lock);
2467         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2468                 *iter->trace = *tr->current_trace;
2469         mutex_unlock(&trace_types_lock);
2470
2471 #ifdef CONFIG_TRACER_MAX_TRACE
2472         if (iter->snapshot && iter->trace->use_max_tr)
2473                 return ERR_PTR(-EBUSY);
2474 #endif
2475
2476         if (!iter->snapshot)
2477                 atomic_inc(&trace_record_cmdline_disabled);
2478
2479         if (*pos != iter->pos) {
2480                 iter->ent = NULL;
2481                 iter->cpu = 0;
2482                 iter->idx = -1;
2483
2484                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2485                         for_each_tracing_cpu(cpu)
2486                                 tracing_iter_reset(iter, cpu);
2487                 } else
2488                         tracing_iter_reset(iter, cpu_file);
2489
2490                 iter->leftover = 0;
2491                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2492                         ;
2493
2494         } else {
2495                 /*
2496                  * If we overflowed the seq_file before, then we want
2497                  * to just reuse the trace_seq buffer again.
2498                  */
2499                 if (iter->leftover)
2500                         p = iter;
2501                 else {
2502                         l = *pos - 1;
2503                         p = s_next(m, p, &l);
2504                 }
2505         }
2506
2507         trace_event_read_lock();
2508         trace_access_lock(cpu_file);
2509         return p;
2510 }
2511
2512 static void s_stop(struct seq_file *m, void *p)
2513 {
2514         struct trace_iterator *iter = m->private;
2515
2516 #ifdef CONFIG_TRACER_MAX_TRACE
2517         if (iter->snapshot && iter->trace->use_max_tr)
2518                 return;
2519 #endif
2520
2521         if (!iter->snapshot)
2522                 atomic_dec(&trace_record_cmdline_disabled);
2523
2524         trace_access_unlock(iter->cpu_file);
2525         trace_event_read_unlock();
2526 }
2527
2528 static void
2529 get_total_entries(struct trace_buffer *buf,
2530                   unsigned long *total, unsigned long *entries)
2531 {
2532         unsigned long count;
2533         int cpu;
2534
2535         *total = 0;
2536         *entries = 0;
2537
2538         for_each_tracing_cpu(cpu) {
2539                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2540                 /*
2541                  * If this buffer has skipped entries, then we hold all
2542                  * entries for the trace and we need to ignore the
2543                  * ones before the time stamp.
2544                  */
2545                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2546                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2547                         /* total is the same as the entries */
2548                         *total += count;
2549                 } else
2550                         *total += count +
2551                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
2552                 *entries += count;
2553         }
2554 }
2555
2556 static void print_lat_help_header(struct seq_file *m)
2557 {
2558         seq_puts(m, "#                  _------=> CPU#            \n"
2559                     "#                 / _-----=> irqs-off        \n"
2560                     "#                | / _----=> need-resched    \n"
2561                     "#                || / _---=> hardirq/softirq \n"
2562                     "#                ||| / _--=> preempt-depth   \n"
2563                     "#                |||| /     delay            \n"
2564                     "#  cmd     pid   ||||| time  |   caller      \n"
2565                     "#     \\   /      |||||  \\    |   /         \n");
2566 }
2567
2568 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2569 {
2570         unsigned long total;
2571         unsigned long entries;
2572
2573         get_total_entries(buf, &total, &entries);
2574         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2575                    entries, total, num_online_cpus());
2576         seq_puts(m, "#\n");
2577 }
2578
2579 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2580 {
2581         print_event_info(buf, m);
2582         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n"
2583                     "#              | |       |          |         |\n");
2584 }
2585
2586 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2587 {
2588         print_event_info(buf, m);
2589         seq_puts(m, "#                              _-----=> irqs-off\n"
2590                     "#                             / _----=> need-resched\n"
2591                     "#                            | / _---=> hardirq/softirq\n"
2592                     "#                            || / _--=> preempt-depth\n"
2593                     "#                            ||| /     delay\n"
2594                     "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n"
2595                     "#              | |       |   ||||       |         |\n");
2596 }
2597
2598 void
2599 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2600 {
2601         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
2602         struct trace_buffer *buf = iter->trace_buffer;
2603         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2604         struct tracer *type = iter->trace;
2605         unsigned long entries;
2606         unsigned long total;
2607         const char *name = "preemption";
2608
2609         name = type->name;
2610
2611         get_total_entries(buf, &total, &entries);
2612
2613         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2614                    name, UTS_RELEASE);
2615         seq_puts(m, "# -----------------------------------"
2616                  "---------------------------------\n");
2617         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2618                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2619                    nsecs_to_usecs(data->saved_latency),
2620                    entries,
2621                    total,
2622                    buf->cpu,
2623 #if defined(CONFIG_PREEMPT_NONE)
2624                    "server",
2625 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2626                    "desktop",
2627 #elif defined(CONFIG_PREEMPT)
2628                    "preempt",
2629 #else
2630                    "unknown",
2631 #endif
2632                    /* These are reserved for later use */
2633                    0, 0, 0, 0);
2634 #ifdef CONFIG_SMP
2635         seq_printf(m, " #P:%d)\n", num_online_cpus());
2636 #else
2637         seq_puts(m, ")\n");
2638 #endif
2639         seq_puts(m, "#    -----------------\n");
2640         seq_printf(m, "#    | task: %.16s-%d "
2641                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2642                    data->comm, data->pid,
2643                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2644                    data->policy, data->rt_priority);
2645         seq_puts(m, "#    -----------------\n");
2646
2647         if (data->critical_start) {
2648                 seq_puts(m, "#  => started at: ");
2649                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2650                 trace_print_seq(m, &iter->seq);
2651                 seq_puts(m, "\n#  => ended at:   ");
2652                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2653                 trace_print_seq(m, &iter->seq);
2654                 seq_puts(m, "\n#\n");
2655         }
2656
2657         seq_puts(m, "#\n");
2658 }
2659
2660 static void test_cpu_buff_start(struct trace_iterator *iter)
2661 {
2662         struct trace_seq *s = &iter->seq;
2663         struct trace_array *tr = iter->tr;
2664
2665         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
2666                 return;
2667
2668         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
2669                 return;
2670
2671         if (iter->started && cpumask_test_cpu(iter->cpu, iter->started))
2672                 return;
2673
2674         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
2675                 return;
2676
2677         if (iter->started)
2678                 cpumask_set_cpu(iter->cpu, iter->started);
2679
2680         /* Don't print started cpu buffer for the first entry of the trace */
2681         if (iter->idx > 1)
2682                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
2683                                 iter->cpu);
2684 }
2685
2686 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
2687 {
2688         struct trace_array *tr = iter->tr;
2689         struct trace_seq *s = &iter->seq;
2690         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
2691         struct trace_entry *entry;
2692         struct trace_event *event;
2693
2694         entry = iter->ent;
2695
2696         test_cpu_buff_start(iter);
2697
2698         event = ftrace_find_event(entry->type);
2699
2700         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
2701                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2702                         trace_print_lat_context(iter);
2703                 else
2704                         trace_print_context(iter);
2705         }
2706
2707         if (trace_seq_has_overflowed(s))
2708                 return TRACE_TYPE_PARTIAL_LINE;
2709
2710         if (event)
2711                 return event->funcs->trace(iter, sym_flags, event);
2712
2713         trace_seq_printf(s, "Unknown type %d\n", entry->type);
2714
2715         return trace_handle_return(s);
2716 }
2717
2718 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2719 {
2720         struct trace_array *tr = iter->tr;
2721         struct trace_seq *s = &iter->seq;
2722         struct trace_entry *entry;
2723         struct trace_event *event;
2724
2725         entry = iter->ent;
2726
2727         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
2728                 trace_seq_printf(s, "%d %d %llu ",
2729                                  entry->pid, iter->cpu, iter->ts);
2730
2731         if (trace_seq_has_overflowed(s))
2732                 return TRACE_TYPE_PARTIAL_LINE;
2733
2734         event = ftrace_find_event(entry->type);
2735         if (event)
2736                 return event->funcs->raw(iter, 0, event);
2737
2738         trace_seq_printf(s, "%d ?\n", entry->type);
2739
2740         return trace_handle_return(s);
2741 }
2742
2743 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2744 {
2745         struct trace_array *tr = iter->tr;
2746         struct trace_seq *s = &iter->seq;
2747         unsigned char newline = '\n';
2748         struct trace_entry *entry;
2749         struct trace_event *event;
2750
2751         entry = iter->ent;
2752
2753         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
2754                 SEQ_PUT_HEX_FIELD(s, entry->pid);
2755                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
2756                 SEQ_PUT_HEX_FIELD(s, iter->ts);
2757                 if (trace_seq_has_overflowed(s))
2758                         return TRACE_TYPE_PARTIAL_LINE;
2759         }
2760
2761         event = ftrace_find_event(entry->type);
2762         if (event) {
2763                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
2764                 if (ret != TRACE_TYPE_HANDLED)
2765                         return ret;
2766         }
2767
2768         SEQ_PUT_FIELD(s, newline);
2769
2770         return trace_handle_return(s);
2771 }
2772
2773 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2774 {
2775         struct trace_array *tr = iter->tr;
2776         struct trace_seq *s = &iter->seq;
2777         struct trace_entry *entry;
2778         struct trace_event *event;
2779
2780         entry = iter->ent;
2781
2782         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
2783                 SEQ_PUT_FIELD(s, entry->pid);
2784                 SEQ_PUT_FIELD(s, iter->cpu);
2785                 SEQ_PUT_FIELD(s, iter->ts);
2786                 if (trace_seq_has_overflowed(s))
2787                         return TRACE_TYPE_PARTIAL_LINE;
2788         }
2789
2790         event = ftrace_find_event(entry->type);
2791         return event ? event->funcs->binary(iter, 0, event) :
2792                 TRACE_TYPE_HANDLED;
2793 }
2794
2795 int trace_empty(struct trace_iterator *iter)
2796 {
2797         struct ring_buffer_iter *buf_iter;
2798         int cpu;
2799
2800         /* If we are looking at one CPU buffer, only check that one */
2801         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
2802                 cpu = iter->cpu_file;
2803                 buf_iter = trace_buffer_iter(iter, cpu);
2804                 if (buf_iter) {
2805                         if (!ring_buffer_iter_empty(buf_iter))
2806                                 return 0;
2807                 } else {
2808                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2809                                 return 0;
2810                 }
2811                 return 1;
2812         }
2813
2814         for_each_tracing_cpu(cpu) {
2815                 buf_iter = trace_buffer_iter(iter, cpu);
2816                 if (buf_iter) {
2817                         if (!ring_buffer_iter_empty(buf_iter))
2818                                 return 0;
2819                 } else {
2820                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2821                                 return 0;
2822                 }
2823         }
2824
2825         return 1;
2826 }
2827
2828 /*  Called with trace_event_read_lock() held. */
2829 enum print_line_t print_trace_line(struct trace_iterator *iter)
2830 {
2831         struct trace_array *tr = iter->tr;
2832         unsigned long trace_flags = tr->trace_flags;
2833         enum print_line_t ret;
2834
2835         if (iter->lost_events) {
2836                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2837                                  iter->cpu, iter->lost_events);
2838                 if (trace_seq_has_overflowed(&iter->seq))
2839                         return TRACE_TYPE_PARTIAL_LINE;
2840         }
2841
2842         if (iter->trace && iter->trace->print_line) {
2843                 ret = iter->trace->print_line(iter);
2844                 if (ret != TRACE_TYPE_UNHANDLED)
2845                         return ret;
2846         }
2847
2848         if (iter->ent->type == TRACE_BPUTS &&
2849                         trace_flags & TRACE_ITER_PRINTK &&
2850                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2851                 return trace_print_bputs_msg_only(iter);
2852
2853         if (iter->ent->type == TRACE_BPRINT &&
2854                         trace_flags & TRACE_ITER_PRINTK &&
2855                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2856                 return trace_print_bprintk_msg_only(iter);
2857
2858         if (iter->ent->type == TRACE_PRINT &&
2859                         trace_flags & TRACE_ITER_PRINTK &&
2860                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2861                 return trace_print_printk_msg_only(iter);
2862
2863         if (trace_flags & TRACE_ITER_BIN)
2864                 return print_bin_fmt(iter);
2865
2866         if (trace_flags & TRACE_ITER_HEX)
2867                 return print_hex_fmt(iter);
2868
2869         if (trace_flags & TRACE_ITER_RAW)
2870                 return print_raw_fmt(iter);
2871
2872         return print_trace_fmt(iter);
2873 }
2874
2875 void trace_latency_header(struct seq_file *m)
2876 {
2877         struct trace_iterator *iter = m->private;
2878         struct trace_array *tr = iter->tr;
2879
2880         /* print nothing if the buffers are empty */
2881         if (trace_empty(iter))
2882                 return;
2883
2884         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2885                 print_trace_header(m, iter);
2886
2887         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
2888                 print_lat_help_header(m);
2889 }
2890
2891 void trace_default_header(struct seq_file *m)
2892 {
2893         struct trace_iterator *iter = m->private;
2894         struct trace_array *tr = iter->tr;
2895         unsigned long trace_flags = tr->trace_flags;
2896
2897         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
2898                 return;
2899
2900         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2901                 /* print nothing if the buffers are empty */
2902                 if (trace_empty(iter))
2903                         return;
2904                 print_trace_header(m, iter);
2905                 if (!(trace_flags & TRACE_ITER_VERBOSE))
2906                         print_lat_help_header(m);
2907         } else {
2908                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
2909                         if (trace_flags & TRACE_ITER_IRQ_INFO)
2910                                 print_func_help_header_irq(iter->trace_buffer, m);
2911                         else
2912                                 print_func_help_header(iter->trace_buffer, m);
2913                 }
2914         }
2915 }
2916
2917 static void test_ftrace_alive(struct seq_file *m)
2918 {
2919         if (!ftrace_is_dead())
2920                 return;
2921         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
2922                     "#          MAY BE MISSING FUNCTION EVENTS\n");
2923 }
2924
2925 #ifdef CONFIG_TRACER_MAX_TRACE
2926 static void show_snapshot_main_help(struct seq_file *m)
2927 {
2928         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
2929                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
2930                     "#                      Takes a snapshot of the main buffer.\n"
2931                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
2932                     "#                      (Doesn't have to be '2' works with any number that\n"
2933                     "#                       is not a '0' or '1')\n");
2934 }
2935
2936 static void show_snapshot_percpu_help(struct seq_file *m)
2937 {
2938         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
2939 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2940         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
2941                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
2942 #else
2943         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
2944                     "#                     Must use main snapshot file to allocate.\n");
2945 #endif
2946         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
2947                     "#                      (Doesn't have to be '2' works with any number that\n"
2948                     "#                       is not a '0' or '1')\n");
2949 }
2950
2951 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
2952 {
2953         if (iter->tr->allocated_snapshot)
2954                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
2955         else
2956                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
2957
2958         seq_puts(m, "# Snapshot commands:\n");
2959         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
2960                 show_snapshot_main_help(m);
2961         else
2962                 show_snapshot_percpu_help(m);
2963 }
2964 #else
2965 /* Should never be called */
2966 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
2967 #endif
2968
2969 static int s_show(struct seq_file *m, void *v)
2970 {
2971         struct trace_iterator *iter = v;
2972         int ret;
2973
2974         if (iter->ent == NULL) {
2975                 if (iter->tr) {
2976                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
2977                         seq_puts(m, "#\n");
2978                         test_ftrace_alive(m);
2979                 }
2980                 if (iter->snapshot && trace_empty(iter))
2981                         print_snapshot_help(m, iter);
2982                 else if (iter->trace && iter->trace->print_header)
2983                         iter->trace->print_header(m);
2984                 else
2985                         trace_default_header(m);
2986
2987         } else if (iter->leftover) {
2988                 /*
2989                  * If we filled the seq_file buffer earlier, we
2990                  * want to just show it now.
2991                  */
2992                 ret = trace_print_seq(m, &iter->seq);
2993
2994                 /* ret should this time be zero, but you never know */
2995                 iter->leftover = ret;
2996
2997         } else {
2998                 print_trace_line(iter);
2999                 ret = trace_print_seq(m, &iter->seq);
3000                 /*
3001                  * If we overflow the seq_file buffer, then it will
3002                  * ask us for this data again at start up.
3003                  * Use that instead.
3004                  *  ret is 0 if seq_file write succeeded.
3005                  *        -1 otherwise.
3006                  */
3007                 iter->leftover = ret;
3008         }
3009
3010         return 0;
3011 }
3012
3013 /*
3014  * Should be used after trace_array_get(), trace_types_lock
3015  * ensures that i_cdev was already initialized.
3016  */
3017 static inline int tracing_get_cpu(struct inode *inode)
3018 {
3019         if (inode->i_cdev) /* See trace_create_cpu_file() */
3020                 return (long)inode->i_cdev - 1;
3021         return RING_BUFFER_ALL_CPUS;
3022 }
3023
3024 static const struct seq_operations tracer_seq_ops = {
3025         .start          = s_start,
3026         .next           = s_next,
3027         .stop           = s_stop,
3028         .show           = s_show,
3029 };
3030
3031 static struct trace_iterator *
3032 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3033 {
3034         struct trace_array *tr = inode->i_private;
3035         struct trace_iterator *iter;
3036         int cpu;
3037
3038         if (tracing_disabled)
3039                 return ERR_PTR(-ENODEV);
3040
3041         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3042         if (!iter)
3043                 return ERR_PTR(-ENOMEM);
3044
3045         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3046                                     GFP_KERNEL);
3047         if (!iter->buffer_iter)
3048                 goto release;
3049
3050         /*
3051          * We make a copy of the current tracer to avoid concurrent
3052          * changes on it while we are reading.
3053          */
3054         mutex_lock(&trace_types_lock);
3055         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3056         if (!iter->trace)
3057                 goto fail;
3058
3059         *iter->trace = *tr->current_trace;
3060
3061         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3062                 goto fail;
3063
3064         iter->tr = tr;
3065
3066 #ifdef CONFIG_TRACER_MAX_TRACE
3067         /* Currently only the top directory has a snapshot */
3068         if (tr->current_trace->print_max || snapshot)
3069                 iter->trace_buffer = &tr->max_buffer;
3070         else
3071 #endif
3072                 iter->trace_buffer = &tr->trace_buffer;
3073         iter->snapshot = snapshot;
3074         iter->pos = -1;
3075         iter->cpu_file = tracing_get_cpu(inode);
3076         mutex_init(&iter->mutex);
3077
3078         /* Notify the tracer early; before we stop tracing. */
3079         if (iter->trace && iter->trace->open)
3080                 iter->trace->open(iter);
3081
3082         /* Annotate start of buffers if we had overruns */
3083         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3084                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3085
3086         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3087         if (trace_clocks[tr->clock_id].in_ns)
3088                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3089
3090         /* stop the trace while dumping if we are not opening "snapshot" */
3091         if (!iter->snapshot)
3092                 tracing_stop_tr(tr);
3093
3094         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3095                 for_each_tracing_cpu(cpu) {
3096                         iter->buffer_iter[cpu] =
3097                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3098                 }
3099                 ring_buffer_read_prepare_sync();
3100                 for_each_tracing_cpu(cpu) {
3101                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3102                         tracing_iter_reset(iter, cpu);
3103                 }
3104         } else {
3105                 cpu = iter->cpu_file;
3106                 iter->buffer_iter[cpu] =
3107                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3108                 ring_buffer_read_prepare_sync();
3109                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3110                 tracing_iter_reset(iter, cpu);
3111         }
3112
3113         mutex_unlock(&trace_types_lock);
3114
3115         return iter;
3116
3117  fail:
3118         mutex_unlock(&trace_types_lock);
3119         kfree(iter->trace);
3120         kfree(iter->buffer_iter);
3121 release:
3122         seq_release_private(inode, file);
3123         return ERR_PTR(-ENOMEM);
3124 }
3125
3126 int tracing_open_generic(struct inode *inode, struct file *filp)
3127 {
3128         if (tracing_disabled)
3129                 return -ENODEV;
3130
3131         filp->private_data = inode->i_private;
3132         return 0;
3133 }
3134
3135 bool tracing_is_disabled(void)
3136 {
3137         return (tracing_disabled) ? true: false;
3138 }
3139
3140 /*
3141  * Open and update trace_array ref count.
3142  * Must have the current trace_array passed to it.
3143  */
3144 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3145 {
3146         struct trace_array *tr = inode->i_private;
3147
3148         if (tracing_disabled)
3149                 return -ENODEV;
3150
3151         if (trace_array_get(tr) < 0)
3152                 return -ENODEV;
3153
3154         filp->private_data = inode->i_private;
3155
3156         return 0;
3157 }
3158
3159 static int tracing_release(struct inode *inode, struct file *file)
3160 {
3161         struct trace_array *tr = inode->i_private;
3162         struct seq_file *m = file->private_data;
3163         struct trace_iterator *iter;
3164         int cpu;
3165
3166         if (!(file->f_mode & FMODE_READ)) {
3167                 trace_array_put(tr);
3168                 return 0;
3169         }
3170
3171         /* Writes do not use seq_file */
3172         iter = m->private;
3173         mutex_lock(&trace_types_lock);
3174
3175         for_each_tracing_cpu(cpu) {
3176                 if (iter->buffer_iter[cpu])
3177                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3178         }
3179
3180         if (iter->trace && iter->trace->close)
3181                 iter->trace->close(iter);
3182
3183         if (!iter->snapshot)
3184                 /* reenable tracing if it was previously enabled */
3185                 tracing_start_tr(tr);
3186
3187         __trace_array_put(tr);
3188
3189         mutex_unlock(&trace_types_lock);
3190
3191         mutex_destroy(&iter->mutex);
3192         free_cpumask_var(iter->started);
3193         kfree(iter->trace);
3194         kfree(iter->buffer_iter);
3195         seq_release_private(inode, file);
3196
3197         return 0;
3198 }
3199
3200 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3201 {
3202         struct trace_array *tr = inode->i_private;
3203
3204         trace_array_put(tr);
3205         return 0;
3206 }
3207
3208 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3209 {
3210         struct trace_array *tr = inode->i_private;
3211
3212         trace_array_put(tr);
3213
3214         return single_release(inode, file);
3215 }
3216
3217 static int tracing_open(struct inode *inode, struct file *file)
3218 {
3219         struct trace_array *tr = inode->i_private;
3220         struct trace_iterator *iter;
3221         int ret = 0;
3222
3223         if (trace_array_get(tr) < 0)
3224                 return -ENODEV;
3225
3226         /* If this file was open for write, then erase contents */
3227         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3228                 int cpu = tracing_get_cpu(inode);
3229
3230                 if (cpu == RING_BUFFER_ALL_CPUS)
3231                         tracing_reset_online_cpus(&tr->trace_buffer);
3232                 else
3233                         tracing_reset(&tr->trace_buffer, cpu);
3234         }
3235
3236         if (file->f_mode & FMODE_READ) {
3237                 iter = __tracing_open(inode, file, false);
3238                 if (IS_ERR(iter))
3239                         ret = PTR_ERR(iter);
3240                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3241                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3242         }
3243
3244         if (ret < 0)
3245                 trace_array_put(tr);
3246
3247         return ret;
3248 }
3249
3250 /*
3251  * Some tracers are not suitable for instance buffers.
3252  * A tracer is always available for the global array (toplevel)
3253  * or if it explicitly states that it is.
3254  */
3255 static bool
3256 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3257 {
3258         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3259 }
3260
3261 /* Find the next tracer that this trace array may use */
3262 static struct tracer *
3263 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3264 {
3265         while (t && !trace_ok_for_array(t, tr))
3266                 t = t->next;
3267
3268         return t;
3269 }
3270
3271 static void *
3272 t_next(struct seq_file *m, void *v, loff_t *pos)
3273 {
3274         struct trace_array *tr = m->private;
3275         struct tracer *t = v;
3276
3277         (*pos)++;
3278
3279         if (t)
3280                 t = get_tracer_for_array(tr, t->next);
3281
3282         return t;
3283 }
3284
3285 static void *t_start(struct seq_file *m, loff_t *pos)
3286 {
3287         struct trace_array *tr = m->private;
3288         struct tracer *t;
3289         loff_t l = 0;
3290
3291         mutex_lock(&trace_types_lock);
3292
3293         t = get_tracer_for_array(tr, trace_types);
3294         for (; t && l < *pos; t = t_next(m, t, &l))
3295                         ;
3296
3297         return t;
3298 }
3299
3300 static void t_stop(struct seq_file *m, void *p)
3301 {
3302         mutex_unlock(&trace_types_lock);
3303 }
3304
3305 static int t_show(struct seq_file *m, void *v)
3306 {
3307         struct tracer *t = v;
3308
3309         if (!t)
3310                 return 0;
3311
3312         seq_puts(m, t->name);
3313         if (t->next)
3314                 seq_putc(m, ' ');
3315         else
3316                 seq_putc(m, '\n');
3317
3318         return 0;
3319 }
3320
3321 static const struct seq_operations show_traces_seq_ops = {
3322         .start          = t_start,
3323         .next           = t_next,
3324         .stop           = t_stop,
3325         .show           = t_show,
3326 };
3327
3328 static int show_traces_open(struct inode *inode, struct file *file)
3329 {
3330         struct trace_array *tr = inode->i_private;
3331         struct seq_file *m;
3332         int ret;
3333
3334         if (tracing_disabled)
3335                 return -ENODEV;
3336
3337         ret = seq_open(file, &show_traces_seq_ops);
3338         if (ret)
3339                 return ret;
3340
3341         m = file->private_data;
3342         m->private = tr;
3343
3344         return 0;
3345 }
3346
3347 static ssize_t
3348 tracing_write_stub(struct file *filp, const char __user *ubuf,
3349                    size_t count, loff_t *ppos)
3350 {
3351         return count;
3352 }
3353
3354 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3355 {
3356         int ret;
3357
3358         if (file->f_mode & FMODE_READ)
3359                 ret = seq_lseek(file, offset, whence);
3360         else
3361                 file->f_pos = ret = 0;
3362
3363         return ret;
3364 }
3365
3366 static const struct file_operations tracing_fops = {
3367         .open           = tracing_open,
3368         .read           = seq_read,
3369         .write          = tracing_write_stub,
3370         .llseek         = tracing_lseek,
3371         .release        = tracing_release,
3372 };
3373
3374 static const struct file_operations show_traces_fops = {
3375         .open           = show_traces_open,
3376         .read           = seq_read,
3377         .release        = seq_release,
3378         .llseek         = seq_lseek,
3379 };
3380
3381 /*
3382  * The tracer itself will not take this lock, but still we want
3383  * to provide a consistent cpumask to user-space:
3384  */
3385 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3386
3387 /*
3388  * Temporary storage for the character representation of the
3389  * CPU bitmask (and one more byte for the newline):
3390  */
3391 static char mask_str[NR_CPUS + 1];
3392
3393 static ssize_t
3394 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3395                      size_t count, loff_t *ppos)
3396 {
3397         struct trace_array *tr = file_inode(filp)->i_private;
3398         int len;
3399
3400         mutex_lock(&tracing_cpumask_update_lock);
3401
3402         len = snprintf(mask_str, count, "%*pb\n",
3403                        cpumask_pr_args(tr->tracing_cpumask));
3404         if (len >= count) {
3405                 count = -EINVAL;
3406                 goto out_err;
3407         }
3408         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3409
3410 out_err:
3411         mutex_unlock(&tracing_cpumask_update_lock);
3412
3413         return count;
3414 }
3415
3416 static ssize_t
3417 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3418                       size_t count, loff_t *ppos)
3419 {
3420         struct trace_array *tr = file_inode(filp)->i_private;
3421         cpumask_var_t tracing_cpumask_new;
3422         int err, cpu;
3423
3424         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3425                 return -ENOMEM;
3426
3427         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3428         if (err)
3429                 goto err_unlock;
3430
3431         mutex_lock(&tracing_cpumask_update_lock);
3432
3433         local_irq_disable();
3434         arch_spin_lock(&tr->max_lock);
3435         for_each_tracing_cpu(cpu) {
3436                 /*
3437                  * Increase/decrease the disabled counter if we are
3438                  * about to flip a bit in the cpumask:
3439                  */
3440                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3441                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3442                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3443                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3444                 }
3445                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3446                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3447                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3448                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3449                 }
3450         }
3451         arch_spin_unlock(&tr->max_lock);
3452         local_irq_enable();
3453
3454         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3455
3456         mutex_unlock(&tracing_cpumask_update_lock);
3457         free_cpumask_var(tracing_cpumask_new);
3458
3459         return count;
3460
3461 err_unlock:
3462         free_cpumask_var(tracing_cpumask_new);
3463
3464         return err;
3465 }
3466
3467 static const struct file_operations tracing_cpumask_fops = {
3468         .open           = tracing_open_generic_tr,
3469         .read           = tracing_cpumask_read,
3470         .write          = tracing_cpumask_write,
3471         .release        = tracing_release_generic_tr,
3472         .llseek         = generic_file_llseek,
3473 };
3474
3475 static int tracing_trace_options_show(struct seq_file *m, void *v)
3476 {
3477         struct tracer_opt *trace_opts;
3478         struct trace_array *tr = m->private;
3479         u32 tracer_flags;
3480         int i;
3481
3482         mutex_lock(&trace_types_lock);
3483         tracer_flags = tr->current_trace->flags->val;
3484         trace_opts = tr->current_trace->flags->opts;
3485
3486         for (i = 0; trace_options[i]; i++) {
3487                 if (tr->trace_flags & (1 << i))
3488                         seq_printf(m, "%s\n", trace_options[i]);
3489                 else
3490                         seq_printf(m, "no%s\n", trace_options[i]);
3491         }
3492
3493         for (i = 0; trace_opts[i].name; i++) {
3494                 if (tracer_flags & trace_opts[i].bit)
3495                         seq_printf(m, "%s\n", trace_opts[i].name);
3496                 else
3497                         seq_printf(m, "no%s\n", trace_opts[i].name);
3498         }
3499         mutex_unlock(&trace_types_lock);
3500
3501         return 0;
3502 }
3503
3504 static int __set_tracer_option(struct trace_array *tr,
3505                                struct tracer_flags *tracer_flags,
3506                                struct tracer_opt *opts, int neg)
3507 {
3508         struct tracer *trace = tr->current_trace;
3509         int ret;
3510
3511         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3512         if (ret)
3513                 return ret;
3514
3515         if (neg)
3516                 tracer_flags->val &= ~opts->bit;
3517         else
3518                 tracer_flags->val |= opts->bit;
3519         return 0;
3520 }
3521
3522 /* Try to assign a tracer specific option */
3523 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3524 {
3525         struct tracer *trace = tr->current_trace;
3526         struct tracer_flags *tracer_flags = trace->flags;
3527         struct tracer_opt *opts = NULL;
3528         int i;
3529
3530         for (i = 0; tracer_flags->opts[i].name; i++) {
3531                 opts = &tracer_flags->opts[i];
3532
3533                 if (strcmp(cmp, opts->name) == 0)
3534                         return __set_tracer_option(tr, trace->flags, opts, neg);
3535         }
3536
3537         return -EINVAL;
3538 }
3539
3540 /* Some tracers require overwrite to stay enabled */
3541 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3542 {
3543         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3544                 return -1;
3545
3546         return 0;
3547 }
3548
3549 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3550 {
3551         /* do nothing if flag is already set */
3552         if (!!(tr->trace_flags & mask) == !!enabled)
3553                 return 0;
3554
3555         /* Give the tracer a chance to approve the change */
3556         if (tr->current_trace->flag_changed)
3557                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3558                         return -EINVAL;
3559
3560         if (enabled)
3561                 tr->trace_flags |= mask;
3562         else
3563                 tr->trace_flags &= ~mask;
3564
3565         if (mask == TRACE_ITER_RECORD_CMD)
3566                 trace_event_enable_cmd_record(enabled);
3567
3568         if (mask == TRACE_ITER_OVERWRITE) {
3569                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3570 #ifdef CONFIG_TRACER_MAX_TRACE
3571                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3572 #endif
3573         }
3574
3575         if (mask == TRACE_ITER_PRINTK) {
3576                 trace_printk_start_stop_comm(enabled);
3577                 trace_printk_control(enabled);
3578         }
3579
3580         return 0;
3581 }
3582
3583 static int trace_set_options(struct trace_array *tr, char *option)
3584 {
3585         char *cmp;
3586         int neg = 0;
3587         int ret = -ENODEV;
3588         int i;
3589         size_t orig_len = strlen(option);
3590
3591         cmp = strstrip(option);
3592
3593         if (strncmp(cmp, "no", 2) == 0) {
3594                 neg = 1;
3595                 cmp += 2;
3596         }
3597
3598         mutex_lock(&trace_types_lock);
3599
3600         for (i = 0; trace_options[i]; i++) {
3601                 if (strcmp(cmp, trace_options[i]) == 0) {
3602                         ret = set_tracer_flag(tr, 1 << i, !neg);
3603                         break;
3604                 }
3605         }
3606
3607         /* If no option could be set, test the specific tracer options */
3608         if (!trace_options[i])
3609                 ret = set_tracer_option(tr, cmp, neg);
3610
3611         mutex_unlock(&trace_types_lock);
3612
3613         /*
3614          * If the first trailing whitespace is replaced with '\0' by strstrip,
3615          * turn it back into a space.
3616          */
3617         if (orig_len > strlen(option))
3618                 option[strlen(option)] = ' ';
3619
3620         return ret;
3621 }
3622
3623 static void __init apply_trace_boot_options(void)
3624 {
3625         char *buf = trace_boot_options_buf;
3626         char *option;
3627
3628         while (true) {
3629                 option = strsep(&buf, ",");
3630
3631                 if (!option)
3632                         break;
3633
3634                 if (*option)
3635                         trace_set_options(&global_trace, option);
3636
3637                 /* Put back the comma to allow this to be called again */
3638                 if (buf)
3639                         *(buf - 1) = ',';
3640         }
3641 }
3642
3643 static ssize_t
3644 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3645                         size_t cnt, loff_t *ppos)
3646 {
3647         struct seq_file *m = filp->private_data;
3648         struct trace_array *tr = m->private;
3649         char buf[64];
3650         int ret;
3651
3652         if (cnt >= sizeof(buf))
3653                 return -EINVAL;
3654
3655         if (copy_from_user(&buf, ubuf, cnt))
3656                 return -EFAULT;
3657
3658         buf[cnt] = 0;
3659
3660         ret = trace_set_options(tr, buf);
3661         if (ret < 0)
3662                 return ret;
3663
3664         *ppos += cnt;
3665
3666         return cnt;
3667 }
3668
3669 static int tracing_trace_options_open(struct inode *inode, struct file *file)
3670 {
3671         struct trace_array *tr = inode->i_private;
3672         int ret;
3673
3674         if (tracing_disabled)
3675                 return -ENODEV;
3676
3677         if (trace_array_get(tr) < 0)
3678                 return -ENODEV;
3679
3680         ret = single_open(file, tracing_trace_options_show, inode->i_private);
3681         if (ret < 0)
3682                 trace_array_put(tr);
3683
3684         return ret;
3685 }
3686
3687 static const struct file_operations tracing_iter_fops = {
3688         .open           = tracing_trace_options_open,
3689         .read           = seq_read,
3690         .llseek         = seq_lseek,
3691         .release        = tracing_single_release_tr,
3692         .write          = tracing_trace_options_write,
3693 };
3694
3695 static const char readme_msg[] =
3696         "tracing mini-HOWTO:\n\n"
3697         "# echo 0 > tracing_on : quick way to disable tracing\n"
3698         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
3699         " Important files:\n"
3700         "  trace\t\t\t- The static contents of the buffer\n"
3701         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
3702         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
3703         "  current_tracer\t- function and latency tracers\n"
3704         "  available_tracers\t- list of configured tracers for current_tracer\n"
3705         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
3706         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
3707         "  trace_clock\t\t-change the clock used to order events\n"
3708         "       local:   Per cpu clock but may not be synced across CPUs\n"
3709         "      global:   Synced across CPUs but slows tracing down.\n"
3710         "     counter:   Not a clock, but just an increment\n"
3711         "      uptime:   Jiffy counter from time of boot\n"
3712         "        perf:   Same clock that perf events use\n"
3713 #ifdef CONFIG_X86_64
3714         "     x86-tsc:   TSC cycle counter\n"
3715 #endif
3716         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
3717         "  tracing_cpumask\t- Limit which CPUs to trace\n"
3718         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
3719         "\t\t\t  Remove sub-buffer with rmdir\n"
3720         "  trace_options\t\t- Set format or modify how tracing happens\n"
3721         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
3722         "\t\t\t  option name\n"
3723         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
3724 #ifdef CONFIG_DYNAMIC_FTRACE
3725         "\n  available_filter_functions - list of functions that can be filtered on\n"
3726         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
3727         "\t\t\t  functions\n"
3728         "\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3729         "\t     modules: Can select a group via module\n"
3730         "\t      Format: :mod:<module-name>\n"
3731         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
3732         "\t    triggers: a command to perform when function is hit\n"
3733         "\t      Format: <function>:<trigger>[:count]\n"
3734         "\t     trigger: traceon, traceoff\n"
3735         "\t\t      enable_event:<system>:<event>\n"
3736         "\t\t      disable_event:<system>:<event>\n"
3737 #ifdef CONFIG_STACKTRACE
3738         "\t\t      stacktrace\n"
3739 #endif
3740 #ifdef CONFIG_TRACER_SNAPSHOT
3741         "\t\t      snapshot\n"
3742 #endif
3743         "\t\t      dump\n"
3744         "\t\t      cpudump\n"
3745         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
3746         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
3747         "\t     The first one will disable tracing every time do_fault is hit\n"
3748         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
3749         "\t       The first time do trap is hit and it disables tracing, the\n"
3750         "\t       counter will decrement to 2. If tracing is already disabled,\n"
3751         "\t       the counter will not decrement. It only decrements when the\n"
3752         "\t       trigger did work\n"
3753         "\t     To remove trigger without count:\n"
3754         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
3755         "\t     To remove trigger with a count:\n"
3756         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
3757         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
3758         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3759         "\t    modules: Can select a group via module command :mod:\n"
3760         "\t    Does not accept triggers\n"
3761 #endif /* CONFIG_DYNAMIC_FTRACE */
3762 #ifdef CONFIG_FUNCTION_TRACER
3763         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
3764         "\t\t    (function)\n"
3765 #endif
3766 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
3767         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
3768         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
3769         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
3770 #endif
3771 #ifdef CONFIG_TRACER_SNAPSHOT
3772         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
3773         "\t\t\t  snapshot buffer. Read the contents for more\n"
3774         "\t\t\t  information\n"
3775 #endif
3776 #ifdef CONFIG_STACK_TRACER
3777         "  stack_trace\t\t- Shows the max stack trace when active\n"
3778         "  stack_max_size\t- Shows current max stack size that was traced\n"
3779         "\t\t\t  Write into this file to reset the max size (trigger a\n"
3780         "\t\t\t  new trace)\n"
3781 #ifdef CONFIG_DYNAMIC_FTRACE
3782         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
3783         "\t\t\t  traces\n"
3784 #endif
3785 #endif /* CONFIG_STACK_TRACER */
3786         "  events/\t\t- Directory containing all trace event subsystems:\n"
3787         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
3788         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
3789         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
3790         "\t\t\t  events\n"
3791         "      filter\t\t- If set, only events passing filter are traced\n"
3792         "  events/<system>/<event>/\t- Directory containing control files for\n"
3793         "\t\t\t  <event>:\n"
3794         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
3795         "      filter\t\t- If set, only events passing filter are traced\n"
3796         "      trigger\t\t- If set, a command to perform when event is hit\n"
3797         "\t    Format: <trigger>[:count][if <filter>]\n"
3798         "\t   trigger: traceon, traceoff\n"
3799         "\t            enable_event:<system>:<event>\n"
3800         "\t            disable_event:<system>:<event>\n"
3801 #ifdef CONFIG_STACKTRACE
3802         "\t\t    stacktrace\n"
3803 #endif
3804 #ifdef CONFIG_TRACER_SNAPSHOT
3805         "\t\t    snapshot\n"
3806 #endif
3807         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
3808         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
3809         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
3810         "\t                  events/block/block_unplug/trigger\n"
3811         "\t   The first disables tracing every time block_unplug is hit.\n"
3812         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
3813         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
3814         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
3815         "\t   Like function triggers, the counter is only decremented if it\n"
3816         "\t    enabled or disabled tracing.\n"
3817         "\t   To remove a trigger without a count:\n"
3818         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
3819         "\t   To remove a trigger with a count:\n"
3820         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
3821         "\t   Filters can be ignored when removing a trigger.\n"
3822 ;
3823
3824 static ssize_t
3825 tracing_readme_read(struct file *filp, char __user *ubuf,
3826                        size_t cnt, loff_t *ppos)
3827 {
3828         return simple_read_from_buffer(ubuf, cnt, ppos,
3829                                         readme_msg, strlen(readme_msg));
3830 }
3831
3832 static const struct file_operations tracing_readme_fops = {
3833         .open           = tracing_open_generic,
3834         .read           = tracing_readme_read,
3835         .llseek         = generic_file_llseek,
3836 };
3837
3838 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
3839 {
3840         unsigned int *ptr = v;
3841
3842         if (*pos || m->count)
3843                 ptr++;
3844
3845         (*pos)++;
3846
3847         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
3848              ptr++) {
3849                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
3850                         continue;
3851
3852                 return ptr;
3853         }
3854
3855         return NULL;
3856 }
3857
3858 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
3859 {
3860         void *v;
3861         loff_t l = 0;
3862
3863         preempt_disable();
3864         arch_spin_lock(&trace_cmdline_lock);
3865
3866         v = &savedcmd->map_cmdline_to_pid[0];
3867         while (l <= *pos) {
3868                 v = saved_cmdlines_next(m, v, &l);
3869                 if (!v)
3870                         return NULL;
3871         }
3872
3873         return v;
3874 }
3875
3876 static void saved_cmdlines_stop(struct seq_file *m, void *v)
3877 {
3878         arch_spin_unlock(&trace_cmdline_lock);
3879         preempt_enable();
3880 }
3881
3882 static int saved_cmdlines_show(struct seq_file *m, void *v)
3883 {
3884         char buf[TASK_COMM_LEN];
3885         unsigned int *pid = v;
3886
3887         __trace_find_cmdline(*pid, buf);
3888         seq_printf(m, "%d %s\n", *pid, buf);
3889         return 0;
3890 }
3891
3892 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
3893         .start          = saved_cmdlines_start,
3894         .next           = saved_cmdlines_next,
3895         .stop           = saved_cmdlines_stop,
3896         .show           = saved_cmdlines_show,
3897 };
3898
3899 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
3900 {
3901         if (tracing_disabled)
3902                 return -ENODEV;
3903
3904         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
3905 }
3906
3907 static const struct file_operations tracing_saved_cmdlines_fops = {
3908         .open           = tracing_saved_cmdlines_open,
3909         .read           = seq_read,
3910         .llseek         = seq_lseek,
3911         .release        = seq_release,
3912 };
3913
3914 static ssize_t
3915 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
3916                                  size_t cnt, loff_t *ppos)
3917 {
3918         char buf[64];
3919         int r;
3920
3921         arch_spin_lock(&trace_cmdline_lock);
3922         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
3923         arch_spin_unlock(&trace_cmdline_lock);
3924
3925         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3926 }
3927
3928 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
3929 {
3930         kfree(s->saved_cmdlines);
3931         kfree(s->map_cmdline_to_pid);
3932         kfree(s);
3933 }
3934
3935 static int tracing_resize_saved_cmdlines(unsigned int val)
3936 {
3937         struct saved_cmdlines_buffer *s, *savedcmd_temp;
3938
3939         s = kmalloc(sizeof(*s), GFP_KERNEL);
3940         if (!s)
3941                 return -ENOMEM;
3942
3943         if (allocate_cmdlines_buffer(val, s) < 0) {
3944                 kfree(s);
3945                 return -ENOMEM;
3946         }
3947
3948         arch_spin_lock(&trace_cmdline_lock);
3949         savedcmd_temp = savedcmd;
3950         savedcmd = s;
3951         arch_spin_unlock(&trace_cmdline_lock);
3952         free_saved_cmdlines_buffer(savedcmd_temp);
3953
3954         return 0;
3955 }
3956
3957 static ssize_t
3958 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
3959                                   size_t cnt, loff_t *ppos)
3960 {
3961         unsigned long val;
3962         int ret;
3963
3964         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
3965         if (ret)
3966                 return ret;
3967
3968         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
3969         if (!val || val > PID_MAX_DEFAULT)
3970                 return -EINVAL;
3971
3972         ret = tracing_resize_saved_cmdlines((unsigned int)val);
3973         if (ret < 0)
3974                 return ret;
3975
3976         *ppos += cnt;
3977
3978         return cnt;
3979 }
3980
3981 static const struct file_operations tracing_saved_cmdlines_size_fops = {
3982         .open           = tracing_open_generic,
3983         .read           = tracing_saved_cmdlines_size_read,
3984         .write          = tracing_saved_cmdlines_size_write,
3985 };
3986
3987 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
3988 static union trace_enum_map_item *
3989 update_enum_map(union trace_enum_map_item *ptr)
3990 {
3991         if (!ptr->map.enum_string) {
3992                 if (ptr->tail.next) {
3993                         ptr = ptr->tail.next;
3994                         /* Set ptr to the next real item (skip head) */
3995                         ptr++;
3996                 } else
3997                         return NULL;
3998         }
3999         return ptr;
4000 }
4001
4002 static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
4003 {
4004         union trace_enum_map_item *ptr = v;
4005
4006         /*
4007          * Paranoid! If ptr points to end, we don't want to increment past it.
4008          * This really should never happen.
4009          */
4010         ptr = update_enum_map(ptr);
4011         if (WARN_ON_ONCE(!ptr))
4012                 return NULL;
4013
4014         ptr++;
4015
4016         (*pos)++;
4017
4018         ptr = update_enum_map(ptr);
4019
4020         return ptr;
4021 }
4022
4023 static void *enum_map_start(struct seq_file *m, loff_t *pos)
4024 {
4025         union trace_enum_map_item *v;
4026         loff_t l = 0;
4027
4028         mutex_lock(&trace_enum_mutex);
4029
4030         v = trace_enum_maps;
4031         if (v)
4032                 v++;
4033
4034         while (v && l < *pos) {
4035                 v = enum_map_next(m, v, &l);
4036         }
4037
4038         return v;
4039 }
4040
4041 static void enum_map_stop(struct seq_file *m, void *v)
4042 {
4043         mutex_unlock(&trace_enum_mutex);
4044 }
4045
4046 static int enum_map_show(struct seq_file *m, void *v)
4047 {
4048         union trace_enum_map_item *ptr = v;
4049
4050         seq_printf(m, "%s %ld (%s)\n",
4051                    ptr->map.enum_string, ptr->map.enum_value,
4052                    ptr->map.system);
4053
4054         return 0;
4055 }
4056
4057 static const struct seq_operations tracing_enum_map_seq_ops = {
4058         .start          = enum_map_start,
4059         .next           = enum_map_next,
4060         .stop           = enum_map_stop,
4061         .show           = enum_map_show,
4062 };
4063
4064 static int tracing_enum_map_open(struct inode *inode, struct file *filp)
4065 {
4066         if (tracing_disabled)
4067                 return -ENODEV;
4068
4069         return seq_open(filp, &tracing_enum_map_seq_ops);
4070 }
4071
4072 static const struct file_operations tracing_enum_map_fops = {
4073         .open           = tracing_enum_map_open,
4074         .read           = seq_read,
4075         .llseek         = seq_lseek,
4076         .release        = seq_release,
4077 };
4078
4079 static inline union trace_enum_map_item *
4080 trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
4081 {
4082         /* Return tail of array given the head */
4083         return ptr + ptr->head.length + 1;
4084 }
4085
4086 static void
4087 trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
4088                            int len)
4089 {
4090         struct trace_enum_map **stop;
4091         struct trace_enum_map **map;
4092         union trace_enum_map_item *map_array;
4093         union trace_enum_map_item *ptr;
4094
4095         stop = start + len;
4096
4097         /*
4098          * The trace_enum_maps contains the map plus a head and tail item,
4099          * where the head holds the module and length of array, and the
4100          * tail holds a pointer to the next list.
4101          */
4102         map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4103         if (!map_array) {
4104                 pr_warning("Unable to allocate trace enum mapping\n");
4105                 return;
4106         }
4107
4108         mutex_lock(&trace_enum_mutex);
4109
4110         if (!trace_enum_maps)
4111                 trace_enum_maps = map_array;
4112         else {
4113                 ptr = trace_enum_maps;
4114                 for (;;) {
4115                         ptr = trace_enum_jmp_to_tail(ptr);
4116                         if (!ptr->tail.next)
4117                                 break;
4118                         ptr = ptr->tail.next;
4119
4120                 }
4121                 ptr->tail.next = map_array;
4122         }
4123         map_array->head.mod = mod;
4124         map_array->head.length = len;
4125         map_array++;
4126
4127         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4128                 map_array->map = **map;
4129                 map_array++;
4130         }
4131         memset(map_array, 0, sizeof(*map_array));
4132
4133         mutex_unlock(&trace_enum_mutex);
4134 }
4135
4136 static void trace_create_enum_file(struct dentry *d_tracer)
4137 {
4138         trace_create_file("enum_map", 0444, d_tracer,
4139                           NULL, &tracing_enum_map_fops);
4140 }
4141
4142 #else /* CONFIG_TRACE_ENUM_MAP_FILE */
4143 static inline void trace_create_enum_file(struct dentry *d_tracer) { }
4144 static inline void trace_insert_enum_map_file(struct module *mod,
4145                               struct trace_enum_map **start, int len) { }
4146 #endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
4147
4148 static void trace_insert_enum_map(struct module *mod,
4149                                   struct trace_enum_map **start, int len)
4150 {
4151         struct trace_enum_map **map;
4152
4153         if (len <= 0)
4154                 return;
4155
4156         map = start;
4157
4158         trace_event_enum_update(map, len);
4159
4160         trace_insert_enum_map_file(mod, start, len);
4161 }
4162
4163 static ssize_t
4164 tracing_set_trace_read(struct file *filp, char __user *ubuf,
4165                        size_t cnt, loff_t *ppos)
4166 {
4167         struct trace_array *tr = filp->private_data;
4168         char buf[MAX_TRACER_SIZE+2];
4169         int r;
4170
4171         mutex_lock(&trace_types_lock);
4172         r = sprintf(buf, "%s\n", tr->current_trace->name);
4173         mutex_unlock(&trace_types_lock);
4174
4175         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4176 }
4177
4178 int tracer_init(struct tracer *t, struct trace_array *tr)
4179 {
4180         tracing_reset_online_cpus(&tr->trace_buffer);
4181         return t->init(tr);
4182 }
4183
4184 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4185 {
4186         int cpu;
4187
4188         for_each_tracing_cpu(cpu)
4189                 per_cpu_ptr(buf->data, cpu)->entries = val;
4190 }
4191
4192 #ifdef CONFIG_TRACER_MAX_TRACE
4193 /* resize @tr's buffer to the size of @size_tr's entries */
4194 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4195                                         struct trace_buffer *size_buf, int cpu_id)
4196 {
4197         int cpu, ret = 0;
4198
4199         if (cpu_id == RING_BUFFER_ALL_CPUS) {
4200                 for_each_tracing_cpu(cpu) {
4201                         ret = ring_buffer_resize(trace_buf->buffer,
4202                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4203                         if (ret < 0)
4204                                 break;
4205                         per_cpu_ptr(trace_buf->data, cpu)->entries =
4206                                 per_cpu_ptr(size_buf->data, cpu)->entries;
4207                 }
4208         } else {
4209                 ret = ring_buffer_resize(trace_buf->buffer,
4210                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4211                 if (ret == 0)
4212                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4213                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
4214         }
4215
4216         return ret;
4217 }
4218 #endif /* CONFIG_TRACER_MAX_TRACE */
4219
4220 static int __tracing_resize_ring_buffer(struct trace_array *tr,
4221                                         unsigned long size, int cpu)
4222 {
4223         int ret;
4224
4225         /*
4226          * If kernel or user changes the size of the ring buffer
4227          * we use the size that was given, and we can forget about
4228          * expanding it later.
4229          */
4230         ring_buffer_expanded = true;
4231
4232         /* May be called before buffers are initialized */
4233         if (!tr->trace_buffer.buffer)
4234                 return 0;
4235
4236         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4237         if (ret < 0)
4238                 return ret;
4239
4240 #ifdef CONFIG_TRACER_MAX_TRACE
4241         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
4242             !tr->current_trace->use_max_tr)
4243                 goto out;
4244
4245         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4246         if (ret < 0) {
4247                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
4248                                                      &tr->trace_buffer, cpu);
4249                 if (r < 0) {
4250                         /*
4251                          * AARGH! We are left with different
4252                          * size max buffer!!!!
4253                          * The max buffer is our "snapshot" buffer.
4254                          * When a tracer needs a snapshot (one of the
4255                          * latency tracers), it swaps the max buffer
4256                          * with the saved snap shot. We succeeded to
4257                          * update the size of the main buffer, but failed to
4258                          * update the size of the max buffer. But when we tried
4259                          * to reset the main buffer to the original size, we
4260                          * failed there too. This is very unlikely to
4261                          * happen, but if it does, warn and kill all
4262                          * tracing.
4263                          */
4264                         WARN_ON(1);
4265                         tracing_disabled = 1;
4266                 }
4267                 return ret;
4268         }
4269
4270         if (cpu == RING_BUFFER_ALL_CPUS)
4271                 set_buffer_entries(&tr->max_buffer, size);
4272         else
4273                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4274
4275  out:
4276 #endif /* CONFIG_TRACER_MAX_TRACE */
4277
4278         if (cpu == RING_BUFFER_ALL_CPUS)
4279                 set_buffer_entries(&tr->trace_buffer, size);
4280         else
4281                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4282
4283         return ret;
4284 }
4285
4286 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4287                                           unsigned long size, int cpu_id)
4288 {
4289         int ret = size;
4290
4291         mutex_lock(&trace_types_lock);
4292
4293         if (cpu_id != RING_BUFFER_ALL_CPUS) {
4294                 /* make sure, this cpu is enabled in the mask */
4295                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4296                         ret = -EINVAL;
4297                         goto out;
4298                 }
4299         }
4300
4301         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4302         if (ret < 0)
4303                 ret = -ENOMEM;
4304
4305 out:
4306         mutex_unlock(&trace_types_lock);
4307
4308         return ret;
4309 }
4310
4311
4312 /**
4313  * tracing_update_buffers - used by tracing facility to expand ring buffers
4314  *
4315  * To save on memory when the tracing is never used on a system with it
4316  * configured in. The ring buffers are set to a minimum size. But once
4317  * a user starts to use the tracing facility, then they need to grow
4318  * to their default size.
4319  *
4320  * This function is to be called when a tracer is about to be used.
4321  */
4322 int tracing_update_buffers(void)
4323 {
4324         int ret = 0;
4325
4326         mutex_lock(&trace_types_lock);
4327         if (!ring_buffer_expanded)
4328                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4329                                                 RING_BUFFER_ALL_CPUS);
4330         mutex_unlock(&trace_types_lock);
4331
4332         return ret;
4333 }
4334
4335 struct trace_option_dentry;
4336
4337 static void
4338 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4339
4340 /*
4341  * Used to clear out the tracer before deletion of an instance.
4342  * Must have trace_types_lock held.
4343  */
4344 static void tracing_set_nop(struct trace_array *tr)
4345 {
4346         if (tr->current_trace == &nop_trace)
4347                 return;
4348         
4349         tr->current_trace->enabled--;
4350
4351         if (tr->current_trace->reset)
4352                 tr->current_trace->reset(tr);
4353
4354         tr->current_trace = &nop_trace;
4355 }
4356
4357 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
4358 {
4359         /* Only enable if the directory has been created already. */
4360         if (!tr->dir)
4361                 return;
4362
4363         create_trace_option_files(tr, t);
4364 }
4365
4366 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
4367 {
4368         struct tracer *t;
4369 #ifdef CONFIG_TRACER_MAX_TRACE
4370         bool had_max_tr;
4371 #endif
4372         int ret = 0;
4373
4374         mutex_lock(&trace_types_lock);
4375
4376         if (!ring_buffer_expanded) {
4377                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4378                                                 RING_BUFFER_ALL_CPUS);
4379                 if (ret < 0)
4380                         goto out;
4381                 ret = 0;
4382         }
4383
4384         for (t = trace_types; t; t = t->next) {
4385                 if (strcmp(t->name, buf) == 0)
4386                         break;
4387         }
4388         if (!t) {
4389                 ret = -EINVAL;
4390                 goto out;
4391         }
4392         if (t == tr->current_trace)
4393                 goto out;
4394
4395         /* Some tracers are only allowed for the top level buffer */
4396         if (!trace_ok_for_array(t, tr)) {
4397                 ret = -EINVAL;
4398                 goto out;
4399         }
4400
4401         /* If trace pipe files are being read, we can't change the tracer */
4402         if (tr->current_trace->ref) {
4403                 ret = -EBUSY;
4404                 goto out;
4405         }
4406
4407         trace_branch_disable();
4408
4409         tr->current_trace->enabled--;
4410
4411         if (tr->current_trace->reset)
4412                 tr->current_trace->reset(tr);
4413
4414         /* Current trace needs to be nop_trace before synchronize_sched */
4415         tr->current_trace = &nop_trace;
4416
4417 #ifdef CONFIG_TRACER_MAX_TRACE
4418         had_max_tr = tr->allocated_snapshot;
4419
4420         if (had_max_tr && !t->use_max_tr) {
4421                 /*
4422                  * We need to make sure that the update_max_tr sees that
4423                  * current_trace changed to nop_trace to keep it from
4424                  * swapping the buffers after we resize it.
4425                  * The update_max_tr is called from interrupts disabled
4426                  * so a synchronized_sched() is sufficient.
4427                  */
4428                 synchronize_sched();
4429                 free_snapshot(tr);
4430         }
4431 #endif
4432
4433 #ifdef CONFIG_TRACER_MAX_TRACE
4434         if (t->use_max_tr && !had_max_tr) {
4435                 ret = alloc_snapshot(tr);
4436                 if (ret < 0)
4437                         goto out;
4438         }
4439 #endif
4440
4441         if (t->init) {
4442                 ret = tracer_init(t, tr);
4443                 if (ret)
4444                         goto out;
4445         }
4446
4447         tr->current_trace = t;
4448         tr->current_trace->enabled++;
4449         trace_branch_enable(tr);
4450  out:
4451         mutex_unlock(&trace_types_lock);
4452
4453         return ret;
4454 }
4455
4456 static ssize_t
4457 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4458                         size_t cnt, loff_t *ppos)
4459 {
4460         struct trace_array *tr = filp->private_data;
4461         char buf[MAX_TRACER_SIZE+1];
4462         int i;
4463         size_t ret;
4464         int err;
4465
4466         ret = cnt;
4467
4468         if (cnt > MAX_TRACER_SIZE)
4469                 cnt = MAX_TRACER_SIZE;
4470
4471         if (copy_from_user(&buf, ubuf, cnt))
4472                 return -EFAULT;
4473
4474         buf[cnt] = 0;
4475
4476         /* strip ending whitespace. */
4477         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4478                 buf[i] = 0;
4479
4480         err = tracing_set_tracer(tr, buf);
4481         if (err)
4482                 return err;
4483
4484         *ppos += ret;
4485
4486         return ret;
4487 }
4488
4489 static ssize_t
4490 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
4491                    size_t cnt, loff_t *ppos)
4492 {
4493         char buf[64];
4494         int r;
4495
4496         r = snprintf(buf, sizeof(buf), "%ld\n",
4497                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4498         if (r > sizeof(buf))
4499                 r = sizeof(buf);
4500         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4501 }
4502
4503 static ssize_t
4504 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
4505                     size_t cnt, loff_t *ppos)
4506 {
4507         unsigned long val;
4508         int ret;
4509
4510         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4511         if (ret)
4512                 return ret;
4513
4514         *ptr = val * 1000;
4515
4516         return cnt;
4517 }
4518
4519 static ssize_t
4520 tracing_thresh_read(struct file *filp, char __user *ubuf,
4521                     size_t cnt, loff_t *ppos)
4522 {
4523         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
4524 }
4525
4526 static ssize_t
4527 tracing_thresh_write(struct file *filp, const char __user *ubuf,
4528                      size_t cnt, loff_t *ppos)
4529 {
4530         struct trace_array *tr = filp->private_data;
4531         int ret;
4532
4533         mutex_lock(&trace_types_lock);
4534         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
4535         if (ret < 0)
4536                 goto out;
4537
4538         if (tr->current_trace->update_thresh) {
4539                 ret = tr->current_trace->update_thresh(tr);
4540                 if (ret < 0)
4541                         goto out;
4542         }
4543
4544         ret = cnt;
4545 out:
4546         mutex_unlock(&trace_types_lock);
4547
4548         return ret;
4549 }
4550
4551 #ifdef CONFIG_TRACER_MAX_TRACE
4552
4553 static ssize_t
4554 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4555                      size_t cnt, loff_t *ppos)
4556 {
4557         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
4558 }
4559
4560 static ssize_t
4561 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4562                       size_t cnt, loff_t *ppos)
4563 {
4564         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
4565 }
4566
4567 #endif
4568
4569 static int tracing_open_pipe(struct inode *inode, struct file *filp)
4570 {
4571         struct trace_array *tr = inode->i_private;
4572         struct trace_iterator *iter;
4573         int ret = 0;
4574
4575         if (tracing_disabled)
4576                 return -ENODEV;
4577
4578         if (trace_array_get(tr) < 0)
4579                 return -ENODEV;
4580
4581         mutex_lock(&trace_types_lock);
4582
4583         /* create a buffer to store the information to pass to userspace */
4584         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4585         if (!iter) {
4586                 ret = -ENOMEM;
4587                 __trace_array_put(tr);
4588                 goto out;
4589         }
4590
4591         trace_seq_init(&iter->seq);
4592         iter->trace = tr->current_trace;
4593
4594         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
4595                 ret = -ENOMEM;
4596                 goto fail;
4597         }
4598
4599         /* trace pipe does not show start of buffer */
4600         cpumask_setall(iter->started);
4601
4602         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4603                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4604
4605         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4606         if (trace_clocks[tr->clock_id].in_ns)
4607                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4608
4609         iter->tr = tr;
4610         iter->trace_buffer = &tr->trace_buffer;
4611         iter->cpu_file = tracing_get_cpu(inode);
4612         mutex_init(&iter->mutex);
4613         filp->private_data = iter;
4614
4615         if (iter->trace->pipe_open)
4616                 iter->trace->pipe_open(iter);
4617
4618         nonseekable_open(inode, filp);
4619
4620         tr->current_trace->ref++;
4621 out:
4622         mutex_unlock(&trace_types_lock);
4623         return ret;
4624
4625 fail:
4626         kfree(iter->trace);
4627         kfree(iter);
4628         __trace_array_put(tr);
4629         mutex_unlock(&trace_types_lock);
4630         return ret;
4631 }
4632
4633 static int tracing_release_pipe(struct inode *inode, struct file *file)
4634 {
4635         struct trace_iterator *iter = file->private_data;
4636         struct trace_array *tr = inode->i_private;
4637
4638         mutex_lock(&trace_types_lock);
4639
4640         tr->current_trace->ref--;
4641
4642         if (iter->trace->pipe_close)
4643                 iter->trace->pipe_close(iter);
4644
4645         mutex_unlock(&trace_types_lock);
4646
4647         free_cpumask_var(iter->started);
4648         mutex_destroy(&iter->mutex);
4649         kfree(iter);
4650
4651         trace_array_put(tr);
4652
4653         return 0;
4654 }
4655
4656 static unsigned int
4657 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
4658 {
4659         struct trace_array *tr = iter->tr;
4660
4661         /* Iterators are static, they should be filled or empty */
4662         if (trace_buffer_iter(iter, iter->cpu_file))
4663                 return POLLIN | POLLRDNORM;
4664
4665         if (tr->trace_flags & TRACE_ITER_BLOCK)
4666                 /*
4667                  * Always select as readable when in blocking mode
4668                  */
4669                 return POLLIN | POLLRDNORM;
4670         else
4671                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
4672                                              filp, poll_table);
4673 }
4674
4675 static unsigned int
4676 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
4677 {
4678         struct trace_iterator *iter = filp->private_data;
4679
4680         return trace_poll(iter, filp, poll_table);
4681 }
4682
4683 /* Must be called with iter->mutex held. */
4684 static int tracing_wait_pipe(struct file *filp)
4685 {
4686         struct trace_iterator *iter = filp->private_data;
4687         int ret;
4688
4689         while (trace_empty(iter)) {
4690
4691                 if ((filp->f_flags & O_NONBLOCK)) {
4692                         return -EAGAIN;
4693                 }
4694
4695                 /*
4696                  * We block until we read something and tracing is disabled.
4697                  * We still block if tracing is disabled, but we have never
4698                  * read anything. This allows a user to cat this file, and
4699                  * then enable tracing. But after we have read something,
4700                  * we give an EOF when tracing is again disabled.
4701                  *
4702                  * iter->pos will be 0 if we haven't read anything.
4703                  */
4704                 if (!tracing_is_on() && iter->pos)
4705                         break;
4706
4707                 mutex_unlock(&iter->mutex);
4708
4709                 ret = wait_on_pipe(iter, false);
4710
4711                 mutex_lock(&iter->mutex);
4712
4713                 if (ret)
4714                         return ret;
4715         }
4716
4717         return 1;
4718 }
4719
4720 /*
4721  * Consumer reader.
4722  */
4723 static ssize_t
4724 tracing_read_pipe(struct file *filp, char __user *ubuf,
4725                   size_t cnt, loff_t *ppos)
4726 {
4727         struct trace_iterator *iter = filp->private_data;
4728         ssize_t sret;
4729
4730         /* return any leftover data */
4731         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4732         if (sret != -EBUSY)
4733                 return sret;
4734
4735         trace_seq_init(&iter->seq);
4736
4737         /*
4738          * Avoid more than one consumer on a single file descriptor
4739          * This is just a matter of traces coherency, the ring buffer itself
4740          * is protected.
4741          */
4742         mutex_lock(&iter->mutex);
4743         if (iter->trace->read) {
4744                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
4745                 if (sret)
4746                         goto out;
4747         }
4748
4749 waitagain:
4750         sret = tracing_wait_pipe(filp);
4751         if (sret <= 0)
4752                 goto out;
4753
4754         /* stop when tracing is finished */
4755         if (trace_empty(iter)) {
4756                 sret = 0;
4757                 goto out;
4758         }
4759
4760         if (cnt >= PAGE_SIZE)
4761                 cnt = PAGE_SIZE - 1;
4762
4763         /* reset all but tr, trace, and overruns */
4764         memset(&iter->seq, 0,
4765                sizeof(struct trace_iterator) -
4766                offsetof(struct trace_iterator, seq));
4767         cpumask_clear(iter->started);
4768         iter->pos = -1;
4769
4770         trace_event_read_lock();
4771         trace_access_lock(iter->cpu_file);
4772         while (trace_find_next_entry_inc(iter) != NULL) {
4773                 enum print_line_t ret;
4774                 int save_len = iter->seq.seq.len;
4775
4776                 ret = print_trace_line(iter);
4777                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4778                         /* don't print partial lines */
4779                         iter->seq.seq.len = save_len;
4780                         break;
4781                 }
4782                 if (ret != TRACE_TYPE_NO_CONSUME)
4783                         trace_consume(iter);
4784
4785                 if (trace_seq_used(&iter->seq) >= cnt)
4786                         break;
4787
4788                 /*
4789                  * Setting the full flag means we reached the trace_seq buffer
4790                  * size and we should leave by partial output condition above.
4791                  * One of the trace_seq_* functions is not used properly.
4792                  */
4793                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
4794                           iter->ent->type);
4795         }
4796         trace_access_unlock(iter->cpu_file);
4797         trace_event_read_unlock();
4798
4799         /* Now copy what we have to the user */
4800         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4801         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
4802                 trace_seq_init(&iter->seq);
4803
4804         /*
4805          * If there was nothing to send to user, in spite of consuming trace
4806          * entries, go back to wait for more entries.
4807          */
4808         if (sret == -EBUSY)
4809                 goto waitagain;
4810
4811 out:
4812         mutex_unlock(&iter->mutex);
4813
4814         return sret;
4815 }
4816
4817 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
4818                                      unsigned int idx)
4819 {
4820         __free_page(spd->pages[idx]);
4821 }
4822
4823 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
4824         .can_merge              = 0,
4825         .confirm                = generic_pipe_buf_confirm,
4826         .release                = generic_pipe_buf_release,
4827         .steal                  = generic_pipe_buf_steal,
4828         .get                    = generic_pipe_buf_get,
4829 };
4830
4831 static size_t
4832 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
4833 {
4834         size_t count;
4835         int save_len;
4836         int ret;
4837
4838         /* Seq buffer is page-sized, exactly what we need. */
4839         for (;;) {
4840                 save_len = iter->seq.seq.len;
4841                 ret = print_trace_line(iter);
4842
4843                 if (trace_seq_has_overflowed(&iter->seq)) {
4844                         iter->seq.seq.len = save_len;
4845                         break;
4846                 }
4847
4848                 /*
4849                  * This should not be hit, because it should only
4850                  * be set if the iter->seq overflowed. But check it
4851                  * anyway to be safe.
4852                  */
4853                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4854                         iter->seq.seq.len = save_len;
4855                         break;
4856                 }
4857
4858                 count = trace_seq_used(&iter->seq) - save_len;
4859                 if (rem < count) {
4860                         rem = 0;
4861                         iter->seq.seq.len = save_len;
4862                         break;
4863                 }
4864
4865                 if (ret != TRACE_TYPE_NO_CONSUME)
4866                         trace_consume(iter);
4867                 rem -= count;
4868                 if (!trace_find_next_entry_inc(iter))   {
4869                         rem = 0;
4870                         iter->ent = NULL;
4871                         break;
4872                 }
4873         }
4874
4875         return rem;
4876 }
4877
4878 static ssize_t tracing_splice_read_pipe(struct file *filp,
4879                                         loff_t *ppos,
4880                                         struct pipe_inode_info *pipe,
4881                                         size_t len,
4882                                         unsigned int flags)
4883 {
4884         struct page *pages_def[PIPE_DEF_BUFFERS];
4885         struct partial_page partial_def[PIPE_DEF_BUFFERS];
4886         struct trace_iterator *iter = filp->private_data;
4887         struct splice_pipe_desc spd = {
4888                 .pages          = pages_def,
4889                 .partial        = partial_def,
4890                 .nr_pages       = 0, /* This gets updated below. */
4891                 .nr_pages_max   = PIPE_DEF_BUFFERS,
4892                 .flags          = flags,
4893                 .ops            = &tracing_pipe_buf_ops,
4894                 .spd_release    = tracing_spd_release_pipe,
4895         };
4896         ssize_t ret;
4897         size_t rem;
4898         unsigned int i;
4899
4900         if (splice_grow_spd(pipe, &spd))
4901                 return -ENOMEM;
4902
4903         mutex_lock(&iter->mutex);
4904
4905         if (iter->trace->splice_read) {
4906                 ret = iter->trace->splice_read(iter, filp,
4907                                                ppos, pipe, len, flags);
4908                 if (ret)
4909                         goto out_err;
4910         }
4911
4912         ret = tracing_wait_pipe(filp);
4913         if (ret <= 0)
4914                 goto out_err;
4915
4916         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
4917                 ret = -EFAULT;
4918                 goto out_err;
4919         }
4920
4921         trace_event_read_lock();
4922         trace_access_lock(iter->cpu_file);
4923
4924         /* Fill as many pages as possible. */
4925         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
4926                 spd.pages[i] = alloc_page(GFP_KERNEL);
4927                 if (!spd.pages[i])
4928                         break;
4929
4930                 rem = tracing_fill_pipe_page(rem, iter);
4931
4932                 /* Copy the data into the page, so we can start over. */
4933                 ret = trace_seq_to_buffer(&iter->seq,
4934                                           page_address(spd.pages[i]),
4935                                           trace_seq_used(&iter->seq));
4936                 if (ret < 0) {
4937                         __free_page(spd.pages[i]);
4938                         break;
4939                 }
4940                 spd.partial[i].offset = 0;
4941                 spd.partial[i].len = trace_seq_used(&iter->seq);
4942
4943                 trace_seq_init(&iter->seq);
4944         }
4945
4946         trace_access_unlock(iter->cpu_file);
4947         trace_event_read_unlock();
4948         mutex_unlock(&iter->mutex);
4949
4950         spd.nr_pages = i;
4951
4952         if (i)
4953                 ret = splice_to_pipe(pipe, &spd);
4954         else
4955                 ret = 0;
4956 out:
4957         splice_shrink_spd(&spd);
4958         return ret;
4959
4960 out_err:
4961         mutex_unlock(&iter->mutex);
4962         goto out;
4963 }
4964
4965 static ssize_t
4966 tracing_entries_read(struct file *filp, char __user *ubuf,
4967                      size_t cnt, loff_t *ppos)
4968 {
4969         struct inode *inode = file_inode(filp);
4970         struct trace_array *tr = inode->i_private;
4971         int cpu = tracing_get_cpu(inode);
4972         char buf[64];
4973         int r = 0;
4974         ssize_t ret;
4975
4976         mutex_lock(&trace_types_lock);
4977
4978         if (cpu == RING_BUFFER_ALL_CPUS) {
4979                 int cpu, buf_size_same;
4980                 unsigned long size;
4981
4982                 size = 0;
4983                 buf_size_same = 1;
4984                 /* check if all cpu sizes are same */
4985                 for_each_tracing_cpu(cpu) {
4986                         /* fill in the size from first enabled cpu */
4987                         if (size == 0)
4988                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
4989                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
4990                                 buf_size_same = 0;
4991                                 break;
4992                         }
4993                 }
4994
4995                 if (buf_size_same) {
4996                         if (!ring_buffer_expanded)
4997                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
4998                                             size >> 10,
4999                                             trace_buf_size >> 10);
5000                         else
5001                                 r = sprintf(buf, "%lu\n", size >> 10);
5002                 } else
5003                         r = sprintf(buf, "X\n");
5004         } else
5005                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5006
5007         mutex_unlock(&trace_types_lock);
5008
5009         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5010         return ret;
5011 }
5012
5013 static ssize_t
5014 tracing_entries_write(struct file *filp, const char __user *ubuf,
5015                       size_t cnt, loff_t *ppos)
5016 {
5017         struct inode *inode = file_inode(filp);
5018         struct trace_array *tr = inode->i_private;
5019         unsigned long val;
5020         int ret;
5021
5022         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5023         if (ret)
5024                 return ret;
5025
5026         /* must have at least 1 entry */
5027         if (!val)
5028                 return -EINVAL;
5029
5030         /* value is in KB */
5031         val <<= 10;
5032         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5033         if (ret < 0)
5034                 return ret;
5035
5036         *ppos += cnt;
5037
5038         return cnt;
5039 }
5040
5041 static ssize_t
5042 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5043                                 size_t cnt, loff_t *ppos)
5044 {
5045         struct trace_array *tr = filp->private_data;
5046         char buf[64];
5047         int r, cpu;
5048         unsigned long size = 0, expanded_size = 0;
5049
5050         mutex_lock(&trace_types_lock);
5051         for_each_tracing_cpu(cpu) {
5052                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5053                 if (!ring_buffer_expanded)
5054                         expanded_size += trace_buf_size >> 10;
5055         }
5056         if (ring_buffer_expanded)
5057                 r = sprintf(buf, "%lu\n", size);
5058         else
5059                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5060         mutex_unlock(&trace_types_lock);
5061
5062         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5063 }
5064
5065 static ssize_t
5066 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5067                           size_t cnt, loff_t *ppos)
5068 {
5069         /*
5070          * There is no need to read what the user has written, this function
5071          * is just to make sure that there is no error when "echo" is used
5072          */
5073
5074         *ppos += cnt;
5075
5076         return cnt;
5077 }
5078
5079 static int
5080 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5081 {
5082         struct trace_array *tr = inode->i_private;
5083
5084         /* disable tracing ? */
5085         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5086                 tracer_tracing_off(tr);
5087         /* resize the ring buffer to 0 */
5088         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5089
5090         trace_array_put(tr);
5091
5092         return 0;
5093 }
5094
5095 static ssize_t
5096 tracing_mark_write(struct file *filp, const char __user *ubuf,
5097                                         size_t cnt, loff_t *fpos)
5098 {
5099         unsigned long addr = (unsigned long)ubuf;
5100         struct trace_array *tr = filp->private_data;
5101         struct ring_buffer_event *event;
5102         struct ring_buffer *buffer;
5103         struct print_entry *entry;
5104         unsigned long irq_flags;
5105         struct page *pages[2];
5106         void *map_page[2];
5107         int nr_pages = 1;
5108         ssize_t written;
5109         int offset;
5110         int size;
5111         int len;
5112         int ret;
5113         int i;
5114
5115         if (tracing_disabled)
5116                 return -EINVAL;
5117
5118         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5119                 return -EINVAL;
5120
5121         if (cnt > TRACE_BUF_SIZE)
5122                 cnt = TRACE_BUF_SIZE;
5123
5124         /*
5125          * Userspace is injecting traces into the kernel trace buffer.
5126          * We want to be as non intrusive as possible.
5127          * To do so, we do not want to allocate any special buffers
5128          * or take any locks, but instead write the userspace data
5129          * straight into the ring buffer.
5130          *
5131          * First we need to pin the userspace buffer into memory,
5132          * which, most likely it is, because it just referenced it.
5133          * But there's no guarantee that it is. By using get_user_pages_fast()
5134          * and kmap_atomic/kunmap_atomic() we can get access to the
5135          * pages directly. We then write the data directly into the
5136          * ring buffer.
5137          */
5138         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5139
5140         /* check if we cross pages */
5141         if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
5142                 nr_pages = 2;
5143
5144         offset = addr & (PAGE_SIZE - 1);
5145         addr &= PAGE_MASK;
5146
5147         ret = get_user_pages_fast(addr, nr_pages, 0, pages);
5148         if (ret < nr_pages) {
5149                 while (--ret >= 0)
5150                         put_page(pages[ret]);
5151                 written = -EFAULT;
5152                 goto out;
5153         }
5154
5155         for (i = 0; i < nr_pages; i++)
5156                 map_page[i] = kmap_atomic(pages[i]);
5157
5158         local_save_flags(irq_flags);
5159         size = sizeof(*entry) + cnt + 2; /* possible \n added */
5160         buffer = tr->trace_buffer.buffer;
5161         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5162                                           irq_flags, preempt_count());
5163         if (!event) {
5164                 /* Ring buffer disabled, return as if not open for write */
5165                 written = -EBADF;
5166                 goto out_unlock;
5167         }
5168
5169         entry = ring_buffer_event_data(event);
5170         entry->ip = _THIS_IP_;
5171
5172         if (nr_pages == 2) {
5173                 len = PAGE_SIZE - offset;
5174                 memcpy(&entry->buf, map_page[0] + offset, len);
5175                 memcpy(&entry->buf[len], map_page[1], cnt - len);
5176         } else
5177                 memcpy(&entry->buf, map_page[0] + offset, cnt);
5178
5179         if (entry->buf[cnt - 1] != '\n') {
5180                 entry->buf[cnt] = '\n';
5181                 entry->buf[cnt + 1] = '\0';
5182         } else
5183                 entry->buf[cnt] = '\0';
5184
5185         __buffer_unlock_commit(buffer, event);
5186
5187         written = cnt;
5188
5189         *fpos += written;
5190
5191  out_unlock:
5192         for (i = nr_pages - 1; i >= 0; i--) {
5193                 kunmap_atomic(map_page[i]);
5194                 put_page(pages[i]);
5195         }
5196  out:
5197         return written;
5198 }
5199
5200 static int tracing_clock_show(struct seq_file *m, void *v)
5201 {
5202         struct trace_array *tr = m->private;
5203         int i;
5204
5205         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5206                 seq_printf(m,
5207                         "%s%s%s%s", i ? " " : "",
5208                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5209                         i == tr->clock_id ? "]" : "");
5210         seq_putc(m, '\n');
5211
5212         return 0;
5213 }
5214
5215 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
5216 {
5217         int i;
5218
5219         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
5220                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
5221                         break;
5222         }
5223         if (i == ARRAY_SIZE(trace_clocks))
5224                 return -EINVAL;
5225
5226         mutex_lock(&trace_types_lock);
5227
5228         tr->clock_id = i;
5229
5230         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
5231
5232         /*
5233          * New clock may not be consistent with the previous clock.
5234          * Reset the buffer so that it doesn't have incomparable timestamps.
5235          */
5236         tracing_reset_online_cpus(&tr->trace_buffer);
5237
5238 #ifdef CONFIG_TRACER_MAX_TRACE
5239         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
5240                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
5241         tracing_reset_online_cpus(&tr->max_buffer);
5242 #endif
5243
5244         mutex_unlock(&trace_types_lock);
5245
5246         return 0;
5247 }
5248
5249 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
5250                                    size_t cnt, loff_t *fpos)
5251 {
5252         struct seq_file *m = filp->private_data;
5253         struct trace_array *tr = m->private;
5254         char buf[64];
5255         const char *clockstr;
5256         int ret;
5257
5258         if (cnt >= sizeof(buf))
5259                 return -EINVAL;
5260
5261         if (copy_from_user(&buf, ubuf, cnt))
5262                 return -EFAULT;
5263
5264         buf[cnt] = 0;
5265
5266         clockstr = strstrip(buf);
5267
5268         ret = tracing_set_clock(tr, clockstr);
5269         if (ret)
5270                 return ret;
5271
5272         *fpos += cnt;
5273
5274         return cnt;
5275 }
5276
5277 static int tracing_clock_open(struct inode *inode, struct file *file)
5278 {
5279         struct trace_array *tr = inode->i_private;
5280         int ret;
5281
5282         if (tracing_disabled)
5283                 return -ENODEV;
5284
5285         if (trace_array_get(tr))
5286                 return -ENODEV;
5287
5288         ret = single_open(file, tracing_clock_show, inode->i_private);
5289         if (ret < 0)
5290                 trace_array_put(tr);
5291
5292         return ret;
5293 }
5294
5295 struct ftrace_buffer_info {
5296         struct trace_iterator   iter;
5297         void                    *spare;
5298         unsigned int            read;
5299 };
5300
5301 #ifdef CONFIG_TRACER_SNAPSHOT
5302 static int tracing_snapshot_open(struct inode *inode, struct file *file)
5303 {
5304         struct trace_array *tr = inode->i_private;
5305         struct trace_iterator *iter;
5306         struct seq_file *m;
5307         int ret = 0;
5308
5309         if (trace_array_get(tr) < 0)
5310                 return -ENODEV;
5311
5312         if (file->f_mode & FMODE_READ) {
5313                 iter = __tracing_open(inode, file, true);
5314                 if (IS_ERR(iter))
5315                         ret = PTR_ERR(iter);
5316         } else {
5317                 /* Writes still need the seq_file to hold the private data */
5318                 ret = -ENOMEM;
5319                 m = kzalloc(sizeof(*m), GFP_KERNEL);
5320                 if (!m)
5321                         goto out;
5322                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5323                 if (!iter) {
5324                         kfree(m);
5325                         goto out;
5326                 }
5327                 ret = 0;
5328
5329                 iter->tr = tr;
5330                 iter->trace_buffer = &tr->max_buffer;
5331                 iter->cpu_file = tracing_get_cpu(inode);
5332                 m->private = iter;
5333                 file->private_data = m;
5334         }
5335 out:
5336         if (ret < 0)
5337                 trace_array_put(tr);
5338
5339         return ret;
5340 }
5341
5342 static ssize_t
5343 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
5344                        loff_t *ppos)
5345 {
5346         struct seq_file *m = filp->private_data;
5347         struct trace_iterator *iter = m->private;
5348         struct trace_array *tr = iter->tr;
5349         unsigned long val;
5350         int ret;
5351
5352         ret = tracing_update_buffers();
5353         if (ret < 0)
5354                 return ret;
5355
5356         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5357         if (ret)
5358                 return ret;
5359
5360         mutex_lock(&trace_types_lock);
5361
5362         if (tr->current_trace->use_max_tr) {
5363                 ret = -EBUSY;
5364                 goto out;
5365         }
5366
5367         switch (val) {
5368         case 0:
5369                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5370                         ret = -EINVAL;
5371                         break;
5372                 }
5373                 if (tr->allocated_snapshot)
5374                         free_snapshot(tr);
5375                 break;
5376         case 1:
5377 /* Only allow per-cpu swap if the ring buffer supports it */
5378 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
5379                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5380                         ret = -EINVAL;
5381                         break;
5382                 }
5383 #endif
5384                 if (!tr->allocated_snapshot) {
5385                         ret = alloc_snapshot(tr);
5386                         if (ret < 0)
5387                                 break;
5388                 }
5389                 local_irq_disable();
5390                 /* Now, we're going to swap */
5391                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5392                         update_max_tr(tr, current, smp_processor_id());
5393                 else
5394                         update_max_tr_single(tr, current, iter->cpu_file);
5395                 local_irq_enable();
5396                 break;
5397         default:
5398                 if (tr->allocated_snapshot) {
5399                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5400                                 tracing_reset_online_cpus(&tr->max_buffer);
5401                         else
5402                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
5403                 }
5404                 break;
5405         }
5406
5407         if (ret >= 0) {
5408                 *ppos += cnt;
5409                 ret = cnt;
5410         }
5411 out:
5412         mutex_unlock(&trace_types_lock);
5413         return ret;
5414 }
5415
5416 static int tracing_snapshot_release(struct inode *inode, struct file *file)
5417 {
5418         struct seq_file *m = file->private_data;
5419         int ret;
5420
5421         ret = tracing_release(inode, file);
5422
5423         if (file->f_mode & FMODE_READ)
5424                 return ret;
5425
5426         /* If write only, the seq_file is just a stub */
5427         if (m)
5428                 kfree(m->private);
5429         kfree(m);
5430
5431         return 0;
5432 }
5433
5434 static int tracing_buffers_open(struct inode *inode, struct file *filp);
5435 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
5436                                     size_t count, loff_t *ppos);
5437 static int tracing_buffers_release(struct inode *inode, struct file *file);
5438 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5439                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
5440
5441 static int snapshot_raw_open(struct inode *inode, struct file *filp)
5442 {
5443         struct ftrace_buffer_info *info;
5444         int ret;
5445
5446         ret = tracing_buffers_open(inode, filp);
5447         if (ret < 0)
5448                 return ret;
5449
5450         info = filp->private_data;
5451
5452         if (info->iter.trace->use_max_tr) {
5453                 tracing_buffers_release(inode, filp);
5454                 return -EBUSY;
5455         }
5456
5457         info->iter.snapshot = true;
5458         info->iter.trace_buffer = &info->iter.tr->max_buffer;
5459
5460         return ret;
5461 }
5462
5463 #endif /* CONFIG_TRACER_SNAPSHOT */
5464
5465
5466 static const struct file_operations tracing_thresh_fops = {
5467         .open           = tracing_open_generic,
5468         .read           = tracing_thresh_read,
5469         .write          = tracing_thresh_write,
5470         .llseek         = generic_file_llseek,
5471 };
5472
5473 #ifdef CONFIG_TRACER_MAX_TRACE
5474 static const struct file_operations tracing_max_lat_fops = {
5475         .open           = tracing_open_generic,
5476         .read           = tracing_max_lat_read,
5477         .write          = tracing_max_lat_write,
5478         .llseek         = generic_file_llseek,
5479 };
5480 #endif
5481
5482 static const struct file_operations set_tracer_fops = {
5483         .open           = tracing_open_generic,
5484         .read           = tracing_set_trace_read,
5485         .write          = tracing_set_trace_write,
5486         .llseek         = generic_file_llseek,
5487 };
5488
5489 static const struct file_operations tracing_pipe_fops = {
5490         .open           = tracing_open_pipe,
5491         .poll           = tracing_poll_pipe,
5492         .read           = tracing_read_pipe,
5493         .splice_read    = tracing_splice_read_pipe,
5494         .release        = tracing_release_pipe,
5495         .llseek         = no_llseek,
5496 };
5497
5498 static const struct file_operations tracing_entries_fops = {
5499         .open           = tracing_open_generic_tr,
5500         .read           = tracing_entries_read,
5501         .write          = tracing_entries_write,
5502         .llseek         = generic_file_llseek,
5503         .release        = tracing_release_generic_tr,
5504 };
5505
5506 static const struct file_operations tracing_total_entries_fops = {
5507         .open           = tracing_open_generic_tr,
5508         .read           = tracing_total_entries_read,
5509         .llseek         = generic_file_llseek,
5510         .release        = tracing_release_generic_tr,
5511 };
5512
5513 static const struct file_operations tracing_free_buffer_fops = {
5514         .open           = tracing_open_generic_tr,
5515         .write          = tracing_free_buffer_write,
5516         .release        = tracing_free_buffer_release,
5517 };
5518
5519 static const struct file_operations tracing_mark_fops = {
5520         .open           = tracing_open_generic_tr,
5521         .write          = tracing_mark_write,
5522         .llseek         = generic_file_llseek,
5523         .release        = tracing_release_generic_tr,
5524 };
5525
5526 static const struct file_operations trace_clock_fops = {
5527         .open           = tracing_clock_open,
5528         .read           = seq_read,
5529         .llseek         = seq_lseek,
5530         .release        = tracing_single_release_tr,
5531         .write          = tracing_clock_write,
5532 };
5533
5534 #ifdef CONFIG_TRACER_SNAPSHOT
5535 static const struct file_operations snapshot_fops = {
5536         .open           = tracing_snapshot_open,
5537         .read           = seq_read,
5538         .write          = tracing_snapshot_write,
5539         .llseek         = tracing_lseek,
5540         .release        = tracing_snapshot_release,
5541 };
5542
5543 static const struct file_operations snapshot_raw_fops = {
5544         .open           = snapshot_raw_open,
5545         .read           = tracing_buffers_read,
5546         .release        = tracing_buffers_release,
5547         .splice_read    = tracing_buffers_splice_read,
5548         .llseek         = no_llseek,
5549 };
5550
5551 #endif /* CONFIG_TRACER_SNAPSHOT */
5552
5553 static int tracing_buffers_open(struct inode *inode, struct file *filp)
5554 {
5555         struct trace_array *tr = inode->i_private;
5556         struct ftrace_buffer_info *info;
5557         int ret;
5558
5559         if (tracing_disabled)
5560                 return -ENODEV;
5561
5562         if (trace_array_get(tr) < 0)
5563                 return -ENODEV;
5564
5565         info = kzalloc(sizeof(*info), GFP_KERNEL);
5566         if (!info) {
5567                 trace_array_put(tr);
5568                 return -ENOMEM;
5569         }
5570
5571         mutex_lock(&trace_types_lock);
5572
5573         info->iter.tr           = tr;
5574         info->iter.cpu_file     = tracing_get_cpu(inode);
5575         info->iter.trace        = tr->current_trace;
5576         info->iter.trace_buffer = &tr->trace_buffer;
5577         info->spare             = NULL;
5578         /* Force reading ring buffer for first read */
5579         info->read              = (unsigned int)-1;
5580
5581         filp->private_data = info;
5582
5583         tr->current_trace->ref++;
5584
5585         mutex_unlock(&trace_types_lock);
5586
5587         ret = nonseekable_open(inode, filp);
5588         if (ret < 0)
5589                 trace_array_put(tr);
5590
5591         return ret;
5592 }
5593
5594 static unsigned int
5595 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
5596 {
5597         struct ftrace_buffer_info *info = filp->private_data;
5598         struct trace_iterator *iter = &info->iter;
5599
5600         return trace_poll(iter, filp, poll_table);
5601 }
5602
5603 static ssize_t
5604 tracing_buffers_read(struct file *filp, char __user *ubuf,
5605                      size_t count, loff_t *ppos)
5606 {
5607         struct ftrace_buffer_info *info = filp->private_data;
5608         struct trace_iterator *iter = &info->iter;
5609         ssize_t ret;
5610         ssize_t size;
5611
5612         if (!count)
5613                 return 0;
5614
5615 #ifdef CONFIG_TRACER_MAX_TRACE
5616         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
5617                 return -EBUSY;
5618 #endif
5619
5620         if (!info->spare)
5621                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
5622                                                           iter->cpu_file);
5623         if (!info->spare)
5624                 return -ENOMEM;
5625
5626         /* Do we have previous read data to read? */
5627         if (info->read < PAGE_SIZE)
5628                 goto read;
5629
5630  again:
5631         trace_access_lock(iter->cpu_file);
5632         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
5633                                     &info->spare,
5634                                     count,
5635                                     iter->cpu_file, 0);
5636         trace_access_unlock(iter->cpu_file);
5637
5638         if (ret < 0) {
5639                 if (trace_empty(iter)) {
5640                         if ((filp->f_flags & O_NONBLOCK))
5641                                 return -EAGAIN;
5642
5643                         ret = wait_on_pipe(iter, false);
5644                         if (ret)
5645                                 return ret;
5646
5647                         goto again;
5648                 }
5649                 return 0;
5650         }
5651
5652         info->read = 0;
5653  read:
5654         size = PAGE_SIZE - info->read;
5655         if (size > count)
5656                 size = count;
5657
5658         ret = copy_to_user(ubuf, info->spare + info->read, size);
5659         if (ret == size)
5660                 return -EFAULT;
5661
5662         size -= ret;
5663
5664         *ppos += size;
5665         info->read += size;
5666
5667         return size;
5668 }
5669
5670 static int tracing_buffers_release(struct inode *inode, struct file *file)
5671 {
5672         struct ftrace_buffer_info *info = file->private_data;
5673         struct trace_iterator *iter = &info->iter;
5674
5675         mutex_lock(&trace_types_lock);
5676
5677         iter->tr->current_trace->ref--;
5678
5679         __trace_array_put(iter->tr);
5680
5681         if (info->spare)
5682                 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
5683         kfree(info);
5684
5685         mutex_unlock(&trace_types_lock);
5686
5687         return 0;
5688 }
5689
5690 struct buffer_ref {
5691         struct ring_buffer      *buffer;
5692         void                    *page;
5693         int                     ref;
5694 };
5695
5696 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
5697                                     struct pipe_buffer *buf)
5698 {
5699         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5700
5701         if (--ref->ref)
5702                 return;
5703
5704         ring_buffer_free_read_page(ref->buffer, ref->page);
5705         kfree(ref);
5706         buf->private = 0;
5707 }
5708
5709 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
5710                                 struct pipe_buffer *buf)
5711 {
5712         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5713
5714         ref->ref++;
5715 }
5716
5717 /* Pipe buffer operations for a buffer. */
5718 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
5719         .can_merge              = 0,
5720         .confirm                = generic_pipe_buf_confirm,
5721         .release                = buffer_pipe_buf_release,
5722         .steal                  = generic_pipe_buf_steal,
5723         .get                    = buffer_pipe_buf_get,
5724 };
5725
5726 /*
5727  * Callback from splice_to_pipe(), if we need to release some pages
5728  * at the end of the spd in case we error'ed out in filling the pipe.
5729  */
5730 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
5731 {
5732         struct buffer_ref *ref =
5733                 (struct buffer_ref *)spd->partial[i].private;
5734
5735         if (--ref->ref)
5736                 return;
5737
5738         ring_buffer_free_read_page(ref->buffer, ref->page);
5739         kfree(ref);
5740         spd->partial[i].private = 0;
5741 }
5742
5743 static ssize_t
5744 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5745                             struct pipe_inode_info *pipe, size_t len,
5746                             unsigned int flags)
5747 {
5748         struct ftrace_buffer_info *info = file->private_data;
5749         struct trace_iterator *iter = &info->iter;
5750         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5751         struct page *pages_def[PIPE_DEF_BUFFERS];
5752         struct splice_pipe_desc spd = {
5753                 .pages          = pages_def,
5754                 .partial        = partial_def,
5755                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5756                 .flags          = flags,
5757                 .ops            = &buffer_pipe_buf_ops,
5758                 .spd_release    = buffer_spd_release,
5759         };
5760         struct buffer_ref *ref;
5761         int entries, size, i;
5762         ssize_t ret = 0;
5763
5764 #ifdef CONFIG_TRACER_MAX_TRACE
5765         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
5766                 return -EBUSY;
5767 #endif
5768
5769         if (splice_grow_spd(pipe, &spd))
5770                 return -ENOMEM;
5771
5772         if (*ppos & (PAGE_SIZE - 1))
5773                 return -EINVAL;
5774
5775         if (len & (PAGE_SIZE - 1)) {
5776                 if (len < PAGE_SIZE)
5777                         return -EINVAL;
5778                 len &= PAGE_MASK;
5779         }
5780
5781  again:
5782         trace_access_lock(iter->cpu_file);
5783         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5784
5785         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
5786                 struct page *page;
5787                 int r;
5788
5789                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
5790                 if (!ref) {
5791                         ret = -ENOMEM;
5792                         break;
5793                 }
5794
5795                 ref->ref = 1;
5796                 ref->buffer = iter->trace_buffer->buffer;
5797                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
5798                 if (!ref->page) {
5799                         ret = -ENOMEM;
5800                         kfree(ref);
5801                         break;
5802                 }
5803
5804                 r = ring_buffer_read_page(ref->buffer, &ref->page,
5805                                           len, iter->cpu_file, 1);
5806                 if (r < 0) {
5807                         ring_buffer_free_read_page(ref->buffer, ref->page);
5808                         kfree(ref);
5809                         break;
5810                 }
5811
5812                 /*
5813                  * zero out any left over data, this is going to
5814                  * user land.
5815                  */
5816                 size = ring_buffer_page_len(ref->page);
5817                 if (size < PAGE_SIZE)
5818                         memset(ref->page + size, 0, PAGE_SIZE - size);
5819
5820                 page = virt_to_page(ref->page);
5821
5822                 spd.pages[i] = page;
5823                 spd.partial[i].len = PAGE_SIZE;
5824                 spd.partial[i].offset = 0;
5825                 spd.partial[i].private = (unsigned long)ref;
5826                 spd.nr_pages++;
5827                 *ppos += PAGE_SIZE;
5828
5829                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5830         }
5831
5832         trace_access_unlock(iter->cpu_file);
5833         spd.nr_pages = i;
5834
5835         /* did we read anything? */
5836         if (!spd.nr_pages) {
5837                 if (ret)
5838                         return ret;
5839
5840                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
5841                         return -EAGAIN;
5842
5843                 ret = wait_on_pipe(iter, true);
5844                 if (ret)
5845                         return ret;
5846
5847                 goto again;
5848         }
5849
5850         ret = splice_to_pipe(pipe, &spd);
5851         splice_shrink_spd(&spd);
5852
5853         return ret;
5854 }
5855
5856 static const struct file_operations tracing_buffers_fops = {
5857         .open           = tracing_buffers_open,
5858         .read           = tracing_buffers_read,
5859         .poll           = tracing_buffers_poll,
5860         .release        = tracing_buffers_release,
5861         .splice_read    = tracing_buffers_splice_read,
5862         .llseek         = no_llseek,
5863 };
5864
5865 static ssize_t
5866 tracing_stats_read(struct file *filp, char __user *ubuf,
5867                    size_t count, loff_t *ppos)
5868 {
5869         struct inode *inode = file_inode(filp);
5870         struct trace_array *tr = inode->i_private;
5871         struct trace_buffer *trace_buf = &tr->trace_buffer;
5872         int cpu = tracing_get_cpu(inode);
5873         struct trace_seq *s;
5874         unsigned long cnt;
5875         unsigned long long t;
5876         unsigned long usec_rem;
5877
5878         s = kmalloc(sizeof(*s), GFP_KERNEL);
5879         if (!s)
5880                 return -ENOMEM;
5881
5882         trace_seq_init(s);
5883
5884         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
5885         trace_seq_printf(s, "entries: %ld\n", cnt);
5886
5887         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
5888         trace_seq_printf(s, "overrun: %ld\n", cnt);
5889
5890         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
5891         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
5892
5893         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
5894         trace_seq_printf(s, "bytes: %ld\n", cnt);
5895
5896         if (trace_clocks[tr->clock_id].in_ns) {
5897                 /* local or global for trace_clock */
5898                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5899                 usec_rem = do_div(t, USEC_PER_SEC);
5900                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
5901                                                                 t, usec_rem);
5902
5903                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
5904                 usec_rem = do_div(t, USEC_PER_SEC);
5905                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
5906         } else {
5907                 /* counter or tsc mode for trace_clock */
5908                 trace_seq_printf(s, "oldest event ts: %llu\n",
5909                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5910
5911                 trace_seq_printf(s, "now ts: %llu\n",
5912                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
5913         }
5914
5915         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
5916         trace_seq_printf(s, "dropped events: %ld\n", cnt);
5917
5918         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
5919         trace_seq_printf(s, "read events: %ld\n", cnt);
5920
5921         count = simple_read_from_buffer(ubuf, count, ppos,
5922                                         s->buffer, trace_seq_used(s));
5923
5924         kfree(s);
5925
5926         return count;
5927 }
5928
5929 static const struct file_operations tracing_stats_fops = {
5930         .open           = tracing_open_generic_tr,
5931         .read           = tracing_stats_read,
5932         .llseek         = generic_file_llseek,
5933         .release        = tracing_release_generic_tr,
5934 };
5935
5936 #ifdef CONFIG_DYNAMIC_FTRACE
5937
5938 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
5939 {
5940         return 0;
5941 }
5942
5943 static ssize_t
5944 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
5945                   size_t cnt, loff_t *ppos)
5946 {
5947         static char ftrace_dyn_info_buffer[1024];
5948         static DEFINE_MUTEX(dyn_info_mutex);
5949         unsigned long *p = filp->private_data;
5950         char *buf = ftrace_dyn_info_buffer;
5951         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
5952         int r;
5953
5954         mutex_lock(&dyn_info_mutex);
5955         r = sprintf(buf, "%ld ", *p);
5956
5957         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
5958         buf[r++] = '\n';
5959
5960         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5961
5962         mutex_unlock(&dyn_info_mutex);
5963
5964         return r;
5965 }
5966
5967 static const struct file_operations tracing_dyn_info_fops = {
5968         .open           = tracing_open_generic,
5969         .read           = tracing_read_dyn_info,
5970         .llseek         = generic_file_llseek,
5971 };
5972 #endif /* CONFIG_DYNAMIC_FTRACE */
5973
5974 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
5975 static void
5976 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5977 {
5978         tracing_snapshot();
5979 }
5980
5981 static void
5982 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5983 {
5984         unsigned long *count = (long *)data;
5985
5986         if (!*count)
5987                 return;
5988
5989         if (*count != -1)
5990                 (*count)--;
5991
5992         tracing_snapshot();
5993 }
5994
5995 static int
5996 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
5997                       struct ftrace_probe_ops *ops, void *data)
5998 {
5999         long count = (long)data;
6000
6001         seq_printf(m, "%ps:", (void *)ip);
6002
6003         seq_puts(m, "snapshot");
6004
6005         if (count == -1)
6006                 seq_puts(m, ":unlimited\n");
6007         else
6008                 seq_printf(m, ":count=%ld\n", count);
6009
6010         return 0;
6011 }
6012
6013 static struct ftrace_probe_ops snapshot_probe_ops = {
6014         .func                   = ftrace_snapshot,
6015         .print                  = ftrace_snapshot_print,
6016 };
6017
6018 static struct ftrace_probe_ops snapshot_count_probe_ops = {
6019         .func                   = ftrace_count_snapshot,
6020         .print                  = ftrace_snapshot_print,
6021 };
6022
6023 static int
6024 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
6025                                char *glob, char *cmd, char *param, int enable)
6026 {
6027         struct ftrace_probe_ops *ops;
6028         void *count = (void *)-1;
6029         char *number;
6030         int ret;
6031
6032         /* hash funcs only work with set_ftrace_filter */
6033         if (!enable)
6034                 return -EINVAL;
6035
6036         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
6037
6038         if (glob[0] == '!') {
6039                 unregister_ftrace_function_probe_func(glob+1, ops);
6040                 return 0;
6041         }
6042
6043         if (!param)
6044                 goto out_reg;
6045
6046         number = strsep(&param, ":");
6047
6048         if (!strlen(number))
6049                 goto out_reg;
6050
6051         /*
6052          * We use the callback data field (which is a pointer)
6053          * as our counter.
6054          */
6055         ret = kstrtoul(number, 0, (unsigned long *)&count);
6056         if (ret)
6057                 return ret;
6058
6059  out_reg:
6060         ret = register_ftrace_function_probe(glob, ops, count);
6061
6062         if (ret >= 0)
6063                 alloc_snapshot(&global_trace);
6064
6065         return ret < 0 ? ret : 0;
6066 }
6067
6068 static struct ftrace_func_command ftrace_snapshot_cmd = {
6069         .name                   = "snapshot",
6070         .func                   = ftrace_trace_snapshot_callback,
6071 };
6072
6073 static __init int register_snapshot_cmd(void)
6074 {
6075         return register_ftrace_command(&ftrace_snapshot_cmd);
6076 }
6077 #else
6078 static inline __init int register_snapshot_cmd(void) { return 0; }
6079 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
6080
6081 static struct dentry *tracing_get_dentry(struct trace_array *tr)
6082 {
6083         if (WARN_ON(!tr->dir))
6084                 return ERR_PTR(-ENODEV);
6085
6086         /* Top directory uses NULL as the parent */
6087         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
6088                 return NULL;
6089
6090         /* All sub buffers have a descriptor */
6091         return tr->dir;
6092 }
6093
6094 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
6095 {
6096         struct dentry *d_tracer;
6097
6098         if (tr->percpu_dir)
6099                 return tr->percpu_dir;
6100
6101         d_tracer = tracing_get_dentry(tr);
6102         if (IS_ERR(d_tracer))
6103                 return NULL;
6104
6105         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
6106
6107         WARN_ONCE(!tr->percpu_dir,
6108                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
6109
6110         return tr->percpu_dir;
6111 }
6112
6113 static struct dentry *
6114 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
6115                       void *data, long cpu, const struct file_operations *fops)
6116 {
6117         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
6118
6119         if (ret) /* See tracing_get_cpu() */
6120                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
6121         return ret;
6122 }
6123
6124 static void
6125 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
6126 {
6127         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
6128         struct dentry *d_cpu;
6129         char cpu_dir[30]; /* 30 characters should be more than enough */
6130
6131         if (!d_percpu)
6132                 return;
6133
6134         snprintf(cpu_dir, 30, "cpu%ld", cpu);
6135         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
6136         if (!d_cpu) {
6137                 pr_warning("Could not create tracefs '%s' entry\n", cpu_dir);
6138                 return;
6139         }
6140
6141         /* per cpu trace_pipe */
6142         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
6143                                 tr, cpu, &tracing_pipe_fops);
6144
6145         /* per cpu trace */
6146         trace_create_cpu_file("trace", 0644, d_cpu,
6147                                 tr, cpu, &tracing_fops);
6148
6149         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
6150                                 tr, cpu, &tracing_buffers_fops);
6151
6152         trace_create_cpu_file("stats", 0444, d_cpu,
6153                                 tr, cpu, &tracing_stats_fops);
6154
6155         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
6156                                 tr, cpu, &tracing_entries_fops);
6157
6158 #ifdef CONFIG_TRACER_SNAPSHOT
6159         trace_create_cpu_file("snapshot", 0644, d_cpu,
6160                                 tr, cpu, &snapshot_fops);
6161
6162         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
6163                                 tr, cpu, &snapshot_raw_fops);
6164 #endif
6165 }
6166
6167 #ifdef CONFIG_FTRACE_SELFTEST
6168 /* Let selftest have access to static functions in this file */
6169 #include "trace_selftest.c"
6170 #endif
6171
6172 static ssize_t
6173 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
6174                         loff_t *ppos)
6175 {
6176         struct trace_option_dentry *topt = filp->private_data;
6177         char *buf;
6178
6179         if (topt->flags->val & topt->opt->bit)
6180                 buf = "1\n";
6181         else
6182                 buf = "0\n";
6183
6184         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6185 }
6186
6187 static ssize_t
6188 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
6189                          loff_t *ppos)
6190 {
6191         struct trace_option_dentry *topt = filp->private_data;
6192         unsigned long val;
6193         int ret;
6194
6195         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6196         if (ret)
6197                 return ret;
6198
6199         if (val != 0 && val != 1)
6200                 return -EINVAL;
6201
6202         if (!!(topt->flags->val & topt->opt->bit) != val) {
6203                 mutex_lock(&trace_types_lock);
6204                 ret = __set_tracer_option(topt->tr, topt->flags,
6205                                           topt->opt, !val);
6206                 mutex_unlock(&trace_types_lock);
6207                 if (ret)
6208                         return ret;
6209         }
6210
6211         *ppos += cnt;
6212
6213         return cnt;
6214 }
6215
6216
6217 static const struct file_operations trace_options_fops = {
6218         .open = tracing_open_generic,
6219         .read = trace_options_read,
6220         .write = trace_options_write,
6221         .llseek = generic_file_llseek,
6222 };
6223
6224 /*
6225  * In order to pass in both the trace_array descriptor as well as the index
6226  * to the flag that the trace option file represents, the trace_array
6227  * has a character array of trace_flags_index[], which holds the index
6228  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
6229  * The address of this character array is passed to the flag option file
6230  * read/write callbacks.
6231  *
6232  * In order to extract both the index and the trace_array descriptor,
6233  * get_tr_index() uses the following algorithm.
6234  *
6235  *   idx = *ptr;
6236  *
6237  * As the pointer itself contains the address of the index (remember
6238  * index[1] == 1).
6239  *
6240  * Then to get the trace_array descriptor, by subtracting that index
6241  * from the ptr, we get to the start of the index itself.
6242  *
6243  *   ptr - idx == &index[0]
6244  *
6245  * Then a simple container_of() from that pointer gets us to the
6246  * trace_array descriptor.
6247  */
6248 static void get_tr_index(void *data, struct trace_array **ptr,
6249                          unsigned int *pindex)
6250 {
6251         *pindex = *(unsigned char *)data;
6252
6253         *ptr = container_of(data - *pindex, struct trace_array,
6254                             trace_flags_index);
6255 }
6256
6257 static ssize_t
6258 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
6259                         loff_t *ppos)
6260 {
6261         void *tr_index = filp->private_data;
6262         struct trace_array *tr;
6263         unsigned int index;
6264         char *buf;
6265
6266         get_tr_index(tr_index, &tr, &index);
6267
6268         if (tr->trace_flags & (1 << index))
6269                 buf = "1\n";
6270         else
6271                 buf = "0\n";
6272
6273         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6274 }
6275
6276 static ssize_t
6277 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
6278                          loff_t *ppos)
6279 {
6280         void *tr_index = filp->private_data;
6281         struct trace_array *tr;
6282         unsigned int index;
6283         unsigned long val;
6284         int ret;
6285
6286         get_tr_index(tr_index, &tr, &index);
6287
6288         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6289         if (ret)
6290                 return ret;
6291
6292         if (val != 0 && val != 1)
6293                 return -EINVAL;
6294
6295         mutex_lock(&trace_types_lock);
6296         ret = set_tracer_flag(tr, 1 << index, val);
6297         mutex_unlock(&trace_types_lock);
6298
6299         if (ret < 0)
6300                 return ret;
6301
6302         *ppos += cnt;
6303
6304         return cnt;
6305 }
6306
6307 static const struct file_operations trace_options_core_fops = {
6308         .open = tracing_open_generic,
6309         .read = trace_options_core_read,
6310         .write = trace_options_core_write,
6311         .llseek = generic_file_llseek,
6312 };
6313
6314 struct dentry *trace_create_file(const char *name,
6315                                  umode_t mode,
6316                                  struct dentry *parent,
6317                                  void *data,
6318                                  const struct file_operations *fops)
6319 {
6320         struct dentry *ret;
6321
6322         ret = tracefs_create_file(name, mode, parent, data, fops);
6323         if (!ret)
6324                 pr_warning("Could not create tracefs '%s' entry\n", name);
6325
6326         return ret;
6327 }
6328
6329
6330 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
6331 {
6332         struct dentry *d_tracer;
6333
6334         if (tr->options)
6335                 return tr->options;
6336
6337         d_tracer = tracing_get_dentry(tr);
6338         if (IS_ERR(d_tracer))
6339                 return NULL;
6340
6341         tr->options = tracefs_create_dir("options", d_tracer);
6342         if (!tr->options) {
6343                 pr_warning("Could not create tracefs directory 'options'\n");
6344                 return NULL;
6345         }
6346
6347         return tr->options;
6348 }
6349
6350 static void
6351 create_trace_option_file(struct trace_array *tr,
6352                          struct trace_option_dentry *topt,
6353                          struct tracer_flags *flags,
6354                          struct tracer_opt *opt)
6355 {
6356         struct dentry *t_options;
6357
6358         t_options = trace_options_init_dentry(tr);
6359         if (!t_options)
6360                 return;
6361
6362         topt->flags = flags;
6363         topt->opt = opt;
6364         topt->tr = tr;
6365
6366         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
6367                                     &trace_options_fops);
6368
6369 }
6370
6371 static void
6372 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
6373 {
6374         struct trace_option_dentry *topts;
6375         struct trace_options *tr_topts;
6376         struct tracer_flags *flags;
6377         struct tracer_opt *opts;
6378         int cnt;
6379         int i;
6380
6381         if (!tracer)
6382                 return;
6383
6384         flags = tracer->flags;
6385
6386         if (!flags || !flags->opts)
6387                 return;
6388
6389         /*
6390          * If this is an instance, only create flags for tracers
6391          * the instance may have.
6392          */
6393         if (!trace_ok_for_array(tracer, tr))
6394                 return;
6395
6396         for (i = 0; i < tr->nr_topts; i++) {
6397                 /*
6398                  * Check if these flags have already been added.
6399                  * Some tracers share flags.
6400                  */
6401                 if (tr->topts[i].tracer->flags == tracer->flags)
6402                         return;
6403         }
6404
6405         opts = flags->opts;
6406
6407         for (cnt = 0; opts[cnt].name; cnt++)
6408                 ;
6409
6410         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
6411         if (!topts)
6412                 return;
6413
6414         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
6415                             GFP_KERNEL);
6416         if (!tr_topts) {
6417                 kfree(topts);
6418                 return;
6419         }
6420
6421         tr->topts = tr_topts;
6422         tr->topts[tr->nr_topts].tracer = tracer;
6423         tr->topts[tr->nr_topts].topts = topts;
6424         tr->nr_topts++;
6425
6426         for (cnt = 0; opts[cnt].name; cnt++) {
6427                 create_trace_option_file(tr, &topts[cnt], flags,
6428                                          &opts[cnt]);
6429                 WARN_ONCE(topts[cnt].entry == NULL,
6430                           "Failed to create trace option: %s",
6431                           opts[cnt].name);
6432         }
6433 }
6434
6435 static struct dentry *
6436 create_trace_option_core_file(struct trace_array *tr,
6437                               const char *option, long index)
6438 {
6439         struct dentry *t_options;
6440
6441         t_options = trace_options_init_dentry(tr);
6442         if (!t_options)
6443                 return NULL;
6444
6445         return trace_create_file(option, 0644, t_options,
6446                                  (void *)&tr->trace_flags_index[index],
6447                                  &trace_options_core_fops);
6448 }
6449
6450 static void create_trace_options_dir(struct trace_array *tr)
6451 {
6452         struct dentry *t_options;
6453         bool top_level = tr == &global_trace;
6454         int i;
6455
6456         t_options = trace_options_init_dentry(tr);
6457         if (!t_options)
6458                 return;
6459
6460         for (i = 0; trace_options[i]; i++) {
6461                 if (top_level ||
6462                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
6463                         create_trace_option_core_file(tr, trace_options[i], i);
6464         }
6465 }
6466
6467 static ssize_t
6468 rb_simple_read(struct file *filp, char __user *ubuf,
6469                size_t cnt, loff_t *ppos)
6470 {
6471         struct trace_array *tr = filp->private_data;
6472         char buf[64];
6473         int r;
6474
6475         r = tracer_tracing_is_on(tr);
6476         r = sprintf(buf, "%d\n", r);
6477
6478         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6479 }
6480
6481 static ssize_t
6482 rb_simple_write(struct file *filp, const char __user *ubuf,
6483                 size_t cnt, loff_t *ppos)
6484 {
6485         struct trace_array *tr = filp->private_data;
6486         struct ring_buffer *buffer = tr->trace_buffer.buffer;
6487         unsigned long val;
6488         int ret;
6489
6490         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6491         if (ret)
6492                 return ret;
6493
6494         if (buffer) {
6495                 mutex_lock(&trace_types_lock);
6496                 if (val) {
6497                         tracer_tracing_on(tr);
6498                         if (tr->current_trace->start)
6499                                 tr->current_trace->start(tr);
6500                 } else {
6501                         tracer_tracing_off(tr);
6502                         if (tr->current_trace->stop)
6503                                 tr->current_trace->stop(tr);
6504                 }
6505                 mutex_unlock(&trace_types_lock);
6506         }
6507
6508         (*ppos)++;
6509
6510         return cnt;
6511 }
6512
6513 static const struct file_operations rb_simple_fops = {
6514         .open           = tracing_open_generic_tr,
6515         .read           = rb_simple_read,
6516         .write          = rb_simple_write,
6517         .release        = tracing_release_generic_tr,
6518         .llseek         = default_llseek,
6519 };
6520
6521 struct dentry *trace_instance_dir;
6522
6523 static void
6524 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
6525
6526 static int
6527 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6528 {
6529         enum ring_buffer_flags rb_flags;
6530
6531         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6532
6533         buf->tr = tr;
6534
6535         buf->buffer = ring_buffer_alloc(size, rb_flags);
6536         if (!buf->buffer)
6537                 return -ENOMEM;
6538
6539         buf->data = alloc_percpu(struct trace_array_cpu);
6540         if (!buf->data) {
6541                 ring_buffer_free(buf->buffer);
6542                 return -ENOMEM;
6543         }
6544
6545         /* Allocate the first page for all buffers */
6546         set_buffer_entries(&tr->trace_buffer,
6547                            ring_buffer_size(tr->trace_buffer.buffer, 0));
6548
6549         return 0;
6550 }
6551
6552 static int allocate_trace_buffers(struct trace_array *tr, int size)
6553 {
6554         int ret;
6555
6556         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6557         if (ret)
6558                 return ret;
6559
6560 #ifdef CONFIG_TRACER_MAX_TRACE
6561         ret = allocate_trace_buffer(tr, &tr->max_buffer,
6562                                     allocate_snapshot ? size : 1);
6563         if (WARN_ON(ret)) {
6564                 ring_buffer_free(tr->trace_buffer.buffer);
6565                 free_percpu(tr->trace_buffer.data);
6566                 return -ENOMEM;
6567         }
6568         tr->allocated_snapshot = allocate_snapshot;
6569
6570         /*
6571          * Only the top level trace array gets its snapshot allocated
6572          * from the kernel command line.
6573          */
6574         allocate_snapshot = false;
6575 #endif
6576         return 0;
6577 }
6578
6579 static void free_trace_buffer(struct trace_buffer *buf)
6580 {
6581         if (buf->buffer) {
6582                 ring_buffer_free(buf->buffer);
6583                 buf->buffer = NULL;
6584                 free_percpu(buf->data);
6585                 buf->data = NULL;
6586         }
6587 }
6588
6589 static void free_trace_buffers(struct trace_array *tr)
6590 {
6591         if (!tr)
6592                 return;
6593
6594         free_trace_buffer(&tr->trace_buffer);
6595
6596 #ifdef CONFIG_TRACER_MAX_TRACE
6597         free_trace_buffer(&tr->max_buffer);
6598 #endif
6599 }
6600
6601 static void init_trace_flags_index(struct trace_array *tr)
6602 {
6603         int i;
6604
6605         /* Used by the trace options files */
6606         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
6607                 tr->trace_flags_index[i] = i;
6608 }
6609
6610 static void __update_tracer_options(struct trace_array *tr)
6611 {
6612         struct tracer *t;
6613
6614         for (t = trace_types; t; t = t->next)
6615                 add_tracer_options(tr, t);
6616 }
6617
6618 static void update_tracer_options(struct trace_array *tr)
6619 {
6620         mutex_lock(&trace_types_lock);
6621         __update_tracer_options(tr);
6622         mutex_unlock(&trace_types_lock);
6623 }
6624
6625 static int instance_mkdir(const char *name)
6626 {
6627         struct trace_array *tr;
6628         int ret;
6629
6630         mutex_lock(&trace_types_lock);
6631
6632         ret = -EEXIST;
6633         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6634                 if (tr->name && strcmp(tr->name, name) == 0)
6635                         goto out_unlock;
6636         }
6637
6638         ret = -ENOMEM;
6639         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
6640         if (!tr)
6641                 goto out_unlock;
6642
6643         tr->name = kstrdup(name, GFP_KERNEL);
6644         if (!tr->name)
6645                 goto out_free_tr;
6646
6647         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
6648                 goto out_free_tr;
6649
6650         tr->trace_flags = global_trace.trace_flags;
6651
6652         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
6653
6654         raw_spin_lock_init(&tr->start_lock);
6655
6656         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
6657
6658         tr->current_trace = &nop_trace;
6659
6660         INIT_LIST_HEAD(&tr->systems);
6661         INIT_LIST_HEAD(&tr->events);
6662
6663         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
6664                 goto out_free_tr;
6665
6666         tr->dir = tracefs_create_dir(name, trace_instance_dir);
6667         if (!tr->dir)
6668                 goto out_free_tr;
6669
6670         ret = event_trace_add_tracer(tr->dir, tr);
6671         if (ret) {
6672                 tracefs_remove_recursive(tr->dir);
6673                 goto out_free_tr;
6674         }
6675
6676         init_tracer_tracefs(tr, tr->dir);
6677         init_trace_flags_index(tr);
6678         __update_tracer_options(tr);
6679
6680         list_add(&tr->list, &ftrace_trace_arrays);
6681
6682         mutex_unlock(&trace_types_lock);
6683
6684         return 0;
6685
6686  out_free_tr:
6687         free_trace_buffers(tr);
6688         free_cpumask_var(tr->tracing_cpumask);
6689         kfree(tr->name);
6690         kfree(tr);
6691
6692  out_unlock:
6693         mutex_unlock(&trace_types_lock);
6694
6695         return ret;
6696
6697 }
6698
6699 static int instance_rmdir(const char *name)
6700 {
6701         struct trace_array *tr;
6702         int found = 0;
6703         int ret;
6704         int i;
6705
6706         mutex_lock(&trace_types_lock);
6707
6708         ret = -ENODEV;
6709         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6710                 if (tr->name && strcmp(tr->name, name) == 0) {
6711                         found = 1;
6712                         break;
6713                 }
6714         }
6715         if (!found)
6716                 goto out_unlock;
6717
6718         ret = -EBUSY;
6719         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
6720                 goto out_unlock;
6721
6722         list_del(&tr->list);
6723
6724         tracing_set_nop(tr);
6725         event_trace_del_tracer(tr);
6726         ftrace_destroy_function_files(tr);
6727         tracefs_remove_recursive(tr->dir);
6728         free_trace_buffers(tr);
6729
6730         for (i = 0; i < tr->nr_topts; i++) {
6731                 kfree(tr->topts[i].topts);
6732         }
6733         kfree(tr->topts);
6734
6735         kfree(tr->name);
6736         kfree(tr);
6737
6738         ret = 0;
6739
6740  out_unlock:
6741         mutex_unlock(&trace_types_lock);
6742
6743         return ret;
6744 }
6745
6746 static __init void create_trace_instances(struct dentry *d_tracer)
6747 {
6748         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
6749                                                          instance_mkdir,
6750                                                          instance_rmdir);
6751         if (WARN_ON(!trace_instance_dir))
6752                 return;
6753 }
6754
6755 static void
6756 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
6757 {
6758         int cpu;
6759
6760         trace_create_file("available_tracers", 0444, d_tracer,
6761                         tr, &show_traces_fops);
6762
6763         trace_create_file("current_tracer", 0644, d_tracer,
6764                         tr, &set_tracer_fops);
6765
6766         trace_create_file("tracing_cpumask", 0644, d_tracer,
6767                           tr, &tracing_cpumask_fops);
6768
6769         trace_create_file("trace_options", 0644, d_tracer,
6770                           tr, &tracing_iter_fops);
6771
6772         trace_create_file("trace", 0644, d_tracer,
6773                           tr, &tracing_fops);
6774
6775         trace_create_file("trace_pipe", 0444, d_tracer,
6776                           tr, &tracing_pipe_fops);
6777
6778         trace_create_file("buffer_size_kb", 0644, d_tracer,
6779                           tr, &tracing_entries_fops);
6780
6781         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
6782                           tr, &tracing_total_entries_fops);
6783
6784         trace_create_file("free_buffer", 0200, d_tracer,
6785                           tr, &tracing_free_buffer_fops);
6786
6787         trace_create_file("trace_marker", 0220, d_tracer,
6788                           tr, &tracing_mark_fops);
6789
6790         trace_create_file("trace_clock", 0644, d_tracer, tr,
6791                           &trace_clock_fops);
6792
6793         trace_create_file("tracing_on", 0644, d_tracer,
6794                           tr, &rb_simple_fops);
6795
6796         create_trace_options_dir(tr);
6797
6798 #ifdef CONFIG_TRACER_MAX_TRACE
6799         trace_create_file("tracing_max_latency", 0644, d_tracer,
6800                         &tr->max_latency, &tracing_max_lat_fops);
6801 #endif
6802
6803         if (ftrace_create_function_files(tr, d_tracer))
6804                 WARN(1, "Could not allocate function filter files");
6805
6806 #ifdef CONFIG_TRACER_SNAPSHOT
6807         trace_create_file("snapshot", 0644, d_tracer,
6808                           tr, &snapshot_fops);
6809 #endif
6810
6811         for_each_tracing_cpu(cpu)
6812                 tracing_init_tracefs_percpu(tr, cpu);
6813
6814 }
6815
6816 static struct vfsmount *trace_automount(void *ingore)
6817 {
6818         struct vfsmount *mnt;
6819         struct file_system_type *type;
6820
6821         /*
6822          * To maintain backward compatibility for tools that mount
6823          * debugfs to get to the tracing facility, tracefs is automatically
6824          * mounted to the debugfs/tracing directory.
6825          */
6826         type = get_fs_type("tracefs");
6827         if (!type)
6828                 return NULL;
6829         mnt = vfs_kern_mount(type, 0, "tracefs", NULL);
6830         put_filesystem(type);
6831         if (IS_ERR(mnt))
6832                 return NULL;
6833         mntget(mnt);
6834
6835         return mnt;
6836 }
6837
6838 /**
6839  * tracing_init_dentry - initialize top level trace array
6840  *
6841  * This is called when creating files or directories in the tracing
6842  * directory. It is called via fs_initcall() by any of the boot up code
6843  * and expects to return the dentry of the top level tracing directory.
6844  */
6845 struct dentry *tracing_init_dentry(void)
6846 {
6847         struct trace_array *tr = &global_trace;
6848
6849         /* The top level trace array uses  NULL as parent */
6850         if (tr->dir)
6851                 return NULL;
6852
6853         if (WARN_ON(!tracefs_initialized()) ||
6854                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
6855                  WARN_ON(!debugfs_initialized())))
6856                 return ERR_PTR(-ENODEV);
6857
6858         /*
6859          * As there may still be users that expect the tracing
6860          * files to exist in debugfs/tracing, we must automount
6861          * the tracefs file system there, so older tools still
6862          * work with the newer kerenl.
6863          */
6864         tr->dir = debugfs_create_automount("tracing", NULL,
6865                                            trace_automount, NULL);
6866         if (!tr->dir) {
6867                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
6868                 return ERR_PTR(-ENOMEM);
6869         }
6870
6871         return NULL;
6872 }
6873
6874 extern struct trace_enum_map *__start_ftrace_enum_maps[];
6875 extern struct trace_enum_map *__stop_ftrace_enum_maps[];
6876
6877 static void __init trace_enum_init(void)
6878 {
6879         int len;
6880
6881         len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
6882         trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len);
6883 }
6884
6885 #ifdef CONFIG_MODULES
6886 static void trace_module_add_enums(struct module *mod)
6887 {
6888         if (!mod->num_trace_enums)
6889                 return;
6890
6891         /*
6892          * Modules with bad taint do not have events created, do
6893          * not bother with enums either.
6894          */
6895         if (trace_module_has_bad_taint(mod))
6896                 return;
6897
6898         trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
6899 }
6900
6901 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
6902 static void trace_module_remove_enums(struct module *mod)
6903 {
6904         union trace_enum_map_item *map;
6905         union trace_enum_map_item **last = &trace_enum_maps;
6906
6907         if (!mod->num_trace_enums)
6908                 return;
6909
6910         mutex_lock(&trace_enum_mutex);
6911
6912         map = trace_enum_maps;
6913
6914         while (map) {
6915                 if (map->head.mod == mod)
6916                         break;
6917                 map = trace_enum_jmp_to_tail(map);
6918                 last = &map->tail.next;
6919                 map = map->tail.next;
6920         }
6921         if (!map)
6922                 goto out;
6923
6924         *last = trace_enum_jmp_to_tail(map)->tail.next;
6925         kfree(map);
6926  out:
6927         mutex_unlock(&trace_enum_mutex);
6928 }
6929 #else
6930 static inline void trace_module_remove_enums(struct module *mod) { }
6931 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
6932
6933 static int trace_module_notify(struct notifier_block *self,
6934                                unsigned long val, void *data)
6935 {
6936         struct module *mod = data;
6937
6938         switch (val) {
6939         case MODULE_STATE_COMING:
6940                 trace_module_add_enums(mod);
6941                 break;
6942         case MODULE_STATE_GOING:
6943                 trace_module_remove_enums(mod);
6944                 break;
6945         }
6946
6947         return 0;
6948 }
6949
6950 static struct notifier_block trace_module_nb = {
6951         .notifier_call = trace_module_notify,
6952         .priority = 0,
6953 };
6954 #endif /* CONFIG_MODULES */
6955
6956 static __init int tracer_init_tracefs(void)
6957 {
6958         struct dentry *d_tracer;
6959
6960         trace_access_lock_init();
6961
6962         d_tracer = tracing_init_dentry();
6963         if (IS_ERR(d_tracer))
6964                 return 0;
6965
6966         init_tracer_tracefs(&global_trace, d_tracer);
6967
6968         trace_create_file("tracing_thresh", 0644, d_tracer,
6969                         &global_trace, &tracing_thresh_fops);
6970
6971         trace_create_file("README", 0444, d_tracer,
6972                         NULL, &tracing_readme_fops);
6973
6974         trace_create_file("saved_cmdlines", 0444, d_tracer,
6975                         NULL, &tracing_saved_cmdlines_fops);
6976
6977         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
6978                           NULL, &tracing_saved_cmdlines_size_fops);
6979
6980         trace_enum_init();
6981
6982         trace_create_enum_file(d_tracer);
6983
6984 #ifdef CONFIG_MODULES
6985         register_module_notifier(&trace_module_nb);
6986 #endif
6987
6988 #ifdef CONFIG_DYNAMIC_FTRACE
6989         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
6990                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
6991 #endif
6992
6993         create_trace_instances(d_tracer);
6994
6995         update_tracer_options(&global_trace);
6996
6997         return 0;
6998 }
6999
7000 static int trace_panic_handler(struct notifier_block *this,
7001                                unsigned long event, void *unused)
7002 {
7003         if (ftrace_dump_on_oops)
7004                 ftrace_dump(ftrace_dump_on_oops);
7005         return NOTIFY_OK;
7006 }
7007
7008 static struct notifier_block trace_panic_notifier = {
7009         .notifier_call  = trace_panic_handler,
7010         .next           = NULL,
7011         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
7012 };
7013
7014 static int trace_die_handler(struct notifier_block *self,
7015                              unsigned long val,
7016                              void *data)
7017 {
7018         switch (val) {
7019         case DIE_OOPS:
7020                 if (ftrace_dump_on_oops)
7021                         ftrace_dump(ftrace_dump_on_oops);
7022                 break;
7023         default:
7024                 break;
7025         }
7026         return NOTIFY_OK;
7027 }
7028
7029 static struct notifier_block trace_die_notifier = {
7030         .notifier_call = trace_die_handler,
7031         .priority = 200
7032 };
7033
7034 /*
7035  * printk is set to max of 1024, we really don't need it that big.
7036  * Nothing should be printing 1000 characters anyway.
7037  */
7038 #define TRACE_MAX_PRINT         1000
7039
7040 /*
7041  * Define here KERN_TRACE so that we have one place to modify
7042  * it if we decide to change what log level the ftrace dump
7043  * should be at.
7044  */
7045 #define KERN_TRACE              KERN_EMERG
7046
7047 void
7048 trace_printk_seq(struct trace_seq *s)
7049 {
7050         /* Probably should print a warning here. */
7051         if (s->seq.len >= TRACE_MAX_PRINT)
7052                 s->seq.len = TRACE_MAX_PRINT;
7053
7054         /*
7055          * More paranoid code. Although the buffer size is set to
7056          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
7057          * an extra layer of protection.
7058          */
7059         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
7060                 s->seq.len = s->seq.size - 1;
7061
7062         /* should be zero ended, but we are paranoid. */
7063         s->buffer[s->seq.len] = 0;
7064
7065         printk(KERN_TRACE "%s", s->buffer);
7066
7067         trace_seq_init(s);
7068 }
7069
7070 void trace_init_global_iter(struct trace_iterator *iter)
7071 {
7072         iter->tr = &global_trace;
7073         iter->trace = iter->tr->current_trace;
7074         iter->cpu_file = RING_BUFFER_ALL_CPUS;
7075         iter->trace_buffer = &global_trace.trace_buffer;
7076
7077         if (iter->trace && iter->trace->open)
7078                 iter->trace->open(iter);
7079
7080         /* Annotate start of buffers if we had overruns */
7081         if (ring_buffer_overruns(iter->trace_buffer->buffer))
7082                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
7083
7084         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
7085         if (trace_clocks[iter->tr->clock_id].in_ns)
7086                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
7087 }
7088
7089 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
7090 {
7091         /* use static because iter can be a bit big for the stack */
7092         static struct trace_iterator iter;
7093         static atomic_t dump_running;
7094         struct trace_array *tr = &global_trace;
7095         unsigned int old_userobj;
7096         unsigned long flags;
7097         int cnt = 0, cpu;
7098
7099         /* Only allow one dump user at a time. */
7100         if (atomic_inc_return(&dump_running) != 1) {
7101                 atomic_dec(&dump_running);
7102                 return;
7103         }
7104
7105         /*
7106          * Always turn off tracing when we dump.
7107          * We don't need to show trace output of what happens
7108          * between multiple crashes.
7109          *
7110          * If the user does a sysrq-z, then they can re-enable
7111          * tracing with echo 1 > tracing_on.
7112          */
7113         tracing_off();
7114
7115         local_irq_save(flags);
7116
7117         /* Simulate the iterator */
7118         trace_init_global_iter(&iter);
7119
7120         for_each_tracing_cpu(cpu) {
7121                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7122         }
7123
7124         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
7125
7126         /* don't look at user memory in panic mode */
7127         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
7128
7129         switch (oops_dump_mode) {
7130         case DUMP_ALL:
7131                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7132                 break;
7133         case DUMP_ORIG:
7134                 iter.cpu_file = raw_smp_processor_id();
7135                 break;
7136         case DUMP_NONE:
7137                 goto out_enable;
7138         default:
7139                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
7140                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7141         }
7142
7143         printk(KERN_TRACE "Dumping ftrace buffer:\n");
7144
7145         /* Did function tracer already get disabled? */
7146         if (ftrace_is_dead()) {
7147                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
7148                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
7149         }
7150
7151         /*
7152          * We need to stop all tracing on all CPUS to read the
7153          * the next buffer. This is a bit expensive, but is
7154          * not done often. We fill all what we can read,
7155          * and then release the locks again.
7156          */
7157
7158         while (!trace_empty(&iter)) {
7159
7160                 if (!cnt)
7161                         printk(KERN_TRACE "---------------------------------\n");
7162
7163                 cnt++;
7164
7165                 /* reset all but tr, trace, and overruns */
7166                 memset(&iter.seq, 0,
7167                        sizeof(struct trace_iterator) -
7168                        offsetof(struct trace_iterator, seq));
7169                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
7170                 iter.pos = -1;
7171
7172                 if (trace_find_next_entry_inc(&iter) != NULL) {
7173                         int ret;
7174
7175                         ret = print_trace_line(&iter);
7176                         if (ret != TRACE_TYPE_NO_CONSUME)
7177                                 trace_consume(&iter);
7178                 }
7179                 touch_nmi_watchdog();
7180
7181                 trace_printk_seq(&iter.seq);
7182         }
7183
7184         if (!cnt)
7185                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
7186         else
7187                 printk(KERN_TRACE "---------------------------------\n");
7188
7189  out_enable:
7190         tr->trace_flags |= old_userobj;
7191
7192         for_each_tracing_cpu(cpu) {
7193                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7194         }
7195         atomic_dec(&dump_running);
7196         local_irq_restore(flags);
7197 }
7198 EXPORT_SYMBOL_GPL(ftrace_dump);
7199
7200 __init static int tracer_alloc_buffers(void)
7201 {
7202         int ring_buf_size;
7203         int ret = -ENOMEM;
7204
7205         /*
7206          * Make sure we don't accidently add more trace options
7207          * than we have bits for.
7208          */
7209         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
7210
7211         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
7212                 goto out;
7213
7214         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
7215                 goto out_free_buffer_mask;
7216
7217         /* Only allocate trace_printk buffers if a trace_printk exists */
7218         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
7219                 /* Must be called before global_trace.buffer is allocated */
7220                 trace_printk_init_buffers();
7221
7222         /* To save memory, keep the ring buffer size to its minimum */
7223         if (ring_buffer_expanded)
7224                 ring_buf_size = trace_buf_size;
7225         else
7226                 ring_buf_size = 1;
7227
7228         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
7229         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
7230
7231         raw_spin_lock_init(&global_trace.start_lock);
7232
7233         /* Used for event triggers */
7234         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
7235         if (!temp_buffer)
7236                 goto out_free_cpumask;
7237
7238         if (trace_create_savedcmd() < 0)
7239                 goto out_free_temp_buffer;
7240
7241         /* TODO: make the number of buffers hot pluggable with CPUS */
7242         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
7243                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
7244                 WARN_ON(1);
7245                 goto out_free_savedcmd;
7246         }
7247
7248         if (global_trace.buffer_disabled)
7249                 tracing_off();
7250
7251         if (trace_boot_clock) {
7252                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
7253                 if (ret < 0)
7254                         pr_warning("Trace clock %s not defined, going back to default\n",
7255                                    trace_boot_clock);
7256         }
7257
7258         /*
7259          * register_tracer() might reference current_trace, so it
7260          * needs to be set before we register anything. This is
7261          * just a bootstrap of current_trace anyway.
7262          */
7263         global_trace.current_trace = &nop_trace;
7264
7265         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7266
7267         ftrace_init_global_array_ops(&global_trace);
7268
7269         init_trace_flags_index(&global_trace);
7270
7271         register_tracer(&nop_trace);
7272
7273         /* All seems OK, enable tracing */
7274         tracing_disabled = 0;
7275
7276         atomic_notifier_chain_register(&panic_notifier_list,
7277                                        &trace_panic_notifier);
7278
7279         register_die_notifier(&trace_die_notifier);
7280
7281         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
7282
7283         INIT_LIST_HEAD(&global_trace.systems);
7284         INIT_LIST_HEAD(&global_trace.events);
7285         list_add(&global_trace.list, &ftrace_trace_arrays);
7286
7287         apply_trace_boot_options();
7288
7289         register_snapshot_cmd();
7290
7291         return 0;
7292
7293 out_free_savedcmd:
7294         free_saved_cmdlines_buffer(savedcmd);
7295 out_free_temp_buffer:
7296         ring_buffer_free(temp_buffer);
7297 out_free_cpumask:
7298         free_cpumask_var(global_trace.tracing_cpumask);
7299 out_free_buffer_mask:
7300         free_cpumask_var(tracing_buffer_mask);
7301 out:
7302         return ret;
7303 }
7304
7305 void __init trace_init(void)
7306 {
7307         if (tracepoint_printk) {
7308                 tracepoint_print_iter =
7309                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
7310                 if (WARN_ON(!tracepoint_print_iter))
7311                         tracepoint_printk = 0;
7312         }
7313         tracer_alloc_buffers();
7314         trace_event_init();
7315 }
7316
7317 __init static int clear_boot_tracer(void)
7318 {
7319         /*
7320          * The default tracer at boot buffer is an init section.
7321          * This function is called in lateinit. If we did not
7322          * find the boot tracer, then clear it out, to prevent
7323          * later registration from accessing the buffer that is
7324          * about to be freed.
7325          */
7326         if (!default_bootup_tracer)
7327                 return 0;
7328
7329         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
7330                default_bootup_tracer);
7331         default_bootup_tracer = NULL;
7332
7333         return 0;
7334 }
7335
7336 fs_initcall(tracer_init_tracefs);
7337 late_initcall(clear_boot_tracer);