]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - kernel/trace/trace.c
bda9621638ccca1a08e6ff24df9dc486464d256a
[karo-tx-linux.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/pagemap.h>
24 #include <linux/hardirq.h>
25 #include <linux/linkage.h>
26 #include <linux/uaccess.h>
27 #include <linux/kprobes.h>
28 #include <linux/ftrace.h>
29 #include <linux/module.h>
30 #include <linux/percpu.h>
31 #include <linux/splice.h>
32 #include <linux/kdebug.h>
33 #include <linux/string.h>
34 #include <linux/rwsem.h>
35 #include <linux/slab.h>
36 #include <linux/ctype.h>
37 #include <linux/init.h>
38 #include <linux/poll.h>
39 #include <linux/nmi.h>
40 #include <linux/fs.h>
41 #include <linux/sched/rt.h>
42
43 #include "trace.h"
44 #include "trace_output.h"
45
46 /*
47  * On boot up, the ring buffer is set to the minimum size, so that
48  * we do not waste memory on systems that are not using tracing.
49  */
50 bool ring_buffer_expanded;
51
52 /*
53  * We need to change this state when a selftest is running.
54  * A selftest will lurk into the ring-buffer to count the
55  * entries inserted during the selftest although some concurrent
56  * insertions into the ring-buffer such as trace_printk could occurred
57  * at the same time, giving false positive or negative results.
58  */
59 static bool __read_mostly tracing_selftest_running;
60
61 /*
62  * If a tracer is running, we do not want to run SELFTEST.
63  */
64 bool __read_mostly tracing_selftest_disabled;
65
66 /* For tracers that don't implement custom flags */
67 static struct tracer_opt dummy_tracer_opt[] = {
68         { }
69 };
70
71 static struct tracer_flags dummy_tracer_flags = {
72         .val = 0,
73         .opts = dummy_tracer_opt
74 };
75
76 static int
77 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
78 {
79         return 0;
80 }
81
82 /*
83  * To prevent the comm cache from being overwritten when no
84  * tracing is active, only save the comm when a trace event
85  * occurred.
86  */
87 static DEFINE_PER_CPU(bool, trace_cmdline_save);
88
89 /*
90  * Kill all tracing for good (never come back).
91  * It is initialized to 1 but will turn to zero if the initialization
92  * of the tracer is successful. But that is the only place that sets
93  * this back to zero.
94  */
95 static int tracing_disabled = 1;
96
97 DEFINE_PER_CPU(int, ftrace_cpu_disabled);
98
99 cpumask_var_t __read_mostly     tracing_buffer_mask;
100
101 /*
102  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
103  *
104  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
105  * is set, then ftrace_dump is called. This will output the contents
106  * of the ftrace buffers to the console.  This is very useful for
107  * capturing traces that lead to crashes and outputing it to a
108  * serial console.
109  *
110  * It is default off, but you can enable it with either specifying
111  * "ftrace_dump_on_oops" in the kernel command line, or setting
112  * /proc/sys/kernel/ftrace_dump_on_oops
113  * Set 1 if you want to dump buffers of all CPUs
114  * Set 2 if you want to dump the buffer of the CPU that triggered oops
115  */
116
117 enum ftrace_dump_mode ftrace_dump_on_oops;
118
119 /* When set, tracing will stop when a WARN*() is hit */
120 int __disable_trace_on_warning;
121
122 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
123
124 #define MAX_TRACER_SIZE         100
125 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
126 static char *default_bootup_tracer;
127
128 static bool allocate_snapshot;
129
130 static int __init set_cmdline_ftrace(char *str)
131 {
132         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
133         default_bootup_tracer = bootup_tracer_buf;
134         /* We are using ftrace early, expand it */
135         ring_buffer_expanded = true;
136         return 1;
137 }
138 __setup("ftrace=", set_cmdline_ftrace);
139
140 static int __init set_ftrace_dump_on_oops(char *str)
141 {
142         if (*str++ != '=' || !*str) {
143                 ftrace_dump_on_oops = DUMP_ALL;
144                 return 1;
145         }
146
147         if (!strcmp("orig_cpu", str)) {
148                 ftrace_dump_on_oops = DUMP_ORIG;
149                 return 1;
150         }
151
152         return 0;
153 }
154 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
155
156 static int __init stop_trace_on_warning(char *str)
157 {
158         __disable_trace_on_warning = 1;
159         return 1;
160 }
161 __setup("traceoff_on_warning=", stop_trace_on_warning);
162
163 static int __init boot_alloc_snapshot(char *str)
164 {
165         allocate_snapshot = true;
166         /* We also need the main ring buffer expanded */
167         ring_buffer_expanded = true;
168         return 1;
169 }
170 __setup("alloc_snapshot", boot_alloc_snapshot);
171
172
173 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
174 static char *trace_boot_options __initdata;
175
176 static int __init set_trace_boot_options(char *str)
177 {
178         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
179         trace_boot_options = trace_boot_options_buf;
180         return 0;
181 }
182 __setup("trace_options=", set_trace_boot_options);
183
184 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
185 static char *trace_boot_clock __initdata;
186
187 static int __init set_trace_boot_clock(char *str)
188 {
189         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
190         trace_boot_clock = trace_boot_clock_buf;
191         return 0;
192 }
193 __setup("trace_clock=", set_trace_boot_clock);
194
195
196 unsigned long long ns2usecs(cycle_t nsec)
197 {
198         nsec += 500;
199         do_div(nsec, 1000);
200         return nsec;
201 }
202
203 /*
204  * The global_trace is the descriptor that holds the tracing
205  * buffers for the live tracing. For each CPU, it contains
206  * a link list of pages that will store trace entries. The
207  * page descriptor of the pages in the memory is used to hold
208  * the link list by linking the lru item in the page descriptor
209  * to each of the pages in the buffer per CPU.
210  *
211  * For each active CPU there is a data field that holds the
212  * pages for the buffer for that CPU. Each CPU has the same number
213  * of pages allocated for its buffer.
214  */
215 static struct trace_array       global_trace;
216
217 LIST_HEAD(ftrace_trace_arrays);
218
219 int trace_array_get(struct trace_array *this_tr)
220 {
221         struct trace_array *tr;
222         int ret = -ENODEV;
223
224         mutex_lock(&trace_types_lock);
225         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
226                 if (tr == this_tr) {
227                         tr->ref++;
228                         ret = 0;
229                         break;
230                 }
231         }
232         mutex_unlock(&trace_types_lock);
233
234         return ret;
235 }
236
237 static void __trace_array_put(struct trace_array *this_tr)
238 {
239         WARN_ON(!this_tr->ref);
240         this_tr->ref--;
241 }
242
243 void trace_array_put(struct trace_array *this_tr)
244 {
245         mutex_lock(&trace_types_lock);
246         __trace_array_put(this_tr);
247         mutex_unlock(&trace_types_lock);
248 }
249
250 int filter_check_discard(struct ftrace_event_file *file, void *rec,
251                          struct ring_buffer *buffer,
252                          struct ring_buffer_event *event)
253 {
254         if (unlikely(file->flags & FTRACE_EVENT_FL_FILTERED) &&
255             !filter_match_preds(file->filter, rec)) {
256                 ring_buffer_discard_commit(buffer, event);
257                 return 1;
258         }
259
260         return 0;
261 }
262 EXPORT_SYMBOL_GPL(filter_check_discard);
263
264 int call_filter_check_discard(struct ftrace_event_call *call, void *rec,
265                               struct ring_buffer *buffer,
266                               struct ring_buffer_event *event)
267 {
268         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
269             !filter_match_preds(call->filter, rec)) {
270                 ring_buffer_discard_commit(buffer, event);
271                 return 1;
272         }
273
274         return 0;
275 }
276 EXPORT_SYMBOL_GPL(call_filter_check_discard);
277
278 static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
279 {
280         u64 ts;
281
282         /* Early boot up does not have a buffer yet */
283         if (!buf->buffer)
284                 return trace_clock_local();
285
286         ts = ring_buffer_time_stamp(buf->buffer, cpu);
287         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
288
289         return ts;
290 }
291
292 cycle_t ftrace_now(int cpu)
293 {
294         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
295 }
296
297 /**
298  * tracing_is_enabled - Show if global_trace has been disabled
299  *
300  * Shows if the global trace has been enabled or not. It uses the
301  * mirror flag "buffer_disabled" to be used in fast paths such as for
302  * the irqsoff tracer. But it may be inaccurate due to races. If you
303  * need to know the accurate state, use tracing_is_on() which is a little
304  * slower, but accurate.
305  */
306 int tracing_is_enabled(void)
307 {
308         /*
309          * For quick access (irqsoff uses this in fast path), just
310          * return the mirror variable of the state of the ring buffer.
311          * It's a little racy, but we don't really care.
312          */
313         smp_rmb();
314         return !global_trace.buffer_disabled;
315 }
316
317 /*
318  * trace_buf_size is the size in bytes that is allocated
319  * for a buffer. Note, the number of bytes is always rounded
320  * to page size.
321  *
322  * This number is purposely set to a low number of 16384.
323  * If the dump on oops happens, it will be much appreciated
324  * to not have to wait for all that output. Anyway this can be
325  * boot time and run time configurable.
326  */
327 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
328
329 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
330
331 /* trace_types holds a link list of available tracers. */
332 static struct tracer            *trace_types __read_mostly;
333
334 /*
335  * trace_types_lock is used to protect the trace_types list.
336  */
337 DEFINE_MUTEX(trace_types_lock);
338
339 /*
340  * serialize the access of the ring buffer
341  *
342  * ring buffer serializes readers, but it is low level protection.
343  * The validity of the events (which returns by ring_buffer_peek() ..etc)
344  * are not protected by ring buffer.
345  *
346  * The content of events may become garbage if we allow other process consumes
347  * these events concurrently:
348  *   A) the page of the consumed events may become a normal page
349  *      (not reader page) in ring buffer, and this page will be rewrited
350  *      by events producer.
351  *   B) The page of the consumed events may become a page for splice_read,
352  *      and this page will be returned to system.
353  *
354  * These primitives allow multi process access to different cpu ring buffer
355  * concurrently.
356  *
357  * These primitives don't distinguish read-only and read-consume access.
358  * Multi read-only access are also serialized.
359  */
360
361 #ifdef CONFIG_SMP
362 static DECLARE_RWSEM(all_cpu_access_lock);
363 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
364
365 static inline void trace_access_lock(int cpu)
366 {
367         if (cpu == RING_BUFFER_ALL_CPUS) {
368                 /* gain it for accessing the whole ring buffer. */
369                 down_write(&all_cpu_access_lock);
370         } else {
371                 /* gain it for accessing a cpu ring buffer. */
372
373                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
374                 down_read(&all_cpu_access_lock);
375
376                 /* Secondly block other access to this @cpu ring buffer. */
377                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
378         }
379 }
380
381 static inline void trace_access_unlock(int cpu)
382 {
383         if (cpu == RING_BUFFER_ALL_CPUS) {
384                 up_write(&all_cpu_access_lock);
385         } else {
386                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
387                 up_read(&all_cpu_access_lock);
388         }
389 }
390
391 static inline void trace_access_lock_init(void)
392 {
393         int cpu;
394
395         for_each_possible_cpu(cpu)
396                 mutex_init(&per_cpu(cpu_access_lock, cpu));
397 }
398
399 #else
400
401 static DEFINE_MUTEX(access_lock);
402
403 static inline void trace_access_lock(int cpu)
404 {
405         (void)cpu;
406         mutex_lock(&access_lock);
407 }
408
409 static inline void trace_access_unlock(int cpu)
410 {
411         (void)cpu;
412         mutex_unlock(&access_lock);
413 }
414
415 static inline void trace_access_lock_init(void)
416 {
417 }
418
419 #endif
420
421 /* trace_flags holds trace_options default values */
422 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
423         TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
424         TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |
425         TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | TRACE_ITER_FUNCTION;
426
427 static void tracer_tracing_on(struct trace_array *tr)
428 {
429         if (tr->trace_buffer.buffer)
430                 ring_buffer_record_on(tr->trace_buffer.buffer);
431         /*
432          * This flag is looked at when buffers haven't been allocated
433          * yet, or by some tracers (like irqsoff), that just want to
434          * know if the ring buffer has been disabled, but it can handle
435          * races of where it gets disabled but we still do a record.
436          * As the check is in the fast path of the tracers, it is more
437          * important to be fast than accurate.
438          */
439         tr->buffer_disabled = 0;
440         /* Make the flag seen by readers */
441         smp_wmb();
442 }
443
444 /**
445  * tracing_on - enable tracing buffers
446  *
447  * This function enables tracing buffers that may have been
448  * disabled with tracing_off.
449  */
450 void tracing_on(void)
451 {
452         tracer_tracing_on(&global_trace);
453 }
454 EXPORT_SYMBOL_GPL(tracing_on);
455
456 /**
457  * __trace_puts - write a constant string into the trace buffer.
458  * @ip:    The address of the caller
459  * @str:   The constant string to write
460  * @size:  The size of the string.
461  */
462 int __trace_puts(unsigned long ip, const char *str, int size)
463 {
464         struct ring_buffer_event *event;
465         struct ring_buffer *buffer;
466         struct print_entry *entry;
467         unsigned long irq_flags;
468         int alloc;
469         int pc;
470
471         if (!(trace_flags & TRACE_ITER_PRINTK))
472                 return 0;
473
474         pc = preempt_count();
475
476         if (unlikely(tracing_selftest_running || tracing_disabled))
477                 return 0;
478
479         alloc = sizeof(*entry) + size + 2; /* possible \n added */
480
481         local_save_flags(irq_flags);
482         buffer = global_trace.trace_buffer.buffer;
483         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
484                                           irq_flags, pc);
485         if (!event)
486                 return 0;
487
488         entry = ring_buffer_event_data(event);
489         entry->ip = ip;
490
491         memcpy(&entry->buf, str, size);
492
493         /* Add a newline if necessary */
494         if (entry->buf[size - 1] != '\n') {
495                 entry->buf[size] = '\n';
496                 entry->buf[size + 1] = '\0';
497         } else
498                 entry->buf[size] = '\0';
499
500         __buffer_unlock_commit(buffer, event);
501         ftrace_trace_stack(buffer, irq_flags, 4, pc);
502
503         return size;
504 }
505 EXPORT_SYMBOL_GPL(__trace_puts);
506
507 /**
508  * __trace_bputs - write the pointer to a constant string into trace buffer
509  * @ip:    The address of the caller
510  * @str:   The constant string to write to the buffer to
511  */
512 int __trace_bputs(unsigned long ip, const char *str)
513 {
514         struct ring_buffer_event *event;
515         struct ring_buffer *buffer;
516         struct bputs_entry *entry;
517         unsigned long irq_flags;
518         int size = sizeof(struct bputs_entry);
519         int pc;
520
521         if (!(trace_flags & TRACE_ITER_PRINTK))
522                 return 0;
523
524         pc = preempt_count();
525
526         if (unlikely(tracing_selftest_running || tracing_disabled))
527                 return 0;
528
529         local_save_flags(irq_flags);
530         buffer = global_trace.trace_buffer.buffer;
531         event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
532                                           irq_flags, pc);
533         if (!event)
534                 return 0;
535
536         entry = ring_buffer_event_data(event);
537         entry->ip                       = ip;
538         entry->str                      = str;
539
540         __buffer_unlock_commit(buffer, event);
541         ftrace_trace_stack(buffer, irq_flags, 4, pc);
542
543         return 1;
544 }
545 EXPORT_SYMBOL_GPL(__trace_bputs);
546
547 #ifdef CONFIG_TRACER_SNAPSHOT
548 /**
549  * trace_snapshot - take a snapshot of the current buffer.
550  *
551  * This causes a swap between the snapshot buffer and the current live
552  * tracing buffer. You can use this to take snapshots of the live
553  * trace when some condition is triggered, but continue to trace.
554  *
555  * Note, make sure to allocate the snapshot with either
556  * a tracing_snapshot_alloc(), or by doing it manually
557  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
558  *
559  * If the snapshot buffer is not allocated, it will stop tracing.
560  * Basically making a permanent snapshot.
561  */
562 void tracing_snapshot(void)
563 {
564         struct trace_array *tr = &global_trace;
565         struct tracer *tracer = tr->current_trace;
566         unsigned long flags;
567
568         if (in_nmi()) {
569                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
570                 internal_trace_puts("*** snapshot is being ignored        ***\n");
571                 return;
572         }
573
574         if (!tr->allocated_snapshot) {
575                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
576                 internal_trace_puts("*** stopping trace here!   ***\n");
577                 tracing_off();
578                 return;
579         }
580
581         /* Note, snapshot can not be used when the tracer uses it */
582         if (tracer->use_max_tr) {
583                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
584                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
585                 return;
586         }
587
588         local_irq_save(flags);
589         update_max_tr(tr, current, smp_processor_id());
590         local_irq_restore(flags);
591 }
592 EXPORT_SYMBOL_GPL(tracing_snapshot);
593
594 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
595                                         struct trace_buffer *size_buf, int cpu_id);
596 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
597
598 static int alloc_snapshot(struct trace_array *tr)
599 {
600         int ret;
601
602         if (!tr->allocated_snapshot) {
603
604                 /* allocate spare buffer */
605                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
606                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
607                 if (ret < 0)
608                         return ret;
609
610                 tr->allocated_snapshot = true;
611         }
612
613         return 0;
614 }
615
616 static void free_snapshot(struct trace_array *tr)
617 {
618         /*
619          * We don't free the ring buffer. instead, resize it because
620          * The max_tr ring buffer has some state (e.g. ring->clock) and
621          * we want preserve it.
622          */
623         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
624         set_buffer_entries(&tr->max_buffer, 1);
625         tracing_reset_online_cpus(&tr->max_buffer);
626         tr->allocated_snapshot = false;
627 }
628
629 /**
630  * tracing_alloc_snapshot - allocate snapshot buffer.
631  *
632  * This only allocates the snapshot buffer if it isn't already
633  * allocated - it doesn't also take a snapshot.
634  *
635  * This is meant to be used in cases where the snapshot buffer needs
636  * to be set up for events that can't sleep but need to be able to
637  * trigger a snapshot.
638  */
639 int tracing_alloc_snapshot(void)
640 {
641         struct trace_array *tr = &global_trace;
642         int ret;
643
644         ret = alloc_snapshot(tr);
645         WARN_ON(ret < 0);
646
647         return ret;
648 }
649 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
650
651 /**
652  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
653  *
654  * This is similar to trace_snapshot(), but it will allocate the
655  * snapshot buffer if it isn't already allocated. Use this only
656  * where it is safe to sleep, as the allocation may sleep.
657  *
658  * This causes a swap between the snapshot buffer and the current live
659  * tracing buffer. You can use this to take snapshots of the live
660  * trace when some condition is triggered, but continue to trace.
661  */
662 void tracing_snapshot_alloc(void)
663 {
664         int ret;
665
666         ret = tracing_alloc_snapshot();
667         if (ret < 0)
668                 return;
669
670         tracing_snapshot();
671 }
672 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
673 #else
674 void tracing_snapshot(void)
675 {
676         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
677 }
678 EXPORT_SYMBOL_GPL(tracing_snapshot);
679 int tracing_alloc_snapshot(void)
680 {
681         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
682         return -ENODEV;
683 }
684 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
685 void tracing_snapshot_alloc(void)
686 {
687         /* Give warning */
688         tracing_snapshot();
689 }
690 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
691 #endif /* CONFIG_TRACER_SNAPSHOT */
692
693 static void tracer_tracing_off(struct trace_array *tr)
694 {
695         if (tr->trace_buffer.buffer)
696                 ring_buffer_record_off(tr->trace_buffer.buffer);
697         /*
698          * This flag is looked at when buffers haven't been allocated
699          * yet, or by some tracers (like irqsoff), that just want to
700          * know if the ring buffer has been disabled, but it can handle
701          * races of where it gets disabled but we still do a record.
702          * As the check is in the fast path of the tracers, it is more
703          * important to be fast than accurate.
704          */
705         tr->buffer_disabled = 1;
706         /* Make the flag seen by readers */
707         smp_wmb();
708 }
709
710 /**
711  * tracing_off - turn off tracing buffers
712  *
713  * This function stops the tracing buffers from recording data.
714  * It does not disable any overhead the tracers themselves may
715  * be causing. This function simply causes all recording to
716  * the ring buffers to fail.
717  */
718 void tracing_off(void)
719 {
720         tracer_tracing_off(&global_trace);
721 }
722 EXPORT_SYMBOL_GPL(tracing_off);
723
724 void disable_trace_on_warning(void)
725 {
726         if (__disable_trace_on_warning)
727                 tracing_off();
728 }
729
730 /**
731  * tracer_tracing_is_on - show real state of ring buffer enabled
732  * @tr : the trace array to know if ring buffer is enabled
733  *
734  * Shows real state of the ring buffer if it is enabled or not.
735  */
736 static int tracer_tracing_is_on(struct trace_array *tr)
737 {
738         if (tr->trace_buffer.buffer)
739                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
740         return !tr->buffer_disabled;
741 }
742
743 /**
744  * tracing_is_on - show state of ring buffers enabled
745  */
746 int tracing_is_on(void)
747 {
748         return tracer_tracing_is_on(&global_trace);
749 }
750 EXPORT_SYMBOL_GPL(tracing_is_on);
751
752 static int __init set_buf_size(char *str)
753 {
754         unsigned long buf_size;
755
756         if (!str)
757                 return 0;
758         buf_size = memparse(str, &str);
759         /* nr_entries can not be zero */
760         if (buf_size == 0)
761                 return 0;
762         trace_buf_size = buf_size;
763         return 1;
764 }
765 __setup("trace_buf_size=", set_buf_size);
766
767 static int __init set_tracing_thresh(char *str)
768 {
769         unsigned long threshold;
770         int ret;
771
772         if (!str)
773                 return 0;
774         ret = kstrtoul(str, 0, &threshold);
775         if (ret < 0)
776                 return 0;
777         tracing_thresh = threshold * 1000;
778         return 1;
779 }
780 __setup("tracing_thresh=", set_tracing_thresh);
781
782 unsigned long nsecs_to_usecs(unsigned long nsecs)
783 {
784         return nsecs / 1000;
785 }
786
787 /* These must match the bit postions in trace_iterator_flags */
788 static const char *trace_options[] = {
789         "print-parent",
790         "sym-offset",
791         "sym-addr",
792         "verbose",
793         "raw",
794         "hex",
795         "bin",
796         "block",
797         "stacktrace",
798         "trace_printk",
799         "ftrace_preempt",
800         "branch",
801         "annotate",
802         "userstacktrace",
803         "sym-userobj",
804         "printk-msg-only",
805         "context-info",
806         "latency-format",
807         "sleep-time",
808         "graph-time",
809         "record-cmd",
810         "overwrite",
811         "disable_on_free",
812         "irq-info",
813         "markers",
814         "function-trace",
815         NULL
816 };
817
818 static struct {
819         u64 (*func)(void);
820         const char *name;
821         int in_ns;              /* is this clock in nanoseconds? */
822 } trace_clocks[] = {
823         { trace_clock_local,    "local",        1 },
824         { trace_clock_global,   "global",       1 },
825         { trace_clock_counter,  "counter",      0 },
826         { trace_clock_jiffies,  "uptime",       1 },
827         { trace_clock,          "perf",         1 },
828         ARCH_TRACE_CLOCKS
829 };
830
831 /*
832  * trace_parser_get_init - gets the buffer for trace parser
833  */
834 int trace_parser_get_init(struct trace_parser *parser, int size)
835 {
836         memset(parser, 0, sizeof(*parser));
837
838         parser->buffer = kmalloc(size, GFP_KERNEL);
839         if (!parser->buffer)
840                 return 1;
841
842         parser->size = size;
843         return 0;
844 }
845
846 /*
847  * trace_parser_put - frees the buffer for trace parser
848  */
849 void trace_parser_put(struct trace_parser *parser)
850 {
851         kfree(parser->buffer);
852 }
853
854 /*
855  * trace_get_user - reads the user input string separated by  space
856  * (matched by isspace(ch))
857  *
858  * For each string found the 'struct trace_parser' is updated,
859  * and the function returns.
860  *
861  * Returns number of bytes read.
862  *
863  * See kernel/trace/trace.h for 'struct trace_parser' details.
864  */
865 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
866         size_t cnt, loff_t *ppos)
867 {
868         char ch;
869         size_t read = 0;
870         ssize_t ret;
871
872         if (!*ppos)
873                 trace_parser_clear(parser);
874
875         ret = get_user(ch, ubuf++);
876         if (ret)
877                 goto out;
878
879         read++;
880         cnt--;
881
882         /*
883          * The parser is not finished with the last write,
884          * continue reading the user input without skipping spaces.
885          */
886         if (!parser->cont) {
887                 /* skip white space */
888                 while (cnt && isspace(ch)) {
889                         ret = get_user(ch, ubuf++);
890                         if (ret)
891                                 goto out;
892                         read++;
893                         cnt--;
894                 }
895
896                 /* only spaces were written */
897                 if (isspace(ch)) {
898                         *ppos += read;
899                         ret = read;
900                         goto out;
901                 }
902
903                 parser->idx = 0;
904         }
905
906         /* read the non-space input */
907         while (cnt && !isspace(ch)) {
908                 if (parser->idx < parser->size - 1)
909                         parser->buffer[parser->idx++] = ch;
910                 else {
911                         ret = -EINVAL;
912                         goto out;
913                 }
914                 ret = get_user(ch, ubuf++);
915                 if (ret)
916                         goto out;
917                 read++;
918                 cnt--;
919         }
920
921         /* We either got finished input or we have to wait for another call. */
922         if (isspace(ch)) {
923                 parser->buffer[parser->idx] = 0;
924                 parser->cont = false;
925         } else if (parser->idx < parser->size - 1) {
926                 parser->cont = true;
927                 parser->buffer[parser->idx++] = ch;
928         } else {
929                 ret = -EINVAL;
930                 goto out;
931         }
932
933         *ppos += read;
934         ret = read;
935
936 out:
937         return ret;
938 }
939
940 ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
941 {
942         int len;
943         int ret;
944
945         if (!cnt)
946                 return 0;
947
948         if (s->len <= s->readpos)
949                 return -EBUSY;
950
951         len = s->len - s->readpos;
952         if (cnt > len)
953                 cnt = len;
954         ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
955         if (ret == cnt)
956                 return -EFAULT;
957
958         cnt -= ret;
959
960         s->readpos += cnt;
961         return cnt;
962 }
963
964 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
965 {
966         int len;
967
968         if (s->len <= s->readpos)
969                 return -EBUSY;
970
971         len = s->len - s->readpos;
972         if (cnt > len)
973                 cnt = len;
974         memcpy(buf, s->buffer + s->readpos, cnt);
975
976         s->readpos += cnt;
977         return cnt;
978 }
979
980 unsigned long __read_mostly     tracing_thresh;
981
982 #ifdef CONFIG_TRACER_MAX_TRACE
983 /*
984  * Copy the new maximum trace into the separate maximum-trace
985  * structure. (this way the maximum trace is permanently saved,
986  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
987  */
988 static void
989 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
990 {
991         struct trace_buffer *trace_buf = &tr->trace_buffer;
992         struct trace_buffer *max_buf = &tr->max_buffer;
993         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
994         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
995
996         max_buf->cpu = cpu;
997         max_buf->time_start = data->preempt_timestamp;
998
999         max_data->saved_latency = tr->max_latency;
1000         max_data->critical_start = data->critical_start;
1001         max_data->critical_end = data->critical_end;
1002
1003         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1004         max_data->pid = tsk->pid;
1005         /*
1006          * If tsk == current, then use current_uid(), as that does not use
1007          * RCU. The irq tracer can be called out of RCU scope.
1008          */
1009         if (tsk == current)
1010                 max_data->uid = current_uid();
1011         else
1012                 max_data->uid = task_uid(tsk);
1013
1014         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1015         max_data->policy = tsk->policy;
1016         max_data->rt_priority = tsk->rt_priority;
1017
1018         /* record this tasks comm */
1019         tracing_record_cmdline(tsk);
1020 }
1021
1022 /**
1023  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1024  * @tr: tracer
1025  * @tsk: the task with the latency
1026  * @cpu: The cpu that initiated the trace.
1027  *
1028  * Flip the buffers between the @tr and the max_tr and record information
1029  * about which task was the cause of this latency.
1030  */
1031 void
1032 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1033 {
1034         struct ring_buffer *buf;
1035
1036         if (tr->stop_count)
1037                 return;
1038
1039         WARN_ON_ONCE(!irqs_disabled());
1040
1041         if (!tr->allocated_snapshot) {
1042                 /* Only the nop tracer should hit this when disabling */
1043                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1044                 return;
1045         }
1046
1047         arch_spin_lock(&tr->max_lock);
1048
1049         buf = tr->trace_buffer.buffer;
1050         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1051         tr->max_buffer.buffer = buf;
1052
1053         __update_max_tr(tr, tsk, cpu);
1054         arch_spin_unlock(&tr->max_lock);
1055 }
1056
1057 /**
1058  * update_max_tr_single - only copy one trace over, and reset the rest
1059  * @tr - tracer
1060  * @tsk - task with the latency
1061  * @cpu - the cpu of the buffer to copy.
1062  *
1063  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1064  */
1065 void
1066 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1067 {
1068         int ret;
1069
1070         if (tr->stop_count)
1071                 return;
1072
1073         WARN_ON_ONCE(!irqs_disabled());
1074         if (!tr->allocated_snapshot) {
1075                 /* Only the nop tracer should hit this when disabling */
1076                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1077                 return;
1078         }
1079
1080         arch_spin_lock(&tr->max_lock);
1081
1082         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1083
1084         if (ret == -EBUSY) {
1085                 /*
1086                  * We failed to swap the buffer due to a commit taking
1087                  * place on this CPU. We fail to record, but we reset
1088                  * the max trace buffer (no one writes directly to it)
1089                  * and flag that it failed.
1090                  */
1091                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1092                         "Failed to swap buffers due to commit in progress\n");
1093         }
1094
1095         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1096
1097         __update_max_tr(tr, tsk, cpu);
1098         arch_spin_unlock(&tr->max_lock);
1099 }
1100 #endif /* CONFIG_TRACER_MAX_TRACE */
1101
1102 static int wait_on_pipe(struct trace_iterator *iter)
1103 {
1104         /* Iterators are static, they should be filled or empty */
1105         if (trace_buffer_iter(iter, iter->cpu_file))
1106                 return 0;
1107
1108         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file);
1109 }
1110
1111 #ifdef CONFIG_FTRACE_STARTUP_TEST
1112 static int run_tracer_selftest(struct tracer *type)
1113 {
1114         struct trace_array *tr = &global_trace;
1115         struct tracer *saved_tracer = tr->current_trace;
1116         int ret;
1117
1118         if (!type->selftest || tracing_selftest_disabled)
1119                 return 0;
1120
1121         /*
1122          * Run a selftest on this tracer.
1123          * Here we reset the trace buffer, and set the current
1124          * tracer to be this tracer. The tracer can then run some
1125          * internal tracing to verify that everything is in order.
1126          * If we fail, we do not register this tracer.
1127          */
1128         tracing_reset_online_cpus(&tr->trace_buffer);
1129
1130         tr->current_trace = type;
1131
1132 #ifdef CONFIG_TRACER_MAX_TRACE
1133         if (type->use_max_tr) {
1134                 /* If we expanded the buffers, make sure the max is expanded too */
1135                 if (ring_buffer_expanded)
1136                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1137                                            RING_BUFFER_ALL_CPUS);
1138                 tr->allocated_snapshot = true;
1139         }
1140 #endif
1141
1142         /* the test is responsible for initializing and enabling */
1143         pr_info("Testing tracer %s: ", type->name);
1144         ret = type->selftest(type, tr);
1145         /* the test is responsible for resetting too */
1146         tr->current_trace = saved_tracer;
1147         if (ret) {
1148                 printk(KERN_CONT "FAILED!\n");
1149                 /* Add the warning after printing 'FAILED' */
1150                 WARN_ON(1);
1151                 return -1;
1152         }
1153         /* Only reset on passing, to avoid touching corrupted buffers */
1154         tracing_reset_online_cpus(&tr->trace_buffer);
1155
1156 #ifdef CONFIG_TRACER_MAX_TRACE
1157         if (type->use_max_tr) {
1158                 tr->allocated_snapshot = false;
1159
1160                 /* Shrink the max buffer again */
1161                 if (ring_buffer_expanded)
1162                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1163                                            RING_BUFFER_ALL_CPUS);
1164         }
1165 #endif
1166
1167         printk(KERN_CONT "PASSED\n");
1168         return 0;
1169 }
1170 #else
1171 static inline int run_tracer_selftest(struct tracer *type)
1172 {
1173         return 0;
1174 }
1175 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1176
1177 /**
1178  * register_tracer - register a tracer with the ftrace system.
1179  * @type - the plugin for the tracer
1180  *
1181  * Register a new plugin tracer.
1182  */
1183 int register_tracer(struct tracer *type)
1184 {
1185         struct tracer *t;
1186         int ret = 0;
1187
1188         if (!type->name) {
1189                 pr_info("Tracer must have a name\n");
1190                 return -1;
1191         }
1192
1193         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1194                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1195                 return -1;
1196         }
1197
1198         mutex_lock(&trace_types_lock);
1199
1200         tracing_selftest_running = true;
1201
1202         for (t = trace_types; t; t = t->next) {
1203                 if (strcmp(type->name, t->name) == 0) {
1204                         /* already found */
1205                         pr_info("Tracer %s already registered\n",
1206                                 type->name);
1207                         ret = -1;
1208                         goto out;
1209                 }
1210         }
1211
1212         if (!type->set_flag)
1213                 type->set_flag = &dummy_set_flag;
1214         if (!type->flags)
1215                 type->flags = &dummy_tracer_flags;
1216         else
1217                 if (!type->flags->opts)
1218                         type->flags->opts = dummy_tracer_opt;
1219
1220         ret = run_tracer_selftest(type);
1221         if (ret < 0)
1222                 goto out;
1223
1224         type->next = trace_types;
1225         trace_types = type;
1226
1227  out:
1228         tracing_selftest_running = false;
1229         mutex_unlock(&trace_types_lock);
1230
1231         if (ret || !default_bootup_tracer)
1232                 goto out_unlock;
1233
1234         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1235                 goto out_unlock;
1236
1237         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1238         /* Do we want this tracer to start on bootup? */
1239         tracing_set_tracer(&global_trace, type->name);
1240         default_bootup_tracer = NULL;
1241         /* disable other selftests, since this will break it. */
1242         tracing_selftest_disabled = true;
1243 #ifdef CONFIG_FTRACE_STARTUP_TEST
1244         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1245                type->name);
1246 #endif
1247
1248  out_unlock:
1249         return ret;
1250 }
1251
1252 void tracing_reset(struct trace_buffer *buf, int cpu)
1253 {
1254         struct ring_buffer *buffer = buf->buffer;
1255
1256         if (!buffer)
1257                 return;
1258
1259         ring_buffer_record_disable(buffer);
1260
1261         /* Make sure all commits have finished */
1262         synchronize_sched();
1263         ring_buffer_reset_cpu(buffer, cpu);
1264
1265         ring_buffer_record_enable(buffer);
1266 }
1267
1268 void tracing_reset_online_cpus(struct trace_buffer *buf)
1269 {
1270         struct ring_buffer *buffer = buf->buffer;
1271         int cpu;
1272
1273         if (!buffer)
1274                 return;
1275
1276         ring_buffer_record_disable(buffer);
1277
1278         /* Make sure all commits have finished */
1279         synchronize_sched();
1280
1281         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1282
1283         for_each_online_cpu(cpu)
1284                 ring_buffer_reset_cpu(buffer, cpu);
1285
1286         ring_buffer_record_enable(buffer);
1287 }
1288
1289 /* Must have trace_types_lock held */
1290 void tracing_reset_all_online_cpus(void)
1291 {
1292         struct trace_array *tr;
1293
1294         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1295                 tracing_reset_online_cpus(&tr->trace_buffer);
1296 #ifdef CONFIG_TRACER_MAX_TRACE
1297                 tracing_reset_online_cpus(&tr->max_buffer);
1298 #endif
1299         }
1300 }
1301
1302 #define SAVED_CMDLINES_DEFAULT 128
1303 #define NO_CMDLINE_MAP UINT_MAX
1304 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1305 struct saved_cmdlines_buffer {
1306         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1307         unsigned *map_cmdline_to_pid;
1308         unsigned cmdline_num;
1309         int cmdline_idx;
1310         char *saved_cmdlines;
1311 };
1312 static struct saved_cmdlines_buffer *savedcmd;
1313
1314 /* temporary disable recording */
1315 static atomic_t trace_record_cmdline_disabled __read_mostly;
1316
1317 static inline char *get_saved_cmdlines(int idx)
1318 {
1319         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1320 }
1321
1322 static inline void set_cmdline(int idx, const char *cmdline)
1323 {
1324         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1325 }
1326
1327 static int allocate_cmdlines_buffer(unsigned int val,
1328                                     struct saved_cmdlines_buffer *s)
1329 {
1330         s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1331                                         GFP_KERNEL);
1332         if (!s->map_cmdline_to_pid)
1333                 return -ENOMEM;
1334
1335         s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1336         if (!s->saved_cmdlines) {
1337                 kfree(s->map_cmdline_to_pid);
1338                 return -ENOMEM;
1339         }
1340
1341         s->cmdline_idx = 0;
1342         s->cmdline_num = val;
1343         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1344                sizeof(s->map_pid_to_cmdline));
1345         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1346                val * sizeof(*s->map_cmdline_to_pid));
1347
1348         return 0;
1349 }
1350
1351 static int trace_create_savedcmd(void)
1352 {
1353         int ret;
1354
1355         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1356         if (!savedcmd)
1357                 return -ENOMEM;
1358
1359         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1360         if (ret < 0) {
1361                 kfree(savedcmd);
1362                 savedcmd = NULL;
1363                 return -ENOMEM;
1364         }
1365
1366         return 0;
1367 }
1368
1369 int is_tracing_stopped(void)
1370 {
1371         return global_trace.stop_count;
1372 }
1373
1374 /**
1375  * tracing_start - quick start of the tracer
1376  *
1377  * If tracing is enabled but was stopped by tracing_stop,
1378  * this will start the tracer back up.
1379  */
1380 void tracing_start(void)
1381 {
1382         struct ring_buffer *buffer;
1383         unsigned long flags;
1384
1385         if (tracing_disabled)
1386                 return;
1387
1388         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1389         if (--global_trace.stop_count) {
1390                 if (global_trace.stop_count < 0) {
1391                         /* Someone screwed up their debugging */
1392                         WARN_ON_ONCE(1);
1393                         global_trace.stop_count = 0;
1394                 }
1395                 goto out;
1396         }
1397
1398         /* Prevent the buffers from switching */
1399         arch_spin_lock(&global_trace.max_lock);
1400
1401         buffer = global_trace.trace_buffer.buffer;
1402         if (buffer)
1403                 ring_buffer_record_enable(buffer);
1404
1405 #ifdef CONFIG_TRACER_MAX_TRACE
1406         buffer = global_trace.max_buffer.buffer;
1407         if (buffer)
1408                 ring_buffer_record_enable(buffer);
1409 #endif
1410
1411         arch_spin_unlock(&global_trace.max_lock);
1412
1413  out:
1414         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1415 }
1416
1417 static void tracing_start_tr(struct trace_array *tr)
1418 {
1419         struct ring_buffer *buffer;
1420         unsigned long flags;
1421
1422         if (tracing_disabled)
1423                 return;
1424
1425         /* If global, we need to also start the max tracer */
1426         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1427                 return tracing_start();
1428
1429         raw_spin_lock_irqsave(&tr->start_lock, flags);
1430
1431         if (--tr->stop_count) {
1432                 if (tr->stop_count < 0) {
1433                         /* Someone screwed up their debugging */
1434                         WARN_ON_ONCE(1);
1435                         tr->stop_count = 0;
1436                 }
1437                 goto out;
1438         }
1439
1440         buffer = tr->trace_buffer.buffer;
1441         if (buffer)
1442                 ring_buffer_record_enable(buffer);
1443
1444  out:
1445         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1446 }
1447
1448 /**
1449  * tracing_stop - quick stop of the tracer
1450  *
1451  * Light weight way to stop tracing. Use in conjunction with
1452  * tracing_start.
1453  */
1454 void tracing_stop(void)
1455 {
1456         struct ring_buffer *buffer;
1457         unsigned long flags;
1458
1459         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1460         if (global_trace.stop_count++)
1461                 goto out;
1462
1463         /* Prevent the buffers from switching */
1464         arch_spin_lock(&global_trace.max_lock);
1465
1466         buffer = global_trace.trace_buffer.buffer;
1467         if (buffer)
1468                 ring_buffer_record_disable(buffer);
1469
1470 #ifdef CONFIG_TRACER_MAX_TRACE
1471         buffer = global_trace.max_buffer.buffer;
1472         if (buffer)
1473                 ring_buffer_record_disable(buffer);
1474 #endif
1475
1476         arch_spin_unlock(&global_trace.max_lock);
1477
1478  out:
1479         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1480 }
1481
1482 static void tracing_stop_tr(struct trace_array *tr)
1483 {
1484         struct ring_buffer *buffer;
1485         unsigned long flags;
1486
1487         /* If global, we need to also stop the max tracer */
1488         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1489                 return tracing_stop();
1490
1491         raw_spin_lock_irqsave(&tr->start_lock, flags);
1492         if (tr->stop_count++)
1493                 goto out;
1494
1495         buffer = tr->trace_buffer.buffer;
1496         if (buffer)
1497                 ring_buffer_record_disable(buffer);
1498
1499  out:
1500         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1501 }
1502
1503 void trace_stop_cmdline_recording(void);
1504
1505 static int trace_save_cmdline(struct task_struct *tsk)
1506 {
1507         unsigned pid, idx;
1508
1509         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1510                 return 0;
1511
1512         /*
1513          * It's not the end of the world if we don't get
1514          * the lock, but we also don't want to spin
1515          * nor do we want to disable interrupts,
1516          * so if we miss here, then better luck next time.
1517          */
1518         if (!arch_spin_trylock(&trace_cmdline_lock))
1519                 return 0;
1520
1521         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1522         if (idx == NO_CMDLINE_MAP) {
1523                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1524
1525                 /*
1526                  * Check whether the cmdline buffer at idx has a pid
1527                  * mapped. We are going to overwrite that entry so we
1528                  * need to clear the map_pid_to_cmdline. Otherwise we
1529                  * would read the new comm for the old pid.
1530                  */
1531                 pid = savedcmd->map_cmdline_to_pid[idx];
1532                 if (pid != NO_CMDLINE_MAP)
1533                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1534
1535                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1536                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1537
1538                 savedcmd->cmdline_idx = idx;
1539         }
1540
1541         set_cmdline(idx, tsk->comm);
1542
1543         arch_spin_unlock(&trace_cmdline_lock);
1544
1545         return 1;
1546 }
1547
1548 static void __trace_find_cmdline(int pid, char comm[])
1549 {
1550         unsigned map;
1551
1552         if (!pid) {
1553                 strcpy(comm, "<idle>");
1554                 return;
1555         }
1556
1557         if (WARN_ON_ONCE(pid < 0)) {
1558                 strcpy(comm, "<XXX>");
1559                 return;
1560         }
1561
1562         if (pid > PID_MAX_DEFAULT) {
1563                 strcpy(comm, "<...>");
1564                 return;
1565         }
1566
1567         map = savedcmd->map_pid_to_cmdline[pid];
1568         if (map != NO_CMDLINE_MAP)
1569                 strcpy(comm, get_saved_cmdlines(map));
1570         else
1571                 strcpy(comm, "<...>");
1572 }
1573
1574 void trace_find_cmdline(int pid, char comm[])
1575 {
1576         preempt_disable();
1577         arch_spin_lock(&trace_cmdline_lock);
1578
1579         __trace_find_cmdline(pid, comm);
1580
1581         arch_spin_unlock(&trace_cmdline_lock);
1582         preempt_enable();
1583 }
1584
1585 void tracing_record_cmdline(struct task_struct *tsk)
1586 {
1587         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1588                 return;
1589
1590         if (!__this_cpu_read(trace_cmdline_save))
1591                 return;
1592
1593         if (trace_save_cmdline(tsk))
1594                 __this_cpu_write(trace_cmdline_save, false);
1595 }
1596
1597 void
1598 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1599                              int pc)
1600 {
1601         struct task_struct *tsk = current;
1602
1603         entry->preempt_count            = pc & 0xff;
1604         entry->pid                      = (tsk) ? tsk->pid : 0;
1605         entry->flags =
1606 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1607                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1608 #else
1609                 TRACE_FLAG_IRQS_NOSUPPORT |
1610 #endif
1611                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1612                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1613                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1614                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1615 }
1616 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1617
1618 struct ring_buffer_event *
1619 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1620                           int type,
1621                           unsigned long len,
1622                           unsigned long flags, int pc)
1623 {
1624         struct ring_buffer_event *event;
1625
1626         event = ring_buffer_lock_reserve(buffer, len);
1627         if (event != NULL) {
1628                 struct trace_entry *ent = ring_buffer_event_data(event);
1629
1630                 tracing_generic_entry_update(ent, flags, pc);
1631                 ent->type = type;
1632         }
1633
1634         return event;
1635 }
1636
1637 void
1638 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1639 {
1640         __this_cpu_write(trace_cmdline_save, true);
1641         ring_buffer_unlock_commit(buffer, event);
1642 }
1643
1644 static inline void
1645 __trace_buffer_unlock_commit(struct ring_buffer *buffer,
1646                              struct ring_buffer_event *event,
1647                              unsigned long flags, int pc)
1648 {
1649         __buffer_unlock_commit(buffer, event);
1650
1651         ftrace_trace_stack(buffer, flags, 6, pc);
1652         ftrace_trace_userstack(buffer, flags, pc);
1653 }
1654
1655 void trace_buffer_unlock_commit(struct ring_buffer *buffer,
1656                                 struct ring_buffer_event *event,
1657                                 unsigned long flags, int pc)
1658 {
1659         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1660 }
1661 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
1662
1663 static struct ring_buffer *temp_buffer;
1664
1665 struct ring_buffer_event *
1666 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
1667                           struct ftrace_event_file *ftrace_file,
1668                           int type, unsigned long len,
1669                           unsigned long flags, int pc)
1670 {
1671         struct ring_buffer_event *entry;
1672
1673         *current_rb = ftrace_file->tr->trace_buffer.buffer;
1674         entry = trace_buffer_lock_reserve(*current_rb,
1675                                          type, len, flags, pc);
1676         /*
1677          * If tracing is off, but we have triggers enabled
1678          * we still need to look at the event data. Use the temp_buffer
1679          * to store the trace event for the tigger to use. It's recusive
1680          * safe and will not be recorded anywhere.
1681          */
1682         if (!entry && ftrace_file->flags & FTRACE_EVENT_FL_TRIGGER_COND) {
1683                 *current_rb = temp_buffer;
1684                 entry = trace_buffer_lock_reserve(*current_rb,
1685                                                   type, len, flags, pc);
1686         }
1687         return entry;
1688 }
1689 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
1690
1691 struct ring_buffer_event *
1692 trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
1693                                   int type, unsigned long len,
1694                                   unsigned long flags, int pc)
1695 {
1696         *current_rb = global_trace.trace_buffer.buffer;
1697         return trace_buffer_lock_reserve(*current_rb,
1698                                          type, len, flags, pc);
1699 }
1700 EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
1701
1702 void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
1703                                         struct ring_buffer_event *event,
1704                                         unsigned long flags, int pc)
1705 {
1706         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1707 }
1708 EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
1709
1710 void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer,
1711                                      struct ring_buffer_event *event,
1712                                      unsigned long flags, int pc,
1713                                      struct pt_regs *regs)
1714 {
1715         __buffer_unlock_commit(buffer, event);
1716
1717         ftrace_trace_stack_regs(buffer, flags, 0, pc, regs);
1718         ftrace_trace_userstack(buffer, flags, pc);
1719 }
1720 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
1721
1722 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1723                                          struct ring_buffer_event *event)
1724 {
1725         ring_buffer_discard_commit(buffer, event);
1726 }
1727 EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
1728
1729 void
1730 trace_function(struct trace_array *tr,
1731                unsigned long ip, unsigned long parent_ip, unsigned long flags,
1732                int pc)
1733 {
1734         struct ftrace_event_call *call = &event_function;
1735         struct ring_buffer *buffer = tr->trace_buffer.buffer;
1736         struct ring_buffer_event *event;
1737         struct ftrace_entry *entry;
1738
1739         /* If we are reading the ring buffer, don't trace */
1740         if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
1741                 return;
1742
1743         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1744                                           flags, pc);
1745         if (!event)
1746                 return;
1747         entry   = ring_buffer_event_data(event);
1748         entry->ip                       = ip;
1749         entry->parent_ip                = parent_ip;
1750
1751         if (!call_filter_check_discard(call, entry, buffer, event))
1752                 __buffer_unlock_commit(buffer, event);
1753 }
1754
1755 #ifdef CONFIG_STACKTRACE
1756
1757 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
1758 struct ftrace_stack {
1759         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
1760 };
1761
1762 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
1763 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
1764
1765 static void __ftrace_trace_stack(struct ring_buffer *buffer,
1766                                  unsigned long flags,
1767                                  int skip, int pc, struct pt_regs *regs)
1768 {
1769         struct ftrace_event_call *call = &event_kernel_stack;
1770         struct ring_buffer_event *event;
1771         struct stack_entry *entry;
1772         struct stack_trace trace;
1773         int use_stack;
1774         int size = FTRACE_STACK_ENTRIES;
1775
1776         trace.nr_entries        = 0;
1777         trace.skip              = skip;
1778
1779         /*
1780          * Since events can happen in NMIs there's no safe way to
1781          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
1782          * or NMI comes in, it will just have to use the default
1783          * FTRACE_STACK_SIZE.
1784          */
1785         preempt_disable_notrace();
1786
1787         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1788         /*
1789          * We don't need any atomic variables, just a barrier.
1790          * If an interrupt comes in, we don't care, because it would
1791          * have exited and put the counter back to what we want.
1792          * We just need a barrier to keep gcc from moving things
1793          * around.
1794          */
1795         barrier();
1796         if (use_stack == 1) {
1797                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
1798                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
1799
1800                 if (regs)
1801                         save_stack_trace_regs(regs, &trace);
1802                 else
1803                         save_stack_trace(&trace);
1804
1805                 if (trace.nr_entries > size)
1806                         size = trace.nr_entries;
1807         } else
1808                 /* From now on, use_stack is a boolean */
1809                 use_stack = 0;
1810
1811         size *= sizeof(unsigned long);
1812
1813         event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1814                                           sizeof(*entry) + size, flags, pc);
1815         if (!event)
1816                 goto out;
1817         entry = ring_buffer_event_data(event);
1818
1819         memset(&entry->caller, 0, size);
1820
1821         if (use_stack)
1822                 memcpy(&entry->caller, trace.entries,
1823                        trace.nr_entries * sizeof(unsigned long));
1824         else {
1825                 trace.max_entries       = FTRACE_STACK_ENTRIES;
1826                 trace.entries           = entry->caller;
1827                 if (regs)
1828                         save_stack_trace_regs(regs, &trace);
1829                 else
1830                         save_stack_trace(&trace);
1831         }
1832
1833         entry->size = trace.nr_entries;
1834
1835         if (!call_filter_check_discard(call, entry, buffer, event))
1836                 __buffer_unlock_commit(buffer, event);
1837
1838  out:
1839         /* Again, don't let gcc optimize things here */
1840         barrier();
1841         __this_cpu_dec(ftrace_stack_reserve);
1842         preempt_enable_notrace();
1843
1844 }
1845
1846 void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
1847                              int skip, int pc, struct pt_regs *regs)
1848 {
1849         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1850                 return;
1851
1852         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
1853 }
1854
1855 void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
1856                         int skip, int pc)
1857 {
1858         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1859                 return;
1860
1861         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
1862 }
1863
1864 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1865                    int pc)
1866 {
1867         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
1868 }
1869
1870 /**
1871  * trace_dump_stack - record a stack back trace in the trace buffer
1872  * @skip: Number of functions to skip (helper handlers)
1873  */
1874 void trace_dump_stack(int skip)
1875 {
1876         unsigned long flags;
1877
1878         if (tracing_disabled || tracing_selftest_running)
1879                 return;
1880
1881         local_save_flags(flags);
1882
1883         /*
1884          * Skip 3 more, seems to get us at the caller of
1885          * this function.
1886          */
1887         skip += 3;
1888         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
1889                              flags, skip, preempt_count(), NULL);
1890 }
1891
1892 static DEFINE_PER_CPU(int, user_stack_count);
1893
1894 void
1895 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1896 {
1897         struct ftrace_event_call *call = &event_user_stack;
1898         struct ring_buffer_event *event;
1899         struct userstack_entry *entry;
1900         struct stack_trace trace;
1901
1902         if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1903                 return;
1904
1905         /*
1906          * NMIs can not handle page faults, even with fix ups.
1907          * The save user stack can (and often does) fault.
1908          */
1909         if (unlikely(in_nmi()))
1910                 return;
1911
1912         /*
1913          * prevent recursion, since the user stack tracing may
1914          * trigger other kernel events.
1915          */
1916         preempt_disable();
1917         if (__this_cpu_read(user_stack_count))
1918                 goto out;
1919
1920         __this_cpu_inc(user_stack_count);
1921
1922         event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1923                                           sizeof(*entry), flags, pc);
1924         if (!event)
1925                 goto out_drop_count;
1926         entry   = ring_buffer_event_data(event);
1927
1928         entry->tgid             = current->tgid;
1929         memset(&entry->caller, 0, sizeof(entry->caller));
1930
1931         trace.nr_entries        = 0;
1932         trace.max_entries       = FTRACE_STACK_ENTRIES;
1933         trace.skip              = 0;
1934         trace.entries           = entry->caller;
1935
1936         save_stack_trace_user(&trace);
1937         if (!call_filter_check_discard(call, entry, buffer, event))
1938                 __buffer_unlock_commit(buffer, event);
1939
1940  out_drop_count:
1941         __this_cpu_dec(user_stack_count);
1942  out:
1943         preempt_enable();
1944 }
1945
1946 #ifdef UNUSED
1947 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1948 {
1949         ftrace_trace_userstack(tr, flags, preempt_count());
1950 }
1951 #endif /* UNUSED */
1952
1953 #endif /* CONFIG_STACKTRACE */
1954
1955 /* created for use with alloc_percpu */
1956 struct trace_buffer_struct {
1957         char buffer[TRACE_BUF_SIZE];
1958 };
1959
1960 static struct trace_buffer_struct *trace_percpu_buffer;
1961 static struct trace_buffer_struct *trace_percpu_sirq_buffer;
1962 static struct trace_buffer_struct *trace_percpu_irq_buffer;
1963 static struct trace_buffer_struct *trace_percpu_nmi_buffer;
1964
1965 /*
1966  * The buffer used is dependent on the context. There is a per cpu
1967  * buffer for normal context, softirq contex, hard irq context and
1968  * for NMI context. Thise allows for lockless recording.
1969  *
1970  * Note, if the buffers failed to be allocated, then this returns NULL
1971  */
1972 static char *get_trace_buf(void)
1973 {
1974         struct trace_buffer_struct *percpu_buffer;
1975
1976         /*
1977          * If we have allocated per cpu buffers, then we do not
1978          * need to do any locking.
1979          */
1980         if (in_nmi())
1981                 percpu_buffer = trace_percpu_nmi_buffer;
1982         else if (in_irq())
1983                 percpu_buffer = trace_percpu_irq_buffer;
1984         else if (in_softirq())
1985                 percpu_buffer = trace_percpu_sirq_buffer;
1986         else
1987                 percpu_buffer = trace_percpu_buffer;
1988
1989         if (!percpu_buffer)
1990                 return NULL;
1991
1992         return this_cpu_ptr(&percpu_buffer->buffer[0]);
1993 }
1994
1995 static int alloc_percpu_trace_buffer(void)
1996 {
1997         struct trace_buffer_struct *buffers;
1998         struct trace_buffer_struct *sirq_buffers;
1999         struct trace_buffer_struct *irq_buffers;
2000         struct trace_buffer_struct *nmi_buffers;
2001
2002         buffers = alloc_percpu(struct trace_buffer_struct);
2003         if (!buffers)
2004                 goto err_warn;
2005
2006         sirq_buffers = alloc_percpu(struct trace_buffer_struct);
2007         if (!sirq_buffers)
2008                 goto err_sirq;
2009
2010         irq_buffers = alloc_percpu(struct trace_buffer_struct);
2011         if (!irq_buffers)
2012                 goto err_irq;
2013
2014         nmi_buffers = alloc_percpu(struct trace_buffer_struct);
2015         if (!nmi_buffers)
2016                 goto err_nmi;
2017
2018         trace_percpu_buffer = buffers;
2019         trace_percpu_sirq_buffer = sirq_buffers;
2020         trace_percpu_irq_buffer = irq_buffers;
2021         trace_percpu_nmi_buffer = nmi_buffers;
2022
2023         return 0;
2024
2025  err_nmi:
2026         free_percpu(irq_buffers);
2027  err_irq:
2028         free_percpu(sirq_buffers);
2029  err_sirq:
2030         free_percpu(buffers);
2031  err_warn:
2032         WARN(1, "Could not allocate percpu trace_printk buffer");
2033         return -ENOMEM;
2034 }
2035
2036 static int buffers_allocated;
2037
2038 void trace_printk_init_buffers(void)
2039 {
2040         if (buffers_allocated)
2041                 return;
2042
2043         if (alloc_percpu_trace_buffer())
2044                 return;
2045
2046         /* trace_printk() is for debug use only. Don't use it in production. */
2047
2048         pr_warning("\n**********************************************************\n");
2049         pr_warning("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2050         pr_warning("**                                                      **\n");
2051         pr_warning("** trace_printk() being used. Allocating extra memory.  **\n");
2052         pr_warning("**                                                      **\n");
2053         pr_warning("** This means that this is a DEBUG kernel and it is     **\n");
2054         pr_warning("** unsafe for produciton use.                           **\n");
2055         pr_warning("**                                                      **\n");
2056         pr_warning("** If you see this message and you are not debugging    **\n");
2057         pr_warning("** the kernel, report this immediately to your vendor!  **\n");
2058         pr_warning("**                                                      **\n");
2059         pr_warning("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2060         pr_warning("**********************************************************\n");
2061
2062         /* Expand the buffers to set size */
2063         tracing_update_buffers();
2064
2065         buffers_allocated = 1;
2066
2067         /*
2068          * trace_printk_init_buffers() can be called by modules.
2069          * If that happens, then we need to start cmdline recording
2070          * directly here. If the global_trace.buffer is already
2071          * allocated here, then this was called by module code.
2072          */
2073         if (global_trace.trace_buffer.buffer)
2074                 tracing_start_cmdline_record();
2075 }
2076
2077 void trace_printk_start_comm(void)
2078 {
2079         /* Start tracing comms if trace printk is set */
2080         if (!buffers_allocated)
2081                 return;
2082         tracing_start_cmdline_record();
2083 }
2084
2085 static void trace_printk_start_stop_comm(int enabled)
2086 {
2087         if (!buffers_allocated)
2088                 return;
2089
2090         if (enabled)
2091                 tracing_start_cmdline_record();
2092         else
2093                 tracing_stop_cmdline_record();
2094 }
2095
2096 /**
2097  * trace_vbprintk - write binary msg to tracing buffer
2098  *
2099  */
2100 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2101 {
2102         struct ftrace_event_call *call = &event_bprint;
2103         struct ring_buffer_event *event;
2104         struct ring_buffer *buffer;
2105         struct trace_array *tr = &global_trace;
2106         struct bprint_entry *entry;
2107         unsigned long flags;
2108         char *tbuffer;
2109         int len = 0, size, pc;
2110
2111         if (unlikely(tracing_selftest_running || tracing_disabled))
2112                 return 0;
2113
2114         /* Don't pollute graph traces with trace_vprintk internals */
2115         pause_graph_tracing();
2116
2117         pc = preempt_count();
2118         preempt_disable_notrace();
2119
2120         tbuffer = get_trace_buf();
2121         if (!tbuffer) {
2122                 len = 0;
2123                 goto out;
2124         }
2125
2126         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2127
2128         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2129                 goto out;
2130
2131         local_save_flags(flags);
2132         size = sizeof(*entry) + sizeof(u32) * len;
2133         buffer = tr->trace_buffer.buffer;
2134         event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2135                                           flags, pc);
2136         if (!event)
2137                 goto out;
2138         entry = ring_buffer_event_data(event);
2139         entry->ip                       = ip;
2140         entry->fmt                      = fmt;
2141
2142         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2143         if (!call_filter_check_discard(call, entry, buffer, event)) {
2144                 __buffer_unlock_commit(buffer, event);
2145                 ftrace_trace_stack(buffer, flags, 6, pc);
2146         }
2147
2148 out:
2149         preempt_enable_notrace();
2150         unpause_graph_tracing();
2151
2152         return len;
2153 }
2154 EXPORT_SYMBOL_GPL(trace_vbprintk);
2155
2156 static int
2157 __trace_array_vprintk(struct ring_buffer *buffer,
2158                       unsigned long ip, const char *fmt, va_list args)
2159 {
2160         struct ftrace_event_call *call = &event_print;
2161         struct ring_buffer_event *event;
2162         int len = 0, size, pc;
2163         struct print_entry *entry;
2164         unsigned long flags;
2165         char *tbuffer;
2166
2167         if (tracing_disabled || tracing_selftest_running)
2168                 return 0;
2169
2170         /* Don't pollute graph traces with trace_vprintk internals */
2171         pause_graph_tracing();
2172
2173         pc = preempt_count();
2174         preempt_disable_notrace();
2175
2176
2177         tbuffer = get_trace_buf();
2178         if (!tbuffer) {
2179                 len = 0;
2180                 goto out;
2181         }
2182
2183         len = vsnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2184         if (len > TRACE_BUF_SIZE)
2185                 goto out;
2186
2187         local_save_flags(flags);
2188         size = sizeof(*entry) + len + 1;
2189         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2190                                           flags, pc);
2191         if (!event)
2192                 goto out;
2193         entry = ring_buffer_event_data(event);
2194         entry->ip = ip;
2195
2196         memcpy(&entry->buf, tbuffer, len);
2197         entry->buf[len] = '\0';
2198         if (!call_filter_check_discard(call, entry, buffer, event)) {
2199                 __buffer_unlock_commit(buffer, event);
2200                 ftrace_trace_stack(buffer, flags, 6, pc);
2201         }
2202  out:
2203         preempt_enable_notrace();
2204         unpause_graph_tracing();
2205
2206         return len;
2207 }
2208
2209 int trace_array_vprintk(struct trace_array *tr,
2210                         unsigned long ip, const char *fmt, va_list args)
2211 {
2212         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2213 }
2214
2215 int trace_array_printk(struct trace_array *tr,
2216                        unsigned long ip, const char *fmt, ...)
2217 {
2218         int ret;
2219         va_list ap;
2220
2221         if (!(trace_flags & TRACE_ITER_PRINTK))
2222                 return 0;
2223
2224         va_start(ap, fmt);
2225         ret = trace_array_vprintk(tr, ip, fmt, ap);
2226         va_end(ap);
2227         return ret;
2228 }
2229
2230 int trace_array_printk_buf(struct ring_buffer *buffer,
2231                            unsigned long ip, const char *fmt, ...)
2232 {
2233         int ret;
2234         va_list ap;
2235
2236         if (!(trace_flags & TRACE_ITER_PRINTK))
2237                 return 0;
2238
2239         va_start(ap, fmt);
2240         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2241         va_end(ap);
2242         return ret;
2243 }
2244
2245 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2246 {
2247         return trace_array_vprintk(&global_trace, ip, fmt, args);
2248 }
2249 EXPORT_SYMBOL_GPL(trace_vprintk);
2250
2251 static void trace_iterator_increment(struct trace_iterator *iter)
2252 {
2253         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2254
2255         iter->idx++;
2256         if (buf_iter)
2257                 ring_buffer_read(buf_iter, NULL);
2258 }
2259
2260 static struct trace_entry *
2261 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2262                 unsigned long *lost_events)
2263 {
2264         struct ring_buffer_event *event;
2265         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2266
2267         if (buf_iter)
2268                 event = ring_buffer_iter_peek(buf_iter, ts);
2269         else
2270                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2271                                          lost_events);
2272
2273         if (event) {
2274                 iter->ent_size = ring_buffer_event_length(event);
2275                 return ring_buffer_event_data(event);
2276         }
2277         iter->ent_size = 0;
2278         return NULL;
2279 }
2280
2281 static struct trace_entry *
2282 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2283                   unsigned long *missing_events, u64 *ent_ts)
2284 {
2285         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2286         struct trace_entry *ent, *next = NULL;
2287         unsigned long lost_events = 0, next_lost = 0;
2288         int cpu_file = iter->cpu_file;
2289         u64 next_ts = 0, ts;
2290         int next_cpu = -1;
2291         int next_size = 0;
2292         int cpu;
2293
2294         /*
2295          * If we are in a per_cpu trace file, don't bother by iterating over
2296          * all cpu and peek directly.
2297          */
2298         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2299                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2300                         return NULL;
2301                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2302                 if (ent_cpu)
2303                         *ent_cpu = cpu_file;
2304
2305                 return ent;
2306         }
2307
2308         for_each_tracing_cpu(cpu) {
2309
2310                 if (ring_buffer_empty_cpu(buffer, cpu))
2311                         continue;
2312
2313                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2314
2315                 /*
2316                  * Pick the entry with the smallest timestamp:
2317                  */
2318                 if (ent && (!next || ts < next_ts)) {
2319                         next = ent;
2320                         next_cpu = cpu;
2321                         next_ts = ts;
2322                         next_lost = lost_events;
2323                         next_size = iter->ent_size;
2324                 }
2325         }
2326
2327         iter->ent_size = next_size;
2328
2329         if (ent_cpu)
2330                 *ent_cpu = next_cpu;
2331
2332         if (ent_ts)
2333                 *ent_ts = next_ts;
2334
2335         if (missing_events)
2336                 *missing_events = next_lost;
2337
2338         return next;
2339 }
2340
2341 /* Find the next real entry, without updating the iterator itself */
2342 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2343                                           int *ent_cpu, u64 *ent_ts)
2344 {
2345         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2346 }
2347
2348 /* Find the next real entry, and increment the iterator to the next entry */
2349 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2350 {
2351         iter->ent = __find_next_entry(iter, &iter->cpu,
2352                                       &iter->lost_events, &iter->ts);
2353
2354         if (iter->ent)
2355                 trace_iterator_increment(iter);
2356
2357         return iter->ent ? iter : NULL;
2358 }
2359
2360 static void trace_consume(struct trace_iterator *iter)
2361 {
2362         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2363                             &iter->lost_events);
2364 }
2365
2366 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2367 {
2368         struct trace_iterator *iter = m->private;
2369         int i = (int)*pos;
2370         void *ent;
2371
2372         WARN_ON_ONCE(iter->leftover);
2373
2374         (*pos)++;
2375
2376         /* can't go backwards */
2377         if (iter->idx > i)
2378                 return NULL;
2379
2380         if (iter->idx < 0)
2381                 ent = trace_find_next_entry_inc(iter);
2382         else
2383                 ent = iter;
2384
2385         while (ent && iter->idx < i)
2386                 ent = trace_find_next_entry_inc(iter);
2387
2388         iter->pos = *pos;
2389
2390         return ent;
2391 }
2392
2393 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2394 {
2395         struct ring_buffer_event *event;
2396         struct ring_buffer_iter *buf_iter;
2397         unsigned long entries = 0;
2398         u64 ts;
2399
2400         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2401
2402         buf_iter = trace_buffer_iter(iter, cpu);
2403         if (!buf_iter)
2404                 return;
2405
2406         ring_buffer_iter_reset(buf_iter);
2407
2408         /*
2409          * We could have the case with the max latency tracers
2410          * that a reset never took place on a cpu. This is evident
2411          * by the timestamp being before the start of the buffer.
2412          */
2413         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2414                 if (ts >= iter->trace_buffer->time_start)
2415                         break;
2416                 entries++;
2417                 ring_buffer_read(buf_iter, NULL);
2418         }
2419
2420         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2421 }
2422
2423 /*
2424  * The current tracer is copied to avoid a global locking
2425  * all around.
2426  */
2427 static void *s_start(struct seq_file *m, loff_t *pos)
2428 {
2429         struct trace_iterator *iter = m->private;
2430         struct trace_array *tr = iter->tr;
2431         int cpu_file = iter->cpu_file;
2432         void *p = NULL;
2433         loff_t l = 0;
2434         int cpu;
2435
2436         /*
2437          * copy the tracer to avoid using a global lock all around.
2438          * iter->trace is a copy of current_trace, the pointer to the
2439          * name may be used instead of a strcmp(), as iter->trace->name
2440          * will point to the same string as current_trace->name.
2441          */
2442         mutex_lock(&trace_types_lock);
2443         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2444                 *iter->trace = *tr->current_trace;
2445         mutex_unlock(&trace_types_lock);
2446
2447 #ifdef CONFIG_TRACER_MAX_TRACE
2448         if (iter->snapshot && iter->trace->use_max_tr)
2449                 return ERR_PTR(-EBUSY);
2450 #endif
2451
2452         if (!iter->snapshot)
2453                 atomic_inc(&trace_record_cmdline_disabled);
2454
2455         if (*pos != iter->pos) {
2456                 iter->ent = NULL;
2457                 iter->cpu = 0;
2458                 iter->idx = -1;
2459
2460                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2461                         for_each_tracing_cpu(cpu)
2462                                 tracing_iter_reset(iter, cpu);
2463                 } else
2464                         tracing_iter_reset(iter, cpu_file);
2465
2466                 iter->leftover = 0;
2467                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2468                         ;
2469
2470         } else {
2471                 /*
2472                  * If we overflowed the seq_file before, then we want
2473                  * to just reuse the trace_seq buffer again.
2474                  */
2475                 if (iter->leftover)
2476                         p = iter;
2477                 else {
2478                         l = *pos - 1;
2479                         p = s_next(m, p, &l);
2480                 }
2481         }
2482
2483         trace_event_read_lock();
2484         trace_access_lock(cpu_file);
2485         return p;
2486 }
2487
2488 static void s_stop(struct seq_file *m, void *p)
2489 {
2490         struct trace_iterator *iter = m->private;
2491
2492 #ifdef CONFIG_TRACER_MAX_TRACE
2493         if (iter->snapshot && iter->trace->use_max_tr)
2494                 return;
2495 #endif
2496
2497         if (!iter->snapshot)
2498                 atomic_dec(&trace_record_cmdline_disabled);
2499
2500         trace_access_unlock(iter->cpu_file);
2501         trace_event_read_unlock();
2502 }
2503
2504 static void
2505 get_total_entries(struct trace_buffer *buf,
2506                   unsigned long *total, unsigned long *entries)
2507 {
2508         unsigned long count;
2509         int cpu;
2510
2511         *total = 0;
2512         *entries = 0;
2513
2514         for_each_tracing_cpu(cpu) {
2515                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2516                 /*
2517                  * If this buffer has skipped entries, then we hold all
2518                  * entries for the trace and we need to ignore the
2519                  * ones before the time stamp.
2520                  */
2521                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2522                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2523                         /* total is the same as the entries */
2524                         *total += count;
2525                 } else
2526                         *total += count +
2527                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
2528                 *entries += count;
2529         }
2530 }
2531
2532 static void print_lat_help_header(struct seq_file *m)
2533 {
2534         seq_puts(m, "#                  _------=> CPU#            \n");
2535         seq_puts(m, "#                 / _-----=> irqs-off        \n");
2536         seq_puts(m, "#                | / _----=> need-resched    \n");
2537         seq_puts(m, "#                || / _---=> hardirq/softirq \n");
2538         seq_puts(m, "#                ||| / _--=> preempt-depth   \n");
2539         seq_puts(m, "#                |||| /     delay             \n");
2540         seq_puts(m, "#  cmd     pid   ||||| time  |   caller      \n");
2541         seq_puts(m, "#     \\   /      |||||  \\    |   /           \n");
2542 }
2543
2544 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2545 {
2546         unsigned long total;
2547         unsigned long entries;
2548
2549         get_total_entries(buf, &total, &entries);
2550         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2551                    entries, total, num_online_cpus());
2552         seq_puts(m, "#\n");
2553 }
2554
2555 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2556 {
2557         print_event_info(buf, m);
2558         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n");
2559         seq_puts(m, "#              | |       |          |         |\n");
2560 }
2561
2562 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2563 {
2564         print_event_info(buf, m);
2565         seq_puts(m, "#                              _-----=> irqs-off\n");
2566         seq_puts(m, "#                             / _----=> need-resched\n");
2567         seq_puts(m, "#                            | / _---=> hardirq/softirq\n");
2568         seq_puts(m, "#                            || / _--=> preempt-depth\n");
2569         seq_puts(m, "#                            ||| /     delay\n");
2570         seq_puts(m, "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n");
2571         seq_puts(m, "#              | |       |   ||||       |         |\n");
2572 }
2573
2574 void
2575 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2576 {
2577         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2578         struct trace_buffer *buf = iter->trace_buffer;
2579         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2580         struct tracer *type = iter->trace;
2581         unsigned long entries;
2582         unsigned long total;
2583         const char *name = "preemption";
2584
2585         name = type->name;
2586
2587         get_total_entries(buf, &total, &entries);
2588
2589         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2590                    name, UTS_RELEASE);
2591         seq_puts(m, "# -----------------------------------"
2592                  "---------------------------------\n");
2593         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2594                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2595                    nsecs_to_usecs(data->saved_latency),
2596                    entries,
2597                    total,
2598                    buf->cpu,
2599 #if defined(CONFIG_PREEMPT_NONE)
2600                    "server",
2601 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2602                    "desktop",
2603 #elif defined(CONFIG_PREEMPT)
2604                    "preempt",
2605 #else
2606                    "unknown",
2607 #endif
2608                    /* These are reserved for later use */
2609                    0, 0, 0, 0);
2610 #ifdef CONFIG_SMP
2611         seq_printf(m, " #P:%d)\n", num_online_cpus());
2612 #else
2613         seq_puts(m, ")\n");
2614 #endif
2615         seq_puts(m, "#    -----------------\n");
2616         seq_printf(m, "#    | task: %.16s-%d "
2617                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2618                    data->comm, data->pid,
2619                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2620                    data->policy, data->rt_priority);
2621         seq_puts(m, "#    -----------------\n");
2622
2623         if (data->critical_start) {
2624                 seq_puts(m, "#  => started at: ");
2625                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2626                 trace_print_seq(m, &iter->seq);
2627                 seq_puts(m, "\n#  => ended at:   ");
2628                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2629                 trace_print_seq(m, &iter->seq);
2630                 seq_puts(m, "\n#\n");
2631         }
2632
2633         seq_puts(m, "#\n");
2634 }
2635
2636 static void test_cpu_buff_start(struct trace_iterator *iter)
2637 {
2638         struct trace_seq *s = &iter->seq;
2639
2640         if (!(trace_flags & TRACE_ITER_ANNOTATE))
2641                 return;
2642
2643         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
2644                 return;
2645
2646         if (cpumask_test_cpu(iter->cpu, iter->started))
2647                 return;
2648
2649         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
2650                 return;
2651
2652         cpumask_set_cpu(iter->cpu, iter->started);
2653
2654         /* Don't print started cpu buffer for the first entry of the trace */
2655         if (iter->idx > 1)
2656                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
2657                                 iter->cpu);
2658 }
2659
2660 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
2661 {
2662         struct trace_seq *s = &iter->seq;
2663         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2664         struct trace_entry *entry;
2665         struct trace_event *event;
2666
2667         entry = iter->ent;
2668
2669         test_cpu_buff_start(iter);
2670
2671         event = ftrace_find_event(entry->type);
2672
2673         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2674                 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2675                         if (!trace_print_lat_context(iter))
2676                                 goto partial;
2677                 } else {
2678                         if (!trace_print_context(iter))
2679                                 goto partial;
2680                 }
2681         }
2682
2683         if (event)
2684                 return event->funcs->trace(iter, sym_flags, event);
2685
2686         if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
2687                 goto partial;
2688
2689         return TRACE_TYPE_HANDLED;
2690 partial:
2691         return TRACE_TYPE_PARTIAL_LINE;
2692 }
2693
2694 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2695 {
2696         struct trace_seq *s = &iter->seq;
2697         struct trace_entry *entry;
2698         struct trace_event *event;
2699
2700         entry = iter->ent;
2701
2702         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2703                 if (!trace_seq_printf(s, "%d %d %llu ",
2704                                       entry->pid, iter->cpu, iter->ts))
2705                         goto partial;
2706         }
2707
2708         event = ftrace_find_event(entry->type);
2709         if (event)
2710                 return event->funcs->raw(iter, 0, event);
2711
2712         if (!trace_seq_printf(s, "%d ?\n", entry->type))
2713                 goto partial;
2714
2715         return TRACE_TYPE_HANDLED;
2716 partial:
2717         return TRACE_TYPE_PARTIAL_LINE;
2718 }
2719
2720 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2721 {
2722         struct trace_seq *s = &iter->seq;
2723         unsigned char newline = '\n';
2724         struct trace_entry *entry;
2725         struct trace_event *event;
2726
2727         entry = iter->ent;
2728
2729         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2730                 SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
2731                 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
2732                 SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
2733         }
2734
2735         event = ftrace_find_event(entry->type);
2736         if (event) {
2737                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
2738                 if (ret != TRACE_TYPE_HANDLED)
2739                         return ret;
2740         }
2741
2742         SEQ_PUT_FIELD_RET(s, newline);
2743
2744         return TRACE_TYPE_HANDLED;
2745 }
2746
2747 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2748 {
2749         struct trace_seq *s = &iter->seq;
2750         struct trace_entry *entry;
2751         struct trace_event *event;
2752
2753         entry = iter->ent;
2754
2755         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2756                 SEQ_PUT_FIELD_RET(s, entry->pid);
2757                 SEQ_PUT_FIELD_RET(s, iter->cpu);
2758                 SEQ_PUT_FIELD_RET(s, iter->ts);
2759         }
2760
2761         event = ftrace_find_event(entry->type);
2762         return event ? event->funcs->binary(iter, 0, event) :
2763                 TRACE_TYPE_HANDLED;
2764 }
2765
2766 int trace_empty(struct trace_iterator *iter)
2767 {
2768         struct ring_buffer_iter *buf_iter;
2769         int cpu;
2770
2771         /* If we are looking at one CPU buffer, only check that one */
2772         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
2773                 cpu = iter->cpu_file;
2774                 buf_iter = trace_buffer_iter(iter, cpu);
2775                 if (buf_iter) {
2776                         if (!ring_buffer_iter_empty(buf_iter))
2777                                 return 0;
2778                 } else {
2779                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2780                                 return 0;
2781                 }
2782                 return 1;
2783         }
2784
2785         for_each_tracing_cpu(cpu) {
2786                 buf_iter = trace_buffer_iter(iter, cpu);
2787                 if (buf_iter) {
2788                         if (!ring_buffer_iter_empty(buf_iter))
2789                                 return 0;
2790                 } else {
2791                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2792                                 return 0;
2793                 }
2794         }
2795
2796         return 1;
2797 }
2798
2799 /*  Called with trace_event_read_lock() held. */
2800 enum print_line_t print_trace_line(struct trace_iterator *iter)
2801 {
2802         enum print_line_t ret;
2803
2804         if (iter->lost_events &&
2805             !trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2806                                  iter->cpu, iter->lost_events))
2807                 return TRACE_TYPE_PARTIAL_LINE;
2808
2809         if (iter->trace && iter->trace->print_line) {
2810                 ret = iter->trace->print_line(iter);
2811                 if (ret != TRACE_TYPE_UNHANDLED)
2812                         return ret;
2813         }
2814
2815         if (iter->ent->type == TRACE_BPUTS &&
2816                         trace_flags & TRACE_ITER_PRINTK &&
2817                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2818                 return trace_print_bputs_msg_only(iter);
2819
2820         if (iter->ent->type == TRACE_BPRINT &&
2821                         trace_flags & TRACE_ITER_PRINTK &&
2822                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2823                 return trace_print_bprintk_msg_only(iter);
2824
2825         if (iter->ent->type == TRACE_PRINT &&
2826                         trace_flags & TRACE_ITER_PRINTK &&
2827                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2828                 return trace_print_printk_msg_only(iter);
2829
2830         if (trace_flags & TRACE_ITER_BIN)
2831                 return print_bin_fmt(iter);
2832
2833         if (trace_flags & TRACE_ITER_HEX)
2834                 return print_hex_fmt(iter);
2835
2836         if (trace_flags & TRACE_ITER_RAW)
2837                 return print_raw_fmt(iter);
2838
2839         return print_trace_fmt(iter);
2840 }
2841
2842 void trace_latency_header(struct seq_file *m)
2843 {
2844         struct trace_iterator *iter = m->private;
2845
2846         /* print nothing if the buffers are empty */
2847         if (trace_empty(iter))
2848                 return;
2849
2850         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2851                 print_trace_header(m, iter);
2852
2853         if (!(trace_flags & TRACE_ITER_VERBOSE))
2854                 print_lat_help_header(m);
2855 }
2856
2857 void trace_default_header(struct seq_file *m)
2858 {
2859         struct trace_iterator *iter = m->private;
2860
2861         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
2862                 return;
2863
2864         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2865                 /* print nothing if the buffers are empty */
2866                 if (trace_empty(iter))
2867                         return;
2868                 print_trace_header(m, iter);
2869                 if (!(trace_flags & TRACE_ITER_VERBOSE))
2870                         print_lat_help_header(m);
2871         } else {
2872                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
2873                         if (trace_flags & TRACE_ITER_IRQ_INFO)
2874                                 print_func_help_header_irq(iter->trace_buffer, m);
2875                         else
2876                                 print_func_help_header(iter->trace_buffer, m);
2877                 }
2878         }
2879 }
2880
2881 static void test_ftrace_alive(struct seq_file *m)
2882 {
2883         if (!ftrace_is_dead())
2884                 return;
2885         seq_printf(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n");
2886         seq_printf(m, "#          MAY BE MISSING FUNCTION EVENTS\n");
2887 }
2888
2889 #ifdef CONFIG_TRACER_MAX_TRACE
2890 static void show_snapshot_main_help(struct seq_file *m)
2891 {
2892         seq_printf(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n");
2893         seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2894         seq_printf(m, "#                      Takes a snapshot of the main buffer.\n");
2895         seq_printf(m, "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n");
2896         seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2897         seq_printf(m, "#                       is not a '0' or '1')\n");
2898 }
2899
2900 static void show_snapshot_percpu_help(struct seq_file *m)
2901 {
2902         seq_printf(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
2903 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2904         seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2905         seq_printf(m, "#                      Takes a snapshot of the main buffer for this cpu.\n");
2906 #else
2907         seq_printf(m, "# echo 1 > snapshot : Not supported with this kernel.\n");
2908         seq_printf(m, "#                     Must use main snapshot file to allocate.\n");
2909 #endif
2910         seq_printf(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n");
2911         seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2912         seq_printf(m, "#                       is not a '0' or '1')\n");
2913 }
2914
2915 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
2916 {
2917         if (iter->tr->allocated_snapshot)
2918                 seq_printf(m, "#\n# * Snapshot is allocated *\n#\n");
2919         else
2920                 seq_printf(m, "#\n# * Snapshot is freed *\n#\n");
2921
2922         seq_printf(m, "# Snapshot commands:\n");
2923         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
2924                 show_snapshot_main_help(m);
2925         else
2926                 show_snapshot_percpu_help(m);
2927 }
2928 #else
2929 /* Should never be called */
2930 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
2931 #endif
2932
2933 static int s_show(struct seq_file *m, void *v)
2934 {
2935         struct trace_iterator *iter = v;
2936         int ret;
2937
2938         if (iter->ent == NULL) {
2939                 if (iter->tr) {
2940                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
2941                         seq_puts(m, "#\n");
2942                         test_ftrace_alive(m);
2943                 }
2944                 if (iter->snapshot && trace_empty(iter))
2945                         print_snapshot_help(m, iter);
2946                 else if (iter->trace && iter->trace->print_header)
2947                         iter->trace->print_header(m);
2948                 else
2949                         trace_default_header(m);
2950
2951         } else if (iter->leftover) {
2952                 /*
2953                  * If we filled the seq_file buffer earlier, we
2954                  * want to just show it now.
2955                  */
2956                 ret = trace_print_seq(m, &iter->seq);
2957
2958                 /* ret should this time be zero, but you never know */
2959                 iter->leftover = ret;
2960
2961         } else {
2962                 print_trace_line(iter);
2963                 ret = trace_print_seq(m, &iter->seq);
2964                 /*
2965                  * If we overflow the seq_file buffer, then it will
2966                  * ask us for this data again at start up.
2967                  * Use that instead.
2968                  *  ret is 0 if seq_file write succeeded.
2969                  *        -1 otherwise.
2970                  */
2971                 iter->leftover = ret;
2972         }
2973
2974         return 0;
2975 }
2976
2977 /*
2978  * Should be used after trace_array_get(), trace_types_lock
2979  * ensures that i_cdev was already initialized.
2980  */
2981 static inline int tracing_get_cpu(struct inode *inode)
2982 {
2983         if (inode->i_cdev) /* See trace_create_cpu_file() */
2984                 return (long)inode->i_cdev - 1;
2985         return RING_BUFFER_ALL_CPUS;
2986 }
2987
2988 static const struct seq_operations tracer_seq_ops = {
2989         .start          = s_start,
2990         .next           = s_next,
2991         .stop           = s_stop,
2992         .show           = s_show,
2993 };
2994
2995 static struct trace_iterator *
2996 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
2997 {
2998         struct trace_array *tr = inode->i_private;
2999         struct trace_iterator *iter;
3000         int cpu;
3001
3002         if (tracing_disabled)
3003                 return ERR_PTR(-ENODEV);
3004
3005         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3006         if (!iter)
3007                 return ERR_PTR(-ENOMEM);
3008
3009         iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(),
3010                                     GFP_KERNEL);
3011         if (!iter->buffer_iter)
3012                 goto release;
3013
3014         /*
3015          * We make a copy of the current tracer to avoid concurrent
3016          * changes on it while we are reading.
3017          */
3018         mutex_lock(&trace_types_lock);
3019         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3020         if (!iter->trace)
3021                 goto fail;
3022
3023         *iter->trace = *tr->current_trace;
3024
3025         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3026                 goto fail;
3027
3028         iter->tr = tr;
3029
3030 #ifdef CONFIG_TRACER_MAX_TRACE
3031         /* Currently only the top directory has a snapshot */
3032         if (tr->current_trace->print_max || snapshot)
3033                 iter->trace_buffer = &tr->max_buffer;
3034         else
3035 #endif
3036                 iter->trace_buffer = &tr->trace_buffer;
3037         iter->snapshot = snapshot;
3038         iter->pos = -1;
3039         iter->cpu_file = tracing_get_cpu(inode);
3040         mutex_init(&iter->mutex);
3041
3042         /* Notify the tracer early; before we stop tracing. */
3043         if (iter->trace && iter->trace->open)
3044                 iter->trace->open(iter);
3045
3046         /* Annotate start of buffers if we had overruns */
3047         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3048                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3049
3050         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3051         if (trace_clocks[tr->clock_id].in_ns)
3052                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3053
3054         /* stop the trace while dumping if we are not opening "snapshot" */
3055         if (!iter->snapshot)
3056                 tracing_stop_tr(tr);
3057
3058         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3059                 for_each_tracing_cpu(cpu) {
3060                         iter->buffer_iter[cpu] =
3061                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3062                 }
3063                 ring_buffer_read_prepare_sync();
3064                 for_each_tracing_cpu(cpu) {
3065                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3066                         tracing_iter_reset(iter, cpu);
3067                 }
3068         } else {
3069                 cpu = iter->cpu_file;
3070                 iter->buffer_iter[cpu] =
3071                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3072                 ring_buffer_read_prepare_sync();
3073                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3074                 tracing_iter_reset(iter, cpu);
3075         }
3076
3077         mutex_unlock(&trace_types_lock);
3078
3079         return iter;
3080
3081  fail:
3082         mutex_unlock(&trace_types_lock);
3083         kfree(iter->trace);
3084         kfree(iter->buffer_iter);
3085 release:
3086         seq_release_private(inode, file);
3087         return ERR_PTR(-ENOMEM);
3088 }
3089
3090 int tracing_open_generic(struct inode *inode, struct file *filp)
3091 {
3092         if (tracing_disabled)
3093                 return -ENODEV;
3094
3095         filp->private_data = inode->i_private;
3096         return 0;
3097 }
3098
3099 bool tracing_is_disabled(void)
3100 {
3101         return (tracing_disabled) ? true: false;
3102 }
3103
3104 /*
3105  * Open and update trace_array ref count.
3106  * Must have the current trace_array passed to it.
3107  */
3108 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3109 {
3110         struct trace_array *tr = inode->i_private;
3111
3112         if (tracing_disabled)
3113                 return -ENODEV;
3114
3115         if (trace_array_get(tr) < 0)
3116                 return -ENODEV;
3117
3118         filp->private_data = inode->i_private;
3119
3120         return 0;
3121 }
3122
3123 static int tracing_release(struct inode *inode, struct file *file)
3124 {
3125         struct trace_array *tr = inode->i_private;
3126         struct seq_file *m = file->private_data;
3127         struct trace_iterator *iter;
3128         int cpu;
3129
3130         if (!(file->f_mode & FMODE_READ)) {
3131                 trace_array_put(tr);
3132                 return 0;
3133         }
3134
3135         /* Writes do not use seq_file */
3136         iter = m->private;
3137         mutex_lock(&trace_types_lock);
3138
3139         for_each_tracing_cpu(cpu) {
3140                 if (iter->buffer_iter[cpu])
3141                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3142         }
3143
3144         if (iter->trace && iter->trace->close)
3145                 iter->trace->close(iter);
3146
3147         if (!iter->snapshot)
3148                 /* reenable tracing if it was previously enabled */
3149                 tracing_start_tr(tr);
3150
3151         __trace_array_put(tr);
3152
3153         mutex_unlock(&trace_types_lock);
3154
3155         mutex_destroy(&iter->mutex);
3156         free_cpumask_var(iter->started);
3157         kfree(iter->trace);
3158         kfree(iter->buffer_iter);
3159         seq_release_private(inode, file);
3160
3161         return 0;
3162 }
3163
3164 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3165 {
3166         struct trace_array *tr = inode->i_private;
3167
3168         trace_array_put(tr);
3169         return 0;
3170 }
3171
3172 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3173 {
3174         struct trace_array *tr = inode->i_private;
3175
3176         trace_array_put(tr);
3177
3178         return single_release(inode, file);
3179 }
3180
3181 static int tracing_open(struct inode *inode, struct file *file)
3182 {
3183         struct trace_array *tr = inode->i_private;
3184         struct trace_iterator *iter;
3185         int ret = 0;
3186
3187         if (trace_array_get(tr) < 0)
3188                 return -ENODEV;
3189
3190         /* If this file was open for write, then erase contents */
3191         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3192                 int cpu = tracing_get_cpu(inode);
3193
3194                 if (cpu == RING_BUFFER_ALL_CPUS)
3195                         tracing_reset_online_cpus(&tr->trace_buffer);
3196                 else
3197                         tracing_reset(&tr->trace_buffer, cpu);
3198         }
3199
3200         if (file->f_mode & FMODE_READ) {
3201                 iter = __tracing_open(inode, file, false);
3202                 if (IS_ERR(iter))
3203                         ret = PTR_ERR(iter);
3204                 else if (trace_flags & TRACE_ITER_LATENCY_FMT)
3205                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3206         }
3207
3208         if (ret < 0)
3209                 trace_array_put(tr);
3210
3211         return ret;
3212 }
3213
3214 /*
3215  * Some tracers are not suitable for instance buffers.
3216  * A tracer is always available for the global array (toplevel)
3217  * or if it explicitly states that it is.
3218  */
3219 static bool
3220 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3221 {
3222         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3223 }
3224
3225 /* Find the next tracer that this trace array may use */
3226 static struct tracer *
3227 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3228 {
3229         while (t && !trace_ok_for_array(t, tr))
3230                 t = t->next;
3231
3232         return t;
3233 }
3234
3235 static void *
3236 t_next(struct seq_file *m, void *v, loff_t *pos)
3237 {
3238         struct trace_array *tr = m->private;
3239         struct tracer *t = v;
3240
3241         (*pos)++;
3242
3243         if (t)
3244                 t = get_tracer_for_array(tr, t->next);
3245
3246         return t;
3247 }
3248
3249 static void *t_start(struct seq_file *m, loff_t *pos)
3250 {
3251         struct trace_array *tr = m->private;
3252         struct tracer *t;
3253         loff_t l = 0;
3254
3255         mutex_lock(&trace_types_lock);
3256
3257         t = get_tracer_for_array(tr, trace_types);
3258         for (; t && l < *pos; t = t_next(m, t, &l))
3259                         ;
3260
3261         return t;
3262 }
3263
3264 static void t_stop(struct seq_file *m, void *p)
3265 {
3266         mutex_unlock(&trace_types_lock);
3267 }
3268
3269 static int t_show(struct seq_file *m, void *v)
3270 {
3271         struct tracer *t = v;
3272
3273         if (!t)
3274                 return 0;
3275
3276         seq_printf(m, "%s", t->name);
3277         if (t->next)
3278                 seq_putc(m, ' ');
3279         else
3280                 seq_putc(m, '\n');
3281
3282         return 0;
3283 }
3284
3285 static const struct seq_operations show_traces_seq_ops = {
3286         .start          = t_start,
3287         .next           = t_next,
3288         .stop           = t_stop,
3289         .show           = t_show,
3290 };
3291
3292 static int show_traces_open(struct inode *inode, struct file *file)
3293 {
3294         struct trace_array *tr = inode->i_private;
3295         struct seq_file *m;
3296         int ret;
3297
3298         if (tracing_disabled)
3299                 return -ENODEV;
3300
3301         ret = seq_open(file, &show_traces_seq_ops);
3302         if (ret)
3303                 return ret;
3304
3305         m = file->private_data;
3306         m->private = tr;
3307
3308         return 0;
3309 }
3310
3311 static ssize_t
3312 tracing_write_stub(struct file *filp, const char __user *ubuf,
3313                    size_t count, loff_t *ppos)
3314 {
3315         return count;
3316 }
3317
3318 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3319 {
3320         int ret;
3321
3322         if (file->f_mode & FMODE_READ)
3323                 ret = seq_lseek(file, offset, whence);
3324         else
3325                 file->f_pos = ret = 0;
3326
3327         return ret;
3328 }
3329
3330 static const struct file_operations tracing_fops = {
3331         .open           = tracing_open,
3332         .read           = seq_read,
3333         .write          = tracing_write_stub,
3334         .llseek         = tracing_lseek,
3335         .release        = tracing_release,
3336 };
3337
3338 static const struct file_operations show_traces_fops = {
3339         .open           = show_traces_open,
3340         .read           = seq_read,
3341         .release        = seq_release,
3342         .llseek         = seq_lseek,
3343 };
3344
3345 /*
3346  * The tracer itself will not take this lock, but still we want
3347  * to provide a consistent cpumask to user-space:
3348  */
3349 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3350
3351 /*
3352  * Temporary storage for the character representation of the
3353  * CPU bitmask (and one more byte for the newline):
3354  */
3355 static char mask_str[NR_CPUS + 1];
3356
3357 static ssize_t
3358 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3359                      size_t count, loff_t *ppos)
3360 {
3361         struct trace_array *tr = file_inode(filp)->i_private;
3362         int len;
3363
3364         mutex_lock(&tracing_cpumask_update_lock);
3365
3366         len = cpumask_scnprintf(mask_str, count, tr->tracing_cpumask);
3367         if (count - len < 2) {
3368                 count = -EINVAL;
3369                 goto out_err;
3370         }
3371         len += sprintf(mask_str + len, "\n");
3372         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3373
3374 out_err:
3375         mutex_unlock(&tracing_cpumask_update_lock);
3376
3377         return count;
3378 }
3379
3380 static ssize_t
3381 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3382                       size_t count, loff_t *ppos)
3383 {
3384         struct trace_array *tr = file_inode(filp)->i_private;
3385         cpumask_var_t tracing_cpumask_new;
3386         int err, cpu;
3387
3388         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3389                 return -ENOMEM;
3390
3391         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3392         if (err)
3393                 goto err_unlock;
3394
3395         mutex_lock(&tracing_cpumask_update_lock);
3396
3397         local_irq_disable();
3398         arch_spin_lock(&tr->max_lock);
3399         for_each_tracing_cpu(cpu) {
3400                 /*
3401                  * Increase/decrease the disabled counter if we are
3402                  * about to flip a bit in the cpumask:
3403                  */
3404                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3405                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3406                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3407                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3408                 }
3409                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3410                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3411                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3412                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3413                 }
3414         }
3415         arch_spin_unlock(&tr->max_lock);
3416         local_irq_enable();
3417
3418         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3419
3420         mutex_unlock(&tracing_cpumask_update_lock);
3421         free_cpumask_var(tracing_cpumask_new);
3422
3423         return count;
3424
3425 err_unlock:
3426         free_cpumask_var(tracing_cpumask_new);
3427
3428         return err;
3429 }
3430
3431 static const struct file_operations tracing_cpumask_fops = {
3432         .open           = tracing_open_generic_tr,
3433         .read           = tracing_cpumask_read,
3434         .write          = tracing_cpumask_write,
3435         .release        = tracing_release_generic_tr,
3436         .llseek         = generic_file_llseek,
3437 };
3438
3439 static int tracing_trace_options_show(struct seq_file *m, void *v)
3440 {
3441         struct tracer_opt *trace_opts;
3442         struct trace_array *tr = m->private;
3443         u32 tracer_flags;
3444         int i;
3445
3446         mutex_lock(&trace_types_lock);
3447         tracer_flags = tr->current_trace->flags->val;
3448         trace_opts = tr->current_trace->flags->opts;
3449
3450         for (i = 0; trace_options[i]; i++) {
3451                 if (trace_flags & (1 << i))
3452                         seq_printf(m, "%s\n", trace_options[i]);
3453                 else
3454                         seq_printf(m, "no%s\n", trace_options[i]);
3455         }
3456
3457         for (i = 0; trace_opts[i].name; i++) {
3458                 if (tracer_flags & trace_opts[i].bit)
3459                         seq_printf(m, "%s\n", trace_opts[i].name);
3460                 else
3461                         seq_printf(m, "no%s\n", trace_opts[i].name);
3462         }
3463         mutex_unlock(&trace_types_lock);
3464
3465         return 0;
3466 }
3467
3468 static int __set_tracer_option(struct trace_array *tr,
3469                                struct tracer_flags *tracer_flags,
3470                                struct tracer_opt *opts, int neg)
3471 {
3472         struct tracer *trace = tr->current_trace;
3473         int ret;
3474
3475         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3476         if (ret)
3477                 return ret;
3478
3479         if (neg)
3480                 tracer_flags->val &= ~opts->bit;
3481         else
3482                 tracer_flags->val |= opts->bit;
3483         return 0;
3484 }
3485
3486 /* Try to assign a tracer specific option */
3487 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3488 {
3489         struct tracer *trace = tr->current_trace;
3490         struct tracer_flags *tracer_flags = trace->flags;
3491         struct tracer_opt *opts = NULL;
3492         int i;
3493
3494         for (i = 0; tracer_flags->opts[i].name; i++) {
3495                 opts = &tracer_flags->opts[i];
3496
3497                 if (strcmp(cmp, opts->name) == 0)
3498                         return __set_tracer_option(tr, trace->flags, opts, neg);
3499         }
3500
3501         return -EINVAL;
3502 }
3503
3504 /* Some tracers require overwrite to stay enabled */
3505 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3506 {
3507         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3508                 return -1;
3509
3510         return 0;
3511 }
3512
3513 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3514 {
3515         /* do nothing if flag is already set */
3516         if (!!(trace_flags & mask) == !!enabled)
3517                 return 0;
3518
3519         /* Give the tracer a chance to approve the change */
3520         if (tr->current_trace->flag_changed)
3521                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3522                         return -EINVAL;
3523
3524         if (enabled)
3525                 trace_flags |= mask;
3526         else
3527                 trace_flags &= ~mask;
3528
3529         if (mask == TRACE_ITER_RECORD_CMD)
3530                 trace_event_enable_cmd_record(enabled);
3531
3532         if (mask == TRACE_ITER_OVERWRITE) {
3533                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3534 #ifdef CONFIG_TRACER_MAX_TRACE
3535                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3536 #endif
3537         }
3538
3539         if (mask == TRACE_ITER_PRINTK)
3540                 trace_printk_start_stop_comm(enabled);
3541
3542         return 0;
3543 }
3544
3545 static int trace_set_options(struct trace_array *tr, char *option)
3546 {
3547         char *cmp;
3548         int neg = 0;
3549         int ret = -ENODEV;
3550         int i;
3551
3552         cmp = strstrip(option);
3553
3554         if (strncmp(cmp, "no", 2) == 0) {
3555                 neg = 1;
3556                 cmp += 2;
3557         }
3558
3559         mutex_lock(&trace_types_lock);
3560
3561         for (i = 0; trace_options[i]; i++) {
3562                 if (strcmp(cmp, trace_options[i]) == 0) {
3563                         ret = set_tracer_flag(tr, 1 << i, !neg);
3564                         break;
3565                 }
3566         }
3567
3568         /* If no option could be set, test the specific tracer options */
3569         if (!trace_options[i])
3570                 ret = set_tracer_option(tr, cmp, neg);
3571
3572         mutex_unlock(&trace_types_lock);
3573
3574         return ret;
3575 }
3576
3577 static ssize_t
3578 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3579                         size_t cnt, loff_t *ppos)
3580 {
3581         struct seq_file *m = filp->private_data;
3582         struct trace_array *tr = m->private;
3583         char buf[64];
3584         int ret;
3585
3586         if (cnt >= sizeof(buf))
3587                 return -EINVAL;
3588
3589         if (copy_from_user(&buf, ubuf, cnt))
3590                 return -EFAULT;
3591
3592         buf[cnt] = 0;
3593
3594         ret = trace_set_options(tr, buf);
3595         if (ret < 0)
3596                 return ret;
3597
3598         *ppos += cnt;
3599
3600         return cnt;
3601 }
3602
3603 static int tracing_trace_options_open(struct inode *inode, struct file *file)
3604 {
3605         struct trace_array *tr = inode->i_private;
3606         int ret;
3607
3608         if (tracing_disabled)
3609                 return -ENODEV;
3610
3611         if (trace_array_get(tr) < 0)
3612                 return -ENODEV;
3613
3614         ret = single_open(file, tracing_trace_options_show, inode->i_private);
3615         if (ret < 0)
3616                 trace_array_put(tr);
3617
3618         return ret;
3619 }
3620
3621 static const struct file_operations tracing_iter_fops = {
3622         .open           = tracing_trace_options_open,
3623         .read           = seq_read,
3624         .llseek         = seq_lseek,
3625         .release        = tracing_single_release_tr,
3626         .write          = tracing_trace_options_write,
3627 };
3628
3629 static const char readme_msg[] =
3630         "tracing mini-HOWTO:\n\n"
3631         "# echo 0 > tracing_on : quick way to disable tracing\n"
3632         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
3633         " Important files:\n"
3634         "  trace\t\t\t- The static contents of the buffer\n"
3635         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
3636         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
3637         "  current_tracer\t- function and latency tracers\n"
3638         "  available_tracers\t- list of configured tracers for current_tracer\n"
3639         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
3640         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
3641         "  trace_clock\t\t-change the clock used to order events\n"
3642         "       local:   Per cpu clock but may not be synced across CPUs\n"
3643         "      global:   Synced across CPUs but slows tracing down.\n"
3644         "     counter:   Not a clock, but just an increment\n"
3645         "      uptime:   Jiffy counter from time of boot\n"
3646         "        perf:   Same clock that perf events use\n"
3647 #ifdef CONFIG_X86_64
3648         "     x86-tsc:   TSC cycle counter\n"
3649 #endif
3650         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
3651         "  tracing_cpumask\t- Limit which CPUs to trace\n"
3652         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
3653         "\t\t\t  Remove sub-buffer with rmdir\n"
3654         "  trace_options\t\t- Set format or modify how tracing happens\n"
3655         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
3656         "\t\t\t  option name\n"
3657         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
3658 #ifdef CONFIG_DYNAMIC_FTRACE
3659         "\n  available_filter_functions - list of functions that can be filtered on\n"
3660         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
3661         "\t\t\t  functions\n"
3662         "\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3663         "\t     modules: Can select a group via module\n"
3664         "\t      Format: :mod:<module-name>\n"
3665         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
3666         "\t    triggers: a command to perform when function is hit\n"
3667         "\t      Format: <function>:<trigger>[:count]\n"
3668         "\t     trigger: traceon, traceoff\n"
3669         "\t\t      enable_event:<system>:<event>\n"
3670         "\t\t      disable_event:<system>:<event>\n"
3671 #ifdef CONFIG_STACKTRACE
3672         "\t\t      stacktrace\n"
3673 #endif
3674 #ifdef CONFIG_TRACER_SNAPSHOT
3675         "\t\t      snapshot\n"
3676 #endif
3677         "\t\t      dump\n"
3678         "\t\t      cpudump\n"
3679         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
3680         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
3681         "\t     The first one will disable tracing every time do_fault is hit\n"
3682         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
3683         "\t       The first time do trap is hit and it disables tracing, the\n"
3684         "\t       counter will decrement to 2. If tracing is already disabled,\n"
3685         "\t       the counter will not decrement. It only decrements when the\n"
3686         "\t       trigger did work\n"
3687         "\t     To remove trigger without count:\n"
3688         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
3689         "\t     To remove trigger with a count:\n"
3690         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
3691         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
3692         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3693         "\t    modules: Can select a group via module command :mod:\n"
3694         "\t    Does not accept triggers\n"
3695 #endif /* CONFIG_DYNAMIC_FTRACE */
3696 #ifdef CONFIG_FUNCTION_TRACER
3697         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
3698         "\t\t    (function)\n"
3699 #endif
3700 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
3701         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
3702         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
3703 #endif
3704 #ifdef CONFIG_TRACER_SNAPSHOT
3705         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
3706         "\t\t\t  snapshot buffer. Read the contents for more\n"
3707         "\t\t\t  information\n"
3708 #endif
3709 #ifdef CONFIG_STACK_TRACER
3710         "  stack_trace\t\t- Shows the max stack trace when active\n"
3711         "  stack_max_size\t- Shows current max stack size that was traced\n"
3712         "\t\t\t  Write into this file to reset the max size (trigger a\n"
3713         "\t\t\t  new trace)\n"
3714 #ifdef CONFIG_DYNAMIC_FTRACE
3715         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
3716         "\t\t\t  traces\n"
3717 #endif
3718 #endif /* CONFIG_STACK_TRACER */
3719         "  events/\t\t- Directory containing all trace event subsystems:\n"
3720         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
3721         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
3722         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
3723         "\t\t\t  events\n"
3724         "      filter\t\t- If set, only events passing filter are traced\n"
3725         "  events/<system>/<event>/\t- Directory containing control files for\n"
3726         "\t\t\t  <event>:\n"
3727         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
3728         "      filter\t\t- If set, only events passing filter are traced\n"
3729         "      trigger\t\t- If set, a command to perform when event is hit\n"
3730         "\t    Format: <trigger>[:count][if <filter>]\n"
3731         "\t   trigger: traceon, traceoff\n"
3732         "\t            enable_event:<system>:<event>\n"
3733         "\t            disable_event:<system>:<event>\n"
3734 #ifdef CONFIG_STACKTRACE
3735         "\t\t    stacktrace\n"
3736 #endif
3737 #ifdef CONFIG_TRACER_SNAPSHOT
3738         "\t\t    snapshot\n"
3739 #endif
3740         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
3741         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
3742         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
3743         "\t                  events/block/block_unplug/trigger\n"
3744         "\t   The first disables tracing every time block_unplug is hit.\n"
3745         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
3746         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
3747         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
3748         "\t   Like function triggers, the counter is only decremented if it\n"
3749         "\t    enabled or disabled tracing.\n"
3750         "\t   To remove a trigger without a count:\n"
3751         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
3752         "\t   To remove a trigger with a count:\n"
3753         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
3754         "\t   Filters can be ignored when removing a trigger.\n"
3755 ;
3756
3757 static ssize_t
3758 tracing_readme_read(struct file *filp, char __user *ubuf,
3759                        size_t cnt, loff_t *ppos)
3760 {
3761         return simple_read_from_buffer(ubuf, cnt, ppos,
3762                                         readme_msg, strlen(readme_msg));
3763 }
3764
3765 static const struct file_operations tracing_readme_fops = {
3766         .open           = tracing_open_generic,
3767         .read           = tracing_readme_read,
3768         .llseek         = generic_file_llseek,
3769 };
3770
3771 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
3772 {
3773         unsigned int *ptr = v;
3774
3775         if (*pos || m->count)
3776                 ptr++;
3777
3778         (*pos)++;
3779
3780         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
3781              ptr++) {
3782                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
3783                         continue;
3784
3785                 return ptr;
3786         }
3787
3788         return NULL;
3789 }
3790
3791 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
3792 {
3793         void *v;
3794         loff_t l = 0;
3795
3796         preempt_disable();
3797         arch_spin_lock(&trace_cmdline_lock);
3798
3799         v = &savedcmd->map_cmdline_to_pid[0];
3800         while (l <= *pos) {
3801                 v = saved_cmdlines_next(m, v, &l);
3802                 if (!v)
3803                         return NULL;
3804         }
3805
3806         return v;
3807 }
3808
3809 static void saved_cmdlines_stop(struct seq_file *m, void *v)
3810 {
3811         arch_spin_unlock(&trace_cmdline_lock);
3812         preempt_enable();
3813 }
3814
3815 static int saved_cmdlines_show(struct seq_file *m, void *v)
3816 {
3817         char buf[TASK_COMM_LEN];
3818         unsigned int *pid = v;
3819
3820         __trace_find_cmdline(*pid, buf);
3821         seq_printf(m, "%d %s\n", *pid, buf);
3822         return 0;
3823 }
3824
3825 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
3826         .start          = saved_cmdlines_start,
3827         .next           = saved_cmdlines_next,
3828         .stop           = saved_cmdlines_stop,
3829         .show           = saved_cmdlines_show,
3830 };
3831
3832 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
3833 {
3834         if (tracing_disabled)
3835                 return -ENODEV;
3836
3837         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
3838 }
3839
3840 static const struct file_operations tracing_saved_cmdlines_fops = {
3841         .open           = tracing_saved_cmdlines_open,
3842         .read           = seq_read,
3843         .llseek         = seq_lseek,
3844         .release        = seq_release,
3845 };
3846
3847 static ssize_t
3848 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
3849                                  size_t cnt, loff_t *ppos)
3850 {
3851         char buf[64];
3852         int r;
3853
3854         arch_spin_lock(&trace_cmdline_lock);
3855         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
3856         arch_spin_unlock(&trace_cmdline_lock);
3857
3858         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3859 }
3860
3861 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
3862 {
3863         kfree(s->saved_cmdlines);
3864         kfree(s->map_cmdline_to_pid);
3865         kfree(s);
3866 }
3867
3868 static int tracing_resize_saved_cmdlines(unsigned int val)
3869 {
3870         struct saved_cmdlines_buffer *s, *savedcmd_temp;
3871
3872         s = kmalloc(sizeof(*s), GFP_KERNEL);
3873         if (!s)
3874                 return -ENOMEM;
3875
3876         if (allocate_cmdlines_buffer(val, s) < 0) {
3877                 kfree(s);
3878                 return -ENOMEM;
3879         }
3880
3881         arch_spin_lock(&trace_cmdline_lock);
3882         savedcmd_temp = savedcmd;
3883         savedcmd = s;
3884         arch_spin_unlock(&trace_cmdline_lock);
3885         free_saved_cmdlines_buffer(savedcmd_temp);
3886
3887         return 0;
3888 }
3889
3890 static ssize_t
3891 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
3892                                   size_t cnt, loff_t *ppos)
3893 {
3894         unsigned long val;
3895         int ret;
3896
3897         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
3898         if (ret)
3899                 return ret;
3900
3901         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
3902         if (!val || val > PID_MAX_DEFAULT)
3903                 return -EINVAL;
3904
3905         ret = tracing_resize_saved_cmdlines((unsigned int)val);
3906         if (ret < 0)
3907                 return ret;
3908
3909         *ppos += cnt;
3910
3911         return cnt;
3912 }
3913
3914 static const struct file_operations tracing_saved_cmdlines_size_fops = {
3915         .open           = tracing_open_generic,
3916         .read           = tracing_saved_cmdlines_size_read,
3917         .write          = tracing_saved_cmdlines_size_write,
3918 };
3919
3920 static ssize_t
3921 tracing_set_trace_read(struct file *filp, char __user *ubuf,
3922                        size_t cnt, loff_t *ppos)
3923 {
3924         struct trace_array *tr = filp->private_data;
3925         char buf[MAX_TRACER_SIZE+2];
3926         int r;
3927
3928         mutex_lock(&trace_types_lock);
3929         r = sprintf(buf, "%s\n", tr->current_trace->name);
3930         mutex_unlock(&trace_types_lock);
3931
3932         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3933 }
3934
3935 int tracer_init(struct tracer *t, struct trace_array *tr)
3936 {
3937         tracing_reset_online_cpus(&tr->trace_buffer);
3938         return t->init(tr);
3939 }
3940
3941 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
3942 {
3943         int cpu;
3944
3945         for_each_tracing_cpu(cpu)
3946                 per_cpu_ptr(buf->data, cpu)->entries = val;
3947 }
3948
3949 #ifdef CONFIG_TRACER_MAX_TRACE
3950 /* resize @tr's buffer to the size of @size_tr's entries */
3951 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
3952                                         struct trace_buffer *size_buf, int cpu_id)
3953 {
3954         int cpu, ret = 0;
3955
3956         if (cpu_id == RING_BUFFER_ALL_CPUS) {
3957                 for_each_tracing_cpu(cpu) {
3958                         ret = ring_buffer_resize(trace_buf->buffer,
3959                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
3960                         if (ret < 0)
3961                                 break;
3962                         per_cpu_ptr(trace_buf->data, cpu)->entries =
3963                                 per_cpu_ptr(size_buf->data, cpu)->entries;
3964                 }
3965         } else {
3966                 ret = ring_buffer_resize(trace_buf->buffer,
3967                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
3968                 if (ret == 0)
3969                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
3970                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
3971         }
3972
3973         return ret;
3974 }
3975 #endif /* CONFIG_TRACER_MAX_TRACE */
3976
3977 static int __tracing_resize_ring_buffer(struct trace_array *tr,
3978                                         unsigned long size, int cpu)
3979 {
3980         int ret;
3981
3982         /*
3983          * If kernel or user changes the size of the ring buffer
3984          * we use the size that was given, and we can forget about
3985          * expanding it later.
3986          */
3987         ring_buffer_expanded = true;
3988
3989         /* May be called before buffers are initialized */
3990         if (!tr->trace_buffer.buffer)
3991                 return 0;
3992
3993         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
3994         if (ret < 0)
3995                 return ret;
3996
3997 #ifdef CONFIG_TRACER_MAX_TRACE
3998         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
3999             !tr->current_trace->use_max_tr)
4000                 goto out;
4001
4002         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4003         if (ret < 0) {
4004                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
4005                                                      &tr->trace_buffer, cpu);
4006                 if (r < 0) {
4007                         /*
4008                          * AARGH! We are left with different
4009                          * size max buffer!!!!
4010                          * The max buffer is our "snapshot" buffer.
4011                          * When a tracer needs a snapshot (one of the
4012                          * latency tracers), it swaps the max buffer
4013                          * with the saved snap shot. We succeeded to
4014                          * update the size of the main buffer, but failed to
4015                          * update the size of the max buffer. But when we tried
4016                          * to reset the main buffer to the original size, we
4017                          * failed there too. This is very unlikely to
4018                          * happen, but if it does, warn and kill all
4019                          * tracing.
4020                          */
4021                         WARN_ON(1);
4022                         tracing_disabled = 1;
4023                 }
4024                 return ret;
4025         }
4026
4027         if (cpu == RING_BUFFER_ALL_CPUS)
4028                 set_buffer_entries(&tr->max_buffer, size);
4029         else
4030                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4031
4032  out:
4033 #endif /* CONFIG_TRACER_MAX_TRACE */
4034
4035         if (cpu == RING_BUFFER_ALL_CPUS)
4036                 set_buffer_entries(&tr->trace_buffer, size);
4037         else
4038                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4039
4040         return ret;
4041 }
4042
4043 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4044                                           unsigned long size, int cpu_id)
4045 {
4046         int ret = size;
4047
4048         mutex_lock(&trace_types_lock);
4049
4050         if (cpu_id != RING_BUFFER_ALL_CPUS) {
4051                 /* make sure, this cpu is enabled in the mask */
4052                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4053                         ret = -EINVAL;
4054                         goto out;
4055                 }
4056         }
4057
4058         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4059         if (ret < 0)
4060                 ret = -ENOMEM;
4061
4062 out:
4063         mutex_unlock(&trace_types_lock);
4064
4065         return ret;
4066 }
4067
4068
4069 /**
4070  * tracing_update_buffers - used by tracing facility to expand ring buffers
4071  *
4072  * To save on memory when the tracing is never used on a system with it
4073  * configured in. The ring buffers are set to a minimum size. But once
4074  * a user starts to use the tracing facility, then they need to grow
4075  * to their default size.
4076  *
4077  * This function is to be called when a tracer is about to be used.
4078  */
4079 int tracing_update_buffers(void)
4080 {
4081         int ret = 0;
4082
4083         mutex_lock(&trace_types_lock);
4084         if (!ring_buffer_expanded)
4085                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4086                                                 RING_BUFFER_ALL_CPUS);
4087         mutex_unlock(&trace_types_lock);
4088
4089         return ret;
4090 }
4091
4092 struct trace_option_dentry;
4093
4094 static struct trace_option_dentry *
4095 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4096
4097 static void
4098 destroy_trace_option_files(struct trace_option_dentry *topts);
4099
4100 /*
4101  * Used to clear out the tracer before deletion of an instance.
4102  * Must have trace_types_lock held.
4103  */
4104 static void tracing_set_nop(struct trace_array *tr)
4105 {
4106         if (tr->current_trace == &nop_trace)
4107                 return;
4108         
4109         tr->current_trace->enabled--;
4110
4111         if (tr->current_trace->reset)
4112                 tr->current_trace->reset(tr);
4113
4114         tr->current_trace = &nop_trace;
4115 }
4116
4117 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
4118 {
4119         static struct trace_option_dentry *topts;
4120         struct tracer *t;
4121 #ifdef CONFIG_TRACER_MAX_TRACE
4122         bool had_max_tr;
4123 #endif
4124         int ret = 0;
4125
4126         mutex_lock(&trace_types_lock);
4127
4128         if (!ring_buffer_expanded) {
4129                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4130                                                 RING_BUFFER_ALL_CPUS);
4131                 if (ret < 0)
4132                         goto out;
4133                 ret = 0;
4134         }
4135
4136         for (t = trace_types; t; t = t->next) {
4137                 if (strcmp(t->name, buf) == 0)
4138                         break;
4139         }
4140         if (!t) {
4141                 ret = -EINVAL;
4142                 goto out;
4143         }
4144         if (t == tr->current_trace)
4145                 goto out;
4146
4147         /* Some tracers are only allowed for the top level buffer */
4148         if (!trace_ok_for_array(t, tr)) {
4149                 ret = -EINVAL;
4150                 goto out;
4151         }
4152
4153         trace_branch_disable();
4154
4155         tr->current_trace->enabled--;
4156
4157         if (tr->current_trace->reset)
4158                 tr->current_trace->reset(tr);
4159
4160         /* Current trace needs to be nop_trace before synchronize_sched */
4161         tr->current_trace = &nop_trace;
4162
4163 #ifdef CONFIG_TRACER_MAX_TRACE
4164         had_max_tr = tr->allocated_snapshot;
4165
4166         if (had_max_tr && !t->use_max_tr) {
4167                 /*
4168                  * We need to make sure that the update_max_tr sees that
4169                  * current_trace changed to nop_trace to keep it from
4170                  * swapping the buffers after we resize it.
4171                  * The update_max_tr is called from interrupts disabled
4172                  * so a synchronized_sched() is sufficient.
4173                  */
4174                 synchronize_sched();
4175                 free_snapshot(tr);
4176         }
4177 #endif
4178         /* Currently, only the top instance has options */
4179         if (tr->flags & TRACE_ARRAY_FL_GLOBAL) {
4180                 destroy_trace_option_files(topts);
4181                 topts = create_trace_option_files(tr, t);
4182         }
4183
4184 #ifdef CONFIG_TRACER_MAX_TRACE
4185         if (t->use_max_tr && !had_max_tr) {
4186                 ret = alloc_snapshot(tr);
4187                 if (ret < 0)
4188                         goto out;
4189         }
4190 #endif
4191
4192         if (t->init) {
4193                 ret = tracer_init(t, tr);
4194                 if (ret)
4195                         goto out;
4196         }
4197
4198         tr->current_trace = t;
4199         tr->current_trace->enabled++;
4200         trace_branch_enable(tr);
4201  out:
4202         mutex_unlock(&trace_types_lock);
4203
4204         return ret;
4205 }
4206
4207 static ssize_t
4208 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4209                         size_t cnt, loff_t *ppos)
4210 {
4211         struct trace_array *tr = filp->private_data;
4212         char buf[MAX_TRACER_SIZE+1];
4213         int i;
4214         size_t ret;
4215         int err;
4216
4217         ret = cnt;
4218
4219         if (cnt > MAX_TRACER_SIZE)
4220                 cnt = MAX_TRACER_SIZE;
4221
4222         if (copy_from_user(&buf, ubuf, cnt))
4223                 return -EFAULT;
4224
4225         buf[cnt] = 0;
4226
4227         /* strip ending whitespace. */
4228         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4229                 buf[i] = 0;
4230
4231         err = tracing_set_tracer(tr, buf);
4232         if (err)
4233                 return err;
4234
4235         *ppos += ret;
4236
4237         return ret;
4238 }
4239
4240 static ssize_t
4241 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4242                      size_t cnt, loff_t *ppos)
4243 {
4244         unsigned long *ptr = filp->private_data;
4245         char buf[64];
4246         int r;
4247
4248         r = snprintf(buf, sizeof(buf), "%ld\n",
4249                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4250         if (r > sizeof(buf))
4251                 r = sizeof(buf);
4252         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4253 }
4254
4255 static ssize_t
4256 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4257                       size_t cnt, loff_t *ppos)
4258 {
4259         unsigned long *ptr = filp->private_data;
4260         unsigned long val;
4261         int ret;
4262
4263         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4264         if (ret)
4265                 return ret;
4266
4267         *ptr = val * 1000;
4268
4269         return cnt;
4270 }
4271
4272 static int tracing_open_pipe(struct inode *inode, struct file *filp)
4273 {
4274         struct trace_array *tr = inode->i_private;
4275         struct trace_iterator *iter;
4276         int ret = 0;
4277
4278         if (tracing_disabled)
4279                 return -ENODEV;
4280
4281         if (trace_array_get(tr) < 0)
4282                 return -ENODEV;
4283
4284         mutex_lock(&trace_types_lock);
4285
4286         /* create a buffer to store the information to pass to userspace */
4287         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4288         if (!iter) {
4289                 ret = -ENOMEM;
4290                 __trace_array_put(tr);
4291                 goto out;
4292         }
4293
4294         /*
4295          * We make a copy of the current tracer to avoid concurrent
4296          * changes on it while we are reading.
4297          */
4298         iter->trace = kmalloc(sizeof(*iter->trace), GFP_KERNEL);
4299         if (!iter->trace) {
4300                 ret = -ENOMEM;
4301                 goto fail;
4302         }
4303         *iter->trace = *tr->current_trace;
4304
4305         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
4306                 ret = -ENOMEM;
4307                 goto fail;
4308         }
4309
4310         /* trace pipe does not show start of buffer */
4311         cpumask_setall(iter->started);
4312
4313         if (trace_flags & TRACE_ITER_LATENCY_FMT)
4314                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4315
4316         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4317         if (trace_clocks[tr->clock_id].in_ns)
4318                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4319
4320         iter->tr = tr;
4321         iter->trace_buffer = &tr->trace_buffer;
4322         iter->cpu_file = tracing_get_cpu(inode);
4323         mutex_init(&iter->mutex);
4324         filp->private_data = iter;
4325
4326         if (iter->trace->pipe_open)
4327                 iter->trace->pipe_open(iter);
4328
4329         nonseekable_open(inode, filp);
4330 out:
4331         mutex_unlock(&trace_types_lock);
4332         return ret;
4333
4334 fail:
4335         kfree(iter->trace);
4336         kfree(iter);
4337         __trace_array_put(tr);
4338         mutex_unlock(&trace_types_lock);
4339         return ret;
4340 }
4341
4342 static int tracing_release_pipe(struct inode *inode, struct file *file)
4343 {
4344         struct trace_iterator *iter = file->private_data;
4345         struct trace_array *tr = inode->i_private;
4346
4347         mutex_lock(&trace_types_lock);
4348
4349         if (iter->trace->pipe_close)
4350                 iter->trace->pipe_close(iter);
4351
4352         mutex_unlock(&trace_types_lock);
4353
4354         free_cpumask_var(iter->started);
4355         mutex_destroy(&iter->mutex);
4356         kfree(iter->trace);
4357         kfree(iter);
4358
4359         trace_array_put(tr);
4360
4361         return 0;
4362 }
4363
4364 static unsigned int
4365 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
4366 {
4367         /* Iterators are static, they should be filled or empty */
4368         if (trace_buffer_iter(iter, iter->cpu_file))
4369                 return POLLIN | POLLRDNORM;
4370
4371         if (trace_flags & TRACE_ITER_BLOCK)
4372                 /*
4373                  * Always select as readable when in blocking mode
4374                  */
4375                 return POLLIN | POLLRDNORM;
4376         else
4377                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
4378                                              filp, poll_table);
4379 }
4380
4381 static unsigned int
4382 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
4383 {
4384         struct trace_iterator *iter = filp->private_data;
4385
4386         return trace_poll(iter, filp, poll_table);
4387 }
4388
4389 /* Must be called with trace_types_lock mutex held. */
4390 static int tracing_wait_pipe(struct file *filp)
4391 {
4392         struct trace_iterator *iter = filp->private_data;
4393         int ret;
4394
4395         while (trace_empty(iter)) {
4396
4397                 if ((filp->f_flags & O_NONBLOCK)) {
4398                         return -EAGAIN;
4399                 }
4400
4401                 /*
4402                  * We block until we read something and tracing is disabled.
4403                  * We still block if tracing is disabled, but we have never
4404                  * read anything. This allows a user to cat this file, and
4405                  * then enable tracing. But after we have read something,
4406                  * we give an EOF when tracing is again disabled.
4407                  *
4408                  * iter->pos will be 0 if we haven't read anything.
4409                  */
4410                 if (!tracing_is_on() && iter->pos)
4411                         break;
4412
4413                 mutex_unlock(&iter->mutex);
4414
4415                 ret = wait_on_pipe(iter);
4416
4417                 mutex_lock(&iter->mutex);
4418
4419                 if (ret)
4420                         return ret;
4421
4422                 if (signal_pending(current))
4423                         return -EINTR;
4424         }
4425
4426         return 1;
4427 }
4428
4429 /*
4430  * Consumer reader.
4431  */
4432 static ssize_t
4433 tracing_read_pipe(struct file *filp, char __user *ubuf,
4434                   size_t cnt, loff_t *ppos)
4435 {
4436         struct trace_iterator *iter = filp->private_data;
4437         struct trace_array *tr = iter->tr;
4438         ssize_t sret;
4439
4440         /* return any leftover data */
4441         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4442         if (sret != -EBUSY)
4443                 return sret;
4444
4445         trace_seq_init(&iter->seq);
4446
4447         /* copy the tracer to avoid using a global lock all around */
4448         mutex_lock(&trace_types_lock);
4449         if (unlikely(iter->trace->name != tr->current_trace->name))
4450                 *iter->trace = *tr->current_trace;
4451         mutex_unlock(&trace_types_lock);
4452
4453         /*
4454          * Avoid more than one consumer on a single file descriptor
4455          * This is just a matter of traces coherency, the ring buffer itself
4456          * is protected.
4457          */
4458         mutex_lock(&iter->mutex);
4459         if (iter->trace->read) {
4460                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
4461                 if (sret)
4462                         goto out;
4463         }
4464
4465 waitagain:
4466         sret = tracing_wait_pipe(filp);
4467         if (sret <= 0)
4468                 goto out;
4469
4470         /* stop when tracing is finished */
4471         if (trace_empty(iter)) {
4472                 sret = 0;
4473                 goto out;
4474         }
4475
4476         if (cnt >= PAGE_SIZE)
4477                 cnt = PAGE_SIZE - 1;
4478
4479         /* reset all but tr, trace, and overruns */
4480         memset(&iter->seq, 0,
4481                sizeof(struct trace_iterator) -
4482                offsetof(struct trace_iterator, seq));
4483         cpumask_clear(iter->started);
4484         iter->pos = -1;
4485
4486         trace_event_read_lock();
4487         trace_access_lock(iter->cpu_file);
4488         while (trace_find_next_entry_inc(iter) != NULL) {
4489                 enum print_line_t ret;
4490                 int len = iter->seq.len;
4491
4492                 ret = print_trace_line(iter);
4493                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4494                         /* don't print partial lines */
4495                         iter->seq.len = len;
4496                         break;
4497                 }
4498                 if (ret != TRACE_TYPE_NO_CONSUME)
4499                         trace_consume(iter);
4500
4501                 if (iter->seq.len >= cnt)
4502                         break;
4503
4504                 /*
4505                  * Setting the full flag means we reached the trace_seq buffer
4506                  * size and we should leave by partial output condition above.
4507                  * One of the trace_seq_* functions is not used properly.
4508                  */
4509                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
4510                           iter->ent->type);
4511         }
4512         trace_access_unlock(iter->cpu_file);
4513         trace_event_read_unlock();
4514
4515         /* Now copy what we have to the user */
4516         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4517         if (iter->seq.readpos >= iter->seq.len)
4518                 trace_seq_init(&iter->seq);
4519
4520         /*
4521          * If there was nothing to send to user, in spite of consuming trace
4522          * entries, go back to wait for more entries.
4523          */
4524         if (sret == -EBUSY)
4525                 goto waitagain;
4526
4527 out:
4528         mutex_unlock(&iter->mutex);
4529
4530         return sret;
4531 }
4532
4533 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
4534                                      unsigned int idx)
4535 {
4536         __free_page(spd->pages[idx]);
4537 }
4538
4539 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
4540         .can_merge              = 0,
4541         .confirm                = generic_pipe_buf_confirm,
4542         .release                = generic_pipe_buf_release,
4543         .steal                  = generic_pipe_buf_steal,
4544         .get                    = generic_pipe_buf_get,
4545 };
4546
4547 static size_t
4548 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
4549 {
4550         size_t count;
4551         int ret;
4552
4553         /* Seq buffer is page-sized, exactly what we need. */
4554         for (;;) {
4555                 count = iter->seq.len;
4556                 ret = print_trace_line(iter);
4557                 count = iter->seq.len - count;
4558                 if (rem < count) {
4559                         rem = 0;
4560                         iter->seq.len -= count;
4561                         break;
4562                 }
4563                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4564                         iter->seq.len -= count;
4565                         break;
4566                 }
4567
4568                 if (ret != TRACE_TYPE_NO_CONSUME)
4569                         trace_consume(iter);
4570                 rem -= count;
4571                 if (!trace_find_next_entry_inc(iter))   {
4572                         rem = 0;
4573                         iter->ent = NULL;
4574                         break;
4575                 }
4576         }
4577
4578         return rem;
4579 }
4580
4581 static ssize_t tracing_splice_read_pipe(struct file *filp,
4582                                         loff_t *ppos,
4583                                         struct pipe_inode_info *pipe,
4584                                         size_t len,
4585                                         unsigned int flags)
4586 {
4587         struct page *pages_def[PIPE_DEF_BUFFERS];
4588         struct partial_page partial_def[PIPE_DEF_BUFFERS];
4589         struct trace_iterator *iter = filp->private_data;
4590         struct splice_pipe_desc spd = {
4591                 .pages          = pages_def,
4592                 .partial        = partial_def,
4593                 .nr_pages       = 0, /* This gets updated below. */
4594                 .nr_pages_max   = PIPE_DEF_BUFFERS,
4595                 .flags          = flags,
4596                 .ops            = &tracing_pipe_buf_ops,
4597                 .spd_release    = tracing_spd_release_pipe,
4598         };
4599         struct trace_array *tr = iter->tr;
4600         ssize_t ret;
4601         size_t rem;
4602         unsigned int i;
4603
4604         if (splice_grow_spd(pipe, &spd))
4605                 return -ENOMEM;
4606
4607         /* copy the tracer to avoid using a global lock all around */
4608         mutex_lock(&trace_types_lock);
4609         if (unlikely(iter->trace->name != tr->current_trace->name))
4610                 *iter->trace = *tr->current_trace;
4611         mutex_unlock(&trace_types_lock);
4612
4613         mutex_lock(&iter->mutex);
4614
4615         if (iter->trace->splice_read) {
4616                 ret = iter->trace->splice_read(iter, filp,
4617                                                ppos, pipe, len, flags);
4618                 if (ret)
4619                         goto out_err;
4620         }
4621
4622         ret = tracing_wait_pipe(filp);
4623         if (ret <= 0)
4624                 goto out_err;
4625
4626         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
4627                 ret = -EFAULT;
4628                 goto out_err;
4629         }
4630
4631         trace_event_read_lock();
4632         trace_access_lock(iter->cpu_file);
4633
4634         /* Fill as many pages as possible. */
4635         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
4636                 spd.pages[i] = alloc_page(GFP_KERNEL);
4637                 if (!spd.pages[i])
4638                         break;
4639
4640                 rem = tracing_fill_pipe_page(rem, iter);
4641
4642                 /* Copy the data into the page, so we can start over. */
4643                 ret = trace_seq_to_buffer(&iter->seq,
4644                                           page_address(spd.pages[i]),
4645                                           iter->seq.len);
4646                 if (ret < 0) {
4647                         __free_page(spd.pages[i]);
4648                         break;
4649                 }
4650                 spd.partial[i].offset = 0;
4651                 spd.partial[i].len = iter->seq.len;
4652
4653                 trace_seq_init(&iter->seq);
4654         }
4655
4656         trace_access_unlock(iter->cpu_file);
4657         trace_event_read_unlock();
4658         mutex_unlock(&iter->mutex);
4659
4660         spd.nr_pages = i;
4661
4662         ret = splice_to_pipe(pipe, &spd);
4663 out:
4664         splice_shrink_spd(&spd);
4665         return ret;
4666
4667 out_err:
4668         mutex_unlock(&iter->mutex);
4669         goto out;
4670 }
4671
4672 static ssize_t
4673 tracing_entries_read(struct file *filp, char __user *ubuf,
4674                      size_t cnt, loff_t *ppos)
4675 {
4676         struct inode *inode = file_inode(filp);
4677         struct trace_array *tr = inode->i_private;
4678         int cpu = tracing_get_cpu(inode);
4679         char buf[64];
4680         int r = 0;
4681         ssize_t ret;
4682
4683         mutex_lock(&trace_types_lock);
4684
4685         if (cpu == RING_BUFFER_ALL_CPUS) {
4686                 int cpu, buf_size_same;
4687                 unsigned long size;
4688
4689                 size = 0;
4690                 buf_size_same = 1;
4691                 /* check if all cpu sizes are same */
4692                 for_each_tracing_cpu(cpu) {
4693                         /* fill in the size from first enabled cpu */
4694                         if (size == 0)
4695                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
4696                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
4697                                 buf_size_same = 0;
4698                                 break;
4699                         }
4700                 }
4701
4702                 if (buf_size_same) {
4703                         if (!ring_buffer_expanded)
4704                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
4705                                             size >> 10,
4706                                             trace_buf_size >> 10);
4707                         else
4708                                 r = sprintf(buf, "%lu\n", size >> 10);
4709                 } else
4710                         r = sprintf(buf, "X\n");
4711         } else
4712                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
4713
4714         mutex_unlock(&trace_types_lock);
4715
4716         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4717         return ret;
4718 }
4719
4720 static ssize_t
4721 tracing_entries_write(struct file *filp, const char __user *ubuf,
4722                       size_t cnt, loff_t *ppos)
4723 {
4724         struct inode *inode = file_inode(filp);
4725         struct trace_array *tr = inode->i_private;
4726         unsigned long val;
4727         int ret;
4728
4729         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4730         if (ret)
4731                 return ret;
4732
4733         /* must have at least 1 entry */
4734         if (!val)
4735                 return -EINVAL;
4736
4737         /* value is in KB */
4738         val <<= 10;
4739         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
4740         if (ret < 0)
4741                 return ret;
4742
4743         *ppos += cnt;
4744
4745         return cnt;
4746 }
4747
4748 static ssize_t
4749 tracing_total_entries_read(struct file *filp, char __user *ubuf,
4750                                 size_t cnt, loff_t *ppos)
4751 {
4752         struct trace_array *tr = filp->private_data;
4753         char buf[64];
4754         int r, cpu;
4755         unsigned long size = 0, expanded_size = 0;
4756
4757         mutex_lock(&trace_types_lock);
4758         for_each_tracing_cpu(cpu) {
4759                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
4760                 if (!ring_buffer_expanded)
4761                         expanded_size += trace_buf_size >> 10;
4762         }
4763         if (ring_buffer_expanded)
4764                 r = sprintf(buf, "%lu\n", size);
4765         else
4766                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
4767         mutex_unlock(&trace_types_lock);
4768
4769         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4770 }
4771
4772 static ssize_t
4773 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
4774                           size_t cnt, loff_t *ppos)
4775 {
4776         /*
4777          * There is no need to read what the user has written, this function
4778          * is just to make sure that there is no error when "echo" is used
4779          */
4780
4781         *ppos += cnt;
4782
4783         return cnt;
4784 }
4785
4786 static int
4787 tracing_free_buffer_release(struct inode *inode, struct file *filp)
4788 {
4789         struct trace_array *tr = inode->i_private;
4790
4791         /* disable tracing ? */
4792         if (trace_flags & TRACE_ITER_STOP_ON_FREE)
4793                 tracer_tracing_off(tr);
4794         /* resize the ring buffer to 0 */
4795         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
4796
4797         trace_array_put(tr);
4798
4799         return 0;
4800 }
4801
4802 static ssize_t
4803 tracing_mark_write(struct file *filp, const char __user *ubuf,
4804                                         size_t cnt, loff_t *fpos)
4805 {
4806         unsigned long addr = (unsigned long)ubuf;
4807         struct trace_array *tr = filp->private_data;
4808         struct ring_buffer_event *event;
4809         struct ring_buffer *buffer;
4810         struct print_entry *entry;
4811         unsigned long irq_flags;
4812         struct page *pages[2];
4813         void *map_page[2];
4814         int nr_pages = 1;
4815         ssize_t written;
4816         int offset;
4817         int size;
4818         int len;
4819         int ret;
4820         int i;
4821
4822         if (tracing_disabled)
4823                 return -EINVAL;
4824
4825         if (!(trace_flags & TRACE_ITER_MARKERS))
4826                 return -EINVAL;
4827
4828         if (cnt > TRACE_BUF_SIZE)
4829                 cnt = TRACE_BUF_SIZE;
4830
4831         /*
4832          * Userspace is injecting traces into the kernel trace buffer.
4833          * We want to be as non intrusive as possible.
4834          * To do so, we do not want to allocate any special buffers
4835          * or take any locks, but instead write the userspace data
4836          * straight into the ring buffer.
4837          *
4838          * First we need to pin the userspace buffer into memory,
4839          * which, most likely it is, because it just referenced it.
4840          * But there's no guarantee that it is. By using get_user_pages_fast()
4841          * and kmap_atomic/kunmap_atomic() we can get access to the
4842          * pages directly. We then write the data directly into the
4843          * ring buffer.
4844          */
4845         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
4846
4847         /* check if we cross pages */
4848         if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
4849                 nr_pages = 2;
4850
4851         offset = addr & (PAGE_SIZE - 1);
4852         addr &= PAGE_MASK;
4853
4854         ret = get_user_pages_fast(addr, nr_pages, 0, pages);
4855         if (ret < nr_pages) {
4856                 while (--ret >= 0)
4857                         put_page(pages[ret]);
4858                 written = -EFAULT;
4859                 goto out;
4860         }
4861
4862         for (i = 0; i < nr_pages; i++)
4863                 map_page[i] = kmap_atomic(pages[i]);
4864
4865         local_save_flags(irq_flags);
4866         size = sizeof(*entry) + cnt + 2; /* possible \n added */
4867         buffer = tr->trace_buffer.buffer;
4868         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
4869                                           irq_flags, preempt_count());
4870         if (!event) {
4871                 /* Ring buffer disabled, return as if not open for write */
4872                 written = -EBADF;
4873                 goto out_unlock;
4874         }
4875
4876         entry = ring_buffer_event_data(event);
4877         entry->ip = _THIS_IP_;
4878
4879         if (nr_pages == 2) {
4880                 len = PAGE_SIZE - offset;
4881                 memcpy(&entry->buf, map_page[0] + offset, len);
4882                 memcpy(&entry->buf[len], map_page[1], cnt - len);
4883         } else
4884                 memcpy(&entry->buf, map_page[0] + offset, cnt);
4885
4886         if (entry->buf[cnt - 1] != '\n') {
4887                 entry->buf[cnt] = '\n';
4888                 entry->buf[cnt + 1] = '\0';
4889         } else
4890                 entry->buf[cnt] = '\0';
4891
4892         __buffer_unlock_commit(buffer, event);
4893
4894         written = cnt;
4895
4896         *fpos += written;
4897
4898  out_unlock:
4899         for (i = 0; i < nr_pages; i++){
4900                 kunmap_atomic(map_page[i]);
4901                 put_page(pages[i]);
4902         }
4903  out:
4904         return written;
4905 }
4906
4907 static int tracing_clock_show(struct seq_file *m, void *v)
4908 {
4909         struct trace_array *tr = m->private;
4910         int i;
4911
4912         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
4913                 seq_printf(m,
4914                         "%s%s%s%s", i ? " " : "",
4915                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
4916                         i == tr->clock_id ? "]" : "");
4917         seq_putc(m, '\n');
4918
4919         return 0;
4920 }
4921
4922 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
4923 {
4924         int i;
4925
4926         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
4927                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
4928                         break;
4929         }
4930         if (i == ARRAY_SIZE(trace_clocks))
4931                 return -EINVAL;
4932
4933         mutex_lock(&trace_types_lock);
4934
4935         tr->clock_id = i;
4936
4937         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
4938
4939         /*
4940          * New clock may not be consistent with the previous clock.
4941          * Reset the buffer so that it doesn't have incomparable timestamps.
4942          */
4943         tracing_reset_online_cpus(&tr->trace_buffer);
4944
4945 #ifdef CONFIG_TRACER_MAX_TRACE
4946         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
4947                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
4948         tracing_reset_online_cpus(&tr->max_buffer);
4949 #endif
4950
4951         mutex_unlock(&trace_types_lock);
4952
4953         return 0;
4954 }
4955
4956 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
4957                                    size_t cnt, loff_t *fpos)
4958 {
4959         struct seq_file *m = filp->private_data;
4960         struct trace_array *tr = m->private;
4961         char buf[64];
4962         const char *clockstr;
4963         int ret;
4964
4965         if (cnt >= sizeof(buf))
4966                 return -EINVAL;
4967
4968         if (copy_from_user(&buf, ubuf, cnt))
4969                 return -EFAULT;
4970
4971         buf[cnt] = 0;
4972
4973         clockstr = strstrip(buf);
4974
4975         ret = tracing_set_clock(tr, clockstr);
4976         if (ret)
4977                 return ret;
4978
4979         *fpos += cnt;
4980
4981         return cnt;
4982 }
4983
4984 static int tracing_clock_open(struct inode *inode, struct file *file)
4985 {
4986         struct trace_array *tr = inode->i_private;
4987         int ret;
4988
4989         if (tracing_disabled)
4990                 return -ENODEV;
4991
4992         if (trace_array_get(tr))
4993                 return -ENODEV;
4994
4995         ret = single_open(file, tracing_clock_show, inode->i_private);
4996         if (ret < 0)
4997                 trace_array_put(tr);
4998
4999         return ret;
5000 }
5001
5002 struct ftrace_buffer_info {
5003         struct trace_iterator   iter;
5004         void                    *spare;
5005         unsigned int            read;
5006 };
5007
5008 #ifdef CONFIG_TRACER_SNAPSHOT
5009 static int tracing_snapshot_open(struct inode *inode, struct file *file)
5010 {
5011         struct trace_array *tr = inode->i_private;
5012         struct trace_iterator *iter;
5013         struct seq_file *m;
5014         int ret = 0;
5015
5016         if (trace_array_get(tr) < 0)
5017                 return -ENODEV;
5018
5019         if (file->f_mode & FMODE_READ) {
5020                 iter = __tracing_open(inode, file, true);
5021                 if (IS_ERR(iter))
5022                         ret = PTR_ERR(iter);
5023         } else {
5024                 /* Writes still need the seq_file to hold the private data */
5025                 ret = -ENOMEM;
5026                 m = kzalloc(sizeof(*m), GFP_KERNEL);
5027                 if (!m)
5028                         goto out;
5029                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5030                 if (!iter) {
5031                         kfree(m);
5032                         goto out;
5033                 }
5034                 ret = 0;
5035
5036                 iter->tr = tr;
5037                 iter->trace_buffer = &tr->max_buffer;
5038                 iter->cpu_file = tracing_get_cpu(inode);
5039                 m->private = iter;
5040                 file->private_data = m;
5041         }
5042 out:
5043         if (ret < 0)
5044                 trace_array_put(tr);
5045
5046         return ret;
5047 }
5048
5049 static ssize_t
5050 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
5051                        loff_t *ppos)
5052 {
5053         struct seq_file *m = filp->private_data;
5054         struct trace_iterator *iter = m->private;
5055         struct trace_array *tr = iter->tr;
5056         unsigned long val;
5057         int ret;
5058
5059         ret = tracing_update_buffers();
5060         if (ret < 0)
5061                 return ret;
5062
5063         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5064         if (ret)
5065                 return ret;
5066
5067         mutex_lock(&trace_types_lock);
5068
5069         if (tr->current_trace->use_max_tr) {
5070                 ret = -EBUSY;
5071                 goto out;
5072         }
5073
5074         switch (val) {
5075         case 0:
5076                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5077                         ret = -EINVAL;
5078                         break;
5079                 }
5080                 if (tr->allocated_snapshot)
5081                         free_snapshot(tr);
5082                 break;
5083         case 1:
5084 /* Only allow per-cpu swap if the ring buffer supports it */
5085 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
5086                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5087                         ret = -EINVAL;
5088                         break;
5089                 }
5090 #endif
5091                 if (!tr->allocated_snapshot) {
5092                         ret = alloc_snapshot(tr);
5093                         if (ret < 0)
5094                                 break;
5095                 }
5096                 local_irq_disable();
5097                 /* Now, we're going to swap */
5098                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5099                         update_max_tr(tr, current, smp_processor_id());
5100                 else
5101                         update_max_tr_single(tr, current, iter->cpu_file);
5102                 local_irq_enable();
5103                 break;
5104         default:
5105                 if (tr->allocated_snapshot) {
5106                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5107                                 tracing_reset_online_cpus(&tr->max_buffer);
5108                         else
5109                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
5110                 }
5111                 break;
5112         }
5113
5114         if (ret >= 0) {
5115                 *ppos += cnt;
5116                 ret = cnt;
5117         }
5118 out:
5119         mutex_unlock(&trace_types_lock);
5120         return ret;
5121 }
5122
5123 static int tracing_snapshot_release(struct inode *inode, struct file *file)
5124 {
5125         struct seq_file *m = file->private_data;
5126         int ret;
5127
5128         ret = tracing_release(inode, file);
5129
5130         if (file->f_mode & FMODE_READ)
5131                 return ret;
5132
5133         /* If write only, the seq_file is just a stub */
5134         if (m)
5135                 kfree(m->private);
5136         kfree(m);
5137
5138         return 0;
5139 }
5140
5141 static int tracing_buffers_open(struct inode *inode, struct file *filp);
5142 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
5143                                     size_t count, loff_t *ppos);
5144 static int tracing_buffers_release(struct inode *inode, struct file *file);
5145 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5146                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
5147
5148 static int snapshot_raw_open(struct inode *inode, struct file *filp)
5149 {
5150         struct ftrace_buffer_info *info;
5151         int ret;
5152
5153         ret = tracing_buffers_open(inode, filp);
5154         if (ret < 0)
5155                 return ret;
5156
5157         info = filp->private_data;
5158
5159         if (info->iter.trace->use_max_tr) {
5160                 tracing_buffers_release(inode, filp);
5161                 return -EBUSY;
5162         }
5163
5164         info->iter.snapshot = true;
5165         info->iter.trace_buffer = &info->iter.tr->max_buffer;
5166
5167         return ret;
5168 }
5169
5170 #endif /* CONFIG_TRACER_SNAPSHOT */
5171
5172
5173 static const struct file_operations tracing_max_lat_fops = {
5174         .open           = tracing_open_generic,
5175         .read           = tracing_max_lat_read,
5176         .write          = tracing_max_lat_write,
5177         .llseek         = generic_file_llseek,
5178 };
5179
5180 static const struct file_operations set_tracer_fops = {
5181         .open           = tracing_open_generic,
5182         .read           = tracing_set_trace_read,
5183         .write          = tracing_set_trace_write,
5184         .llseek         = generic_file_llseek,
5185 };
5186
5187 static const struct file_operations tracing_pipe_fops = {
5188         .open           = tracing_open_pipe,
5189         .poll           = tracing_poll_pipe,
5190         .read           = tracing_read_pipe,
5191         .splice_read    = tracing_splice_read_pipe,
5192         .release        = tracing_release_pipe,
5193         .llseek         = no_llseek,
5194 };
5195
5196 static const struct file_operations tracing_entries_fops = {
5197         .open           = tracing_open_generic_tr,
5198         .read           = tracing_entries_read,
5199         .write          = tracing_entries_write,
5200         .llseek         = generic_file_llseek,
5201         .release        = tracing_release_generic_tr,
5202 };
5203
5204 static const struct file_operations tracing_total_entries_fops = {
5205         .open           = tracing_open_generic_tr,
5206         .read           = tracing_total_entries_read,
5207         .llseek         = generic_file_llseek,
5208         .release        = tracing_release_generic_tr,
5209 };
5210
5211 static const struct file_operations tracing_free_buffer_fops = {
5212         .open           = tracing_open_generic_tr,
5213         .write          = tracing_free_buffer_write,
5214         .release        = tracing_free_buffer_release,
5215 };
5216
5217 static const struct file_operations tracing_mark_fops = {
5218         .open           = tracing_open_generic_tr,
5219         .write          = tracing_mark_write,
5220         .llseek         = generic_file_llseek,
5221         .release        = tracing_release_generic_tr,
5222 };
5223
5224 static const struct file_operations trace_clock_fops = {
5225         .open           = tracing_clock_open,
5226         .read           = seq_read,
5227         .llseek         = seq_lseek,
5228         .release        = tracing_single_release_tr,
5229         .write          = tracing_clock_write,
5230 };
5231
5232 #ifdef CONFIG_TRACER_SNAPSHOT
5233 static const struct file_operations snapshot_fops = {
5234         .open           = tracing_snapshot_open,
5235         .read           = seq_read,
5236         .write          = tracing_snapshot_write,
5237         .llseek         = tracing_lseek,
5238         .release        = tracing_snapshot_release,
5239 };
5240
5241 static const struct file_operations snapshot_raw_fops = {
5242         .open           = snapshot_raw_open,
5243         .read           = tracing_buffers_read,
5244         .release        = tracing_buffers_release,
5245         .splice_read    = tracing_buffers_splice_read,
5246         .llseek         = no_llseek,
5247 };
5248
5249 #endif /* CONFIG_TRACER_SNAPSHOT */
5250
5251 static int tracing_buffers_open(struct inode *inode, struct file *filp)
5252 {
5253         struct trace_array *tr = inode->i_private;
5254         struct ftrace_buffer_info *info;
5255         int ret;
5256
5257         if (tracing_disabled)
5258                 return -ENODEV;
5259
5260         if (trace_array_get(tr) < 0)
5261                 return -ENODEV;
5262
5263         info = kzalloc(sizeof(*info), GFP_KERNEL);
5264         if (!info) {
5265                 trace_array_put(tr);
5266                 return -ENOMEM;
5267         }
5268
5269         mutex_lock(&trace_types_lock);
5270
5271         info->iter.tr           = tr;
5272         info->iter.cpu_file     = tracing_get_cpu(inode);
5273         info->iter.trace        = tr->current_trace;
5274         info->iter.trace_buffer = &tr->trace_buffer;
5275         info->spare             = NULL;
5276         /* Force reading ring buffer for first read */
5277         info->read              = (unsigned int)-1;
5278
5279         filp->private_data = info;
5280
5281         mutex_unlock(&trace_types_lock);
5282
5283         ret = nonseekable_open(inode, filp);
5284         if (ret < 0)
5285                 trace_array_put(tr);
5286
5287         return ret;
5288 }
5289
5290 static unsigned int
5291 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
5292 {
5293         struct ftrace_buffer_info *info = filp->private_data;
5294         struct trace_iterator *iter = &info->iter;
5295
5296         return trace_poll(iter, filp, poll_table);
5297 }
5298
5299 static ssize_t
5300 tracing_buffers_read(struct file *filp, char __user *ubuf,
5301                      size_t count, loff_t *ppos)
5302 {
5303         struct ftrace_buffer_info *info = filp->private_data;
5304         struct trace_iterator *iter = &info->iter;
5305         ssize_t ret;
5306         ssize_t size;
5307
5308         if (!count)
5309                 return 0;
5310
5311         mutex_lock(&trace_types_lock);
5312
5313 #ifdef CONFIG_TRACER_MAX_TRACE
5314         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5315                 size = -EBUSY;
5316                 goto out_unlock;
5317         }
5318 #endif
5319
5320         if (!info->spare)
5321                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
5322                                                           iter->cpu_file);
5323         size = -ENOMEM;
5324         if (!info->spare)
5325                 goto out_unlock;
5326
5327         /* Do we have previous read data to read? */
5328         if (info->read < PAGE_SIZE)
5329                 goto read;
5330
5331  again:
5332         trace_access_lock(iter->cpu_file);
5333         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
5334                                     &info->spare,
5335                                     count,
5336                                     iter->cpu_file, 0);
5337         trace_access_unlock(iter->cpu_file);
5338
5339         if (ret < 0) {
5340                 if (trace_empty(iter)) {
5341                         if ((filp->f_flags & O_NONBLOCK)) {
5342                                 size = -EAGAIN;
5343                                 goto out_unlock;
5344                         }
5345                         mutex_unlock(&trace_types_lock);
5346                         ret = wait_on_pipe(iter);
5347                         mutex_lock(&trace_types_lock);
5348                         if (ret) {
5349                                 size = ret;
5350                                 goto out_unlock;
5351                         }
5352                         if (signal_pending(current)) {
5353                                 size = -EINTR;
5354                                 goto out_unlock;
5355                         }
5356                         goto again;
5357                 }
5358                 size = 0;
5359                 goto out_unlock;
5360         }
5361
5362         info->read = 0;
5363  read:
5364         size = PAGE_SIZE - info->read;
5365         if (size > count)
5366                 size = count;
5367
5368         ret = copy_to_user(ubuf, info->spare + info->read, size);
5369         if (ret == size) {
5370                 size = -EFAULT;
5371                 goto out_unlock;
5372         }
5373         size -= ret;
5374
5375         *ppos += size;
5376         info->read += size;
5377
5378  out_unlock:
5379         mutex_unlock(&trace_types_lock);
5380
5381         return size;
5382 }
5383
5384 static int tracing_buffers_release(struct inode *inode, struct file *file)
5385 {
5386         struct ftrace_buffer_info *info = file->private_data;
5387         struct trace_iterator *iter = &info->iter;
5388
5389         mutex_lock(&trace_types_lock);
5390
5391         __trace_array_put(iter->tr);
5392
5393         if (info->spare)
5394                 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
5395         kfree(info);
5396
5397         mutex_unlock(&trace_types_lock);
5398
5399         return 0;
5400 }
5401
5402 struct buffer_ref {
5403         struct ring_buffer      *buffer;
5404         void                    *page;
5405         int                     ref;
5406 };
5407
5408 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
5409                                     struct pipe_buffer *buf)
5410 {
5411         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5412
5413         if (--ref->ref)
5414                 return;
5415
5416         ring_buffer_free_read_page(ref->buffer, ref->page);
5417         kfree(ref);
5418         buf->private = 0;
5419 }
5420
5421 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
5422                                 struct pipe_buffer *buf)
5423 {
5424         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5425
5426         ref->ref++;
5427 }
5428
5429 /* Pipe buffer operations for a buffer. */
5430 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
5431         .can_merge              = 0,
5432         .confirm                = generic_pipe_buf_confirm,
5433         .release                = buffer_pipe_buf_release,
5434         .steal                  = generic_pipe_buf_steal,
5435         .get                    = buffer_pipe_buf_get,
5436 };
5437
5438 /*
5439  * Callback from splice_to_pipe(), if we need to release some pages
5440  * at the end of the spd in case we error'ed out in filling the pipe.
5441  */
5442 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
5443 {
5444         struct buffer_ref *ref =
5445                 (struct buffer_ref *)spd->partial[i].private;
5446
5447         if (--ref->ref)
5448                 return;
5449
5450         ring_buffer_free_read_page(ref->buffer, ref->page);
5451         kfree(ref);
5452         spd->partial[i].private = 0;
5453 }
5454
5455 static ssize_t
5456 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5457                             struct pipe_inode_info *pipe, size_t len,
5458                             unsigned int flags)
5459 {
5460         struct ftrace_buffer_info *info = file->private_data;
5461         struct trace_iterator *iter = &info->iter;
5462         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5463         struct page *pages_def[PIPE_DEF_BUFFERS];
5464         struct splice_pipe_desc spd = {
5465                 .pages          = pages_def,
5466                 .partial        = partial_def,
5467                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5468                 .flags          = flags,
5469                 .ops            = &buffer_pipe_buf_ops,
5470                 .spd_release    = buffer_spd_release,
5471         };
5472         struct buffer_ref *ref;
5473         int entries, size, i;
5474         ssize_t ret;
5475
5476         mutex_lock(&trace_types_lock);
5477
5478 #ifdef CONFIG_TRACER_MAX_TRACE
5479         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5480                 ret = -EBUSY;
5481                 goto out;
5482         }
5483 #endif
5484
5485         if (splice_grow_spd(pipe, &spd)) {
5486                 ret = -ENOMEM;
5487                 goto out;
5488         }
5489
5490         if (*ppos & (PAGE_SIZE - 1)) {
5491                 ret = -EINVAL;
5492                 goto out;
5493         }
5494
5495         if (len & (PAGE_SIZE - 1)) {
5496                 if (len < PAGE_SIZE) {
5497                         ret = -EINVAL;
5498                         goto out;
5499                 }
5500                 len &= PAGE_MASK;
5501         }
5502
5503  again:
5504         trace_access_lock(iter->cpu_file);
5505         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5506
5507         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
5508                 struct page *page;
5509                 int r;
5510
5511                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
5512                 if (!ref)
5513                         break;
5514
5515                 ref->ref = 1;
5516                 ref->buffer = iter->trace_buffer->buffer;
5517                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
5518                 if (!ref->page) {
5519                         kfree(ref);
5520                         break;
5521                 }
5522
5523                 r = ring_buffer_read_page(ref->buffer, &ref->page,
5524                                           len, iter->cpu_file, 1);
5525                 if (r < 0) {
5526                         ring_buffer_free_read_page(ref->buffer, ref->page);
5527                         kfree(ref);
5528                         break;
5529                 }
5530
5531                 /*
5532                  * zero out any left over data, this is going to
5533                  * user land.
5534                  */
5535                 size = ring_buffer_page_len(ref->page);
5536                 if (size < PAGE_SIZE)
5537                         memset(ref->page + size, 0, PAGE_SIZE - size);
5538
5539                 page = virt_to_page(ref->page);
5540
5541                 spd.pages[i] = page;
5542                 spd.partial[i].len = PAGE_SIZE;
5543                 spd.partial[i].offset = 0;
5544                 spd.partial[i].private = (unsigned long)ref;
5545                 spd.nr_pages++;
5546                 *ppos += PAGE_SIZE;
5547
5548                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5549         }
5550
5551         trace_access_unlock(iter->cpu_file);
5552         spd.nr_pages = i;
5553
5554         /* did we read anything? */
5555         if (!spd.nr_pages) {
5556                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) {
5557                         ret = -EAGAIN;
5558                         goto out;
5559                 }
5560                 mutex_unlock(&trace_types_lock);
5561                 ret = wait_on_pipe(iter);
5562                 mutex_lock(&trace_types_lock);
5563                 if (ret)
5564                         goto out;
5565                 if (signal_pending(current)) {
5566                         ret = -EINTR;
5567                         goto out;
5568                 }
5569                 goto again;
5570         }
5571
5572         ret = splice_to_pipe(pipe, &spd);
5573         splice_shrink_spd(&spd);
5574 out:
5575         mutex_unlock(&trace_types_lock);
5576
5577         return ret;
5578 }
5579
5580 static const struct file_operations tracing_buffers_fops = {
5581         .open           = tracing_buffers_open,
5582         .read           = tracing_buffers_read,
5583         .poll           = tracing_buffers_poll,
5584         .release        = tracing_buffers_release,
5585         .splice_read    = tracing_buffers_splice_read,
5586         .llseek         = no_llseek,
5587 };
5588
5589 static ssize_t
5590 tracing_stats_read(struct file *filp, char __user *ubuf,
5591                    size_t count, loff_t *ppos)
5592 {
5593         struct inode *inode = file_inode(filp);
5594         struct trace_array *tr = inode->i_private;
5595         struct trace_buffer *trace_buf = &tr->trace_buffer;
5596         int cpu = tracing_get_cpu(inode);
5597         struct trace_seq *s;
5598         unsigned long cnt;
5599         unsigned long long t;
5600         unsigned long usec_rem;
5601
5602         s = kmalloc(sizeof(*s), GFP_KERNEL);
5603         if (!s)
5604                 return -ENOMEM;
5605
5606         trace_seq_init(s);
5607
5608         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
5609         trace_seq_printf(s, "entries: %ld\n", cnt);
5610
5611         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
5612         trace_seq_printf(s, "overrun: %ld\n", cnt);
5613
5614         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
5615         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
5616
5617         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
5618         trace_seq_printf(s, "bytes: %ld\n", cnt);
5619
5620         if (trace_clocks[tr->clock_id].in_ns) {
5621                 /* local or global for trace_clock */
5622                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5623                 usec_rem = do_div(t, USEC_PER_SEC);
5624                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
5625                                                                 t, usec_rem);
5626
5627                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
5628                 usec_rem = do_div(t, USEC_PER_SEC);
5629                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
5630         } else {
5631                 /* counter or tsc mode for trace_clock */
5632                 trace_seq_printf(s, "oldest event ts: %llu\n",
5633                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5634
5635                 trace_seq_printf(s, "now ts: %llu\n",
5636                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
5637         }
5638
5639         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
5640         trace_seq_printf(s, "dropped events: %ld\n", cnt);
5641
5642         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
5643         trace_seq_printf(s, "read events: %ld\n", cnt);
5644
5645         count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
5646
5647         kfree(s);
5648
5649         return count;
5650 }
5651
5652 static const struct file_operations tracing_stats_fops = {
5653         .open           = tracing_open_generic_tr,
5654         .read           = tracing_stats_read,
5655         .llseek         = generic_file_llseek,
5656         .release        = tracing_release_generic_tr,
5657 };
5658
5659 #ifdef CONFIG_DYNAMIC_FTRACE
5660
5661 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
5662 {
5663         return 0;
5664 }
5665
5666 static ssize_t
5667 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
5668                   size_t cnt, loff_t *ppos)
5669 {
5670         static char ftrace_dyn_info_buffer[1024];
5671         static DEFINE_MUTEX(dyn_info_mutex);
5672         unsigned long *p = filp->private_data;
5673         char *buf = ftrace_dyn_info_buffer;
5674         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
5675         int r;
5676
5677         mutex_lock(&dyn_info_mutex);
5678         r = sprintf(buf, "%ld ", *p);
5679
5680         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
5681         buf[r++] = '\n';
5682
5683         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5684
5685         mutex_unlock(&dyn_info_mutex);
5686
5687         return r;
5688 }
5689
5690 static const struct file_operations tracing_dyn_info_fops = {
5691         .open           = tracing_open_generic,
5692         .read           = tracing_read_dyn_info,
5693         .llseek         = generic_file_llseek,
5694 };
5695 #endif /* CONFIG_DYNAMIC_FTRACE */
5696
5697 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
5698 static void
5699 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5700 {
5701         tracing_snapshot();
5702 }
5703
5704 static void
5705 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5706 {
5707         unsigned long *count = (long *)data;
5708
5709         if (!*count)
5710                 return;
5711
5712         if (*count != -1)
5713                 (*count)--;
5714
5715         tracing_snapshot();
5716 }
5717
5718 static int
5719 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
5720                       struct ftrace_probe_ops *ops, void *data)
5721 {
5722         long count = (long)data;
5723
5724         seq_printf(m, "%ps:", (void *)ip);
5725
5726         seq_printf(m, "snapshot");
5727
5728         if (count == -1)
5729                 seq_printf(m, ":unlimited\n");
5730         else
5731                 seq_printf(m, ":count=%ld\n", count);
5732
5733         return 0;
5734 }
5735
5736 static struct ftrace_probe_ops snapshot_probe_ops = {
5737         .func                   = ftrace_snapshot,
5738         .print                  = ftrace_snapshot_print,
5739 };
5740
5741 static struct ftrace_probe_ops snapshot_count_probe_ops = {
5742         .func                   = ftrace_count_snapshot,
5743         .print                  = ftrace_snapshot_print,
5744 };
5745
5746 static int
5747 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
5748                                char *glob, char *cmd, char *param, int enable)
5749 {
5750         struct ftrace_probe_ops *ops;
5751         void *count = (void *)-1;
5752         char *number;
5753         int ret;
5754
5755         /* hash funcs only work with set_ftrace_filter */
5756         if (!enable)
5757                 return -EINVAL;
5758
5759         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
5760
5761         if (glob[0] == '!') {
5762                 unregister_ftrace_function_probe_func(glob+1, ops);
5763                 return 0;
5764         }
5765
5766         if (!param)
5767                 goto out_reg;
5768
5769         number = strsep(&param, ":");
5770
5771         if (!strlen(number))
5772                 goto out_reg;
5773
5774         /*
5775          * We use the callback data field (which is a pointer)
5776          * as our counter.
5777          */
5778         ret = kstrtoul(number, 0, (unsigned long *)&count);
5779         if (ret)
5780                 return ret;
5781
5782  out_reg:
5783         ret = register_ftrace_function_probe(glob, ops, count);
5784
5785         if (ret >= 0)
5786                 alloc_snapshot(&global_trace);
5787
5788         return ret < 0 ? ret : 0;
5789 }
5790
5791 static struct ftrace_func_command ftrace_snapshot_cmd = {
5792         .name                   = "snapshot",
5793         .func                   = ftrace_trace_snapshot_callback,
5794 };
5795
5796 static __init int register_snapshot_cmd(void)
5797 {
5798         return register_ftrace_command(&ftrace_snapshot_cmd);
5799 }
5800 #else
5801 static inline __init int register_snapshot_cmd(void) { return 0; }
5802 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
5803
5804 struct dentry *tracing_init_dentry_tr(struct trace_array *tr)
5805 {
5806         if (tr->dir)
5807                 return tr->dir;
5808
5809         if (!debugfs_initialized())
5810                 return NULL;
5811
5812         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
5813                 tr->dir = debugfs_create_dir("tracing", NULL);
5814
5815         if (!tr->dir)
5816                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
5817
5818         return tr->dir;
5819 }
5820
5821 struct dentry *tracing_init_dentry(void)
5822 {
5823         return tracing_init_dentry_tr(&global_trace);
5824 }
5825
5826 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
5827 {
5828         struct dentry *d_tracer;
5829
5830         if (tr->percpu_dir)
5831                 return tr->percpu_dir;
5832
5833         d_tracer = tracing_init_dentry_tr(tr);
5834         if (!d_tracer)
5835                 return NULL;
5836
5837         tr->percpu_dir = debugfs_create_dir("per_cpu", d_tracer);
5838
5839         WARN_ONCE(!tr->percpu_dir,
5840                   "Could not create debugfs directory 'per_cpu/%d'\n", cpu);
5841
5842         return tr->percpu_dir;
5843 }
5844
5845 static struct dentry *
5846 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
5847                       void *data, long cpu, const struct file_operations *fops)
5848 {
5849         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
5850
5851         if (ret) /* See tracing_get_cpu() */
5852                 ret->d_inode->i_cdev = (void *)(cpu + 1);
5853         return ret;
5854 }
5855
5856 static void
5857 tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
5858 {
5859         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
5860         struct dentry *d_cpu;
5861         char cpu_dir[30]; /* 30 characters should be more than enough */
5862
5863         if (!d_percpu)
5864                 return;
5865
5866         snprintf(cpu_dir, 30, "cpu%ld", cpu);
5867         d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
5868         if (!d_cpu) {
5869                 pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
5870                 return;
5871         }
5872
5873         /* per cpu trace_pipe */
5874         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
5875                                 tr, cpu, &tracing_pipe_fops);
5876
5877         /* per cpu trace */
5878         trace_create_cpu_file("trace", 0644, d_cpu,
5879                                 tr, cpu, &tracing_fops);
5880
5881         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
5882                                 tr, cpu, &tracing_buffers_fops);
5883
5884         trace_create_cpu_file("stats", 0444, d_cpu,
5885                                 tr, cpu, &tracing_stats_fops);
5886
5887         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
5888                                 tr, cpu, &tracing_entries_fops);
5889
5890 #ifdef CONFIG_TRACER_SNAPSHOT
5891         trace_create_cpu_file("snapshot", 0644, d_cpu,
5892                                 tr, cpu, &snapshot_fops);
5893
5894         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
5895                                 tr, cpu, &snapshot_raw_fops);
5896 #endif
5897 }
5898
5899 #ifdef CONFIG_FTRACE_SELFTEST
5900 /* Let selftest have access to static functions in this file */
5901 #include "trace_selftest.c"
5902 #endif
5903
5904 struct trace_option_dentry {
5905         struct tracer_opt               *opt;
5906         struct tracer_flags             *flags;
5907         struct trace_array              *tr;
5908         struct dentry                   *entry;
5909 };
5910
5911 static ssize_t
5912 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
5913                         loff_t *ppos)
5914 {
5915         struct trace_option_dentry *topt = filp->private_data;
5916         char *buf;
5917
5918         if (topt->flags->val & topt->opt->bit)
5919                 buf = "1\n";
5920         else
5921                 buf = "0\n";
5922
5923         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5924 }
5925
5926 static ssize_t
5927 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
5928                          loff_t *ppos)
5929 {
5930         struct trace_option_dentry *topt = filp->private_data;
5931         unsigned long val;
5932         int ret;
5933
5934         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5935         if (ret)
5936                 return ret;
5937
5938         if (val != 0 && val != 1)
5939                 return -EINVAL;
5940
5941         if (!!(topt->flags->val & topt->opt->bit) != val) {
5942                 mutex_lock(&trace_types_lock);
5943                 ret = __set_tracer_option(topt->tr, topt->flags,
5944                                           topt->opt, !val);
5945                 mutex_unlock(&trace_types_lock);
5946                 if (ret)
5947                         return ret;
5948         }
5949
5950         *ppos += cnt;
5951
5952         return cnt;
5953 }
5954
5955
5956 static const struct file_operations trace_options_fops = {
5957         .open = tracing_open_generic,
5958         .read = trace_options_read,
5959         .write = trace_options_write,
5960         .llseek = generic_file_llseek,
5961 };
5962
5963 static ssize_t
5964 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
5965                         loff_t *ppos)
5966 {
5967         long index = (long)filp->private_data;
5968         char *buf;
5969
5970         if (trace_flags & (1 << index))
5971                 buf = "1\n";
5972         else
5973                 buf = "0\n";
5974
5975         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5976 }
5977
5978 static ssize_t
5979 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
5980                          loff_t *ppos)
5981 {
5982         struct trace_array *tr = &global_trace;
5983         long index = (long)filp->private_data;
5984         unsigned long val;
5985         int ret;
5986
5987         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5988         if (ret)
5989                 return ret;
5990
5991         if (val != 0 && val != 1)
5992                 return -EINVAL;
5993
5994         mutex_lock(&trace_types_lock);
5995         ret = set_tracer_flag(tr, 1 << index, val);
5996         mutex_unlock(&trace_types_lock);
5997
5998         if (ret < 0)
5999                 return ret;
6000
6001         *ppos += cnt;
6002
6003         return cnt;
6004 }
6005
6006 static const struct file_operations trace_options_core_fops = {
6007         .open = tracing_open_generic,
6008         .read = trace_options_core_read,
6009         .write = trace_options_core_write,
6010         .llseek = generic_file_llseek,
6011 };
6012
6013 struct dentry *trace_create_file(const char *name,
6014                                  umode_t mode,
6015                                  struct dentry *parent,
6016                                  void *data,
6017                                  const struct file_operations *fops)
6018 {
6019         struct dentry *ret;
6020
6021         ret = debugfs_create_file(name, mode, parent, data, fops);
6022         if (!ret)
6023                 pr_warning("Could not create debugfs '%s' entry\n", name);
6024
6025         return ret;
6026 }
6027
6028
6029 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
6030 {
6031         struct dentry *d_tracer;
6032
6033         if (tr->options)
6034                 return tr->options;
6035
6036         d_tracer = tracing_init_dentry_tr(tr);
6037         if (!d_tracer)
6038                 return NULL;
6039
6040         tr->options = debugfs_create_dir("options", d_tracer);
6041         if (!tr->options) {
6042                 pr_warning("Could not create debugfs directory 'options'\n");
6043                 return NULL;
6044         }
6045
6046         return tr->options;
6047 }
6048
6049 static void
6050 create_trace_option_file(struct trace_array *tr,
6051                          struct trace_option_dentry *topt,
6052                          struct tracer_flags *flags,
6053                          struct tracer_opt *opt)
6054 {
6055         struct dentry *t_options;
6056
6057         t_options = trace_options_init_dentry(tr);
6058         if (!t_options)
6059                 return;
6060
6061         topt->flags = flags;
6062         topt->opt = opt;
6063         topt->tr = tr;
6064
6065         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
6066                                     &trace_options_fops);
6067
6068 }
6069
6070 static struct trace_option_dentry *
6071 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
6072 {
6073         struct trace_option_dentry *topts;
6074         struct tracer_flags *flags;
6075         struct tracer_opt *opts;
6076         int cnt;
6077
6078         if (!tracer)
6079                 return NULL;
6080
6081         flags = tracer->flags;
6082
6083         if (!flags || !flags->opts)
6084                 return NULL;
6085
6086         opts = flags->opts;
6087
6088         for (cnt = 0; opts[cnt].name; cnt++)
6089                 ;
6090
6091         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
6092         if (!topts)
6093                 return NULL;
6094
6095         for (cnt = 0; opts[cnt].name; cnt++)
6096                 create_trace_option_file(tr, &topts[cnt], flags,
6097                                          &opts[cnt]);
6098
6099         return topts;
6100 }
6101
6102 static void
6103 destroy_trace_option_files(struct trace_option_dentry *topts)
6104 {
6105         int cnt;
6106
6107         if (!topts)
6108                 return;
6109
6110         for (cnt = 0; topts[cnt].opt; cnt++) {
6111                 if (topts[cnt].entry)
6112                         debugfs_remove(topts[cnt].entry);
6113         }
6114
6115         kfree(topts);
6116 }
6117
6118 static struct dentry *
6119 create_trace_option_core_file(struct trace_array *tr,
6120                               const char *option, long index)
6121 {
6122         struct dentry *t_options;
6123
6124         t_options = trace_options_init_dentry(tr);
6125         if (!t_options)
6126                 return NULL;
6127
6128         return trace_create_file(option, 0644, t_options, (void *)index,
6129                                     &trace_options_core_fops);
6130 }
6131
6132 static __init void create_trace_options_dir(struct trace_array *tr)
6133 {
6134         struct dentry *t_options;
6135         int i;
6136
6137         t_options = trace_options_init_dentry(tr);
6138         if (!t_options)
6139                 return;
6140
6141         for (i = 0; trace_options[i]; i++)
6142                 create_trace_option_core_file(tr, trace_options[i], i);
6143 }
6144
6145 static ssize_t
6146 rb_simple_read(struct file *filp, char __user *ubuf,
6147                size_t cnt, loff_t *ppos)
6148 {
6149         struct trace_array *tr = filp->private_data;
6150         char buf[64];
6151         int r;
6152
6153         r = tracer_tracing_is_on(tr);
6154         r = sprintf(buf, "%d\n", r);
6155
6156         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6157 }
6158
6159 static ssize_t
6160 rb_simple_write(struct file *filp, const char __user *ubuf,
6161                 size_t cnt, loff_t *ppos)
6162 {
6163         struct trace_array *tr = filp->private_data;
6164         struct ring_buffer *buffer = tr->trace_buffer.buffer;
6165         unsigned long val;
6166         int ret;
6167
6168         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6169         if (ret)
6170                 return ret;
6171
6172         if (buffer) {
6173                 mutex_lock(&trace_types_lock);
6174                 if (val) {
6175                         tracer_tracing_on(tr);
6176                         if (tr->current_trace->start)
6177                                 tr->current_trace->start(tr);
6178                 } else {
6179                         tracer_tracing_off(tr);
6180                         if (tr->current_trace->stop)
6181                                 tr->current_trace->stop(tr);
6182                 }
6183                 mutex_unlock(&trace_types_lock);
6184         }
6185
6186         (*ppos)++;
6187
6188         return cnt;
6189 }
6190
6191 static const struct file_operations rb_simple_fops = {
6192         .open           = tracing_open_generic_tr,
6193         .read           = rb_simple_read,
6194         .write          = rb_simple_write,
6195         .release        = tracing_release_generic_tr,
6196         .llseek         = default_llseek,
6197 };
6198
6199 struct dentry *trace_instance_dir;
6200
6201 static void
6202 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer);
6203
6204 static int
6205 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6206 {
6207         enum ring_buffer_flags rb_flags;
6208
6209         rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6210
6211         buf->tr = tr;
6212
6213         buf->buffer = ring_buffer_alloc(size, rb_flags);
6214         if (!buf->buffer)
6215                 return -ENOMEM;
6216
6217         buf->data = alloc_percpu(struct trace_array_cpu);
6218         if (!buf->data) {
6219                 ring_buffer_free(buf->buffer);
6220                 return -ENOMEM;
6221         }
6222
6223         /* Allocate the first page for all buffers */
6224         set_buffer_entries(&tr->trace_buffer,
6225                            ring_buffer_size(tr->trace_buffer.buffer, 0));
6226
6227         return 0;
6228 }
6229
6230 static int allocate_trace_buffers(struct trace_array *tr, int size)
6231 {
6232         int ret;
6233
6234         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6235         if (ret)
6236                 return ret;
6237
6238 #ifdef CONFIG_TRACER_MAX_TRACE
6239         ret = allocate_trace_buffer(tr, &tr->max_buffer,
6240                                     allocate_snapshot ? size : 1);
6241         if (WARN_ON(ret)) {
6242                 ring_buffer_free(tr->trace_buffer.buffer);
6243                 free_percpu(tr->trace_buffer.data);
6244                 return -ENOMEM;
6245         }
6246         tr->allocated_snapshot = allocate_snapshot;
6247
6248         /*
6249          * Only the top level trace array gets its snapshot allocated
6250          * from the kernel command line.
6251          */
6252         allocate_snapshot = false;
6253 #endif
6254         return 0;
6255 }
6256
6257 static void free_trace_buffer(struct trace_buffer *buf)
6258 {
6259         if (buf->buffer) {
6260                 ring_buffer_free(buf->buffer);
6261                 buf->buffer = NULL;
6262                 free_percpu(buf->data);
6263                 buf->data = NULL;
6264         }
6265 }
6266
6267 static void free_trace_buffers(struct trace_array *tr)
6268 {
6269         if (!tr)
6270                 return;
6271
6272         free_trace_buffer(&tr->trace_buffer);
6273
6274 #ifdef CONFIG_TRACER_MAX_TRACE
6275         free_trace_buffer(&tr->max_buffer);
6276 #endif
6277 }
6278
6279 static int new_instance_create(const char *name)
6280 {
6281         struct trace_array *tr;
6282         int ret;
6283
6284         mutex_lock(&trace_types_lock);
6285
6286         ret = -EEXIST;
6287         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6288                 if (tr->name && strcmp(tr->name, name) == 0)
6289                         goto out_unlock;
6290         }
6291
6292         ret = -ENOMEM;
6293         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
6294         if (!tr)
6295                 goto out_unlock;
6296
6297         tr->name = kstrdup(name, GFP_KERNEL);
6298         if (!tr->name)
6299                 goto out_free_tr;
6300
6301         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
6302                 goto out_free_tr;
6303
6304         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
6305
6306         raw_spin_lock_init(&tr->start_lock);
6307
6308         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
6309
6310         tr->current_trace = &nop_trace;
6311
6312         INIT_LIST_HEAD(&tr->systems);
6313         INIT_LIST_HEAD(&tr->events);
6314
6315         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
6316                 goto out_free_tr;
6317
6318         tr->dir = debugfs_create_dir(name, trace_instance_dir);
6319         if (!tr->dir)
6320                 goto out_free_tr;
6321
6322         ret = event_trace_add_tracer(tr->dir, tr);
6323         if (ret) {
6324                 debugfs_remove_recursive(tr->dir);
6325                 goto out_free_tr;
6326         }
6327
6328         init_tracer_debugfs(tr, tr->dir);
6329
6330         list_add(&tr->list, &ftrace_trace_arrays);
6331
6332         mutex_unlock(&trace_types_lock);
6333
6334         return 0;
6335
6336  out_free_tr:
6337         free_trace_buffers(tr);
6338         free_cpumask_var(tr->tracing_cpumask);
6339         kfree(tr->name);
6340         kfree(tr);
6341
6342  out_unlock:
6343         mutex_unlock(&trace_types_lock);
6344
6345         return ret;
6346
6347 }
6348
6349 static int instance_delete(const char *name)
6350 {
6351         struct trace_array *tr;
6352         int found = 0;
6353         int ret;
6354
6355         mutex_lock(&trace_types_lock);
6356
6357         ret = -ENODEV;
6358         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6359                 if (tr->name && strcmp(tr->name, name) == 0) {
6360                         found = 1;
6361                         break;
6362                 }
6363         }
6364         if (!found)
6365                 goto out_unlock;
6366
6367         ret = -EBUSY;
6368         if (tr->ref)
6369                 goto out_unlock;
6370
6371         list_del(&tr->list);
6372
6373         tracing_set_nop(tr);
6374         event_trace_del_tracer(tr);
6375         ftrace_destroy_function_files(tr);
6376         debugfs_remove_recursive(tr->dir);
6377         free_trace_buffers(tr);
6378
6379         kfree(tr->name);
6380         kfree(tr);
6381
6382         ret = 0;
6383
6384  out_unlock:
6385         mutex_unlock(&trace_types_lock);
6386
6387         return ret;
6388 }
6389
6390 static int instance_mkdir (struct inode *inode, struct dentry *dentry, umode_t mode)
6391 {
6392         struct dentry *parent;
6393         int ret;
6394
6395         /* Paranoid: Make sure the parent is the "instances" directory */
6396         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6397         if (WARN_ON_ONCE(parent != trace_instance_dir))
6398                 return -ENOENT;
6399
6400         /*
6401          * The inode mutex is locked, but debugfs_create_dir() will also
6402          * take the mutex. As the instances directory can not be destroyed
6403          * or changed in any other way, it is safe to unlock it, and
6404          * let the dentry try. If two users try to make the same dir at
6405          * the same time, then the new_instance_create() will determine the
6406          * winner.
6407          */
6408         mutex_unlock(&inode->i_mutex);
6409
6410         ret = new_instance_create(dentry->d_iname);
6411
6412         mutex_lock(&inode->i_mutex);
6413
6414         return ret;
6415 }
6416
6417 static int instance_rmdir(struct inode *inode, struct dentry *dentry)
6418 {
6419         struct dentry *parent;
6420         int ret;
6421
6422         /* Paranoid: Make sure the parent is the "instances" directory */
6423         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6424         if (WARN_ON_ONCE(parent != trace_instance_dir))
6425                 return -ENOENT;
6426
6427         /* The caller did a dget() on dentry */
6428         mutex_unlock(&dentry->d_inode->i_mutex);
6429
6430         /*
6431          * The inode mutex is locked, but debugfs_create_dir() will also
6432          * take the mutex. As the instances directory can not be destroyed
6433          * or changed in any other way, it is safe to unlock it, and
6434          * let the dentry try. If two users try to make the same dir at
6435          * the same time, then the instance_delete() will determine the
6436          * winner.
6437          */
6438         mutex_unlock(&inode->i_mutex);
6439
6440         ret = instance_delete(dentry->d_iname);
6441
6442         mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
6443         mutex_lock(&dentry->d_inode->i_mutex);
6444
6445         return ret;
6446 }
6447
6448 static const struct inode_operations instance_dir_inode_operations = {
6449         .lookup         = simple_lookup,
6450         .mkdir          = instance_mkdir,
6451         .rmdir          = instance_rmdir,
6452 };
6453
6454 static __init void create_trace_instances(struct dentry *d_tracer)
6455 {
6456         trace_instance_dir = debugfs_create_dir("instances", d_tracer);
6457         if (WARN_ON(!trace_instance_dir))
6458                 return;
6459
6460         /* Hijack the dir inode operations, to allow mkdir */
6461         trace_instance_dir->d_inode->i_op = &instance_dir_inode_operations;
6462 }
6463
6464 static void
6465 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
6466 {
6467         int cpu;
6468
6469         trace_create_file("available_tracers", 0444, d_tracer,
6470                         tr, &show_traces_fops);
6471
6472         trace_create_file("current_tracer", 0644, d_tracer,
6473                         tr, &set_tracer_fops);
6474
6475         trace_create_file("tracing_cpumask", 0644, d_tracer,
6476                           tr, &tracing_cpumask_fops);
6477
6478         trace_create_file("trace_options", 0644, d_tracer,
6479                           tr, &tracing_iter_fops);
6480
6481         trace_create_file("trace", 0644, d_tracer,
6482                           tr, &tracing_fops);
6483
6484         trace_create_file("trace_pipe", 0444, d_tracer,
6485                           tr, &tracing_pipe_fops);
6486
6487         trace_create_file("buffer_size_kb", 0644, d_tracer,
6488                           tr, &tracing_entries_fops);
6489
6490         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
6491                           tr, &tracing_total_entries_fops);
6492
6493         trace_create_file("free_buffer", 0200, d_tracer,
6494                           tr, &tracing_free_buffer_fops);
6495
6496         trace_create_file("trace_marker", 0220, d_tracer,
6497                           tr, &tracing_mark_fops);
6498
6499         trace_create_file("trace_clock", 0644, d_tracer, tr,
6500                           &trace_clock_fops);
6501
6502         trace_create_file("tracing_on", 0644, d_tracer,
6503                           tr, &rb_simple_fops);
6504
6505 #ifdef CONFIG_TRACER_MAX_TRACE
6506         trace_create_file("tracing_max_latency", 0644, d_tracer,
6507                         &tr->max_latency, &tracing_max_lat_fops);
6508 #endif
6509
6510         if (ftrace_create_function_files(tr, d_tracer))
6511                 WARN(1, "Could not allocate function filter files");
6512
6513 #ifdef CONFIG_TRACER_SNAPSHOT
6514         trace_create_file("snapshot", 0644, d_tracer,
6515                           tr, &snapshot_fops);
6516 #endif
6517
6518         for_each_tracing_cpu(cpu)
6519                 tracing_init_debugfs_percpu(tr, cpu);
6520
6521 }
6522
6523 static __init int tracer_init_debugfs(void)
6524 {
6525         struct dentry *d_tracer;
6526
6527         trace_access_lock_init();
6528
6529         d_tracer = tracing_init_dentry();
6530         if (!d_tracer)
6531                 return 0;
6532
6533         init_tracer_debugfs(&global_trace, d_tracer);
6534
6535         trace_create_file("tracing_thresh", 0644, d_tracer,
6536                         &tracing_thresh, &tracing_max_lat_fops);
6537
6538         trace_create_file("README", 0444, d_tracer,
6539                         NULL, &tracing_readme_fops);
6540
6541         trace_create_file("saved_cmdlines", 0444, d_tracer,
6542                         NULL, &tracing_saved_cmdlines_fops);
6543
6544         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
6545                           NULL, &tracing_saved_cmdlines_size_fops);
6546
6547 #ifdef CONFIG_DYNAMIC_FTRACE
6548         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
6549                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
6550 #endif
6551
6552         create_trace_instances(d_tracer);
6553
6554         create_trace_options_dir(&global_trace);
6555
6556         return 0;
6557 }
6558
6559 static int trace_panic_handler(struct notifier_block *this,
6560                                unsigned long event, void *unused)
6561 {
6562         if (ftrace_dump_on_oops)
6563                 ftrace_dump(ftrace_dump_on_oops);
6564         return NOTIFY_OK;
6565 }
6566
6567 static struct notifier_block trace_panic_notifier = {
6568         .notifier_call  = trace_panic_handler,
6569         .next           = NULL,
6570         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
6571 };
6572
6573 static int trace_die_handler(struct notifier_block *self,
6574                              unsigned long val,
6575                              void *data)
6576 {
6577         switch (val) {
6578         case DIE_OOPS:
6579                 if (ftrace_dump_on_oops)
6580                         ftrace_dump(ftrace_dump_on_oops);
6581                 break;
6582         default:
6583                 break;
6584         }
6585         return NOTIFY_OK;
6586 }
6587
6588 static struct notifier_block trace_die_notifier = {
6589         .notifier_call = trace_die_handler,
6590         .priority = 200
6591 };
6592
6593 /*
6594  * printk is set to max of 1024, we really don't need it that big.
6595  * Nothing should be printing 1000 characters anyway.
6596  */
6597 #define TRACE_MAX_PRINT         1000
6598
6599 /*
6600  * Define here KERN_TRACE so that we have one place to modify
6601  * it if we decide to change what log level the ftrace dump
6602  * should be at.
6603  */
6604 #define KERN_TRACE              KERN_EMERG
6605
6606 void
6607 trace_printk_seq(struct trace_seq *s)
6608 {
6609         /* Probably should print a warning here. */
6610         if (s->len >= TRACE_MAX_PRINT)
6611                 s->len = TRACE_MAX_PRINT;
6612
6613         /* should be zero ended, but we are paranoid. */
6614         s->buffer[s->len] = 0;
6615
6616         printk(KERN_TRACE "%s", s->buffer);
6617
6618         trace_seq_init(s);
6619 }
6620
6621 void trace_init_global_iter(struct trace_iterator *iter)
6622 {
6623         iter->tr = &global_trace;
6624         iter->trace = iter->tr->current_trace;
6625         iter->cpu_file = RING_BUFFER_ALL_CPUS;
6626         iter->trace_buffer = &global_trace.trace_buffer;
6627
6628         if (iter->trace && iter->trace->open)
6629                 iter->trace->open(iter);
6630
6631         /* Annotate start of buffers if we had overruns */
6632         if (ring_buffer_overruns(iter->trace_buffer->buffer))
6633                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
6634
6635         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6636         if (trace_clocks[iter->tr->clock_id].in_ns)
6637                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6638 }
6639
6640 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
6641 {
6642         /* use static because iter can be a bit big for the stack */
6643         static struct trace_iterator iter;
6644         static atomic_t dump_running;
6645         unsigned int old_userobj;
6646         unsigned long flags;
6647         int cnt = 0, cpu;
6648
6649         /* Only allow one dump user at a time. */
6650         if (atomic_inc_return(&dump_running) != 1) {
6651                 atomic_dec(&dump_running);
6652                 return;
6653         }
6654
6655         /*
6656          * Always turn off tracing when we dump.
6657          * We don't need to show trace output of what happens
6658          * between multiple crashes.
6659          *
6660          * If the user does a sysrq-z, then they can re-enable
6661          * tracing with echo 1 > tracing_on.
6662          */
6663         tracing_off();
6664
6665         local_irq_save(flags);
6666
6667         /* Simulate the iterator */
6668         trace_init_global_iter(&iter);
6669
6670         for_each_tracing_cpu(cpu) {
6671                 atomic_inc(&per_cpu_ptr(iter.tr->trace_buffer.data, cpu)->disabled);
6672         }
6673
6674         old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
6675
6676         /* don't look at user memory in panic mode */
6677         trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
6678
6679         switch (oops_dump_mode) {
6680         case DUMP_ALL:
6681                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6682                 break;
6683         case DUMP_ORIG:
6684                 iter.cpu_file = raw_smp_processor_id();
6685                 break;
6686         case DUMP_NONE:
6687                 goto out_enable;
6688         default:
6689                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
6690                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6691         }
6692
6693         printk(KERN_TRACE "Dumping ftrace buffer:\n");
6694
6695         /* Did function tracer already get disabled? */
6696         if (ftrace_is_dead()) {
6697                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
6698                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
6699         }
6700
6701         /*
6702          * We need to stop all tracing on all CPUS to read the
6703          * the next buffer. This is a bit expensive, but is
6704          * not done often. We fill all what we can read,
6705          * and then release the locks again.
6706          */
6707
6708         while (!trace_empty(&iter)) {
6709
6710                 if (!cnt)
6711                         printk(KERN_TRACE "---------------------------------\n");
6712
6713                 cnt++;
6714
6715                 /* reset all but tr, trace, and overruns */
6716                 memset(&iter.seq, 0,
6717                        sizeof(struct trace_iterator) -
6718                        offsetof(struct trace_iterator, seq));
6719                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
6720                 iter.pos = -1;
6721
6722                 if (trace_find_next_entry_inc(&iter) != NULL) {
6723                         int ret;
6724
6725                         ret = print_trace_line(&iter);
6726                         if (ret != TRACE_TYPE_NO_CONSUME)
6727                                 trace_consume(&iter);
6728                 }
6729                 touch_nmi_watchdog();
6730
6731                 trace_printk_seq(&iter.seq);
6732         }
6733
6734         if (!cnt)
6735                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
6736         else
6737                 printk(KERN_TRACE "---------------------------------\n");
6738
6739  out_enable:
6740         trace_flags |= old_userobj;
6741
6742         for_each_tracing_cpu(cpu) {
6743                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
6744         }
6745         atomic_dec(&dump_running);
6746         local_irq_restore(flags);
6747 }
6748 EXPORT_SYMBOL_GPL(ftrace_dump);
6749
6750 __init static int tracer_alloc_buffers(void)
6751 {
6752         int ring_buf_size;
6753         int ret = -ENOMEM;
6754
6755
6756         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
6757                 goto out;
6758
6759         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
6760                 goto out_free_buffer_mask;
6761
6762         /* Only allocate trace_printk buffers if a trace_printk exists */
6763         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
6764                 /* Must be called before global_trace.buffer is allocated */
6765                 trace_printk_init_buffers();
6766
6767         /* To save memory, keep the ring buffer size to its minimum */
6768         if (ring_buffer_expanded)
6769                 ring_buf_size = trace_buf_size;
6770         else
6771                 ring_buf_size = 1;
6772
6773         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
6774         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
6775
6776         raw_spin_lock_init(&global_trace.start_lock);
6777
6778         /* Used for event triggers */
6779         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
6780         if (!temp_buffer)
6781                 goto out_free_cpumask;
6782
6783         if (trace_create_savedcmd() < 0)
6784                 goto out_free_temp_buffer;
6785
6786         /* TODO: make the number of buffers hot pluggable with CPUS */
6787         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
6788                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
6789                 WARN_ON(1);
6790                 goto out_free_savedcmd;
6791         }
6792
6793         if (global_trace.buffer_disabled)
6794                 tracing_off();
6795
6796         if (trace_boot_clock) {
6797                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
6798                 if (ret < 0)
6799                         pr_warning("Trace clock %s not defined, going back to default\n",
6800                                    trace_boot_clock);
6801         }
6802
6803         /*
6804          * register_tracer() might reference current_trace, so it
6805          * needs to be set before we register anything. This is
6806          * just a bootstrap of current_trace anyway.
6807          */
6808         global_trace.current_trace = &nop_trace;
6809
6810         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
6811
6812         ftrace_init_global_array_ops(&global_trace);
6813
6814         register_tracer(&nop_trace);
6815
6816         /* All seems OK, enable tracing */
6817         tracing_disabled = 0;
6818
6819         atomic_notifier_chain_register(&panic_notifier_list,
6820                                        &trace_panic_notifier);
6821
6822         register_die_notifier(&trace_die_notifier);
6823
6824         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
6825
6826         INIT_LIST_HEAD(&global_trace.systems);
6827         INIT_LIST_HEAD(&global_trace.events);
6828         list_add(&global_trace.list, &ftrace_trace_arrays);
6829
6830         while (trace_boot_options) {
6831                 char *option;
6832
6833                 option = strsep(&trace_boot_options, ",");
6834                 trace_set_options(&global_trace, option);
6835         }
6836
6837         register_snapshot_cmd();
6838
6839         return 0;
6840
6841 out_free_savedcmd:
6842         free_saved_cmdlines_buffer(savedcmd);
6843 out_free_temp_buffer:
6844         ring_buffer_free(temp_buffer);
6845 out_free_cpumask:
6846         free_cpumask_var(global_trace.tracing_cpumask);
6847 out_free_buffer_mask:
6848         free_cpumask_var(tracing_buffer_mask);
6849 out:
6850         return ret;
6851 }
6852
6853 __init static int clear_boot_tracer(void)
6854 {
6855         /*
6856          * The default tracer at boot buffer is an init section.
6857          * This function is called in lateinit. If we did not
6858          * find the boot tracer, then clear it out, to prevent
6859          * later registration from accessing the buffer that is
6860          * about to be freed.
6861          */
6862         if (!default_bootup_tracer)
6863                 return 0;
6864
6865         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
6866                default_bootup_tracer);
6867         default_bootup_tracer = NULL;
6868
6869         return 0;
6870 }
6871
6872 early_initcall(tracer_alloc_buffers);
6873 fs_initcall(tracer_init_debugfs);
6874 late_initcall(clear_boot_tracer);