tracing: Do not inherit event-fork option for instances
[cascardo/linux.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/kprobes.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/sched/rt.h>
44
45 #include "trace.h"
46 #include "trace_output.h"
47
48 /*
49  * On boot up, the ring buffer is set to the minimum size, so that
50  * we do not waste memory on systems that are not using tracing.
51  */
52 bool ring_buffer_expanded;
53
54 /*
55  * We need to change this state when a selftest is running.
56  * A selftest will lurk into the ring-buffer to count the
57  * entries inserted during the selftest although some concurrent
58  * insertions into the ring-buffer such as trace_printk could occurred
59  * at the same time, giving false positive or negative results.
60  */
61 static bool __read_mostly tracing_selftest_running;
62
63 /*
64  * If a tracer is running, we do not want to run SELFTEST.
65  */
66 bool __read_mostly tracing_selftest_disabled;
67
68 /* Pipe tracepoints to printk */
69 struct trace_iterator *tracepoint_print_iter;
70 int tracepoint_printk;
71
72 /* For tracers that don't implement custom flags */
73 static struct tracer_opt dummy_tracer_opt[] = {
74         { }
75 };
76
77 static int
78 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
79 {
80         return 0;
81 }
82
83 /*
84  * To prevent the comm cache from being overwritten when no
85  * tracing is active, only save the comm when a trace event
86  * occurred.
87  */
88 static DEFINE_PER_CPU(bool, trace_cmdline_save);
89
90 /*
91  * Kill all tracing for good (never come back).
92  * It is initialized to 1 but will turn to zero if the initialization
93  * of the tracer is successful. But that is the only place that sets
94  * this back to zero.
95  */
96 static int tracing_disabled = 1;
97
98 cpumask_var_t __read_mostly     tracing_buffer_mask;
99
100 /*
101  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
102  *
103  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
104  * is set, then ftrace_dump is called. This will output the contents
105  * of the ftrace buffers to the console.  This is very useful for
106  * capturing traces that lead to crashes and outputing it to a
107  * serial console.
108  *
109  * It is default off, but you can enable it with either specifying
110  * "ftrace_dump_on_oops" in the kernel command line, or setting
111  * /proc/sys/kernel/ftrace_dump_on_oops
112  * Set 1 if you want to dump buffers of all CPUs
113  * Set 2 if you want to dump the buffer of the CPU that triggered oops
114  */
115
116 enum ftrace_dump_mode ftrace_dump_on_oops;
117
118 /* When set, tracing will stop when a WARN*() is hit */
119 int __disable_trace_on_warning;
120
121 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
122 /* Map of enums to their values, for "enum_map" file */
123 struct trace_enum_map_head {
124         struct module                   *mod;
125         unsigned long                   length;
126 };
127
128 union trace_enum_map_item;
129
130 struct trace_enum_map_tail {
131         /*
132          * "end" is first and points to NULL as it must be different
133          * than "mod" or "enum_string"
134          */
135         union trace_enum_map_item       *next;
136         const char                      *end;   /* points to NULL */
137 };
138
139 static DEFINE_MUTEX(trace_enum_mutex);
140
141 /*
142  * The trace_enum_maps are saved in an array with two extra elements,
143  * one at the beginning, and one at the end. The beginning item contains
144  * the count of the saved maps (head.length), and the module they
145  * belong to if not built in (head.mod). The ending item contains a
146  * pointer to the next array of saved enum_map items.
147  */
148 union trace_enum_map_item {
149         struct trace_enum_map           map;
150         struct trace_enum_map_head      head;
151         struct trace_enum_map_tail      tail;
152 };
153
154 static union trace_enum_map_item *trace_enum_maps;
155 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
156
157 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
158
159 #define MAX_TRACER_SIZE         100
160 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
161 static char *default_bootup_tracer;
162
163 static bool allocate_snapshot;
164
165 static int __init set_cmdline_ftrace(char *str)
166 {
167         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
168         default_bootup_tracer = bootup_tracer_buf;
169         /* We are using ftrace early, expand it */
170         ring_buffer_expanded = true;
171         return 1;
172 }
173 __setup("ftrace=", set_cmdline_ftrace);
174
175 static int __init set_ftrace_dump_on_oops(char *str)
176 {
177         if (*str++ != '=' || !*str) {
178                 ftrace_dump_on_oops = DUMP_ALL;
179                 return 1;
180         }
181
182         if (!strcmp("orig_cpu", str)) {
183                 ftrace_dump_on_oops = DUMP_ORIG;
184                 return 1;
185         }
186
187         return 0;
188 }
189 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
190
191 static int __init stop_trace_on_warning(char *str)
192 {
193         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
194                 __disable_trace_on_warning = 1;
195         return 1;
196 }
197 __setup("traceoff_on_warning", stop_trace_on_warning);
198
199 static int __init boot_alloc_snapshot(char *str)
200 {
201         allocate_snapshot = true;
202         /* We also need the main ring buffer expanded */
203         ring_buffer_expanded = true;
204         return 1;
205 }
206 __setup("alloc_snapshot", boot_alloc_snapshot);
207
208
209 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
210
211 static int __init set_trace_boot_options(char *str)
212 {
213         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
214         return 0;
215 }
216 __setup("trace_options=", set_trace_boot_options);
217
218 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
219 static char *trace_boot_clock __initdata;
220
221 static int __init set_trace_boot_clock(char *str)
222 {
223         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
224         trace_boot_clock = trace_boot_clock_buf;
225         return 0;
226 }
227 __setup("trace_clock=", set_trace_boot_clock);
228
229 static int __init set_tracepoint_printk(char *str)
230 {
231         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
232                 tracepoint_printk = 1;
233         return 1;
234 }
235 __setup("tp_printk", set_tracepoint_printk);
236
237 unsigned long long ns2usecs(cycle_t nsec)
238 {
239         nsec += 500;
240         do_div(nsec, 1000);
241         return nsec;
242 }
243
244 /* trace_flags holds trace_options default values */
245 #define TRACE_DEFAULT_FLAGS                                             \
246         (FUNCTION_DEFAULT_FLAGS |                                       \
247          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
248          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
249          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
250          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
251
252 /* trace_options that are only supported by global_trace */
253 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
254                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
255
256 /* trace_flags that are default zero for instances */
257 #define ZEROED_TRACE_FLAGS \
258         TRACE_ITER_EVENT_FORK
259
260 /*
261  * The global_trace is the descriptor that holds the tracing
262  * buffers for the live tracing. For each CPU, it contains
263  * a link list of pages that will store trace entries. The
264  * page descriptor of the pages in the memory is used to hold
265  * the link list by linking the lru item in the page descriptor
266  * to each of the pages in the buffer per CPU.
267  *
268  * For each active CPU there is a data field that holds the
269  * pages for the buffer for that CPU. Each CPU has the same number
270  * of pages allocated for its buffer.
271  */
272 static struct trace_array global_trace = {
273         .trace_flags = TRACE_DEFAULT_FLAGS,
274 };
275
276 LIST_HEAD(ftrace_trace_arrays);
277
278 int trace_array_get(struct trace_array *this_tr)
279 {
280         struct trace_array *tr;
281         int ret = -ENODEV;
282
283         mutex_lock(&trace_types_lock);
284         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
285                 if (tr == this_tr) {
286                         tr->ref++;
287                         ret = 0;
288                         break;
289                 }
290         }
291         mutex_unlock(&trace_types_lock);
292
293         return ret;
294 }
295
296 static void __trace_array_put(struct trace_array *this_tr)
297 {
298         WARN_ON(!this_tr->ref);
299         this_tr->ref--;
300 }
301
302 void trace_array_put(struct trace_array *this_tr)
303 {
304         mutex_lock(&trace_types_lock);
305         __trace_array_put(this_tr);
306         mutex_unlock(&trace_types_lock);
307 }
308
309 int filter_check_discard(struct trace_event_file *file, void *rec,
310                          struct ring_buffer *buffer,
311                          struct ring_buffer_event *event)
312 {
313         if (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
314             !filter_match_preds(file->filter, rec)) {
315                 ring_buffer_discard_commit(buffer, event);
316                 return 1;
317         }
318
319         return 0;
320 }
321 EXPORT_SYMBOL_GPL(filter_check_discard);
322
323 int call_filter_check_discard(struct trace_event_call *call, void *rec,
324                               struct ring_buffer *buffer,
325                               struct ring_buffer_event *event)
326 {
327         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
328             !filter_match_preds(call->filter, rec)) {
329                 ring_buffer_discard_commit(buffer, event);
330                 return 1;
331         }
332
333         return 0;
334 }
335 EXPORT_SYMBOL_GPL(call_filter_check_discard);
336
337 static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
338 {
339         u64 ts;
340
341         /* Early boot up does not have a buffer yet */
342         if (!buf->buffer)
343                 return trace_clock_local();
344
345         ts = ring_buffer_time_stamp(buf->buffer, cpu);
346         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
347
348         return ts;
349 }
350
351 cycle_t ftrace_now(int cpu)
352 {
353         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
354 }
355
356 /**
357  * tracing_is_enabled - Show if global_trace has been disabled
358  *
359  * Shows if the global trace has been enabled or not. It uses the
360  * mirror flag "buffer_disabled" to be used in fast paths such as for
361  * the irqsoff tracer. But it may be inaccurate due to races. If you
362  * need to know the accurate state, use tracing_is_on() which is a little
363  * slower, but accurate.
364  */
365 int tracing_is_enabled(void)
366 {
367         /*
368          * For quick access (irqsoff uses this in fast path), just
369          * return the mirror variable of the state of the ring buffer.
370          * It's a little racy, but we don't really care.
371          */
372         smp_rmb();
373         return !global_trace.buffer_disabled;
374 }
375
376 /*
377  * trace_buf_size is the size in bytes that is allocated
378  * for a buffer. Note, the number of bytes is always rounded
379  * to page size.
380  *
381  * This number is purposely set to a low number of 16384.
382  * If the dump on oops happens, it will be much appreciated
383  * to not have to wait for all that output. Anyway this can be
384  * boot time and run time configurable.
385  */
386 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
387
388 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
389
390 /* trace_types holds a link list of available tracers. */
391 static struct tracer            *trace_types __read_mostly;
392
393 /*
394  * trace_types_lock is used to protect the trace_types list.
395  */
396 DEFINE_MUTEX(trace_types_lock);
397
398 /*
399  * serialize the access of the ring buffer
400  *
401  * ring buffer serializes readers, but it is low level protection.
402  * The validity of the events (which returns by ring_buffer_peek() ..etc)
403  * are not protected by ring buffer.
404  *
405  * The content of events may become garbage if we allow other process consumes
406  * these events concurrently:
407  *   A) the page of the consumed events may become a normal page
408  *      (not reader page) in ring buffer, and this page will be rewrited
409  *      by events producer.
410  *   B) The page of the consumed events may become a page for splice_read,
411  *      and this page will be returned to system.
412  *
413  * These primitives allow multi process access to different cpu ring buffer
414  * concurrently.
415  *
416  * These primitives don't distinguish read-only and read-consume access.
417  * Multi read-only access are also serialized.
418  */
419
420 #ifdef CONFIG_SMP
421 static DECLARE_RWSEM(all_cpu_access_lock);
422 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
423
424 static inline void trace_access_lock(int cpu)
425 {
426         if (cpu == RING_BUFFER_ALL_CPUS) {
427                 /* gain it for accessing the whole ring buffer. */
428                 down_write(&all_cpu_access_lock);
429         } else {
430                 /* gain it for accessing a cpu ring buffer. */
431
432                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
433                 down_read(&all_cpu_access_lock);
434
435                 /* Secondly block other access to this @cpu ring buffer. */
436                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
437         }
438 }
439
440 static inline void trace_access_unlock(int cpu)
441 {
442         if (cpu == RING_BUFFER_ALL_CPUS) {
443                 up_write(&all_cpu_access_lock);
444         } else {
445                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
446                 up_read(&all_cpu_access_lock);
447         }
448 }
449
450 static inline void trace_access_lock_init(void)
451 {
452         int cpu;
453
454         for_each_possible_cpu(cpu)
455                 mutex_init(&per_cpu(cpu_access_lock, cpu));
456 }
457
458 #else
459
460 static DEFINE_MUTEX(access_lock);
461
462 static inline void trace_access_lock(int cpu)
463 {
464         (void)cpu;
465         mutex_lock(&access_lock);
466 }
467
468 static inline void trace_access_unlock(int cpu)
469 {
470         (void)cpu;
471         mutex_unlock(&access_lock);
472 }
473
474 static inline void trace_access_lock_init(void)
475 {
476 }
477
478 #endif
479
480 #ifdef CONFIG_STACKTRACE
481 static void __ftrace_trace_stack(struct ring_buffer *buffer,
482                                  unsigned long flags,
483                                  int skip, int pc, struct pt_regs *regs);
484 static inline void ftrace_trace_stack(struct trace_array *tr,
485                                       struct ring_buffer *buffer,
486                                       unsigned long flags,
487                                       int skip, int pc, struct pt_regs *regs);
488
489 #else
490 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
491                                         unsigned long flags,
492                                         int skip, int pc, struct pt_regs *regs)
493 {
494 }
495 static inline void ftrace_trace_stack(struct trace_array *tr,
496                                       struct ring_buffer *buffer,
497                                       unsigned long flags,
498                                       int skip, int pc, struct pt_regs *regs)
499 {
500 }
501
502 #endif
503
504 static void tracer_tracing_on(struct trace_array *tr)
505 {
506         if (tr->trace_buffer.buffer)
507                 ring_buffer_record_on(tr->trace_buffer.buffer);
508         /*
509          * This flag is looked at when buffers haven't been allocated
510          * yet, or by some tracers (like irqsoff), that just want to
511          * know if the ring buffer has been disabled, but it can handle
512          * races of where it gets disabled but we still do a record.
513          * As the check is in the fast path of the tracers, it is more
514          * important to be fast than accurate.
515          */
516         tr->buffer_disabled = 0;
517         /* Make the flag seen by readers */
518         smp_wmb();
519 }
520
521 /**
522  * tracing_on - enable tracing buffers
523  *
524  * This function enables tracing buffers that may have been
525  * disabled with tracing_off.
526  */
527 void tracing_on(void)
528 {
529         tracer_tracing_on(&global_trace);
530 }
531 EXPORT_SYMBOL_GPL(tracing_on);
532
533 /**
534  * __trace_puts - write a constant string into the trace buffer.
535  * @ip:    The address of the caller
536  * @str:   The constant string to write
537  * @size:  The size of the string.
538  */
539 int __trace_puts(unsigned long ip, const char *str, int size)
540 {
541         struct ring_buffer_event *event;
542         struct ring_buffer *buffer;
543         struct print_entry *entry;
544         unsigned long irq_flags;
545         int alloc;
546         int pc;
547
548         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
549                 return 0;
550
551         pc = preempt_count();
552
553         if (unlikely(tracing_selftest_running || tracing_disabled))
554                 return 0;
555
556         alloc = sizeof(*entry) + size + 2; /* possible \n added */
557
558         local_save_flags(irq_flags);
559         buffer = global_trace.trace_buffer.buffer;
560         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
561                                           irq_flags, pc);
562         if (!event)
563                 return 0;
564
565         entry = ring_buffer_event_data(event);
566         entry->ip = ip;
567
568         memcpy(&entry->buf, str, size);
569
570         /* Add a newline if necessary */
571         if (entry->buf[size - 1] != '\n') {
572                 entry->buf[size] = '\n';
573                 entry->buf[size + 1] = '\0';
574         } else
575                 entry->buf[size] = '\0';
576
577         __buffer_unlock_commit(buffer, event);
578         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
579
580         return size;
581 }
582 EXPORT_SYMBOL_GPL(__trace_puts);
583
584 /**
585  * __trace_bputs - write the pointer to a constant string into trace buffer
586  * @ip:    The address of the caller
587  * @str:   The constant string to write to the buffer to
588  */
589 int __trace_bputs(unsigned long ip, const char *str)
590 {
591         struct ring_buffer_event *event;
592         struct ring_buffer *buffer;
593         struct bputs_entry *entry;
594         unsigned long irq_flags;
595         int size = sizeof(struct bputs_entry);
596         int pc;
597
598         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
599                 return 0;
600
601         pc = preempt_count();
602
603         if (unlikely(tracing_selftest_running || tracing_disabled))
604                 return 0;
605
606         local_save_flags(irq_flags);
607         buffer = global_trace.trace_buffer.buffer;
608         event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
609                                           irq_flags, pc);
610         if (!event)
611                 return 0;
612
613         entry = ring_buffer_event_data(event);
614         entry->ip                       = ip;
615         entry->str                      = str;
616
617         __buffer_unlock_commit(buffer, event);
618         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
619
620         return 1;
621 }
622 EXPORT_SYMBOL_GPL(__trace_bputs);
623
624 #ifdef CONFIG_TRACER_SNAPSHOT
625 /**
626  * trace_snapshot - take a snapshot of the current buffer.
627  *
628  * This causes a swap between the snapshot buffer and the current live
629  * tracing buffer. You can use this to take snapshots of the live
630  * trace when some condition is triggered, but continue to trace.
631  *
632  * Note, make sure to allocate the snapshot with either
633  * a tracing_snapshot_alloc(), or by doing it manually
634  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
635  *
636  * If the snapshot buffer is not allocated, it will stop tracing.
637  * Basically making a permanent snapshot.
638  */
639 void tracing_snapshot(void)
640 {
641         struct trace_array *tr = &global_trace;
642         struct tracer *tracer = tr->current_trace;
643         unsigned long flags;
644
645         if (in_nmi()) {
646                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
647                 internal_trace_puts("*** snapshot is being ignored        ***\n");
648                 return;
649         }
650
651         if (!tr->allocated_snapshot) {
652                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
653                 internal_trace_puts("*** stopping trace here!   ***\n");
654                 tracing_off();
655                 return;
656         }
657
658         /* Note, snapshot can not be used when the tracer uses it */
659         if (tracer->use_max_tr) {
660                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
661                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
662                 return;
663         }
664
665         local_irq_save(flags);
666         update_max_tr(tr, current, smp_processor_id());
667         local_irq_restore(flags);
668 }
669 EXPORT_SYMBOL_GPL(tracing_snapshot);
670
671 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
672                                         struct trace_buffer *size_buf, int cpu_id);
673 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
674
675 static int alloc_snapshot(struct trace_array *tr)
676 {
677         int ret;
678
679         if (!tr->allocated_snapshot) {
680
681                 /* allocate spare buffer */
682                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
683                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
684                 if (ret < 0)
685                         return ret;
686
687                 tr->allocated_snapshot = true;
688         }
689
690         return 0;
691 }
692
693 static void free_snapshot(struct trace_array *tr)
694 {
695         /*
696          * We don't free the ring buffer. instead, resize it because
697          * The max_tr ring buffer has some state (e.g. ring->clock) and
698          * we want preserve it.
699          */
700         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
701         set_buffer_entries(&tr->max_buffer, 1);
702         tracing_reset_online_cpus(&tr->max_buffer);
703         tr->allocated_snapshot = false;
704 }
705
706 /**
707  * tracing_alloc_snapshot - allocate snapshot buffer.
708  *
709  * This only allocates the snapshot buffer if it isn't already
710  * allocated - it doesn't also take a snapshot.
711  *
712  * This is meant to be used in cases where the snapshot buffer needs
713  * to be set up for events that can't sleep but need to be able to
714  * trigger a snapshot.
715  */
716 int tracing_alloc_snapshot(void)
717 {
718         struct trace_array *tr = &global_trace;
719         int ret;
720
721         ret = alloc_snapshot(tr);
722         WARN_ON(ret < 0);
723
724         return ret;
725 }
726 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
727
728 /**
729  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
730  *
731  * This is similar to trace_snapshot(), but it will allocate the
732  * snapshot buffer if it isn't already allocated. Use this only
733  * where it is safe to sleep, as the allocation may sleep.
734  *
735  * This causes a swap between the snapshot buffer and the current live
736  * tracing buffer. You can use this to take snapshots of the live
737  * trace when some condition is triggered, but continue to trace.
738  */
739 void tracing_snapshot_alloc(void)
740 {
741         int ret;
742
743         ret = tracing_alloc_snapshot();
744         if (ret < 0)
745                 return;
746
747         tracing_snapshot();
748 }
749 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
750 #else
751 void tracing_snapshot(void)
752 {
753         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
754 }
755 EXPORT_SYMBOL_GPL(tracing_snapshot);
756 int tracing_alloc_snapshot(void)
757 {
758         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
759         return -ENODEV;
760 }
761 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
762 void tracing_snapshot_alloc(void)
763 {
764         /* Give warning */
765         tracing_snapshot();
766 }
767 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
768 #endif /* CONFIG_TRACER_SNAPSHOT */
769
770 static void tracer_tracing_off(struct trace_array *tr)
771 {
772         if (tr->trace_buffer.buffer)
773                 ring_buffer_record_off(tr->trace_buffer.buffer);
774         /*
775          * This flag is looked at when buffers haven't been allocated
776          * yet, or by some tracers (like irqsoff), that just want to
777          * know if the ring buffer has been disabled, but it can handle
778          * races of where it gets disabled but we still do a record.
779          * As the check is in the fast path of the tracers, it is more
780          * important to be fast than accurate.
781          */
782         tr->buffer_disabled = 1;
783         /* Make the flag seen by readers */
784         smp_wmb();
785 }
786
787 /**
788  * tracing_off - turn off tracing buffers
789  *
790  * This function stops the tracing buffers from recording data.
791  * It does not disable any overhead the tracers themselves may
792  * be causing. This function simply causes all recording to
793  * the ring buffers to fail.
794  */
795 void tracing_off(void)
796 {
797         tracer_tracing_off(&global_trace);
798 }
799 EXPORT_SYMBOL_GPL(tracing_off);
800
801 void disable_trace_on_warning(void)
802 {
803         if (__disable_trace_on_warning)
804                 tracing_off();
805 }
806
807 /**
808  * tracer_tracing_is_on - show real state of ring buffer enabled
809  * @tr : the trace array to know if ring buffer is enabled
810  *
811  * Shows real state of the ring buffer if it is enabled or not.
812  */
813 static int tracer_tracing_is_on(struct trace_array *tr)
814 {
815         if (tr->trace_buffer.buffer)
816                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
817         return !tr->buffer_disabled;
818 }
819
820 /**
821  * tracing_is_on - show state of ring buffers enabled
822  */
823 int tracing_is_on(void)
824 {
825         return tracer_tracing_is_on(&global_trace);
826 }
827 EXPORT_SYMBOL_GPL(tracing_is_on);
828
829 static int __init set_buf_size(char *str)
830 {
831         unsigned long buf_size;
832
833         if (!str)
834                 return 0;
835         buf_size = memparse(str, &str);
836         /* nr_entries can not be zero */
837         if (buf_size == 0)
838                 return 0;
839         trace_buf_size = buf_size;
840         return 1;
841 }
842 __setup("trace_buf_size=", set_buf_size);
843
844 static int __init set_tracing_thresh(char *str)
845 {
846         unsigned long threshold;
847         int ret;
848
849         if (!str)
850                 return 0;
851         ret = kstrtoul(str, 0, &threshold);
852         if (ret < 0)
853                 return 0;
854         tracing_thresh = threshold * 1000;
855         return 1;
856 }
857 __setup("tracing_thresh=", set_tracing_thresh);
858
859 unsigned long nsecs_to_usecs(unsigned long nsecs)
860 {
861         return nsecs / 1000;
862 }
863
864 /*
865  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
866  * It uses C(a, b) where 'a' is the enum name and 'b' is the string that
867  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
868  * of strings in the order that the enums were defined.
869  */
870 #undef C
871 #define C(a, b) b
872
873 /* These must match the bit postions in trace_iterator_flags */
874 static const char *trace_options[] = {
875         TRACE_FLAGS
876         NULL
877 };
878
879 static struct {
880         u64 (*func)(void);
881         const char *name;
882         int in_ns;              /* is this clock in nanoseconds? */
883 } trace_clocks[] = {
884         { trace_clock_local,            "local",        1 },
885         { trace_clock_global,           "global",       1 },
886         { trace_clock_counter,          "counter",      0 },
887         { trace_clock_jiffies,          "uptime",       0 },
888         { trace_clock,                  "perf",         1 },
889         { ktime_get_mono_fast_ns,       "mono",         1 },
890         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
891         ARCH_TRACE_CLOCKS
892 };
893
894 /*
895  * trace_parser_get_init - gets the buffer for trace parser
896  */
897 int trace_parser_get_init(struct trace_parser *parser, int size)
898 {
899         memset(parser, 0, sizeof(*parser));
900
901         parser->buffer = kmalloc(size, GFP_KERNEL);
902         if (!parser->buffer)
903                 return 1;
904
905         parser->size = size;
906         return 0;
907 }
908
909 /*
910  * trace_parser_put - frees the buffer for trace parser
911  */
912 void trace_parser_put(struct trace_parser *parser)
913 {
914         kfree(parser->buffer);
915 }
916
917 /*
918  * trace_get_user - reads the user input string separated by  space
919  * (matched by isspace(ch))
920  *
921  * For each string found the 'struct trace_parser' is updated,
922  * and the function returns.
923  *
924  * Returns number of bytes read.
925  *
926  * See kernel/trace/trace.h for 'struct trace_parser' details.
927  */
928 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
929         size_t cnt, loff_t *ppos)
930 {
931         char ch;
932         size_t read = 0;
933         ssize_t ret;
934
935         if (!*ppos)
936                 trace_parser_clear(parser);
937
938         ret = get_user(ch, ubuf++);
939         if (ret)
940                 goto out;
941
942         read++;
943         cnt--;
944
945         /*
946          * The parser is not finished with the last write,
947          * continue reading the user input without skipping spaces.
948          */
949         if (!parser->cont) {
950                 /* skip white space */
951                 while (cnt && isspace(ch)) {
952                         ret = get_user(ch, ubuf++);
953                         if (ret)
954                                 goto out;
955                         read++;
956                         cnt--;
957                 }
958
959                 /* only spaces were written */
960                 if (isspace(ch)) {
961                         *ppos += read;
962                         ret = read;
963                         goto out;
964                 }
965
966                 parser->idx = 0;
967         }
968
969         /* read the non-space input */
970         while (cnt && !isspace(ch)) {
971                 if (parser->idx < parser->size - 1)
972                         parser->buffer[parser->idx++] = ch;
973                 else {
974                         ret = -EINVAL;
975                         goto out;
976                 }
977                 ret = get_user(ch, ubuf++);
978                 if (ret)
979                         goto out;
980                 read++;
981                 cnt--;
982         }
983
984         /* We either got finished input or we have to wait for another call. */
985         if (isspace(ch)) {
986                 parser->buffer[parser->idx] = 0;
987                 parser->cont = false;
988         } else if (parser->idx < parser->size - 1) {
989                 parser->cont = true;
990                 parser->buffer[parser->idx++] = ch;
991         } else {
992                 ret = -EINVAL;
993                 goto out;
994         }
995
996         *ppos += read;
997         ret = read;
998
999 out:
1000         return ret;
1001 }
1002
1003 /* TODO add a seq_buf_to_buffer() */
1004 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1005 {
1006         int len;
1007
1008         if (trace_seq_used(s) <= s->seq.readpos)
1009                 return -EBUSY;
1010
1011         len = trace_seq_used(s) - s->seq.readpos;
1012         if (cnt > len)
1013                 cnt = len;
1014         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1015
1016         s->seq.readpos += cnt;
1017         return cnt;
1018 }
1019
1020 unsigned long __read_mostly     tracing_thresh;
1021
1022 #ifdef CONFIG_TRACER_MAX_TRACE
1023 /*
1024  * Copy the new maximum trace into the separate maximum-trace
1025  * structure. (this way the maximum trace is permanently saved,
1026  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1027  */
1028 static void
1029 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1030 {
1031         struct trace_buffer *trace_buf = &tr->trace_buffer;
1032         struct trace_buffer *max_buf = &tr->max_buffer;
1033         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1034         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1035
1036         max_buf->cpu = cpu;
1037         max_buf->time_start = data->preempt_timestamp;
1038
1039         max_data->saved_latency = tr->max_latency;
1040         max_data->critical_start = data->critical_start;
1041         max_data->critical_end = data->critical_end;
1042
1043         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1044         max_data->pid = tsk->pid;
1045         /*
1046          * If tsk == current, then use current_uid(), as that does not use
1047          * RCU. The irq tracer can be called out of RCU scope.
1048          */
1049         if (tsk == current)
1050                 max_data->uid = current_uid();
1051         else
1052                 max_data->uid = task_uid(tsk);
1053
1054         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1055         max_data->policy = tsk->policy;
1056         max_data->rt_priority = tsk->rt_priority;
1057
1058         /* record this tasks comm */
1059         tracing_record_cmdline(tsk);
1060 }
1061
1062 /**
1063  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1064  * @tr: tracer
1065  * @tsk: the task with the latency
1066  * @cpu: The cpu that initiated the trace.
1067  *
1068  * Flip the buffers between the @tr and the max_tr and record information
1069  * about which task was the cause of this latency.
1070  */
1071 void
1072 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1073 {
1074         struct ring_buffer *buf;
1075
1076         if (tr->stop_count)
1077                 return;
1078
1079         WARN_ON_ONCE(!irqs_disabled());
1080
1081         if (!tr->allocated_snapshot) {
1082                 /* Only the nop tracer should hit this when disabling */
1083                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1084                 return;
1085         }
1086
1087         arch_spin_lock(&tr->max_lock);
1088
1089         buf = tr->trace_buffer.buffer;
1090         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1091         tr->max_buffer.buffer = buf;
1092
1093         __update_max_tr(tr, tsk, cpu);
1094         arch_spin_unlock(&tr->max_lock);
1095 }
1096
1097 /**
1098  * update_max_tr_single - only copy one trace over, and reset the rest
1099  * @tr - tracer
1100  * @tsk - task with the latency
1101  * @cpu - the cpu of the buffer to copy.
1102  *
1103  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1104  */
1105 void
1106 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1107 {
1108         int ret;
1109
1110         if (tr->stop_count)
1111                 return;
1112
1113         WARN_ON_ONCE(!irqs_disabled());
1114         if (!tr->allocated_snapshot) {
1115                 /* Only the nop tracer should hit this when disabling */
1116                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1117                 return;
1118         }
1119
1120         arch_spin_lock(&tr->max_lock);
1121
1122         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1123
1124         if (ret == -EBUSY) {
1125                 /*
1126                  * We failed to swap the buffer due to a commit taking
1127                  * place on this CPU. We fail to record, but we reset
1128                  * the max trace buffer (no one writes directly to it)
1129                  * and flag that it failed.
1130                  */
1131                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1132                         "Failed to swap buffers due to commit in progress\n");
1133         }
1134
1135         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1136
1137         __update_max_tr(tr, tsk, cpu);
1138         arch_spin_unlock(&tr->max_lock);
1139 }
1140 #endif /* CONFIG_TRACER_MAX_TRACE */
1141
1142 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1143 {
1144         /* Iterators are static, they should be filled or empty */
1145         if (trace_buffer_iter(iter, iter->cpu_file))
1146                 return 0;
1147
1148         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1149                                 full);
1150 }
1151
1152 #ifdef CONFIG_FTRACE_STARTUP_TEST
1153 static int run_tracer_selftest(struct tracer *type)
1154 {
1155         struct trace_array *tr = &global_trace;
1156         struct tracer *saved_tracer = tr->current_trace;
1157         int ret;
1158
1159         if (!type->selftest || tracing_selftest_disabled)
1160                 return 0;
1161
1162         /*
1163          * Run a selftest on this tracer.
1164          * Here we reset the trace buffer, and set the current
1165          * tracer to be this tracer. The tracer can then run some
1166          * internal tracing to verify that everything is in order.
1167          * If we fail, we do not register this tracer.
1168          */
1169         tracing_reset_online_cpus(&tr->trace_buffer);
1170
1171         tr->current_trace = type;
1172
1173 #ifdef CONFIG_TRACER_MAX_TRACE
1174         if (type->use_max_tr) {
1175                 /* If we expanded the buffers, make sure the max is expanded too */
1176                 if (ring_buffer_expanded)
1177                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1178                                            RING_BUFFER_ALL_CPUS);
1179                 tr->allocated_snapshot = true;
1180         }
1181 #endif
1182
1183         /* the test is responsible for initializing and enabling */
1184         pr_info("Testing tracer %s: ", type->name);
1185         ret = type->selftest(type, tr);
1186         /* the test is responsible for resetting too */
1187         tr->current_trace = saved_tracer;
1188         if (ret) {
1189                 printk(KERN_CONT "FAILED!\n");
1190                 /* Add the warning after printing 'FAILED' */
1191                 WARN_ON(1);
1192                 return -1;
1193         }
1194         /* Only reset on passing, to avoid touching corrupted buffers */
1195         tracing_reset_online_cpus(&tr->trace_buffer);
1196
1197 #ifdef CONFIG_TRACER_MAX_TRACE
1198         if (type->use_max_tr) {
1199                 tr->allocated_snapshot = false;
1200
1201                 /* Shrink the max buffer again */
1202                 if (ring_buffer_expanded)
1203                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1204                                            RING_BUFFER_ALL_CPUS);
1205         }
1206 #endif
1207
1208         printk(KERN_CONT "PASSED\n");
1209         return 0;
1210 }
1211 #else
1212 static inline int run_tracer_selftest(struct tracer *type)
1213 {
1214         return 0;
1215 }
1216 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1217
1218 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1219
1220 static void __init apply_trace_boot_options(void);
1221
1222 /**
1223  * register_tracer - register a tracer with the ftrace system.
1224  * @type - the plugin for the tracer
1225  *
1226  * Register a new plugin tracer.
1227  */
1228 int __init register_tracer(struct tracer *type)
1229 {
1230         struct tracer *t;
1231         int ret = 0;
1232
1233         if (!type->name) {
1234                 pr_info("Tracer must have a name\n");
1235                 return -1;
1236         }
1237
1238         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1239                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1240                 return -1;
1241         }
1242
1243         mutex_lock(&trace_types_lock);
1244
1245         tracing_selftest_running = true;
1246
1247         for (t = trace_types; t; t = t->next) {
1248                 if (strcmp(type->name, t->name) == 0) {
1249                         /* already found */
1250                         pr_info("Tracer %s already registered\n",
1251                                 type->name);
1252                         ret = -1;
1253                         goto out;
1254                 }
1255         }
1256
1257         if (!type->set_flag)
1258                 type->set_flag = &dummy_set_flag;
1259         if (!type->flags) {
1260                 /*allocate a dummy tracer_flags*/
1261                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1262                 if (!type->flags) {
1263                         ret = -ENOMEM;
1264                         goto out;
1265                 }
1266                 type->flags->val = 0;
1267                 type->flags->opts = dummy_tracer_opt;
1268         } else
1269                 if (!type->flags->opts)
1270                         type->flags->opts = dummy_tracer_opt;
1271
1272         /* store the tracer for __set_tracer_option */
1273         type->flags->trace = type;
1274
1275         ret = run_tracer_selftest(type);
1276         if (ret < 0)
1277                 goto out;
1278
1279         type->next = trace_types;
1280         trace_types = type;
1281         add_tracer_options(&global_trace, type);
1282
1283  out:
1284         tracing_selftest_running = false;
1285         mutex_unlock(&trace_types_lock);
1286
1287         if (ret || !default_bootup_tracer)
1288                 goto out_unlock;
1289
1290         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1291                 goto out_unlock;
1292
1293         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1294         /* Do we want this tracer to start on bootup? */
1295         tracing_set_tracer(&global_trace, type->name);
1296         default_bootup_tracer = NULL;
1297
1298         apply_trace_boot_options();
1299
1300         /* disable other selftests, since this will break it. */
1301         tracing_selftest_disabled = true;
1302 #ifdef CONFIG_FTRACE_STARTUP_TEST
1303         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1304                type->name);
1305 #endif
1306
1307  out_unlock:
1308         return ret;
1309 }
1310
1311 void tracing_reset(struct trace_buffer *buf, int cpu)
1312 {
1313         struct ring_buffer *buffer = buf->buffer;
1314
1315         if (!buffer)
1316                 return;
1317
1318         ring_buffer_record_disable(buffer);
1319
1320         /* Make sure all commits have finished */
1321         synchronize_sched();
1322         ring_buffer_reset_cpu(buffer, cpu);
1323
1324         ring_buffer_record_enable(buffer);
1325 }
1326
1327 void tracing_reset_online_cpus(struct trace_buffer *buf)
1328 {
1329         struct ring_buffer *buffer = buf->buffer;
1330         int cpu;
1331
1332         if (!buffer)
1333                 return;
1334
1335         ring_buffer_record_disable(buffer);
1336
1337         /* Make sure all commits have finished */
1338         synchronize_sched();
1339
1340         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1341
1342         for_each_online_cpu(cpu)
1343                 ring_buffer_reset_cpu(buffer, cpu);
1344
1345         ring_buffer_record_enable(buffer);
1346 }
1347
1348 /* Must have trace_types_lock held */
1349 void tracing_reset_all_online_cpus(void)
1350 {
1351         struct trace_array *tr;
1352
1353         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1354                 tracing_reset_online_cpus(&tr->trace_buffer);
1355 #ifdef CONFIG_TRACER_MAX_TRACE
1356                 tracing_reset_online_cpus(&tr->max_buffer);
1357 #endif
1358         }
1359 }
1360
1361 #define SAVED_CMDLINES_DEFAULT 128
1362 #define NO_CMDLINE_MAP UINT_MAX
1363 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1364 struct saved_cmdlines_buffer {
1365         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1366         unsigned *map_cmdline_to_pid;
1367         unsigned cmdline_num;
1368         int cmdline_idx;
1369         char *saved_cmdlines;
1370 };
1371 static struct saved_cmdlines_buffer *savedcmd;
1372
1373 /* temporary disable recording */
1374 static atomic_t trace_record_cmdline_disabled __read_mostly;
1375
1376 static inline char *get_saved_cmdlines(int idx)
1377 {
1378         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1379 }
1380
1381 static inline void set_cmdline(int idx, const char *cmdline)
1382 {
1383         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1384 }
1385
1386 static int allocate_cmdlines_buffer(unsigned int val,
1387                                     struct saved_cmdlines_buffer *s)
1388 {
1389         s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1390                                         GFP_KERNEL);
1391         if (!s->map_cmdline_to_pid)
1392                 return -ENOMEM;
1393
1394         s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1395         if (!s->saved_cmdlines) {
1396                 kfree(s->map_cmdline_to_pid);
1397                 return -ENOMEM;
1398         }
1399
1400         s->cmdline_idx = 0;
1401         s->cmdline_num = val;
1402         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1403                sizeof(s->map_pid_to_cmdline));
1404         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1405                val * sizeof(*s->map_cmdline_to_pid));
1406
1407         return 0;
1408 }
1409
1410 static int trace_create_savedcmd(void)
1411 {
1412         int ret;
1413
1414         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1415         if (!savedcmd)
1416                 return -ENOMEM;
1417
1418         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1419         if (ret < 0) {
1420                 kfree(savedcmd);
1421                 savedcmd = NULL;
1422                 return -ENOMEM;
1423         }
1424
1425         return 0;
1426 }
1427
1428 int is_tracing_stopped(void)
1429 {
1430         return global_trace.stop_count;
1431 }
1432
1433 /**
1434  * tracing_start - quick start of the tracer
1435  *
1436  * If tracing is enabled but was stopped by tracing_stop,
1437  * this will start the tracer back up.
1438  */
1439 void tracing_start(void)
1440 {
1441         struct ring_buffer *buffer;
1442         unsigned long flags;
1443
1444         if (tracing_disabled)
1445                 return;
1446
1447         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1448         if (--global_trace.stop_count) {
1449                 if (global_trace.stop_count < 0) {
1450                         /* Someone screwed up their debugging */
1451                         WARN_ON_ONCE(1);
1452                         global_trace.stop_count = 0;
1453                 }
1454                 goto out;
1455         }
1456
1457         /* Prevent the buffers from switching */
1458         arch_spin_lock(&global_trace.max_lock);
1459
1460         buffer = global_trace.trace_buffer.buffer;
1461         if (buffer)
1462                 ring_buffer_record_enable(buffer);
1463
1464 #ifdef CONFIG_TRACER_MAX_TRACE
1465         buffer = global_trace.max_buffer.buffer;
1466         if (buffer)
1467                 ring_buffer_record_enable(buffer);
1468 #endif
1469
1470         arch_spin_unlock(&global_trace.max_lock);
1471
1472  out:
1473         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1474 }
1475
1476 static void tracing_start_tr(struct trace_array *tr)
1477 {
1478         struct ring_buffer *buffer;
1479         unsigned long flags;
1480
1481         if (tracing_disabled)
1482                 return;
1483
1484         /* If global, we need to also start the max tracer */
1485         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1486                 return tracing_start();
1487
1488         raw_spin_lock_irqsave(&tr->start_lock, flags);
1489
1490         if (--tr->stop_count) {
1491                 if (tr->stop_count < 0) {
1492                         /* Someone screwed up their debugging */
1493                         WARN_ON_ONCE(1);
1494                         tr->stop_count = 0;
1495                 }
1496                 goto out;
1497         }
1498
1499         buffer = tr->trace_buffer.buffer;
1500         if (buffer)
1501                 ring_buffer_record_enable(buffer);
1502
1503  out:
1504         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1505 }
1506
1507 /**
1508  * tracing_stop - quick stop of the tracer
1509  *
1510  * Light weight way to stop tracing. Use in conjunction with
1511  * tracing_start.
1512  */
1513 void tracing_stop(void)
1514 {
1515         struct ring_buffer *buffer;
1516         unsigned long flags;
1517
1518         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1519         if (global_trace.stop_count++)
1520                 goto out;
1521
1522         /* Prevent the buffers from switching */
1523         arch_spin_lock(&global_trace.max_lock);
1524
1525         buffer = global_trace.trace_buffer.buffer;
1526         if (buffer)
1527                 ring_buffer_record_disable(buffer);
1528
1529 #ifdef CONFIG_TRACER_MAX_TRACE
1530         buffer = global_trace.max_buffer.buffer;
1531         if (buffer)
1532                 ring_buffer_record_disable(buffer);
1533 #endif
1534
1535         arch_spin_unlock(&global_trace.max_lock);
1536
1537  out:
1538         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1539 }
1540
1541 static void tracing_stop_tr(struct trace_array *tr)
1542 {
1543         struct ring_buffer *buffer;
1544         unsigned long flags;
1545
1546         /* If global, we need to also stop the max tracer */
1547         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1548                 return tracing_stop();
1549
1550         raw_spin_lock_irqsave(&tr->start_lock, flags);
1551         if (tr->stop_count++)
1552                 goto out;
1553
1554         buffer = tr->trace_buffer.buffer;
1555         if (buffer)
1556                 ring_buffer_record_disable(buffer);
1557
1558  out:
1559         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1560 }
1561
1562 void trace_stop_cmdline_recording(void);
1563
1564 static int trace_save_cmdline(struct task_struct *tsk)
1565 {
1566         unsigned pid, idx;
1567
1568         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1569                 return 0;
1570
1571         /*
1572          * It's not the end of the world if we don't get
1573          * the lock, but we also don't want to spin
1574          * nor do we want to disable interrupts,
1575          * so if we miss here, then better luck next time.
1576          */
1577         if (!arch_spin_trylock(&trace_cmdline_lock))
1578                 return 0;
1579
1580         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1581         if (idx == NO_CMDLINE_MAP) {
1582                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1583
1584                 /*
1585                  * Check whether the cmdline buffer at idx has a pid
1586                  * mapped. We are going to overwrite that entry so we
1587                  * need to clear the map_pid_to_cmdline. Otherwise we
1588                  * would read the new comm for the old pid.
1589                  */
1590                 pid = savedcmd->map_cmdline_to_pid[idx];
1591                 if (pid != NO_CMDLINE_MAP)
1592                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1593
1594                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1595                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1596
1597                 savedcmd->cmdline_idx = idx;
1598         }
1599
1600         set_cmdline(idx, tsk->comm);
1601
1602         arch_spin_unlock(&trace_cmdline_lock);
1603
1604         return 1;
1605 }
1606
1607 static void __trace_find_cmdline(int pid, char comm[])
1608 {
1609         unsigned map;
1610
1611         if (!pid) {
1612                 strcpy(comm, "<idle>");
1613                 return;
1614         }
1615
1616         if (WARN_ON_ONCE(pid < 0)) {
1617                 strcpy(comm, "<XXX>");
1618                 return;
1619         }
1620
1621         if (pid > PID_MAX_DEFAULT) {
1622                 strcpy(comm, "<...>");
1623                 return;
1624         }
1625
1626         map = savedcmd->map_pid_to_cmdline[pid];
1627         if (map != NO_CMDLINE_MAP)
1628                 strcpy(comm, get_saved_cmdlines(map));
1629         else
1630                 strcpy(comm, "<...>");
1631 }
1632
1633 void trace_find_cmdline(int pid, char comm[])
1634 {
1635         preempt_disable();
1636         arch_spin_lock(&trace_cmdline_lock);
1637
1638         __trace_find_cmdline(pid, comm);
1639
1640         arch_spin_unlock(&trace_cmdline_lock);
1641         preempt_enable();
1642 }
1643
1644 void tracing_record_cmdline(struct task_struct *tsk)
1645 {
1646         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1647                 return;
1648
1649         if (!__this_cpu_read(trace_cmdline_save))
1650                 return;
1651
1652         if (trace_save_cmdline(tsk))
1653                 __this_cpu_write(trace_cmdline_save, false);
1654 }
1655
1656 void
1657 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1658                              int pc)
1659 {
1660         struct task_struct *tsk = current;
1661
1662         entry->preempt_count            = pc & 0xff;
1663         entry->pid                      = (tsk) ? tsk->pid : 0;
1664         entry->flags =
1665 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1666                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1667 #else
1668                 TRACE_FLAG_IRQS_NOSUPPORT |
1669 #endif
1670                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
1671                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1672                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1673                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1674                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1675 }
1676 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1677
1678 struct ring_buffer_event *
1679 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1680                           int type,
1681                           unsigned long len,
1682                           unsigned long flags, int pc)
1683 {
1684         struct ring_buffer_event *event;
1685
1686         event = ring_buffer_lock_reserve(buffer, len);
1687         if (event != NULL) {
1688                 struct trace_entry *ent = ring_buffer_event_data(event);
1689
1690                 tracing_generic_entry_update(ent, flags, pc);
1691                 ent->type = type;
1692         }
1693
1694         return event;
1695 }
1696
1697 void
1698 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1699 {
1700         __this_cpu_write(trace_cmdline_save, true);
1701         ring_buffer_unlock_commit(buffer, event);
1702 }
1703
1704 void trace_buffer_unlock_commit(struct trace_array *tr,
1705                                 struct ring_buffer *buffer,
1706                                 struct ring_buffer_event *event,
1707                                 unsigned long flags, int pc)
1708 {
1709         __buffer_unlock_commit(buffer, event);
1710
1711         ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
1712         ftrace_trace_userstack(buffer, flags, pc);
1713 }
1714 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
1715
1716 static struct ring_buffer *temp_buffer;
1717
1718 struct ring_buffer_event *
1719 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
1720                           struct trace_event_file *trace_file,
1721                           int type, unsigned long len,
1722                           unsigned long flags, int pc)
1723 {
1724         struct ring_buffer_event *entry;
1725
1726         *current_rb = trace_file->tr->trace_buffer.buffer;
1727         entry = trace_buffer_lock_reserve(*current_rb,
1728                                          type, len, flags, pc);
1729         /*
1730          * If tracing is off, but we have triggers enabled
1731          * we still need to look at the event data. Use the temp_buffer
1732          * to store the trace event for the tigger to use. It's recusive
1733          * safe and will not be recorded anywhere.
1734          */
1735         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
1736                 *current_rb = temp_buffer;
1737                 entry = trace_buffer_lock_reserve(*current_rb,
1738                                                   type, len, flags, pc);
1739         }
1740         return entry;
1741 }
1742 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
1743
1744 struct ring_buffer_event *
1745 trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
1746                                   int type, unsigned long len,
1747                                   unsigned long flags, int pc)
1748 {
1749         *current_rb = global_trace.trace_buffer.buffer;
1750         return trace_buffer_lock_reserve(*current_rb,
1751                                          type, len, flags, pc);
1752 }
1753 EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
1754
1755 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
1756                                      struct ring_buffer *buffer,
1757                                      struct ring_buffer_event *event,
1758                                      unsigned long flags, int pc,
1759                                      struct pt_regs *regs)
1760 {
1761         __buffer_unlock_commit(buffer, event);
1762
1763         ftrace_trace_stack(tr, buffer, flags, 0, pc, regs);
1764         ftrace_trace_userstack(buffer, flags, pc);
1765 }
1766 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
1767
1768 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1769                                          struct ring_buffer_event *event)
1770 {
1771         ring_buffer_discard_commit(buffer, event);
1772 }
1773 EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
1774
1775 void
1776 trace_function(struct trace_array *tr,
1777                unsigned long ip, unsigned long parent_ip, unsigned long flags,
1778                int pc)
1779 {
1780         struct trace_event_call *call = &event_function;
1781         struct ring_buffer *buffer = tr->trace_buffer.buffer;
1782         struct ring_buffer_event *event;
1783         struct ftrace_entry *entry;
1784
1785         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1786                                           flags, pc);
1787         if (!event)
1788                 return;
1789         entry   = ring_buffer_event_data(event);
1790         entry->ip                       = ip;
1791         entry->parent_ip                = parent_ip;
1792
1793         if (!call_filter_check_discard(call, entry, buffer, event))
1794                 __buffer_unlock_commit(buffer, event);
1795 }
1796
1797 #ifdef CONFIG_STACKTRACE
1798
1799 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
1800 struct ftrace_stack {
1801         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
1802 };
1803
1804 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
1805 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
1806
1807 static void __ftrace_trace_stack(struct ring_buffer *buffer,
1808                                  unsigned long flags,
1809                                  int skip, int pc, struct pt_regs *regs)
1810 {
1811         struct trace_event_call *call = &event_kernel_stack;
1812         struct ring_buffer_event *event;
1813         struct stack_entry *entry;
1814         struct stack_trace trace;
1815         int use_stack;
1816         int size = FTRACE_STACK_ENTRIES;
1817
1818         trace.nr_entries        = 0;
1819         trace.skip              = skip;
1820
1821         /*
1822          * Since events can happen in NMIs there's no safe way to
1823          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
1824          * or NMI comes in, it will just have to use the default
1825          * FTRACE_STACK_SIZE.
1826          */
1827         preempt_disable_notrace();
1828
1829         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1830         /*
1831          * We don't need any atomic variables, just a barrier.
1832          * If an interrupt comes in, we don't care, because it would
1833          * have exited and put the counter back to what we want.
1834          * We just need a barrier to keep gcc from moving things
1835          * around.
1836          */
1837         barrier();
1838         if (use_stack == 1) {
1839                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
1840                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
1841
1842                 if (regs)
1843                         save_stack_trace_regs(regs, &trace);
1844                 else
1845                         save_stack_trace(&trace);
1846
1847                 if (trace.nr_entries > size)
1848                         size = trace.nr_entries;
1849         } else
1850                 /* From now on, use_stack is a boolean */
1851                 use_stack = 0;
1852
1853         size *= sizeof(unsigned long);
1854
1855         event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1856                                           sizeof(*entry) + size, flags, pc);
1857         if (!event)
1858                 goto out;
1859         entry = ring_buffer_event_data(event);
1860
1861         memset(&entry->caller, 0, size);
1862
1863         if (use_stack)
1864                 memcpy(&entry->caller, trace.entries,
1865                        trace.nr_entries * sizeof(unsigned long));
1866         else {
1867                 trace.max_entries       = FTRACE_STACK_ENTRIES;
1868                 trace.entries           = entry->caller;
1869                 if (regs)
1870                         save_stack_trace_regs(regs, &trace);
1871                 else
1872                         save_stack_trace(&trace);
1873         }
1874
1875         entry->size = trace.nr_entries;
1876
1877         if (!call_filter_check_discard(call, entry, buffer, event))
1878                 __buffer_unlock_commit(buffer, event);
1879
1880  out:
1881         /* Again, don't let gcc optimize things here */
1882         barrier();
1883         __this_cpu_dec(ftrace_stack_reserve);
1884         preempt_enable_notrace();
1885
1886 }
1887
1888 static inline void ftrace_trace_stack(struct trace_array *tr,
1889                                       struct ring_buffer *buffer,
1890                                       unsigned long flags,
1891                                       int skip, int pc, struct pt_regs *regs)
1892 {
1893         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
1894                 return;
1895
1896         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
1897 }
1898
1899 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1900                    int pc)
1901 {
1902         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
1903 }
1904
1905 /**
1906  * trace_dump_stack - record a stack back trace in the trace buffer
1907  * @skip: Number of functions to skip (helper handlers)
1908  */
1909 void trace_dump_stack(int skip)
1910 {
1911         unsigned long flags;
1912
1913         if (tracing_disabled || tracing_selftest_running)
1914                 return;
1915
1916         local_save_flags(flags);
1917
1918         /*
1919          * Skip 3 more, seems to get us at the caller of
1920          * this function.
1921          */
1922         skip += 3;
1923         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
1924                              flags, skip, preempt_count(), NULL);
1925 }
1926
1927 static DEFINE_PER_CPU(int, user_stack_count);
1928
1929 void
1930 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1931 {
1932         struct trace_event_call *call = &event_user_stack;
1933         struct ring_buffer_event *event;
1934         struct userstack_entry *entry;
1935         struct stack_trace trace;
1936
1937         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
1938                 return;
1939
1940         /*
1941          * NMIs can not handle page faults, even with fix ups.
1942          * The save user stack can (and often does) fault.
1943          */
1944         if (unlikely(in_nmi()))
1945                 return;
1946
1947         /*
1948          * prevent recursion, since the user stack tracing may
1949          * trigger other kernel events.
1950          */
1951         preempt_disable();
1952         if (__this_cpu_read(user_stack_count))
1953                 goto out;
1954
1955         __this_cpu_inc(user_stack_count);
1956
1957         event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1958                                           sizeof(*entry), flags, pc);
1959         if (!event)
1960                 goto out_drop_count;
1961         entry   = ring_buffer_event_data(event);
1962
1963         entry->tgid             = current->tgid;
1964         memset(&entry->caller, 0, sizeof(entry->caller));
1965
1966         trace.nr_entries        = 0;
1967         trace.max_entries       = FTRACE_STACK_ENTRIES;
1968         trace.skip              = 0;
1969         trace.entries           = entry->caller;
1970
1971         save_stack_trace_user(&trace);
1972         if (!call_filter_check_discard(call, entry, buffer, event))
1973                 __buffer_unlock_commit(buffer, event);
1974
1975  out_drop_count:
1976         __this_cpu_dec(user_stack_count);
1977  out:
1978         preempt_enable();
1979 }
1980
1981 #ifdef UNUSED
1982 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1983 {
1984         ftrace_trace_userstack(tr, flags, preempt_count());
1985 }
1986 #endif /* UNUSED */
1987
1988 #endif /* CONFIG_STACKTRACE */
1989
1990 /* created for use with alloc_percpu */
1991 struct trace_buffer_struct {
1992         char buffer[TRACE_BUF_SIZE];
1993 };
1994
1995 static struct trace_buffer_struct *trace_percpu_buffer;
1996 static struct trace_buffer_struct *trace_percpu_sirq_buffer;
1997 static struct trace_buffer_struct *trace_percpu_irq_buffer;
1998 static struct trace_buffer_struct *trace_percpu_nmi_buffer;
1999
2000 /*
2001  * The buffer used is dependent on the context. There is a per cpu
2002  * buffer for normal context, softirq contex, hard irq context and
2003  * for NMI context. Thise allows for lockless recording.
2004  *
2005  * Note, if the buffers failed to be allocated, then this returns NULL
2006  */
2007 static char *get_trace_buf(void)
2008 {
2009         struct trace_buffer_struct *percpu_buffer;
2010
2011         /*
2012          * If we have allocated per cpu buffers, then we do not
2013          * need to do any locking.
2014          */
2015         if (in_nmi())
2016                 percpu_buffer = trace_percpu_nmi_buffer;
2017         else if (in_irq())
2018                 percpu_buffer = trace_percpu_irq_buffer;
2019         else if (in_softirq())
2020                 percpu_buffer = trace_percpu_sirq_buffer;
2021         else
2022                 percpu_buffer = trace_percpu_buffer;
2023
2024         if (!percpu_buffer)
2025                 return NULL;
2026
2027         return this_cpu_ptr(&percpu_buffer->buffer[0]);
2028 }
2029
2030 static int alloc_percpu_trace_buffer(void)
2031 {
2032         struct trace_buffer_struct *buffers;
2033         struct trace_buffer_struct *sirq_buffers;
2034         struct trace_buffer_struct *irq_buffers;
2035         struct trace_buffer_struct *nmi_buffers;
2036
2037         buffers = alloc_percpu(struct trace_buffer_struct);
2038         if (!buffers)
2039                 goto err_warn;
2040
2041         sirq_buffers = alloc_percpu(struct trace_buffer_struct);
2042         if (!sirq_buffers)
2043                 goto err_sirq;
2044
2045         irq_buffers = alloc_percpu(struct trace_buffer_struct);
2046         if (!irq_buffers)
2047                 goto err_irq;
2048
2049         nmi_buffers = alloc_percpu(struct trace_buffer_struct);
2050         if (!nmi_buffers)
2051                 goto err_nmi;
2052
2053         trace_percpu_buffer = buffers;
2054         trace_percpu_sirq_buffer = sirq_buffers;
2055         trace_percpu_irq_buffer = irq_buffers;
2056         trace_percpu_nmi_buffer = nmi_buffers;
2057
2058         return 0;
2059
2060  err_nmi:
2061         free_percpu(irq_buffers);
2062  err_irq:
2063         free_percpu(sirq_buffers);
2064  err_sirq:
2065         free_percpu(buffers);
2066  err_warn:
2067         WARN(1, "Could not allocate percpu trace_printk buffer");
2068         return -ENOMEM;
2069 }
2070
2071 static int buffers_allocated;
2072
2073 void trace_printk_init_buffers(void)
2074 {
2075         if (buffers_allocated)
2076                 return;
2077
2078         if (alloc_percpu_trace_buffer())
2079                 return;
2080
2081         /* trace_printk() is for debug use only. Don't use it in production. */
2082
2083         pr_warn("\n");
2084         pr_warn("**********************************************************\n");
2085         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2086         pr_warn("**                                                      **\n");
2087         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2088         pr_warn("**                                                      **\n");
2089         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2090         pr_warn("** unsafe for production use.                           **\n");
2091         pr_warn("**                                                      **\n");
2092         pr_warn("** If you see this message and you are not debugging    **\n");
2093         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2094         pr_warn("**                                                      **\n");
2095         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2096         pr_warn("**********************************************************\n");
2097
2098         /* Expand the buffers to set size */
2099         tracing_update_buffers();
2100
2101         buffers_allocated = 1;
2102
2103         /*
2104          * trace_printk_init_buffers() can be called by modules.
2105          * If that happens, then we need to start cmdline recording
2106          * directly here. If the global_trace.buffer is already
2107          * allocated here, then this was called by module code.
2108          */
2109         if (global_trace.trace_buffer.buffer)
2110                 tracing_start_cmdline_record();
2111 }
2112
2113 void trace_printk_start_comm(void)
2114 {
2115         /* Start tracing comms if trace printk is set */
2116         if (!buffers_allocated)
2117                 return;
2118         tracing_start_cmdline_record();
2119 }
2120
2121 static void trace_printk_start_stop_comm(int enabled)
2122 {
2123         if (!buffers_allocated)
2124                 return;
2125
2126         if (enabled)
2127                 tracing_start_cmdline_record();
2128         else
2129                 tracing_stop_cmdline_record();
2130 }
2131
2132 /**
2133  * trace_vbprintk - write binary msg to tracing buffer
2134  *
2135  */
2136 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2137 {
2138         struct trace_event_call *call = &event_bprint;
2139         struct ring_buffer_event *event;
2140         struct ring_buffer *buffer;
2141         struct trace_array *tr = &global_trace;
2142         struct bprint_entry *entry;
2143         unsigned long flags;
2144         char *tbuffer;
2145         int len = 0, size, pc;
2146
2147         if (unlikely(tracing_selftest_running || tracing_disabled))
2148                 return 0;
2149
2150         /* Don't pollute graph traces with trace_vprintk internals */
2151         pause_graph_tracing();
2152
2153         pc = preempt_count();
2154         preempt_disable_notrace();
2155
2156         tbuffer = get_trace_buf();
2157         if (!tbuffer) {
2158                 len = 0;
2159                 goto out;
2160         }
2161
2162         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2163
2164         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2165                 goto out;
2166
2167         local_save_flags(flags);
2168         size = sizeof(*entry) + sizeof(u32) * len;
2169         buffer = tr->trace_buffer.buffer;
2170         event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2171                                           flags, pc);
2172         if (!event)
2173                 goto out;
2174         entry = ring_buffer_event_data(event);
2175         entry->ip                       = ip;
2176         entry->fmt                      = fmt;
2177
2178         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2179         if (!call_filter_check_discard(call, entry, buffer, event)) {
2180                 __buffer_unlock_commit(buffer, event);
2181                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2182         }
2183
2184 out:
2185         preempt_enable_notrace();
2186         unpause_graph_tracing();
2187
2188         return len;
2189 }
2190 EXPORT_SYMBOL_GPL(trace_vbprintk);
2191
2192 static int
2193 __trace_array_vprintk(struct ring_buffer *buffer,
2194                       unsigned long ip, const char *fmt, va_list args)
2195 {
2196         struct trace_event_call *call = &event_print;
2197         struct ring_buffer_event *event;
2198         int len = 0, size, pc;
2199         struct print_entry *entry;
2200         unsigned long flags;
2201         char *tbuffer;
2202
2203         if (tracing_disabled || tracing_selftest_running)
2204                 return 0;
2205
2206         /* Don't pollute graph traces with trace_vprintk internals */
2207         pause_graph_tracing();
2208
2209         pc = preempt_count();
2210         preempt_disable_notrace();
2211
2212
2213         tbuffer = get_trace_buf();
2214         if (!tbuffer) {
2215                 len = 0;
2216                 goto out;
2217         }
2218
2219         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2220
2221         local_save_flags(flags);
2222         size = sizeof(*entry) + len + 1;
2223         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2224                                           flags, pc);
2225         if (!event)
2226                 goto out;
2227         entry = ring_buffer_event_data(event);
2228         entry->ip = ip;
2229
2230         memcpy(&entry->buf, tbuffer, len + 1);
2231         if (!call_filter_check_discard(call, entry, buffer, event)) {
2232                 __buffer_unlock_commit(buffer, event);
2233                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2234         }
2235  out:
2236         preempt_enable_notrace();
2237         unpause_graph_tracing();
2238
2239         return len;
2240 }
2241
2242 int trace_array_vprintk(struct trace_array *tr,
2243                         unsigned long ip, const char *fmt, va_list args)
2244 {
2245         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2246 }
2247
2248 int trace_array_printk(struct trace_array *tr,
2249                        unsigned long ip, const char *fmt, ...)
2250 {
2251         int ret;
2252         va_list ap;
2253
2254         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2255                 return 0;
2256
2257         va_start(ap, fmt);
2258         ret = trace_array_vprintk(tr, ip, fmt, ap);
2259         va_end(ap);
2260         return ret;
2261 }
2262
2263 int trace_array_printk_buf(struct ring_buffer *buffer,
2264                            unsigned long ip, const char *fmt, ...)
2265 {
2266         int ret;
2267         va_list ap;
2268
2269         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2270                 return 0;
2271
2272         va_start(ap, fmt);
2273         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2274         va_end(ap);
2275         return ret;
2276 }
2277
2278 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2279 {
2280         return trace_array_vprintk(&global_trace, ip, fmt, args);
2281 }
2282 EXPORT_SYMBOL_GPL(trace_vprintk);
2283
2284 static void trace_iterator_increment(struct trace_iterator *iter)
2285 {
2286         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2287
2288         iter->idx++;
2289         if (buf_iter)
2290                 ring_buffer_read(buf_iter, NULL);
2291 }
2292
2293 static struct trace_entry *
2294 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2295                 unsigned long *lost_events)
2296 {
2297         struct ring_buffer_event *event;
2298         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2299
2300         if (buf_iter)
2301                 event = ring_buffer_iter_peek(buf_iter, ts);
2302         else
2303                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2304                                          lost_events);
2305
2306         if (event) {
2307                 iter->ent_size = ring_buffer_event_length(event);
2308                 return ring_buffer_event_data(event);
2309         }
2310         iter->ent_size = 0;
2311         return NULL;
2312 }
2313
2314 static struct trace_entry *
2315 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2316                   unsigned long *missing_events, u64 *ent_ts)
2317 {
2318         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2319         struct trace_entry *ent, *next = NULL;
2320         unsigned long lost_events = 0, next_lost = 0;
2321         int cpu_file = iter->cpu_file;
2322         u64 next_ts = 0, ts;
2323         int next_cpu = -1;
2324         int next_size = 0;
2325         int cpu;
2326
2327         /*
2328          * If we are in a per_cpu trace file, don't bother by iterating over
2329          * all cpu and peek directly.
2330          */
2331         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2332                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2333                         return NULL;
2334                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2335                 if (ent_cpu)
2336                         *ent_cpu = cpu_file;
2337
2338                 return ent;
2339         }
2340
2341         for_each_tracing_cpu(cpu) {
2342
2343                 if (ring_buffer_empty_cpu(buffer, cpu))
2344                         continue;
2345
2346                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2347
2348                 /*
2349                  * Pick the entry with the smallest timestamp:
2350                  */
2351                 if (ent && (!next || ts < next_ts)) {
2352                         next = ent;
2353                         next_cpu = cpu;
2354                         next_ts = ts;
2355                         next_lost = lost_events;
2356                         next_size = iter->ent_size;
2357                 }
2358         }
2359
2360         iter->ent_size = next_size;
2361
2362         if (ent_cpu)
2363                 *ent_cpu = next_cpu;
2364
2365         if (ent_ts)
2366                 *ent_ts = next_ts;
2367
2368         if (missing_events)
2369                 *missing_events = next_lost;
2370
2371         return next;
2372 }
2373
2374 /* Find the next real entry, without updating the iterator itself */
2375 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2376                                           int *ent_cpu, u64 *ent_ts)
2377 {
2378         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2379 }
2380
2381 /* Find the next real entry, and increment the iterator to the next entry */
2382 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2383 {
2384         iter->ent = __find_next_entry(iter, &iter->cpu,
2385                                       &iter->lost_events, &iter->ts);
2386
2387         if (iter->ent)
2388                 trace_iterator_increment(iter);
2389
2390         return iter->ent ? iter : NULL;
2391 }
2392
2393 static void trace_consume(struct trace_iterator *iter)
2394 {
2395         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2396                             &iter->lost_events);
2397 }
2398
2399 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2400 {
2401         struct trace_iterator *iter = m->private;
2402         int i = (int)*pos;
2403         void *ent;
2404
2405         WARN_ON_ONCE(iter->leftover);
2406
2407         (*pos)++;
2408
2409         /* can't go backwards */
2410         if (iter->idx > i)
2411                 return NULL;
2412
2413         if (iter->idx < 0)
2414                 ent = trace_find_next_entry_inc(iter);
2415         else
2416                 ent = iter;
2417
2418         while (ent && iter->idx < i)
2419                 ent = trace_find_next_entry_inc(iter);
2420
2421         iter->pos = *pos;
2422
2423         return ent;
2424 }
2425
2426 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2427 {
2428         struct ring_buffer_event *event;
2429         struct ring_buffer_iter *buf_iter;
2430         unsigned long entries = 0;
2431         u64 ts;
2432
2433         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2434
2435         buf_iter = trace_buffer_iter(iter, cpu);
2436         if (!buf_iter)
2437                 return;
2438
2439         ring_buffer_iter_reset(buf_iter);
2440
2441         /*
2442          * We could have the case with the max latency tracers
2443          * that a reset never took place on a cpu. This is evident
2444          * by the timestamp being before the start of the buffer.
2445          */
2446         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2447                 if (ts >= iter->trace_buffer->time_start)
2448                         break;
2449                 entries++;
2450                 ring_buffer_read(buf_iter, NULL);
2451         }
2452
2453         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2454 }
2455
2456 /*
2457  * The current tracer is copied to avoid a global locking
2458  * all around.
2459  */
2460 static void *s_start(struct seq_file *m, loff_t *pos)
2461 {
2462         struct trace_iterator *iter = m->private;
2463         struct trace_array *tr = iter->tr;
2464         int cpu_file = iter->cpu_file;
2465         void *p = NULL;
2466         loff_t l = 0;
2467         int cpu;
2468
2469         /*
2470          * copy the tracer to avoid using a global lock all around.
2471          * iter->trace is a copy of current_trace, the pointer to the
2472          * name may be used instead of a strcmp(), as iter->trace->name
2473          * will point to the same string as current_trace->name.
2474          */
2475         mutex_lock(&trace_types_lock);
2476         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2477                 *iter->trace = *tr->current_trace;
2478         mutex_unlock(&trace_types_lock);
2479
2480 #ifdef CONFIG_TRACER_MAX_TRACE
2481         if (iter->snapshot && iter->trace->use_max_tr)
2482                 return ERR_PTR(-EBUSY);
2483 #endif
2484
2485         if (!iter->snapshot)
2486                 atomic_inc(&trace_record_cmdline_disabled);
2487
2488         if (*pos != iter->pos) {
2489                 iter->ent = NULL;
2490                 iter->cpu = 0;
2491                 iter->idx = -1;
2492
2493                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2494                         for_each_tracing_cpu(cpu)
2495                                 tracing_iter_reset(iter, cpu);
2496                 } else
2497                         tracing_iter_reset(iter, cpu_file);
2498
2499                 iter->leftover = 0;
2500                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2501                         ;
2502
2503         } else {
2504                 /*
2505                  * If we overflowed the seq_file before, then we want
2506                  * to just reuse the trace_seq buffer again.
2507                  */
2508                 if (iter->leftover)
2509                         p = iter;
2510                 else {
2511                         l = *pos - 1;
2512                         p = s_next(m, p, &l);
2513                 }
2514         }
2515
2516         trace_event_read_lock();
2517         trace_access_lock(cpu_file);
2518         return p;
2519 }
2520
2521 static void s_stop(struct seq_file *m, void *p)
2522 {
2523         struct trace_iterator *iter = m->private;
2524
2525 #ifdef CONFIG_TRACER_MAX_TRACE
2526         if (iter->snapshot && iter->trace->use_max_tr)
2527                 return;
2528 #endif
2529
2530         if (!iter->snapshot)
2531                 atomic_dec(&trace_record_cmdline_disabled);
2532
2533         trace_access_unlock(iter->cpu_file);
2534         trace_event_read_unlock();
2535 }
2536
2537 static void
2538 get_total_entries(struct trace_buffer *buf,
2539                   unsigned long *total, unsigned long *entries)
2540 {
2541         unsigned long count;
2542         int cpu;
2543
2544         *total = 0;
2545         *entries = 0;
2546
2547         for_each_tracing_cpu(cpu) {
2548                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2549                 /*
2550                  * If this buffer has skipped entries, then we hold all
2551                  * entries for the trace and we need to ignore the
2552                  * ones before the time stamp.
2553                  */
2554                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2555                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2556                         /* total is the same as the entries */
2557                         *total += count;
2558                 } else
2559                         *total += count +
2560                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
2561                 *entries += count;
2562         }
2563 }
2564
2565 static void print_lat_help_header(struct seq_file *m)
2566 {
2567         seq_puts(m, "#                  _------=> CPU#            \n"
2568                     "#                 / _-----=> irqs-off        \n"
2569                     "#                | / _----=> need-resched    \n"
2570                     "#                || / _---=> hardirq/softirq \n"
2571                     "#                ||| / _--=> preempt-depth   \n"
2572                     "#                |||| /     delay            \n"
2573                     "#  cmd     pid   ||||| time  |   caller      \n"
2574                     "#     \\   /      |||||  \\    |   /         \n");
2575 }
2576
2577 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2578 {
2579         unsigned long total;
2580         unsigned long entries;
2581
2582         get_total_entries(buf, &total, &entries);
2583         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2584                    entries, total, num_online_cpus());
2585         seq_puts(m, "#\n");
2586 }
2587
2588 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2589 {
2590         print_event_info(buf, m);
2591         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n"
2592                     "#              | |       |          |         |\n");
2593 }
2594
2595 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2596 {
2597         print_event_info(buf, m);
2598         seq_puts(m, "#                              _-----=> irqs-off\n"
2599                     "#                             / _----=> need-resched\n"
2600                     "#                            | / _---=> hardirq/softirq\n"
2601                     "#                            || / _--=> preempt-depth\n"
2602                     "#                            ||| /     delay\n"
2603                     "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n"
2604                     "#              | |       |   ||||       |         |\n");
2605 }
2606
2607 void
2608 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2609 {
2610         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
2611         struct trace_buffer *buf = iter->trace_buffer;
2612         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2613         struct tracer *type = iter->trace;
2614         unsigned long entries;
2615         unsigned long total;
2616         const char *name = "preemption";
2617
2618         name = type->name;
2619
2620         get_total_entries(buf, &total, &entries);
2621
2622         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2623                    name, UTS_RELEASE);
2624         seq_puts(m, "# -----------------------------------"
2625                  "---------------------------------\n");
2626         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2627                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2628                    nsecs_to_usecs(data->saved_latency),
2629                    entries,
2630                    total,
2631                    buf->cpu,
2632 #if defined(CONFIG_PREEMPT_NONE)
2633                    "server",
2634 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2635                    "desktop",
2636 #elif defined(CONFIG_PREEMPT)
2637                    "preempt",
2638 #else
2639                    "unknown",
2640 #endif
2641                    /* These are reserved for later use */
2642                    0, 0, 0, 0);
2643 #ifdef CONFIG_SMP
2644         seq_printf(m, " #P:%d)\n", num_online_cpus());
2645 #else
2646         seq_puts(m, ")\n");
2647 #endif
2648         seq_puts(m, "#    -----------------\n");
2649         seq_printf(m, "#    | task: %.16s-%d "
2650                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2651                    data->comm, data->pid,
2652                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2653                    data->policy, data->rt_priority);
2654         seq_puts(m, "#    -----------------\n");
2655
2656         if (data->critical_start) {
2657                 seq_puts(m, "#  => started at: ");
2658                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2659                 trace_print_seq(m, &iter->seq);
2660                 seq_puts(m, "\n#  => ended at:   ");
2661                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2662                 trace_print_seq(m, &iter->seq);
2663                 seq_puts(m, "\n#\n");
2664         }
2665
2666         seq_puts(m, "#\n");
2667 }
2668
2669 static void test_cpu_buff_start(struct trace_iterator *iter)
2670 {
2671         struct trace_seq *s = &iter->seq;
2672         struct trace_array *tr = iter->tr;
2673
2674         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
2675                 return;
2676
2677         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
2678                 return;
2679
2680         if (iter->started && cpumask_test_cpu(iter->cpu, iter->started))
2681                 return;
2682
2683         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
2684                 return;
2685
2686         if (iter->started)
2687                 cpumask_set_cpu(iter->cpu, iter->started);
2688
2689         /* Don't print started cpu buffer for the first entry of the trace */
2690         if (iter->idx > 1)
2691                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
2692                                 iter->cpu);
2693 }
2694
2695 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
2696 {
2697         struct trace_array *tr = iter->tr;
2698         struct trace_seq *s = &iter->seq;
2699         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
2700         struct trace_entry *entry;
2701         struct trace_event *event;
2702
2703         entry = iter->ent;
2704
2705         test_cpu_buff_start(iter);
2706
2707         event = ftrace_find_event(entry->type);
2708
2709         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
2710                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2711                         trace_print_lat_context(iter);
2712                 else
2713                         trace_print_context(iter);
2714         }
2715
2716         if (trace_seq_has_overflowed(s))
2717                 return TRACE_TYPE_PARTIAL_LINE;
2718
2719         if (event)
2720                 return event->funcs->trace(iter, sym_flags, event);
2721
2722         trace_seq_printf(s, "Unknown type %d\n", entry->type);
2723
2724         return trace_handle_return(s);
2725 }
2726
2727 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2728 {
2729         struct trace_array *tr = iter->tr;
2730         struct trace_seq *s = &iter->seq;
2731         struct trace_entry *entry;
2732         struct trace_event *event;
2733
2734         entry = iter->ent;
2735
2736         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
2737                 trace_seq_printf(s, "%d %d %llu ",
2738                                  entry->pid, iter->cpu, iter->ts);
2739
2740         if (trace_seq_has_overflowed(s))
2741                 return TRACE_TYPE_PARTIAL_LINE;
2742
2743         event = ftrace_find_event(entry->type);
2744         if (event)
2745                 return event->funcs->raw(iter, 0, event);
2746
2747         trace_seq_printf(s, "%d ?\n", entry->type);
2748
2749         return trace_handle_return(s);
2750 }
2751
2752 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2753 {
2754         struct trace_array *tr = iter->tr;
2755         struct trace_seq *s = &iter->seq;
2756         unsigned char newline = '\n';
2757         struct trace_entry *entry;
2758         struct trace_event *event;
2759
2760         entry = iter->ent;
2761
2762         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
2763                 SEQ_PUT_HEX_FIELD(s, entry->pid);
2764                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
2765                 SEQ_PUT_HEX_FIELD(s, iter->ts);
2766                 if (trace_seq_has_overflowed(s))
2767                         return TRACE_TYPE_PARTIAL_LINE;
2768         }
2769
2770         event = ftrace_find_event(entry->type);
2771         if (event) {
2772                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
2773                 if (ret != TRACE_TYPE_HANDLED)
2774                         return ret;
2775         }
2776
2777         SEQ_PUT_FIELD(s, newline);
2778
2779         return trace_handle_return(s);
2780 }
2781
2782 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2783 {
2784         struct trace_array *tr = iter->tr;
2785         struct trace_seq *s = &iter->seq;
2786         struct trace_entry *entry;
2787         struct trace_event *event;
2788
2789         entry = iter->ent;
2790
2791         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
2792                 SEQ_PUT_FIELD(s, entry->pid);
2793                 SEQ_PUT_FIELD(s, iter->cpu);
2794                 SEQ_PUT_FIELD(s, iter->ts);
2795                 if (trace_seq_has_overflowed(s))
2796                         return TRACE_TYPE_PARTIAL_LINE;
2797         }
2798
2799         event = ftrace_find_event(entry->type);
2800         return event ? event->funcs->binary(iter, 0, event) :
2801                 TRACE_TYPE_HANDLED;
2802 }
2803
2804 int trace_empty(struct trace_iterator *iter)
2805 {
2806         struct ring_buffer_iter *buf_iter;
2807         int cpu;
2808
2809         /* If we are looking at one CPU buffer, only check that one */
2810         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
2811                 cpu = iter->cpu_file;
2812                 buf_iter = trace_buffer_iter(iter, cpu);
2813                 if (buf_iter) {
2814                         if (!ring_buffer_iter_empty(buf_iter))
2815                                 return 0;
2816                 } else {
2817                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2818                                 return 0;
2819                 }
2820                 return 1;
2821         }
2822
2823         for_each_tracing_cpu(cpu) {
2824                 buf_iter = trace_buffer_iter(iter, cpu);
2825                 if (buf_iter) {
2826                         if (!ring_buffer_iter_empty(buf_iter))
2827                                 return 0;
2828                 } else {
2829                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2830                                 return 0;
2831                 }
2832         }
2833
2834         return 1;
2835 }
2836
2837 /*  Called with trace_event_read_lock() held. */
2838 enum print_line_t print_trace_line(struct trace_iterator *iter)
2839 {
2840         struct trace_array *tr = iter->tr;
2841         unsigned long trace_flags = tr->trace_flags;
2842         enum print_line_t ret;
2843
2844         if (iter->lost_events) {
2845                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2846                                  iter->cpu, iter->lost_events);
2847                 if (trace_seq_has_overflowed(&iter->seq))
2848                         return TRACE_TYPE_PARTIAL_LINE;
2849         }
2850
2851         if (iter->trace && iter->trace->print_line) {
2852                 ret = iter->trace->print_line(iter);
2853                 if (ret != TRACE_TYPE_UNHANDLED)
2854                         return ret;
2855         }
2856
2857         if (iter->ent->type == TRACE_BPUTS &&
2858                         trace_flags & TRACE_ITER_PRINTK &&
2859                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2860                 return trace_print_bputs_msg_only(iter);
2861
2862         if (iter->ent->type == TRACE_BPRINT &&
2863                         trace_flags & TRACE_ITER_PRINTK &&
2864                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2865                 return trace_print_bprintk_msg_only(iter);
2866
2867         if (iter->ent->type == TRACE_PRINT &&
2868                         trace_flags & TRACE_ITER_PRINTK &&
2869                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2870                 return trace_print_printk_msg_only(iter);
2871
2872         if (trace_flags & TRACE_ITER_BIN)
2873                 return print_bin_fmt(iter);
2874
2875         if (trace_flags & TRACE_ITER_HEX)
2876                 return print_hex_fmt(iter);
2877
2878         if (trace_flags & TRACE_ITER_RAW)
2879                 return print_raw_fmt(iter);
2880
2881         return print_trace_fmt(iter);
2882 }
2883
2884 void trace_latency_header(struct seq_file *m)
2885 {
2886         struct trace_iterator *iter = m->private;
2887         struct trace_array *tr = iter->tr;
2888
2889         /* print nothing if the buffers are empty */
2890         if (trace_empty(iter))
2891                 return;
2892
2893         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2894                 print_trace_header(m, iter);
2895
2896         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
2897                 print_lat_help_header(m);
2898 }
2899
2900 void trace_default_header(struct seq_file *m)
2901 {
2902         struct trace_iterator *iter = m->private;
2903         struct trace_array *tr = iter->tr;
2904         unsigned long trace_flags = tr->trace_flags;
2905
2906         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
2907                 return;
2908
2909         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2910                 /* print nothing if the buffers are empty */
2911                 if (trace_empty(iter))
2912                         return;
2913                 print_trace_header(m, iter);
2914                 if (!(trace_flags & TRACE_ITER_VERBOSE))
2915                         print_lat_help_header(m);
2916         } else {
2917                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
2918                         if (trace_flags & TRACE_ITER_IRQ_INFO)
2919                                 print_func_help_header_irq(iter->trace_buffer, m);
2920                         else
2921                                 print_func_help_header(iter->trace_buffer, m);
2922                 }
2923         }
2924 }
2925
2926 static void test_ftrace_alive(struct seq_file *m)
2927 {
2928         if (!ftrace_is_dead())
2929                 return;
2930         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
2931                     "#          MAY BE MISSING FUNCTION EVENTS\n");
2932 }
2933
2934 #ifdef CONFIG_TRACER_MAX_TRACE
2935 static void show_snapshot_main_help(struct seq_file *m)
2936 {
2937         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
2938                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
2939                     "#                      Takes a snapshot of the main buffer.\n"
2940                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
2941                     "#                      (Doesn't have to be '2' works with any number that\n"
2942                     "#                       is not a '0' or '1')\n");
2943 }
2944
2945 static void show_snapshot_percpu_help(struct seq_file *m)
2946 {
2947         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
2948 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2949         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
2950                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
2951 #else
2952         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
2953                     "#                     Must use main snapshot file to allocate.\n");
2954 #endif
2955         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
2956                     "#                      (Doesn't have to be '2' works with any number that\n"
2957                     "#                       is not a '0' or '1')\n");
2958 }
2959
2960 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
2961 {
2962         if (iter->tr->allocated_snapshot)
2963                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
2964         else
2965                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
2966
2967         seq_puts(m, "# Snapshot commands:\n");
2968         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
2969                 show_snapshot_main_help(m);
2970         else
2971                 show_snapshot_percpu_help(m);
2972 }
2973 #else
2974 /* Should never be called */
2975 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
2976 #endif
2977
2978 static int s_show(struct seq_file *m, void *v)
2979 {
2980         struct trace_iterator *iter = v;
2981         int ret;
2982
2983         if (iter->ent == NULL) {
2984                 if (iter->tr) {
2985                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
2986                         seq_puts(m, "#\n");
2987                         test_ftrace_alive(m);
2988                 }
2989                 if (iter->snapshot && trace_empty(iter))
2990                         print_snapshot_help(m, iter);
2991                 else if (iter->trace && iter->trace->print_header)
2992                         iter->trace->print_header(m);
2993                 else
2994                         trace_default_header(m);
2995
2996         } else if (iter->leftover) {
2997                 /*
2998                  * If we filled the seq_file buffer earlier, we
2999                  * want to just show it now.
3000                  */
3001                 ret = trace_print_seq(m, &iter->seq);
3002
3003                 /* ret should this time be zero, but you never know */
3004                 iter->leftover = ret;
3005
3006         } else {
3007                 print_trace_line(iter);
3008                 ret = trace_print_seq(m, &iter->seq);
3009                 /*
3010                  * If we overflow the seq_file buffer, then it will
3011                  * ask us for this data again at start up.
3012                  * Use that instead.
3013                  *  ret is 0 if seq_file write succeeded.
3014                  *        -1 otherwise.
3015                  */
3016                 iter->leftover = ret;
3017         }
3018
3019         return 0;
3020 }
3021
3022 /*
3023  * Should be used after trace_array_get(), trace_types_lock
3024  * ensures that i_cdev was already initialized.
3025  */
3026 static inline int tracing_get_cpu(struct inode *inode)
3027 {
3028         if (inode->i_cdev) /* See trace_create_cpu_file() */
3029                 return (long)inode->i_cdev - 1;
3030         return RING_BUFFER_ALL_CPUS;
3031 }
3032
3033 static const struct seq_operations tracer_seq_ops = {
3034         .start          = s_start,
3035         .next           = s_next,
3036         .stop           = s_stop,
3037         .show           = s_show,
3038 };
3039
3040 static struct trace_iterator *
3041 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3042 {
3043         struct trace_array *tr = inode->i_private;
3044         struct trace_iterator *iter;
3045         int cpu;
3046
3047         if (tracing_disabled)
3048                 return ERR_PTR(-ENODEV);
3049
3050         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3051         if (!iter)
3052                 return ERR_PTR(-ENOMEM);
3053
3054         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3055                                     GFP_KERNEL);
3056         if (!iter->buffer_iter)
3057                 goto release;
3058
3059         /*
3060          * We make a copy of the current tracer to avoid concurrent
3061          * changes on it while we are reading.
3062          */
3063         mutex_lock(&trace_types_lock);
3064         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3065         if (!iter->trace)
3066                 goto fail;
3067
3068         *iter->trace = *tr->current_trace;
3069
3070         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3071                 goto fail;
3072
3073         iter->tr = tr;
3074
3075 #ifdef CONFIG_TRACER_MAX_TRACE
3076         /* Currently only the top directory has a snapshot */
3077         if (tr->current_trace->print_max || snapshot)
3078                 iter->trace_buffer = &tr->max_buffer;
3079         else
3080 #endif
3081                 iter->trace_buffer = &tr->trace_buffer;
3082         iter->snapshot = snapshot;
3083         iter->pos = -1;
3084         iter->cpu_file = tracing_get_cpu(inode);
3085         mutex_init(&iter->mutex);
3086
3087         /* Notify the tracer early; before we stop tracing. */
3088         if (iter->trace && iter->trace->open)
3089                 iter->trace->open(iter);
3090
3091         /* Annotate start of buffers if we had overruns */
3092         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3093                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3094
3095         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3096         if (trace_clocks[tr->clock_id].in_ns)
3097                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3098
3099         /* stop the trace while dumping if we are not opening "snapshot" */
3100         if (!iter->snapshot)
3101                 tracing_stop_tr(tr);
3102
3103         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3104                 for_each_tracing_cpu(cpu) {
3105                         iter->buffer_iter[cpu] =
3106                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3107                 }
3108                 ring_buffer_read_prepare_sync();
3109                 for_each_tracing_cpu(cpu) {
3110                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3111                         tracing_iter_reset(iter, cpu);
3112                 }
3113         } else {
3114                 cpu = iter->cpu_file;
3115                 iter->buffer_iter[cpu] =
3116                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3117                 ring_buffer_read_prepare_sync();
3118                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3119                 tracing_iter_reset(iter, cpu);
3120         }
3121
3122         mutex_unlock(&trace_types_lock);
3123
3124         return iter;
3125
3126  fail:
3127         mutex_unlock(&trace_types_lock);
3128         kfree(iter->trace);
3129         kfree(iter->buffer_iter);
3130 release:
3131         seq_release_private(inode, file);
3132         return ERR_PTR(-ENOMEM);
3133 }
3134
3135 int tracing_open_generic(struct inode *inode, struct file *filp)
3136 {
3137         if (tracing_disabled)
3138                 return -ENODEV;
3139
3140         filp->private_data = inode->i_private;
3141         return 0;
3142 }
3143
3144 bool tracing_is_disabled(void)
3145 {
3146         return (tracing_disabled) ? true: false;
3147 }
3148
3149 /*
3150  * Open and update trace_array ref count.
3151  * Must have the current trace_array passed to it.
3152  */
3153 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3154 {
3155         struct trace_array *tr = inode->i_private;
3156
3157         if (tracing_disabled)
3158                 return -ENODEV;
3159
3160         if (trace_array_get(tr) < 0)
3161                 return -ENODEV;
3162
3163         filp->private_data = inode->i_private;
3164
3165         return 0;
3166 }
3167
3168 static int tracing_release(struct inode *inode, struct file *file)
3169 {
3170         struct trace_array *tr = inode->i_private;
3171         struct seq_file *m = file->private_data;
3172         struct trace_iterator *iter;
3173         int cpu;
3174
3175         if (!(file->f_mode & FMODE_READ)) {
3176                 trace_array_put(tr);
3177                 return 0;
3178         }
3179
3180         /* Writes do not use seq_file */
3181         iter = m->private;
3182         mutex_lock(&trace_types_lock);
3183
3184         for_each_tracing_cpu(cpu) {
3185                 if (iter->buffer_iter[cpu])
3186                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3187         }
3188
3189         if (iter->trace && iter->trace->close)
3190                 iter->trace->close(iter);
3191
3192         if (!iter->snapshot)
3193                 /* reenable tracing if it was previously enabled */
3194                 tracing_start_tr(tr);
3195
3196         __trace_array_put(tr);
3197
3198         mutex_unlock(&trace_types_lock);
3199
3200         mutex_destroy(&iter->mutex);
3201         free_cpumask_var(iter->started);
3202         kfree(iter->trace);
3203         kfree(iter->buffer_iter);
3204         seq_release_private(inode, file);
3205
3206         return 0;
3207 }
3208
3209 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3210 {
3211         struct trace_array *tr = inode->i_private;
3212
3213         trace_array_put(tr);
3214         return 0;
3215 }
3216
3217 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3218 {
3219         struct trace_array *tr = inode->i_private;
3220
3221         trace_array_put(tr);
3222
3223         return single_release(inode, file);
3224 }
3225
3226 static int tracing_open(struct inode *inode, struct file *file)
3227 {
3228         struct trace_array *tr = inode->i_private;
3229         struct trace_iterator *iter;
3230         int ret = 0;
3231
3232         if (trace_array_get(tr) < 0)
3233                 return -ENODEV;
3234
3235         /* If this file was open for write, then erase contents */
3236         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3237                 int cpu = tracing_get_cpu(inode);
3238
3239                 if (cpu == RING_BUFFER_ALL_CPUS)
3240                         tracing_reset_online_cpus(&tr->trace_buffer);
3241                 else
3242                         tracing_reset(&tr->trace_buffer, cpu);
3243         }
3244
3245         if (file->f_mode & FMODE_READ) {
3246                 iter = __tracing_open(inode, file, false);
3247                 if (IS_ERR(iter))
3248                         ret = PTR_ERR(iter);
3249                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3250                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3251         }
3252
3253         if (ret < 0)
3254                 trace_array_put(tr);
3255
3256         return ret;
3257 }
3258
3259 /*
3260  * Some tracers are not suitable for instance buffers.
3261  * A tracer is always available for the global array (toplevel)
3262  * or if it explicitly states that it is.
3263  */
3264 static bool
3265 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3266 {
3267         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3268 }
3269
3270 /* Find the next tracer that this trace array may use */
3271 static struct tracer *
3272 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3273 {
3274         while (t && !trace_ok_for_array(t, tr))
3275                 t = t->next;
3276
3277         return t;
3278 }
3279
3280 static void *
3281 t_next(struct seq_file *m, void *v, loff_t *pos)
3282 {
3283         struct trace_array *tr = m->private;
3284         struct tracer *t = v;
3285
3286         (*pos)++;
3287
3288         if (t)
3289                 t = get_tracer_for_array(tr, t->next);
3290
3291         return t;
3292 }
3293
3294 static void *t_start(struct seq_file *m, loff_t *pos)
3295 {
3296         struct trace_array *tr = m->private;
3297         struct tracer *t;
3298         loff_t l = 0;
3299
3300         mutex_lock(&trace_types_lock);
3301
3302         t = get_tracer_for_array(tr, trace_types);
3303         for (; t && l < *pos; t = t_next(m, t, &l))
3304                         ;
3305
3306         return t;
3307 }
3308
3309 static void t_stop(struct seq_file *m, void *p)
3310 {
3311         mutex_unlock(&trace_types_lock);
3312 }
3313
3314 static int t_show(struct seq_file *m, void *v)
3315 {
3316         struct tracer *t = v;
3317
3318         if (!t)
3319                 return 0;
3320
3321         seq_puts(m, t->name);
3322         if (t->next)
3323                 seq_putc(m, ' ');
3324         else
3325                 seq_putc(m, '\n');
3326
3327         return 0;
3328 }
3329
3330 static const struct seq_operations show_traces_seq_ops = {
3331         .start          = t_start,
3332         .next           = t_next,
3333         .stop           = t_stop,
3334         .show           = t_show,
3335 };
3336
3337 static int show_traces_open(struct inode *inode, struct file *file)
3338 {
3339         struct trace_array *tr = inode->i_private;
3340         struct seq_file *m;
3341         int ret;
3342
3343         if (tracing_disabled)
3344                 return -ENODEV;
3345
3346         ret = seq_open(file, &show_traces_seq_ops);
3347         if (ret)
3348                 return ret;
3349
3350         m = file->private_data;
3351         m->private = tr;
3352
3353         return 0;
3354 }
3355
3356 static ssize_t
3357 tracing_write_stub(struct file *filp, const char __user *ubuf,
3358                    size_t count, loff_t *ppos)
3359 {
3360         return count;
3361 }
3362
3363 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3364 {
3365         int ret;
3366
3367         if (file->f_mode & FMODE_READ)
3368                 ret = seq_lseek(file, offset, whence);
3369         else
3370                 file->f_pos = ret = 0;
3371
3372         return ret;
3373 }
3374
3375 static const struct file_operations tracing_fops = {
3376         .open           = tracing_open,
3377         .read           = seq_read,
3378         .write          = tracing_write_stub,
3379         .llseek         = tracing_lseek,
3380         .release        = tracing_release,
3381 };
3382
3383 static const struct file_operations show_traces_fops = {
3384         .open           = show_traces_open,
3385         .read           = seq_read,
3386         .release        = seq_release,
3387         .llseek         = seq_lseek,
3388 };
3389
3390 /*
3391  * The tracer itself will not take this lock, but still we want
3392  * to provide a consistent cpumask to user-space:
3393  */
3394 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3395
3396 /*
3397  * Temporary storage for the character representation of the
3398  * CPU bitmask (and one more byte for the newline):
3399  */
3400 static char mask_str[NR_CPUS + 1];
3401
3402 static ssize_t
3403 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3404                      size_t count, loff_t *ppos)
3405 {
3406         struct trace_array *tr = file_inode(filp)->i_private;
3407         int len;
3408
3409         mutex_lock(&tracing_cpumask_update_lock);
3410
3411         len = snprintf(mask_str, count, "%*pb\n",
3412                        cpumask_pr_args(tr->tracing_cpumask));
3413         if (len >= count) {
3414                 count = -EINVAL;
3415                 goto out_err;
3416         }
3417         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3418
3419 out_err:
3420         mutex_unlock(&tracing_cpumask_update_lock);
3421
3422         return count;
3423 }
3424
3425 static ssize_t
3426 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3427                       size_t count, loff_t *ppos)
3428 {
3429         struct trace_array *tr = file_inode(filp)->i_private;
3430         cpumask_var_t tracing_cpumask_new;
3431         int err, cpu;
3432
3433         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3434                 return -ENOMEM;
3435
3436         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3437         if (err)
3438                 goto err_unlock;
3439
3440         mutex_lock(&tracing_cpumask_update_lock);
3441
3442         local_irq_disable();
3443         arch_spin_lock(&tr->max_lock);
3444         for_each_tracing_cpu(cpu) {
3445                 /*
3446                  * Increase/decrease the disabled counter if we are
3447                  * about to flip a bit in the cpumask:
3448                  */
3449                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3450                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3451                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3452                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3453                 }
3454                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3455                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3456                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3457                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3458                 }
3459         }
3460         arch_spin_unlock(&tr->max_lock);
3461         local_irq_enable();
3462
3463         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3464
3465         mutex_unlock(&tracing_cpumask_update_lock);
3466         free_cpumask_var(tracing_cpumask_new);
3467
3468         return count;
3469
3470 err_unlock:
3471         free_cpumask_var(tracing_cpumask_new);
3472
3473         return err;
3474 }
3475
3476 static const struct file_operations tracing_cpumask_fops = {
3477         .open           = tracing_open_generic_tr,
3478         .read           = tracing_cpumask_read,
3479         .write          = tracing_cpumask_write,
3480         .release        = tracing_release_generic_tr,
3481         .llseek         = generic_file_llseek,
3482 };
3483
3484 static int tracing_trace_options_show(struct seq_file *m, void *v)
3485 {
3486         struct tracer_opt *trace_opts;
3487         struct trace_array *tr = m->private;
3488         u32 tracer_flags;
3489         int i;
3490
3491         mutex_lock(&trace_types_lock);
3492         tracer_flags = tr->current_trace->flags->val;
3493         trace_opts = tr->current_trace->flags->opts;
3494
3495         for (i = 0; trace_options[i]; i++) {
3496                 if (tr->trace_flags & (1 << i))
3497                         seq_printf(m, "%s\n", trace_options[i]);
3498                 else
3499                         seq_printf(m, "no%s\n", trace_options[i]);
3500         }
3501
3502         for (i = 0; trace_opts[i].name; i++) {
3503                 if (tracer_flags & trace_opts[i].bit)
3504                         seq_printf(m, "%s\n", trace_opts[i].name);
3505                 else
3506                         seq_printf(m, "no%s\n", trace_opts[i].name);
3507         }
3508         mutex_unlock(&trace_types_lock);
3509
3510         return 0;
3511 }
3512
3513 static int __set_tracer_option(struct trace_array *tr,
3514                                struct tracer_flags *tracer_flags,
3515                                struct tracer_opt *opts, int neg)
3516 {
3517         struct tracer *trace = tracer_flags->trace;
3518         int ret;
3519
3520         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3521         if (ret)
3522                 return ret;
3523
3524         if (neg)
3525                 tracer_flags->val &= ~opts->bit;
3526         else
3527                 tracer_flags->val |= opts->bit;
3528         return 0;
3529 }
3530
3531 /* Try to assign a tracer specific option */
3532 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3533 {
3534         struct tracer *trace = tr->current_trace;
3535         struct tracer_flags *tracer_flags = trace->flags;
3536         struct tracer_opt *opts = NULL;
3537         int i;
3538
3539         for (i = 0; tracer_flags->opts[i].name; i++) {
3540                 opts = &tracer_flags->opts[i];
3541
3542                 if (strcmp(cmp, opts->name) == 0)
3543                         return __set_tracer_option(tr, trace->flags, opts, neg);
3544         }
3545
3546         return -EINVAL;
3547 }
3548
3549 /* Some tracers require overwrite to stay enabled */
3550 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3551 {
3552         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3553                 return -1;
3554
3555         return 0;
3556 }
3557
3558 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3559 {
3560         /* do nothing if flag is already set */
3561         if (!!(tr->trace_flags & mask) == !!enabled)
3562                 return 0;
3563
3564         /* Give the tracer a chance to approve the change */
3565         if (tr->current_trace->flag_changed)
3566                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3567                         return -EINVAL;
3568
3569         if (enabled)
3570                 tr->trace_flags |= mask;
3571         else
3572                 tr->trace_flags &= ~mask;
3573
3574         if (mask == TRACE_ITER_RECORD_CMD)
3575                 trace_event_enable_cmd_record(enabled);
3576
3577         if (mask == TRACE_ITER_EVENT_FORK)
3578                 trace_event_follow_fork(tr, enabled);
3579
3580         if (mask == TRACE_ITER_OVERWRITE) {
3581                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3582 #ifdef CONFIG_TRACER_MAX_TRACE
3583                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3584 #endif
3585         }
3586
3587         if (mask == TRACE_ITER_PRINTK) {
3588                 trace_printk_start_stop_comm(enabled);
3589                 trace_printk_control(enabled);
3590         }
3591
3592         return 0;
3593 }
3594
3595 static int trace_set_options(struct trace_array *tr, char *option)
3596 {
3597         char *cmp;
3598         int neg = 0;
3599         int ret = -ENODEV;
3600         int i;
3601         size_t orig_len = strlen(option);
3602
3603         cmp = strstrip(option);
3604
3605         if (strncmp(cmp, "no", 2) == 0) {
3606                 neg = 1;
3607                 cmp += 2;
3608         }
3609
3610         mutex_lock(&trace_types_lock);
3611
3612         for (i = 0; trace_options[i]; i++) {
3613                 if (strcmp(cmp, trace_options[i]) == 0) {
3614                         ret = set_tracer_flag(tr, 1 << i, !neg);
3615                         break;
3616                 }
3617         }
3618
3619         /* If no option could be set, test the specific tracer options */
3620         if (!trace_options[i])
3621                 ret = set_tracer_option(tr, cmp, neg);
3622
3623         mutex_unlock(&trace_types_lock);
3624
3625         /*
3626          * If the first trailing whitespace is replaced with '\0' by strstrip,
3627          * turn it back into a space.
3628          */
3629         if (orig_len > strlen(option))
3630                 option[strlen(option)] = ' ';
3631
3632         return ret;
3633 }
3634
3635 static void __init apply_trace_boot_options(void)
3636 {
3637         char *buf = trace_boot_options_buf;
3638         char *option;
3639
3640         while (true) {
3641                 option = strsep(&buf, ",");
3642
3643                 if (!option)
3644                         break;
3645
3646                 if (*option)
3647                         trace_set_options(&global_trace, option);
3648
3649                 /* Put back the comma to allow this to be called again */
3650                 if (buf)
3651                         *(buf - 1) = ',';
3652         }
3653 }
3654
3655 static ssize_t
3656 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3657                         size_t cnt, loff_t *ppos)
3658 {
3659         struct seq_file *m = filp->private_data;
3660         struct trace_array *tr = m->private;
3661         char buf[64];
3662         int ret;
3663
3664         if (cnt >= sizeof(buf))
3665                 return -EINVAL;
3666
3667         if (copy_from_user(&buf, ubuf, cnt))
3668                 return -EFAULT;
3669
3670         buf[cnt] = 0;
3671
3672         ret = trace_set_options(tr, buf);
3673         if (ret < 0)
3674                 return ret;
3675
3676         *ppos += cnt;
3677
3678         return cnt;
3679 }
3680
3681 static int tracing_trace_options_open(struct inode *inode, struct file *file)
3682 {
3683         struct trace_array *tr = inode->i_private;
3684         int ret;
3685
3686         if (tracing_disabled)
3687                 return -ENODEV;
3688
3689         if (trace_array_get(tr) < 0)
3690                 return -ENODEV;
3691
3692         ret = single_open(file, tracing_trace_options_show, inode->i_private);
3693         if (ret < 0)
3694                 trace_array_put(tr);
3695
3696         return ret;
3697 }
3698
3699 static const struct file_operations tracing_iter_fops = {
3700         .open           = tracing_trace_options_open,
3701         .read           = seq_read,
3702         .llseek         = seq_lseek,
3703         .release        = tracing_single_release_tr,
3704         .write          = tracing_trace_options_write,
3705 };
3706
3707 static const char readme_msg[] =
3708         "tracing mini-HOWTO:\n\n"
3709         "# echo 0 > tracing_on : quick way to disable tracing\n"
3710         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
3711         " Important files:\n"
3712         "  trace\t\t\t- The static contents of the buffer\n"
3713         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
3714         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
3715         "  current_tracer\t- function and latency tracers\n"
3716         "  available_tracers\t- list of configured tracers for current_tracer\n"
3717         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
3718         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
3719         "  trace_clock\t\t-change the clock used to order events\n"
3720         "       local:   Per cpu clock but may not be synced across CPUs\n"
3721         "      global:   Synced across CPUs but slows tracing down.\n"
3722         "     counter:   Not a clock, but just an increment\n"
3723         "      uptime:   Jiffy counter from time of boot\n"
3724         "        perf:   Same clock that perf events use\n"
3725 #ifdef CONFIG_X86_64
3726         "     x86-tsc:   TSC cycle counter\n"
3727 #endif
3728         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
3729         "  tracing_cpumask\t- Limit which CPUs to trace\n"
3730         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
3731         "\t\t\t  Remove sub-buffer with rmdir\n"
3732         "  trace_options\t\t- Set format or modify how tracing happens\n"
3733         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
3734         "\t\t\t  option name\n"
3735         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
3736 #ifdef CONFIG_DYNAMIC_FTRACE
3737         "\n  available_filter_functions - list of functions that can be filtered on\n"
3738         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
3739         "\t\t\t  functions\n"
3740         "\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3741         "\t     modules: Can select a group via module\n"
3742         "\t      Format: :mod:<module-name>\n"
3743         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
3744         "\t    triggers: a command to perform when function is hit\n"
3745         "\t      Format: <function>:<trigger>[:count]\n"
3746         "\t     trigger: traceon, traceoff\n"
3747         "\t\t      enable_event:<system>:<event>\n"
3748         "\t\t      disable_event:<system>:<event>\n"
3749 #ifdef CONFIG_STACKTRACE
3750         "\t\t      stacktrace\n"
3751 #endif
3752 #ifdef CONFIG_TRACER_SNAPSHOT
3753         "\t\t      snapshot\n"
3754 #endif
3755         "\t\t      dump\n"
3756         "\t\t      cpudump\n"
3757         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
3758         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
3759         "\t     The first one will disable tracing every time do_fault is hit\n"
3760         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
3761         "\t       The first time do trap is hit and it disables tracing, the\n"
3762         "\t       counter will decrement to 2. If tracing is already disabled,\n"
3763         "\t       the counter will not decrement. It only decrements when the\n"
3764         "\t       trigger did work\n"
3765         "\t     To remove trigger without count:\n"
3766         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
3767         "\t     To remove trigger with a count:\n"
3768         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
3769         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
3770         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3771         "\t    modules: Can select a group via module command :mod:\n"
3772         "\t    Does not accept triggers\n"
3773 #endif /* CONFIG_DYNAMIC_FTRACE */
3774 #ifdef CONFIG_FUNCTION_TRACER
3775         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
3776         "\t\t    (function)\n"
3777 #endif
3778 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
3779         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
3780         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
3781         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
3782 #endif
3783 #ifdef CONFIG_TRACER_SNAPSHOT
3784         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
3785         "\t\t\t  snapshot buffer. Read the contents for more\n"
3786         "\t\t\t  information\n"
3787 #endif
3788 #ifdef CONFIG_STACK_TRACER
3789         "  stack_trace\t\t- Shows the max stack trace when active\n"
3790         "  stack_max_size\t- Shows current max stack size that was traced\n"
3791         "\t\t\t  Write into this file to reset the max size (trigger a\n"
3792         "\t\t\t  new trace)\n"
3793 #ifdef CONFIG_DYNAMIC_FTRACE
3794         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
3795         "\t\t\t  traces\n"
3796 #endif
3797 #endif /* CONFIG_STACK_TRACER */
3798         "  events/\t\t- Directory containing all trace event subsystems:\n"
3799         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
3800         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
3801         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
3802         "\t\t\t  events\n"
3803         "      filter\t\t- If set, only events passing filter are traced\n"
3804         "  events/<system>/<event>/\t- Directory containing control files for\n"
3805         "\t\t\t  <event>:\n"
3806         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
3807         "      filter\t\t- If set, only events passing filter are traced\n"
3808         "      trigger\t\t- If set, a command to perform when event is hit\n"
3809         "\t    Format: <trigger>[:count][if <filter>]\n"
3810         "\t   trigger: traceon, traceoff\n"
3811         "\t            enable_event:<system>:<event>\n"
3812         "\t            disable_event:<system>:<event>\n"
3813 #ifdef CONFIG_HIST_TRIGGERS
3814         "\t            enable_hist:<system>:<event>\n"
3815         "\t            disable_hist:<system>:<event>\n"
3816 #endif
3817 #ifdef CONFIG_STACKTRACE
3818         "\t\t    stacktrace\n"
3819 #endif
3820 #ifdef CONFIG_TRACER_SNAPSHOT
3821         "\t\t    snapshot\n"
3822 #endif
3823 #ifdef CONFIG_HIST_TRIGGERS
3824         "\t\t    hist (see below)\n"
3825 #endif
3826         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
3827         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
3828         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
3829         "\t                  events/block/block_unplug/trigger\n"
3830         "\t   The first disables tracing every time block_unplug is hit.\n"
3831         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
3832         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
3833         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
3834         "\t   Like function triggers, the counter is only decremented if it\n"
3835         "\t    enabled or disabled tracing.\n"
3836         "\t   To remove a trigger without a count:\n"
3837         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
3838         "\t   To remove a trigger with a count:\n"
3839         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
3840         "\t   Filters can be ignored when removing a trigger.\n"
3841 #ifdef CONFIG_HIST_TRIGGERS
3842         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
3843         "\t    Format: hist:keys=<field1[,field2,...]>\n"
3844         "\t            [:values=<field1[,field2,...]>]\n"
3845         "\t            [:sort=<field1[,field2,...]>]\n"
3846         "\t            [:size=#entries]\n"
3847         "\t            [:pause][:continue][:clear]\n"
3848         "\t            [:name=histname1]\n"
3849         "\t            [if <filter>]\n\n"
3850         "\t    When a matching event is hit, an entry is added to a hash\n"
3851         "\t    table using the key(s) and value(s) named, and the value of a\n"
3852         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
3853         "\t    correspond to fields in the event's format description.  Keys\n"
3854         "\t    can be any field, or the special string 'stacktrace'.\n"
3855         "\t    Compound keys consisting of up to two fields can be specified\n"
3856         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
3857         "\t    fields.  Sort keys consisting of up to two fields can be\n"
3858         "\t    specified using the 'sort' keyword.  The sort direction can\n"
3859         "\t    be modified by appending '.descending' or '.ascending' to a\n"
3860         "\t    sort field.  The 'size' parameter can be used to specify more\n"
3861         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
3862         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
3863         "\t    its histogram data will be shared with other triggers of the\n"
3864         "\t    same name, and trigger hits will update this common data.\n\n"
3865         "\t    Reading the 'hist' file for the event will dump the hash\n"
3866         "\t    table in its entirety to stdout.  If there are multiple hist\n"
3867         "\t    triggers attached to an event, there will be a table for each\n"
3868         "\t    trigger in the output.  The table displayed for a named\n"
3869         "\t    trigger will be the same as any other instance having the\n"
3870         "\t    same name.  The default format used to display a given field\n"
3871         "\t    can be modified by appending any of the following modifiers\n"
3872         "\t    to the field name, as applicable:\n\n"
3873         "\t            .hex        display a number as a hex value\n"
3874         "\t            .sym        display an address as a symbol\n"
3875         "\t            .sym-offset display an address as a symbol and offset\n"
3876         "\t            .execname   display a common_pid as a program name\n"
3877         "\t            .syscall    display a syscall id as a syscall name\n\n"
3878         "\t            .log2       display log2 value rather than raw number\n\n"
3879         "\t    The 'pause' parameter can be used to pause an existing hist\n"
3880         "\t    trigger or to start a hist trigger but not log any events\n"
3881         "\t    until told to do so.  'continue' can be used to start or\n"
3882         "\t    restart a paused hist trigger.\n\n"
3883         "\t    The 'clear' parameter will clear the contents of a running\n"
3884         "\t    hist trigger and leave its current paused/active state\n"
3885         "\t    unchanged.\n\n"
3886         "\t    The enable_hist and disable_hist triggers can be used to\n"
3887         "\t    have one event conditionally start and stop another event's\n"
3888         "\t    already-attached hist trigger.  The syntax is analagous to\n"
3889         "\t    the enable_event and disable_event triggers.\n"
3890 #endif
3891 ;
3892
3893 static ssize_t
3894 tracing_readme_read(struct file *filp, char __user *ubuf,
3895                        size_t cnt, loff_t *ppos)
3896 {
3897         return simple_read_from_buffer(ubuf, cnt, ppos,
3898                                         readme_msg, strlen(readme_msg));
3899 }
3900
3901 static const struct file_operations tracing_readme_fops = {
3902         .open           = tracing_open_generic,
3903         .read           = tracing_readme_read,
3904         .llseek         = generic_file_llseek,
3905 };
3906
3907 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
3908 {
3909         unsigned int *ptr = v;
3910
3911         if (*pos || m->count)
3912                 ptr++;
3913
3914         (*pos)++;
3915
3916         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
3917              ptr++) {
3918                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
3919                         continue;
3920
3921                 return ptr;
3922         }
3923
3924         return NULL;
3925 }
3926
3927 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
3928 {
3929         void *v;
3930         loff_t l = 0;
3931
3932         preempt_disable();
3933         arch_spin_lock(&trace_cmdline_lock);
3934
3935         v = &savedcmd->map_cmdline_to_pid[0];
3936         while (l <= *pos) {
3937                 v = saved_cmdlines_next(m, v, &l);
3938                 if (!v)
3939                         return NULL;
3940         }
3941
3942         return v;
3943 }
3944
3945 static void saved_cmdlines_stop(struct seq_file *m, void *v)
3946 {
3947         arch_spin_unlock(&trace_cmdline_lock);
3948         preempt_enable();
3949 }
3950
3951 static int saved_cmdlines_show(struct seq_file *m, void *v)
3952 {
3953         char buf[TASK_COMM_LEN];
3954         unsigned int *pid = v;
3955
3956         __trace_find_cmdline(*pid, buf);
3957         seq_printf(m, "%d %s\n", *pid, buf);
3958         return 0;
3959 }
3960
3961 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
3962         .start          = saved_cmdlines_start,
3963         .next           = saved_cmdlines_next,
3964         .stop           = saved_cmdlines_stop,
3965         .show           = saved_cmdlines_show,
3966 };
3967
3968 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
3969 {
3970         if (tracing_disabled)
3971                 return -ENODEV;
3972
3973         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
3974 }
3975
3976 static const struct file_operations tracing_saved_cmdlines_fops = {
3977         .open           = tracing_saved_cmdlines_open,
3978         .read           = seq_read,
3979         .llseek         = seq_lseek,
3980         .release        = seq_release,
3981 };
3982
3983 static ssize_t
3984 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
3985                                  size_t cnt, loff_t *ppos)
3986 {
3987         char buf[64];
3988         int r;
3989
3990         arch_spin_lock(&trace_cmdline_lock);
3991         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
3992         arch_spin_unlock(&trace_cmdline_lock);
3993
3994         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3995 }
3996
3997 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
3998 {
3999         kfree(s->saved_cmdlines);
4000         kfree(s->map_cmdline_to_pid);
4001         kfree(s);
4002 }
4003
4004 static int tracing_resize_saved_cmdlines(unsigned int val)
4005 {
4006         struct saved_cmdlines_buffer *s, *savedcmd_temp;
4007
4008         s = kmalloc(sizeof(*s), GFP_KERNEL);
4009         if (!s)
4010                 return -ENOMEM;
4011
4012         if (allocate_cmdlines_buffer(val, s) < 0) {
4013                 kfree(s);
4014                 return -ENOMEM;
4015         }
4016
4017         arch_spin_lock(&trace_cmdline_lock);
4018         savedcmd_temp = savedcmd;
4019         savedcmd = s;
4020         arch_spin_unlock(&trace_cmdline_lock);
4021         free_saved_cmdlines_buffer(savedcmd_temp);
4022
4023         return 0;
4024 }
4025
4026 static ssize_t
4027 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4028                                   size_t cnt, loff_t *ppos)
4029 {
4030         unsigned long val;
4031         int ret;
4032
4033         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4034         if (ret)
4035                 return ret;
4036
4037         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4038         if (!val || val > PID_MAX_DEFAULT)
4039                 return -EINVAL;
4040
4041         ret = tracing_resize_saved_cmdlines((unsigned int)val);
4042         if (ret < 0)
4043                 return ret;
4044
4045         *ppos += cnt;
4046
4047         return cnt;
4048 }
4049
4050 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4051         .open           = tracing_open_generic,
4052         .read           = tracing_saved_cmdlines_size_read,
4053         .write          = tracing_saved_cmdlines_size_write,
4054 };
4055
4056 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
4057 static union trace_enum_map_item *
4058 update_enum_map(union trace_enum_map_item *ptr)
4059 {
4060         if (!ptr->map.enum_string) {
4061                 if (ptr->tail.next) {
4062                         ptr = ptr->tail.next;
4063                         /* Set ptr to the next real item (skip head) */
4064                         ptr++;
4065                 } else
4066                         return NULL;
4067         }
4068         return ptr;
4069 }
4070
4071 static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
4072 {
4073         union trace_enum_map_item *ptr = v;
4074
4075         /*
4076          * Paranoid! If ptr points to end, we don't want to increment past it.
4077          * This really should never happen.
4078          */
4079         ptr = update_enum_map(ptr);
4080         if (WARN_ON_ONCE(!ptr))
4081                 return NULL;
4082
4083         ptr++;
4084
4085         (*pos)++;
4086
4087         ptr = update_enum_map(ptr);
4088
4089         return ptr;
4090 }
4091
4092 static void *enum_map_start(struct seq_file *m, loff_t *pos)
4093 {
4094         union trace_enum_map_item *v;
4095         loff_t l = 0;
4096
4097         mutex_lock(&trace_enum_mutex);
4098
4099         v = trace_enum_maps;
4100         if (v)
4101                 v++;
4102
4103         while (v && l < *pos) {
4104                 v = enum_map_next(m, v, &l);
4105         }
4106
4107         return v;
4108 }
4109
4110 static void enum_map_stop(struct seq_file *m, void *v)
4111 {
4112         mutex_unlock(&trace_enum_mutex);
4113 }
4114
4115 static int enum_map_show(struct seq_file *m, void *v)
4116 {
4117         union trace_enum_map_item *ptr = v;
4118
4119         seq_printf(m, "%s %ld (%s)\n",
4120                    ptr->map.enum_string, ptr->map.enum_value,
4121                    ptr->map.system);
4122
4123         return 0;
4124 }
4125
4126 static const struct seq_operations tracing_enum_map_seq_ops = {
4127         .start          = enum_map_start,
4128         .next           = enum_map_next,
4129         .stop           = enum_map_stop,
4130         .show           = enum_map_show,
4131 };
4132
4133 static int tracing_enum_map_open(struct inode *inode, struct file *filp)
4134 {
4135         if (tracing_disabled)
4136                 return -ENODEV;
4137
4138         return seq_open(filp, &tracing_enum_map_seq_ops);
4139 }
4140
4141 static const struct file_operations tracing_enum_map_fops = {
4142         .open           = tracing_enum_map_open,
4143         .read           = seq_read,
4144         .llseek         = seq_lseek,
4145         .release        = seq_release,
4146 };
4147
4148 static inline union trace_enum_map_item *
4149 trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
4150 {
4151         /* Return tail of array given the head */
4152         return ptr + ptr->head.length + 1;
4153 }
4154
4155 static void
4156 trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
4157                            int len)
4158 {
4159         struct trace_enum_map **stop;
4160         struct trace_enum_map **map;
4161         union trace_enum_map_item *map_array;
4162         union trace_enum_map_item *ptr;
4163
4164         stop = start + len;
4165
4166         /*
4167          * The trace_enum_maps contains the map plus a head and tail item,
4168          * where the head holds the module and length of array, and the
4169          * tail holds a pointer to the next list.
4170          */
4171         map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4172         if (!map_array) {
4173                 pr_warn("Unable to allocate trace enum mapping\n");
4174                 return;
4175         }
4176
4177         mutex_lock(&trace_enum_mutex);
4178
4179         if (!trace_enum_maps)
4180                 trace_enum_maps = map_array;
4181         else {
4182                 ptr = trace_enum_maps;
4183                 for (;;) {
4184                         ptr = trace_enum_jmp_to_tail(ptr);
4185                         if (!ptr->tail.next)
4186                                 break;
4187                         ptr = ptr->tail.next;
4188
4189                 }
4190                 ptr->tail.next = map_array;
4191         }
4192         map_array->head.mod = mod;
4193         map_array->head.length = len;
4194         map_array++;
4195
4196         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4197                 map_array->map = **map;
4198                 map_array++;
4199         }
4200         memset(map_array, 0, sizeof(*map_array));
4201
4202         mutex_unlock(&trace_enum_mutex);
4203 }
4204
4205 static void trace_create_enum_file(struct dentry *d_tracer)
4206 {
4207         trace_create_file("enum_map", 0444, d_tracer,
4208                           NULL, &tracing_enum_map_fops);
4209 }
4210
4211 #else /* CONFIG_TRACE_ENUM_MAP_FILE */
4212 static inline void trace_create_enum_file(struct dentry *d_tracer) { }
4213 static inline void trace_insert_enum_map_file(struct module *mod,
4214                               struct trace_enum_map **start, int len) { }
4215 #endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
4216
4217 static void trace_insert_enum_map(struct module *mod,
4218                                   struct trace_enum_map **start, int len)
4219 {
4220         struct trace_enum_map **map;
4221
4222         if (len <= 0)
4223                 return;
4224
4225         map = start;
4226
4227         trace_event_enum_update(map, len);
4228
4229         trace_insert_enum_map_file(mod, start, len);
4230 }
4231
4232 static ssize_t
4233 tracing_set_trace_read(struct file *filp, char __user *ubuf,
4234                        size_t cnt, loff_t *ppos)
4235 {
4236         struct trace_array *tr = filp->private_data;
4237         char buf[MAX_TRACER_SIZE+2];
4238         int r;
4239
4240         mutex_lock(&trace_types_lock);
4241         r = sprintf(buf, "%s\n", tr->current_trace->name);
4242         mutex_unlock(&trace_types_lock);
4243
4244         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4245 }
4246
4247 int tracer_init(struct tracer *t, struct trace_array *tr)
4248 {
4249         tracing_reset_online_cpus(&tr->trace_buffer);
4250         return t->init(tr);
4251 }
4252
4253 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4254 {
4255         int cpu;
4256
4257         for_each_tracing_cpu(cpu)
4258                 per_cpu_ptr(buf->data, cpu)->entries = val;
4259 }
4260
4261 #ifdef CONFIG_TRACER_MAX_TRACE
4262 /* resize @tr's buffer to the size of @size_tr's entries */
4263 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4264                                         struct trace_buffer *size_buf, int cpu_id)
4265 {
4266         int cpu, ret = 0;
4267
4268         if (cpu_id == RING_BUFFER_ALL_CPUS) {
4269                 for_each_tracing_cpu(cpu) {
4270                         ret = ring_buffer_resize(trace_buf->buffer,
4271                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4272                         if (ret < 0)
4273                                 break;
4274                         per_cpu_ptr(trace_buf->data, cpu)->entries =
4275                                 per_cpu_ptr(size_buf->data, cpu)->entries;
4276                 }
4277         } else {
4278                 ret = ring_buffer_resize(trace_buf->buffer,
4279                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4280                 if (ret == 0)
4281                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4282                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
4283         }
4284
4285         return ret;
4286 }
4287 #endif /* CONFIG_TRACER_MAX_TRACE */
4288
4289 static int __tracing_resize_ring_buffer(struct trace_array *tr,
4290                                         unsigned long size, int cpu)
4291 {
4292         int ret;
4293
4294         /*
4295          * If kernel or user changes the size of the ring buffer
4296          * we use the size that was given, and we can forget about
4297          * expanding it later.
4298          */
4299         ring_buffer_expanded = true;
4300
4301         /* May be called before buffers are initialized */
4302         if (!tr->trace_buffer.buffer)
4303                 return 0;
4304
4305         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4306         if (ret < 0)
4307                 return ret;
4308
4309 #ifdef CONFIG_TRACER_MAX_TRACE
4310         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
4311             !tr->current_trace->use_max_tr)
4312                 goto out;
4313
4314         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4315         if (ret < 0) {
4316                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
4317                                                      &tr->trace_buffer, cpu);
4318                 if (r < 0) {
4319                         /*
4320                          * AARGH! We are left with different
4321                          * size max buffer!!!!
4322                          * The max buffer is our "snapshot" buffer.
4323                          * When a tracer needs a snapshot (one of the
4324                          * latency tracers), it swaps the max buffer
4325                          * with the saved snap shot. We succeeded to
4326                          * update the size of the main buffer, but failed to
4327                          * update the size of the max buffer. But when we tried
4328                          * to reset the main buffer to the original size, we
4329                          * failed there too. This is very unlikely to
4330                          * happen, but if it does, warn and kill all
4331                          * tracing.
4332                          */
4333                         WARN_ON(1);
4334                         tracing_disabled = 1;
4335                 }
4336                 return ret;
4337         }
4338
4339         if (cpu == RING_BUFFER_ALL_CPUS)
4340                 set_buffer_entries(&tr->max_buffer, size);
4341         else
4342                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4343
4344  out:
4345 #endif /* CONFIG_TRACER_MAX_TRACE */
4346
4347         if (cpu == RING_BUFFER_ALL_CPUS)
4348                 set_buffer_entries(&tr->trace_buffer, size);
4349         else
4350                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4351
4352         return ret;
4353 }
4354
4355 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4356                                           unsigned long size, int cpu_id)
4357 {
4358         int ret = size;
4359
4360         mutex_lock(&trace_types_lock);
4361
4362         if (cpu_id != RING_BUFFER_ALL_CPUS) {
4363                 /* make sure, this cpu is enabled in the mask */
4364                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4365                         ret = -EINVAL;
4366                         goto out;
4367                 }
4368         }
4369
4370         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4371         if (ret < 0)
4372                 ret = -ENOMEM;
4373
4374 out:
4375         mutex_unlock(&trace_types_lock);
4376
4377         return ret;
4378 }
4379
4380
4381 /**
4382  * tracing_update_buffers - used by tracing facility to expand ring buffers
4383  *
4384  * To save on memory when the tracing is never used on a system with it
4385  * configured in. The ring buffers are set to a minimum size. But once
4386  * a user starts to use the tracing facility, then they need to grow
4387  * to their default size.
4388  *
4389  * This function is to be called when a tracer is about to be used.
4390  */
4391 int tracing_update_buffers(void)
4392 {
4393         int ret = 0;
4394
4395         mutex_lock(&trace_types_lock);
4396         if (!ring_buffer_expanded)
4397                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4398                                                 RING_BUFFER_ALL_CPUS);
4399         mutex_unlock(&trace_types_lock);
4400
4401         return ret;
4402 }
4403
4404 struct trace_option_dentry;
4405
4406 static void
4407 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4408
4409 /*
4410  * Used to clear out the tracer before deletion of an instance.
4411  * Must have trace_types_lock held.
4412  */
4413 static void tracing_set_nop(struct trace_array *tr)
4414 {
4415         if (tr->current_trace == &nop_trace)
4416                 return;
4417         
4418         tr->current_trace->enabled--;
4419
4420         if (tr->current_trace->reset)
4421                 tr->current_trace->reset(tr);
4422
4423         tr->current_trace = &nop_trace;
4424 }
4425
4426 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
4427 {
4428         /* Only enable if the directory has been created already. */
4429         if (!tr->dir)
4430                 return;
4431
4432         create_trace_option_files(tr, t);
4433 }
4434
4435 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
4436 {
4437         struct tracer *t;
4438 #ifdef CONFIG_TRACER_MAX_TRACE
4439         bool had_max_tr;
4440 #endif
4441         int ret = 0;
4442
4443         mutex_lock(&trace_types_lock);
4444
4445         if (!ring_buffer_expanded) {
4446                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4447                                                 RING_BUFFER_ALL_CPUS);
4448                 if (ret < 0)
4449                         goto out;
4450                 ret = 0;
4451         }
4452
4453         for (t = trace_types; t; t = t->next) {
4454                 if (strcmp(t->name, buf) == 0)
4455                         break;
4456         }
4457         if (!t) {
4458                 ret = -EINVAL;
4459                 goto out;
4460         }
4461         if (t == tr->current_trace)
4462                 goto out;
4463
4464         /* Some tracers are only allowed for the top level buffer */
4465         if (!trace_ok_for_array(t, tr)) {
4466                 ret = -EINVAL;
4467                 goto out;
4468         }
4469
4470         /* If trace pipe files are being read, we can't change the tracer */
4471         if (tr->current_trace->ref) {
4472                 ret = -EBUSY;
4473                 goto out;
4474         }
4475
4476         trace_branch_disable();
4477
4478         tr->current_trace->enabled--;
4479
4480         if (tr->current_trace->reset)
4481                 tr->current_trace->reset(tr);
4482
4483         /* Current trace needs to be nop_trace before synchronize_sched */
4484         tr->current_trace = &nop_trace;
4485
4486 #ifdef CONFIG_TRACER_MAX_TRACE
4487         had_max_tr = tr->allocated_snapshot;
4488
4489         if (had_max_tr && !t->use_max_tr) {
4490                 /*
4491                  * We need to make sure that the update_max_tr sees that
4492                  * current_trace changed to nop_trace to keep it from
4493                  * swapping the buffers after we resize it.
4494                  * The update_max_tr is called from interrupts disabled
4495                  * so a synchronized_sched() is sufficient.
4496                  */
4497                 synchronize_sched();
4498                 free_snapshot(tr);
4499         }
4500 #endif
4501
4502 #ifdef CONFIG_TRACER_MAX_TRACE
4503         if (t->use_max_tr && !had_max_tr) {
4504                 ret = alloc_snapshot(tr);
4505                 if (ret < 0)
4506                         goto out;
4507         }
4508 #endif
4509
4510         if (t->init) {
4511                 ret = tracer_init(t, tr);
4512                 if (ret)
4513                         goto out;
4514         }
4515
4516         tr->current_trace = t;
4517         tr->current_trace->enabled++;
4518         trace_branch_enable(tr);
4519  out:
4520         mutex_unlock(&trace_types_lock);
4521
4522         return ret;
4523 }
4524
4525 static ssize_t
4526 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4527                         size_t cnt, loff_t *ppos)
4528 {
4529         struct trace_array *tr = filp->private_data;
4530         char buf[MAX_TRACER_SIZE+1];
4531         int i;
4532         size_t ret;
4533         int err;
4534
4535         ret = cnt;
4536
4537         if (cnt > MAX_TRACER_SIZE)
4538                 cnt = MAX_TRACER_SIZE;
4539
4540         if (copy_from_user(&buf, ubuf, cnt))
4541                 return -EFAULT;
4542
4543         buf[cnt] = 0;
4544
4545         /* strip ending whitespace. */
4546         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4547                 buf[i] = 0;
4548
4549         err = tracing_set_tracer(tr, buf);
4550         if (err)
4551                 return err;
4552
4553         *ppos += ret;
4554
4555         return ret;
4556 }
4557
4558 static ssize_t
4559 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
4560                    size_t cnt, loff_t *ppos)
4561 {
4562         char buf[64];
4563         int r;
4564
4565         r = snprintf(buf, sizeof(buf), "%ld\n",
4566                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4567         if (r > sizeof(buf))
4568                 r = sizeof(buf);
4569         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4570 }
4571
4572 static ssize_t
4573 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
4574                     size_t cnt, loff_t *ppos)
4575 {
4576         unsigned long val;
4577         int ret;
4578
4579         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4580         if (ret)
4581                 return ret;
4582
4583         *ptr = val * 1000;
4584
4585         return cnt;
4586 }
4587
4588 static ssize_t
4589 tracing_thresh_read(struct file *filp, char __user *ubuf,
4590                     size_t cnt, loff_t *ppos)
4591 {
4592         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
4593 }
4594
4595 static ssize_t
4596 tracing_thresh_write(struct file *filp, const char __user *ubuf,
4597                      size_t cnt, loff_t *ppos)
4598 {
4599         struct trace_array *tr = filp->private_data;
4600         int ret;
4601
4602         mutex_lock(&trace_types_lock);
4603         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
4604         if (ret < 0)
4605                 goto out;
4606
4607         if (tr->current_trace->update_thresh) {
4608                 ret = tr->current_trace->update_thresh(tr);
4609                 if (ret < 0)
4610                         goto out;
4611         }
4612
4613         ret = cnt;
4614 out:
4615         mutex_unlock(&trace_types_lock);
4616
4617         return ret;
4618 }
4619
4620 #ifdef CONFIG_TRACER_MAX_TRACE
4621
4622 static ssize_t
4623 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4624                      size_t cnt, loff_t *ppos)
4625 {
4626         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
4627 }
4628
4629 static ssize_t
4630 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4631                       size_t cnt, loff_t *ppos)
4632 {
4633         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
4634 }
4635
4636 #endif
4637
4638 static int tracing_open_pipe(struct inode *inode, struct file *filp)
4639 {
4640         struct trace_array *tr = inode->i_private;
4641         struct trace_iterator *iter;
4642         int ret = 0;
4643
4644         if (tracing_disabled)
4645                 return -ENODEV;
4646
4647         if (trace_array_get(tr) < 0)
4648                 return -ENODEV;
4649
4650         mutex_lock(&trace_types_lock);
4651
4652         /* create a buffer to store the information to pass to userspace */
4653         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4654         if (!iter) {
4655                 ret = -ENOMEM;
4656                 __trace_array_put(tr);
4657                 goto out;
4658         }
4659
4660         trace_seq_init(&iter->seq);
4661         iter->trace = tr->current_trace;
4662
4663         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
4664                 ret = -ENOMEM;
4665                 goto fail;
4666         }
4667
4668         /* trace pipe does not show start of buffer */
4669         cpumask_setall(iter->started);
4670
4671         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4672                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4673
4674         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4675         if (trace_clocks[tr->clock_id].in_ns)
4676                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4677
4678         iter->tr = tr;
4679         iter->trace_buffer = &tr->trace_buffer;
4680         iter->cpu_file = tracing_get_cpu(inode);
4681         mutex_init(&iter->mutex);
4682         filp->private_data = iter;
4683
4684         if (iter->trace->pipe_open)
4685                 iter->trace->pipe_open(iter);
4686
4687         nonseekable_open(inode, filp);
4688
4689         tr->current_trace->ref++;
4690 out:
4691         mutex_unlock(&trace_types_lock);
4692         return ret;
4693
4694 fail:
4695         kfree(iter->trace);
4696         kfree(iter);
4697         __trace_array_put(tr);
4698         mutex_unlock(&trace_types_lock);
4699         return ret;
4700 }
4701
4702 static int tracing_release_pipe(struct inode *inode, struct file *file)
4703 {
4704         struct trace_iterator *iter = file->private_data;
4705         struct trace_array *tr = inode->i_private;
4706
4707         mutex_lock(&trace_types_lock);
4708
4709         tr->current_trace->ref--;
4710
4711         if (iter->trace->pipe_close)
4712                 iter->trace->pipe_close(iter);
4713
4714         mutex_unlock(&trace_types_lock);
4715
4716         free_cpumask_var(iter->started);
4717         mutex_destroy(&iter->mutex);
4718         kfree(iter);
4719
4720         trace_array_put(tr);
4721
4722         return 0;
4723 }
4724
4725 static unsigned int
4726 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
4727 {
4728         struct trace_array *tr = iter->tr;
4729
4730         /* Iterators are static, they should be filled or empty */
4731         if (trace_buffer_iter(iter, iter->cpu_file))
4732                 return POLLIN | POLLRDNORM;
4733
4734         if (tr->trace_flags & TRACE_ITER_BLOCK)
4735                 /*
4736                  * Always select as readable when in blocking mode
4737                  */
4738                 return POLLIN | POLLRDNORM;
4739         else
4740                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
4741                                              filp, poll_table);
4742 }
4743
4744 static unsigned int
4745 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
4746 {
4747         struct trace_iterator *iter = filp->private_data;
4748
4749         return trace_poll(iter, filp, poll_table);
4750 }
4751
4752 /* Must be called with iter->mutex held. */
4753 static int tracing_wait_pipe(struct file *filp)
4754 {
4755         struct trace_iterator *iter = filp->private_data;
4756         int ret;
4757
4758         while (trace_empty(iter)) {
4759
4760                 if ((filp->f_flags & O_NONBLOCK)) {
4761                         return -EAGAIN;
4762                 }
4763
4764                 /*
4765                  * We block until we read something and tracing is disabled.
4766                  * We still block if tracing is disabled, but we have never
4767                  * read anything. This allows a user to cat this file, and
4768                  * then enable tracing. But after we have read something,
4769                  * we give an EOF when tracing is again disabled.
4770                  *
4771                  * iter->pos will be 0 if we haven't read anything.
4772                  */
4773                 if (!tracing_is_on() && iter->pos)
4774                         break;
4775
4776                 mutex_unlock(&iter->mutex);
4777
4778                 ret = wait_on_pipe(iter, false);
4779
4780                 mutex_lock(&iter->mutex);
4781
4782                 if (ret)
4783                         return ret;
4784         }
4785
4786         return 1;
4787 }
4788
4789 /*
4790  * Consumer reader.
4791  */
4792 static ssize_t
4793 tracing_read_pipe(struct file *filp, char __user *ubuf,
4794                   size_t cnt, loff_t *ppos)
4795 {
4796         struct trace_iterator *iter = filp->private_data;
4797         ssize_t sret;
4798
4799         /* return any leftover data */
4800         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4801         if (sret != -EBUSY)
4802                 return sret;
4803
4804         trace_seq_init(&iter->seq);
4805
4806         /*
4807          * Avoid more than one consumer on a single file descriptor
4808          * This is just a matter of traces coherency, the ring buffer itself
4809          * is protected.
4810          */
4811         mutex_lock(&iter->mutex);
4812         if (iter->trace->read) {
4813                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
4814                 if (sret)
4815                         goto out;
4816         }
4817
4818 waitagain:
4819         sret = tracing_wait_pipe(filp);
4820         if (sret <= 0)
4821                 goto out;
4822
4823         /* stop when tracing is finished */
4824         if (trace_empty(iter)) {
4825                 sret = 0;
4826                 goto out;
4827         }
4828
4829         if (cnt >= PAGE_SIZE)
4830                 cnt = PAGE_SIZE - 1;
4831
4832         /* reset all but tr, trace, and overruns */
4833         memset(&iter->seq, 0,
4834                sizeof(struct trace_iterator) -
4835                offsetof(struct trace_iterator, seq));
4836         cpumask_clear(iter->started);
4837         iter->pos = -1;
4838
4839         trace_event_read_lock();
4840         trace_access_lock(iter->cpu_file);
4841         while (trace_find_next_entry_inc(iter) != NULL) {
4842                 enum print_line_t ret;
4843                 int save_len = iter->seq.seq.len;
4844
4845                 ret = print_trace_line(iter);
4846                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4847                         /* don't print partial lines */
4848                         iter->seq.seq.len = save_len;
4849                         break;
4850                 }
4851                 if (ret != TRACE_TYPE_NO_CONSUME)
4852                         trace_consume(iter);
4853
4854                 if (trace_seq_used(&iter->seq) >= cnt)
4855                         break;
4856
4857                 /*
4858                  * Setting the full flag means we reached the trace_seq buffer
4859                  * size and we should leave by partial output condition above.
4860                  * One of the trace_seq_* functions is not used properly.
4861                  */
4862                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
4863                           iter->ent->type);
4864         }
4865         trace_access_unlock(iter->cpu_file);
4866         trace_event_read_unlock();
4867
4868         /* Now copy what we have to the user */
4869         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4870         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
4871                 trace_seq_init(&iter->seq);
4872
4873         /*
4874          * If there was nothing to send to user, in spite of consuming trace
4875          * entries, go back to wait for more entries.
4876          */
4877         if (sret == -EBUSY)
4878                 goto waitagain;
4879
4880 out:
4881         mutex_unlock(&iter->mutex);
4882
4883         return sret;
4884 }
4885
4886 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
4887                                      unsigned int idx)
4888 {
4889         __free_page(spd->pages[idx]);
4890 }
4891
4892 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
4893         .can_merge              = 0,
4894         .confirm                = generic_pipe_buf_confirm,
4895         .release                = generic_pipe_buf_release,
4896         .steal                  = generic_pipe_buf_steal,
4897         .get                    = generic_pipe_buf_get,
4898 };
4899
4900 static size_t
4901 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
4902 {
4903         size_t count;
4904         int save_len;
4905         int ret;
4906
4907         /* Seq buffer is page-sized, exactly what we need. */
4908         for (;;) {
4909                 save_len = iter->seq.seq.len;
4910                 ret = print_trace_line(iter);
4911
4912                 if (trace_seq_has_overflowed(&iter->seq)) {
4913                         iter->seq.seq.len = save_len;
4914                         break;
4915                 }
4916
4917                 /*
4918                  * This should not be hit, because it should only
4919                  * be set if the iter->seq overflowed. But check it
4920                  * anyway to be safe.
4921                  */
4922                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4923                         iter->seq.seq.len = save_len;
4924                         break;
4925                 }
4926
4927                 count = trace_seq_used(&iter->seq) - save_len;
4928                 if (rem < count) {
4929                         rem = 0;
4930                         iter->seq.seq.len = save_len;
4931                         break;
4932                 }
4933
4934                 if (ret != TRACE_TYPE_NO_CONSUME)
4935                         trace_consume(iter);
4936                 rem -= count;
4937                 if (!trace_find_next_entry_inc(iter))   {
4938                         rem = 0;
4939                         iter->ent = NULL;
4940                         break;
4941                 }
4942         }
4943
4944         return rem;
4945 }
4946
4947 static ssize_t tracing_splice_read_pipe(struct file *filp,
4948                                         loff_t *ppos,
4949                                         struct pipe_inode_info *pipe,
4950                                         size_t len,
4951                                         unsigned int flags)
4952 {
4953         struct page *pages_def[PIPE_DEF_BUFFERS];
4954         struct partial_page partial_def[PIPE_DEF_BUFFERS];
4955         struct trace_iterator *iter = filp->private_data;
4956         struct splice_pipe_desc spd = {
4957                 .pages          = pages_def,
4958                 .partial        = partial_def,
4959                 .nr_pages       = 0, /* This gets updated below. */
4960                 .nr_pages_max   = PIPE_DEF_BUFFERS,
4961                 .flags          = flags,
4962                 .ops            = &tracing_pipe_buf_ops,
4963                 .spd_release    = tracing_spd_release_pipe,
4964         };
4965         ssize_t ret;
4966         size_t rem;
4967         unsigned int i;
4968
4969         if (splice_grow_spd(pipe, &spd))
4970                 return -ENOMEM;
4971
4972         mutex_lock(&iter->mutex);
4973
4974         if (iter->trace->splice_read) {
4975                 ret = iter->trace->splice_read(iter, filp,
4976                                                ppos, pipe, len, flags);
4977                 if (ret)
4978                         goto out_err;
4979         }
4980
4981         ret = tracing_wait_pipe(filp);
4982         if (ret <= 0)
4983                 goto out_err;
4984
4985         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
4986                 ret = -EFAULT;
4987                 goto out_err;
4988         }
4989
4990         trace_event_read_lock();
4991         trace_access_lock(iter->cpu_file);
4992
4993         /* Fill as many pages as possible. */
4994         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
4995                 spd.pages[i] = alloc_page(GFP_KERNEL);
4996                 if (!spd.pages[i])
4997                         break;
4998
4999                 rem = tracing_fill_pipe_page(rem, iter);
5000
5001                 /* Copy the data into the page, so we can start over. */
5002                 ret = trace_seq_to_buffer(&iter->seq,
5003                                           page_address(spd.pages[i]),
5004                                           trace_seq_used(&iter->seq));
5005                 if (ret < 0) {
5006                         __free_page(spd.pages[i]);
5007                         break;
5008                 }
5009                 spd.partial[i].offset = 0;
5010                 spd.partial[i].len = trace_seq_used(&iter->seq);
5011
5012                 trace_seq_init(&iter->seq);
5013         }
5014
5015         trace_access_unlock(iter->cpu_file);
5016         trace_event_read_unlock();
5017         mutex_unlock(&iter->mutex);
5018
5019         spd.nr_pages = i;
5020
5021         if (i)
5022                 ret = splice_to_pipe(pipe, &spd);
5023         else
5024                 ret = 0;
5025 out:
5026         splice_shrink_spd(&spd);
5027         return ret;
5028
5029 out_err:
5030         mutex_unlock(&iter->mutex);
5031         goto out;
5032 }
5033
5034 static ssize_t
5035 tracing_entries_read(struct file *filp, char __user *ubuf,
5036                      size_t cnt, loff_t *ppos)
5037 {
5038         struct inode *inode = file_inode(filp);
5039         struct trace_array *tr = inode->i_private;
5040         int cpu = tracing_get_cpu(inode);
5041         char buf[64];
5042         int r = 0;
5043         ssize_t ret;
5044
5045         mutex_lock(&trace_types_lock);
5046
5047         if (cpu == RING_BUFFER_ALL_CPUS) {
5048                 int cpu, buf_size_same;
5049                 unsigned long size;
5050
5051                 size = 0;
5052                 buf_size_same = 1;
5053                 /* check if all cpu sizes are same */
5054                 for_each_tracing_cpu(cpu) {
5055                         /* fill in the size from first enabled cpu */
5056                         if (size == 0)
5057                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5058                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5059                                 buf_size_same = 0;
5060                                 break;
5061                         }
5062                 }
5063
5064                 if (buf_size_same) {
5065                         if (!ring_buffer_expanded)
5066                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
5067                                             size >> 10,
5068                                             trace_buf_size >> 10);
5069                         else
5070                                 r = sprintf(buf, "%lu\n", size >> 10);
5071                 } else
5072                         r = sprintf(buf, "X\n");
5073         } else
5074                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5075
5076         mutex_unlock(&trace_types_lock);
5077
5078         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5079         return ret;
5080 }
5081
5082 static ssize_t
5083 tracing_entries_write(struct file *filp, const char __user *ubuf,
5084                       size_t cnt, loff_t *ppos)
5085 {
5086         struct inode *inode = file_inode(filp);
5087         struct trace_array *tr = inode->i_private;
5088         unsigned long val;
5089         int ret;
5090
5091         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5092         if (ret)
5093                 return ret;
5094
5095         /* must have at least 1 entry */
5096         if (!val)
5097                 return -EINVAL;
5098
5099         /* value is in KB */
5100         val <<= 10;
5101         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5102         if (ret < 0)
5103                 return ret;
5104
5105         *ppos += cnt;
5106
5107         return cnt;
5108 }
5109
5110 static ssize_t
5111 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5112                                 size_t cnt, loff_t *ppos)
5113 {
5114         struct trace_array *tr = filp->private_data;
5115         char buf[64];
5116         int r, cpu;
5117         unsigned long size = 0, expanded_size = 0;
5118
5119         mutex_lock(&trace_types_lock);
5120         for_each_tracing_cpu(cpu) {
5121                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5122                 if (!ring_buffer_expanded)
5123                         expanded_size += trace_buf_size >> 10;
5124         }
5125         if (ring_buffer_expanded)
5126                 r = sprintf(buf, "%lu\n", size);
5127         else
5128                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5129         mutex_unlock(&trace_types_lock);
5130
5131         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5132 }
5133
5134 static ssize_t
5135 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5136                           size_t cnt, loff_t *ppos)
5137 {
5138         /*
5139          * There is no need to read what the user has written, this function
5140          * is just to make sure that there is no error when "echo" is used
5141          */
5142
5143         *ppos += cnt;
5144
5145         return cnt;
5146 }
5147
5148 static int
5149 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5150 {
5151         struct trace_array *tr = inode->i_private;
5152
5153         /* disable tracing ? */
5154         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5155                 tracer_tracing_off(tr);
5156         /* resize the ring buffer to 0 */
5157         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5158
5159         trace_array_put(tr);
5160
5161         return 0;
5162 }
5163
5164 static ssize_t
5165 tracing_mark_write(struct file *filp, const char __user *ubuf,
5166                                         size_t cnt, loff_t *fpos)
5167 {
5168         unsigned long addr = (unsigned long)ubuf;
5169         struct trace_array *tr = filp->private_data;
5170         struct ring_buffer_event *event;
5171         struct ring_buffer *buffer;
5172         struct print_entry *entry;
5173         unsigned long irq_flags;
5174         struct page *pages[2];
5175         void *map_page[2];
5176         int nr_pages = 1;
5177         ssize_t written;
5178         int offset;
5179         int size;
5180         int len;
5181         int ret;
5182         int i;
5183
5184         if (tracing_disabled)
5185                 return -EINVAL;
5186
5187         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5188                 return -EINVAL;
5189
5190         if (cnt > TRACE_BUF_SIZE)
5191                 cnt = TRACE_BUF_SIZE;
5192
5193         /*
5194          * Userspace is injecting traces into the kernel trace buffer.
5195          * We want to be as non intrusive as possible.
5196          * To do so, we do not want to allocate any special buffers
5197          * or take any locks, but instead write the userspace data
5198          * straight into the ring buffer.
5199          *
5200          * First we need to pin the userspace buffer into memory,
5201          * which, most likely it is, because it just referenced it.
5202          * But there's no guarantee that it is. By using get_user_pages_fast()
5203          * and kmap_atomic/kunmap_atomic() we can get access to the
5204          * pages directly. We then write the data directly into the
5205          * ring buffer.
5206          */
5207         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5208
5209         /* check if we cross pages */
5210         if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
5211                 nr_pages = 2;
5212
5213         offset = addr & (PAGE_SIZE - 1);
5214         addr &= PAGE_MASK;
5215
5216         ret = get_user_pages_fast(addr, nr_pages, 0, pages);
5217         if (ret < nr_pages) {
5218                 while (--ret >= 0)
5219                         put_page(pages[ret]);
5220                 written = -EFAULT;
5221                 goto out;
5222         }
5223
5224         for (i = 0; i < nr_pages; i++)
5225                 map_page[i] = kmap_atomic(pages[i]);
5226
5227         local_save_flags(irq_flags);
5228         size = sizeof(*entry) + cnt + 2; /* possible \n added */
5229         buffer = tr->trace_buffer.buffer;
5230         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5231                                           irq_flags, preempt_count());
5232         if (!event) {
5233                 /* Ring buffer disabled, return as if not open for write */
5234                 written = -EBADF;
5235                 goto out_unlock;
5236         }
5237
5238         entry = ring_buffer_event_data(event);
5239         entry->ip = _THIS_IP_;
5240
5241         if (nr_pages == 2) {
5242                 len = PAGE_SIZE - offset;
5243                 memcpy(&entry->buf, map_page[0] + offset, len);
5244                 memcpy(&entry->buf[len], map_page[1], cnt - len);
5245         } else
5246                 memcpy(&entry->buf, map_page[0] + offset, cnt);
5247
5248         if (entry->buf[cnt - 1] != '\n') {
5249                 entry->buf[cnt] = '\n';
5250                 entry->buf[cnt + 1] = '\0';
5251         } else
5252                 entry->buf[cnt] = '\0';
5253
5254         __buffer_unlock_commit(buffer, event);
5255
5256         written = cnt;
5257
5258         *fpos += written;
5259
5260  out_unlock:
5261         for (i = nr_pages - 1; i >= 0; i--) {
5262                 kunmap_atomic(map_page[i]);
5263                 put_page(pages[i]);
5264         }
5265  out:
5266         return written;
5267 }
5268
5269 static int tracing_clock_show(struct seq_file *m, void *v)
5270 {
5271         struct trace_array *tr = m->private;
5272         int i;
5273
5274         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5275                 seq_printf(m,
5276                         "%s%s%s%s", i ? " " : "",
5277                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5278                         i == tr->clock_id ? "]" : "");
5279         seq_putc(m, '\n');
5280
5281         return 0;
5282 }
5283
5284 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
5285 {
5286         int i;
5287
5288         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
5289                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
5290                         break;
5291         }
5292         if (i == ARRAY_SIZE(trace_clocks))
5293                 return -EINVAL;
5294
5295         mutex_lock(&trace_types_lock);
5296
5297         tr->clock_id = i;
5298
5299         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
5300
5301         /*
5302          * New clock may not be consistent with the previous clock.
5303          * Reset the buffer so that it doesn't have incomparable timestamps.
5304          */
5305         tracing_reset_online_cpus(&tr->trace_buffer);
5306
5307 #ifdef CONFIG_TRACER_MAX_TRACE
5308         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
5309                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
5310         tracing_reset_online_cpus(&tr->max_buffer);
5311 #endif
5312
5313         mutex_unlock(&trace_types_lock);
5314
5315         return 0;
5316 }
5317
5318 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
5319                                    size_t cnt, loff_t *fpos)
5320 {
5321         struct seq_file *m = filp->private_data;
5322         struct trace_array *tr = m->private;
5323         char buf[64];
5324         const char *clockstr;
5325         int ret;
5326
5327         if (cnt >= sizeof(buf))
5328                 return -EINVAL;
5329
5330         if (copy_from_user(&buf, ubuf, cnt))
5331                 return -EFAULT;
5332
5333         buf[cnt] = 0;
5334
5335         clockstr = strstrip(buf);
5336
5337         ret = tracing_set_clock(tr, clockstr);
5338         if (ret)
5339                 return ret;
5340
5341         *fpos += cnt;
5342
5343         return cnt;
5344 }
5345
5346 static int tracing_clock_open(struct inode *inode, struct file *file)
5347 {
5348         struct trace_array *tr = inode->i_private;
5349         int ret;
5350
5351         if (tracing_disabled)
5352                 return -ENODEV;
5353
5354         if (trace_array_get(tr))
5355                 return -ENODEV;
5356
5357         ret = single_open(file, tracing_clock_show, inode->i_private);
5358         if (ret < 0)
5359                 trace_array_put(tr);
5360
5361         return ret;
5362 }
5363
5364 struct ftrace_buffer_info {
5365         struct trace_iterator   iter;
5366         void                    *spare;
5367         unsigned int            read;
5368 };
5369
5370 #ifdef CONFIG_TRACER_SNAPSHOT
5371 static int tracing_snapshot_open(struct inode *inode, struct file *file)
5372 {
5373         struct trace_array *tr = inode->i_private;
5374         struct trace_iterator *iter;
5375         struct seq_file *m;
5376         int ret = 0;
5377
5378         if (trace_array_get(tr) < 0)
5379                 return -ENODEV;
5380
5381         if (file->f_mode & FMODE_READ) {
5382                 iter = __tracing_open(inode, file, true);
5383                 if (IS_ERR(iter))
5384                         ret = PTR_ERR(iter);
5385         } else {
5386                 /* Writes still need the seq_file to hold the private data */
5387                 ret = -ENOMEM;
5388                 m = kzalloc(sizeof(*m), GFP_KERNEL);
5389                 if (!m)
5390                         goto out;
5391                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5392                 if (!iter) {
5393                         kfree(m);
5394                         goto out;
5395                 }
5396                 ret = 0;
5397
5398                 iter->tr = tr;
5399                 iter->trace_buffer = &tr->max_buffer;
5400                 iter->cpu_file = tracing_get_cpu(inode);
5401                 m->private = iter;
5402                 file->private_data = m;
5403         }
5404 out:
5405         if (ret < 0)
5406                 trace_array_put(tr);
5407
5408         return ret;
5409 }
5410
5411 static ssize_t
5412 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
5413                        loff_t *ppos)
5414 {
5415         struct seq_file *m = filp->private_data;
5416         struct trace_iterator *iter = m->private;
5417         struct trace_array *tr = iter->tr;
5418         unsigned long val;
5419         int ret;
5420
5421         ret = tracing_update_buffers();
5422         if (ret < 0)
5423                 return ret;
5424
5425         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5426         if (ret)
5427                 return ret;
5428
5429         mutex_lock(&trace_types_lock);
5430
5431         if (tr->current_trace->use_max_tr) {
5432                 ret = -EBUSY;
5433                 goto out;
5434         }
5435
5436         switch (val) {
5437         case 0:
5438                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5439                         ret = -EINVAL;
5440                         break;
5441                 }
5442                 if (tr->allocated_snapshot)
5443                         free_snapshot(tr);
5444                 break;
5445         case 1:
5446 /* Only allow per-cpu swap if the ring buffer supports it */
5447 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
5448                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5449                         ret = -EINVAL;
5450                         break;
5451                 }
5452 #endif
5453                 if (!tr->allocated_snapshot) {
5454                         ret = alloc_snapshot(tr);
5455                         if (ret < 0)
5456                                 break;
5457                 }
5458                 local_irq_disable();
5459                 /* Now, we're going to swap */
5460                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5461                         update_max_tr(tr, current, smp_processor_id());
5462                 else
5463                         update_max_tr_single(tr, current, iter->cpu_file);
5464                 local_irq_enable();
5465                 break;
5466         default:
5467                 if (tr->allocated_snapshot) {
5468                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5469                                 tracing_reset_online_cpus(&tr->max_buffer);
5470                         else
5471                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
5472                 }
5473                 break;
5474         }
5475
5476         if (ret >= 0) {
5477                 *ppos += cnt;
5478                 ret = cnt;
5479         }
5480 out:
5481         mutex_unlock(&trace_types_lock);
5482         return ret;
5483 }
5484
5485 static int tracing_snapshot_release(struct inode *inode, struct file *file)
5486 {
5487         struct seq_file *m = file->private_data;
5488         int ret;
5489
5490         ret = tracing_release(inode, file);
5491
5492         if (file->f_mode & FMODE_READ)
5493                 return ret;
5494
5495         /* If write only, the seq_file is just a stub */
5496         if (m)
5497                 kfree(m->private);
5498         kfree(m);
5499
5500         return 0;
5501 }
5502
5503 static int tracing_buffers_open(struct inode *inode, struct file *filp);
5504 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
5505                                     size_t count, loff_t *ppos);
5506 static int tracing_buffers_release(struct inode *inode, struct file *file);
5507 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5508                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
5509
5510 static int snapshot_raw_open(struct inode *inode, struct file *filp)
5511 {
5512         struct ftrace_buffer_info *info;
5513         int ret;
5514
5515         ret = tracing_buffers_open(inode, filp);
5516         if (ret < 0)
5517                 return ret;
5518
5519         info = filp->private_data;
5520
5521         if (info->iter.trace->use_max_tr) {
5522                 tracing_buffers_release(inode, filp);
5523                 return -EBUSY;
5524         }
5525
5526         info->iter.snapshot = true;
5527         info->iter.trace_buffer = &info->iter.tr->max_buffer;
5528
5529         return ret;
5530 }
5531
5532 #endif /* CONFIG_TRACER_SNAPSHOT */
5533
5534
5535 static const struct file_operations tracing_thresh_fops = {
5536         .open           = tracing_open_generic,
5537         .read           = tracing_thresh_read,
5538         .write          = tracing_thresh_write,
5539         .llseek         = generic_file_llseek,
5540 };
5541
5542 #ifdef CONFIG_TRACER_MAX_TRACE
5543 static const struct file_operations tracing_max_lat_fops = {
5544         .open           = tracing_open_generic,
5545         .read           = tracing_max_lat_read,
5546         .write          = tracing_max_lat_write,
5547         .llseek         = generic_file_llseek,
5548 };
5549 #endif
5550
5551 static const struct file_operations set_tracer_fops = {
5552         .open           = tracing_open_generic,
5553         .read           = tracing_set_trace_read,
5554         .write          = tracing_set_trace_write,
5555         .llseek         = generic_file_llseek,
5556 };
5557
5558 static const struct file_operations tracing_pipe_fops = {
5559         .open           = tracing_open_pipe,
5560         .poll           = tracing_poll_pipe,
5561         .read           = tracing_read_pipe,
5562         .splice_read    = tracing_splice_read_pipe,
5563         .release        = tracing_release_pipe,
5564         .llseek         = no_llseek,
5565 };
5566
5567 static const struct file_operations tracing_entries_fops = {
5568         .open           = tracing_open_generic_tr,
5569         .read           = tracing_entries_read,
5570         .write          = tracing_entries_write,
5571         .llseek         = generic_file_llseek,
5572         .release        = tracing_release_generic_tr,
5573 };
5574
5575 static const struct file_operations tracing_total_entries_fops = {
5576         .open           = tracing_open_generic_tr,
5577         .read           = tracing_total_entries_read,
5578         .llseek         = generic_file_llseek,
5579         .release        = tracing_release_generic_tr,
5580 };
5581
5582 static const struct file_operations tracing_free_buffer_fops = {
5583         .open           = tracing_open_generic_tr,
5584         .write          = tracing_free_buffer_write,
5585         .release        = tracing_free_buffer_release,
5586 };
5587
5588 static const struct file_operations tracing_mark_fops = {
5589         .open           = tracing_open_generic_tr,
5590         .write          = tracing_mark_write,
5591         .llseek         = generic_file_llseek,
5592         .release        = tracing_release_generic_tr,
5593 };
5594
5595 static const struct file_operations trace_clock_fops = {
5596         .open           = tracing_clock_open,
5597         .read           = seq_read,
5598         .llseek         = seq_lseek,
5599         .release        = tracing_single_release_tr,
5600         .write          = tracing_clock_write,
5601 };
5602
5603 #ifdef CONFIG_TRACER_SNAPSHOT
5604 static const struct file_operations snapshot_fops = {
5605         .open           = tracing_snapshot_open,
5606         .read           = seq_read,
5607         .write          = tracing_snapshot_write,
5608         .llseek         = tracing_lseek,
5609         .release        = tracing_snapshot_release,
5610 };
5611
5612 static const struct file_operations snapshot_raw_fops = {
5613         .open           = snapshot_raw_open,
5614         .read           = tracing_buffers_read,
5615         .release        = tracing_buffers_release,
5616         .splice_read    = tracing_buffers_splice_read,
5617         .llseek         = no_llseek,
5618 };
5619
5620 #endif /* CONFIG_TRACER_SNAPSHOT */
5621
5622 static int tracing_buffers_open(struct inode *inode, struct file *filp)
5623 {
5624         struct trace_array *tr = inode->i_private;
5625         struct ftrace_buffer_info *info;
5626         int ret;
5627
5628         if (tracing_disabled)
5629                 return -ENODEV;
5630
5631         if (trace_array_get(tr) < 0)
5632                 return -ENODEV;
5633
5634         info = kzalloc(sizeof(*info), GFP_KERNEL);
5635         if (!info) {
5636                 trace_array_put(tr);
5637                 return -ENOMEM;
5638         }
5639
5640         mutex_lock(&trace_types_lock);
5641
5642         info->iter.tr           = tr;
5643         info->iter.cpu_file     = tracing_get_cpu(inode);
5644         info->iter.trace        = tr->current_trace;
5645         info->iter.trace_buffer = &tr->trace_buffer;
5646         info->spare             = NULL;
5647         /* Force reading ring buffer for first read */
5648         info->read              = (unsigned int)-1;
5649
5650         filp->private_data = info;
5651
5652         tr->current_trace->ref++;
5653
5654         mutex_unlock(&trace_types_lock);
5655
5656         ret = nonseekable_open(inode, filp);
5657         if (ret < 0)
5658                 trace_array_put(tr);
5659
5660         return ret;
5661 }
5662
5663 static unsigned int
5664 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
5665 {
5666         struct ftrace_buffer_info *info = filp->private_data;
5667         struct trace_iterator *iter = &info->iter;
5668
5669         return trace_poll(iter, filp, poll_table);
5670 }
5671
5672 static ssize_t
5673 tracing_buffers_read(struct file *filp, char __user *ubuf,
5674                      size_t count, loff_t *ppos)
5675 {
5676         struct ftrace_buffer_info *info = filp->private_data;
5677         struct trace_iterator *iter = &info->iter;
5678         ssize_t ret;
5679         ssize_t size;
5680
5681         if (!count)
5682                 return 0;
5683
5684 #ifdef CONFIG_TRACER_MAX_TRACE
5685         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
5686                 return -EBUSY;
5687 #endif
5688
5689         if (!info->spare)
5690                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
5691                                                           iter->cpu_file);
5692         if (!info->spare)
5693                 return -ENOMEM;
5694
5695         /* Do we have previous read data to read? */
5696         if (info->read < PAGE_SIZE)
5697                 goto read;
5698
5699  again:
5700         trace_access_lock(iter->cpu_file);
5701         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
5702                                     &info->spare,
5703                                     count,
5704                                     iter->cpu_file, 0);
5705         trace_access_unlock(iter->cpu_file);
5706
5707         if (ret < 0) {
5708                 if (trace_empty(iter)) {
5709                         if ((filp->f_flags & O_NONBLOCK))
5710                                 return -EAGAIN;
5711
5712                         ret = wait_on_pipe(iter, false);
5713                         if (ret)
5714                                 return ret;
5715
5716                         goto again;
5717                 }
5718                 return 0;
5719         }
5720
5721         info->read = 0;
5722  read:
5723         size = PAGE_SIZE - info->read;
5724         if (size > count)
5725                 size = count;
5726
5727         ret = copy_to_user(ubuf, info->spare + info->read, size);
5728         if (ret == size)
5729                 return -EFAULT;
5730
5731         size -= ret;
5732
5733         *ppos += size;
5734         info->read += size;
5735
5736         return size;
5737 }
5738
5739 static int tracing_buffers_release(struct inode *inode, struct file *file)
5740 {
5741         struct ftrace_buffer_info *info = file->private_data;
5742         struct trace_iterator *iter = &info->iter;
5743
5744         mutex_lock(&trace_types_lock);
5745
5746         iter->tr->current_trace->ref--;
5747
5748         __trace_array_put(iter->tr);
5749
5750         if (info->spare)
5751                 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
5752         kfree(info);
5753
5754         mutex_unlock(&trace_types_lock);
5755
5756         return 0;
5757 }
5758
5759 struct buffer_ref {
5760         struct ring_buffer      *buffer;
5761         void                    *page;
5762         int                     ref;
5763 };
5764
5765 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
5766                                     struct pipe_buffer *buf)
5767 {
5768         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5769
5770         if (--ref->ref)
5771                 return;
5772
5773         ring_buffer_free_read_page(ref->buffer, ref->page);
5774         kfree(ref);
5775         buf->private = 0;
5776 }
5777
5778 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
5779                                 struct pipe_buffer *buf)
5780 {
5781         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5782
5783         ref->ref++;
5784 }
5785
5786 /* Pipe buffer operations for a buffer. */
5787 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
5788         .can_merge              = 0,
5789         .confirm                = generic_pipe_buf_confirm,
5790         .release                = buffer_pipe_buf_release,
5791         .steal                  = generic_pipe_buf_steal,
5792         .get                    = buffer_pipe_buf_get,
5793 };
5794
5795 /*
5796  * Callback from splice_to_pipe(), if we need to release some pages
5797  * at the end of the spd in case we error'ed out in filling the pipe.
5798  */
5799 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
5800 {
5801         struct buffer_ref *ref =
5802                 (struct buffer_ref *)spd->partial[i].private;
5803
5804         if (--ref->ref)
5805                 return;
5806
5807         ring_buffer_free_read_page(ref->buffer, ref->page);
5808         kfree(ref);
5809         spd->partial[i].private = 0;
5810 }
5811
5812 static ssize_t
5813 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5814                             struct pipe_inode_info *pipe, size_t len,
5815                             unsigned int flags)
5816 {
5817         struct ftrace_buffer_info *info = file->private_data;
5818         struct trace_iterator *iter = &info->iter;
5819         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5820         struct page *pages_def[PIPE_DEF_BUFFERS];
5821         struct splice_pipe_desc spd = {
5822                 .pages          = pages_def,
5823                 .partial        = partial_def,
5824                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5825                 .flags          = flags,
5826                 .ops            = &buffer_pipe_buf_ops,
5827                 .spd_release    = buffer_spd_release,
5828         };
5829         struct buffer_ref *ref;
5830         int entries, size, i;
5831         ssize_t ret = 0;
5832
5833 #ifdef CONFIG_TRACER_MAX_TRACE
5834         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
5835                 return -EBUSY;
5836 #endif
5837
5838         if (splice_grow_spd(pipe, &spd))
5839                 return -ENOMEM;
5840
5841         if (*ppos & (PAGE_SIZE - 1))
5842                 return -EINVAL;
5843
5844         if (len & (PAGE_SIZE - 1)) {
5845                 if (len < PAGE_SIZE)
5846                         return -EINVAL;
5847                 len &= PAGE_MASK;
5848         }
5849
5850  again:
5851         trace_access_lock(iter->cpu_file);
5852         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5853
5854         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
5855                 struct page *page;
5856                 int r;
5857
5858                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
5859                 if (!ref) {
5860                         ret = -ENOMEM;
5861                         break;
5862                 }
5863
5864                 ref->ref = 1;
5865                 ref->buffer = iter->trace_buffer->buffer;
5866                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
5867                 if (!ref->page) {
5868                         ret = -ENOMEM;
5869                         kfree(ref);
5870                         break;
5871                 }
5872
5873                 r = ring_buffer_read_page(ref->buffer, &ref->page,
5874                                           len, iter->cpu_file, 1);
5875                 if (r < 0) {
5876                         ring_buffer_free_read_page(ref->buffer, ref->page);
5877                         kfree(ref);
5878                         break;
5879                 }
5880
5881                 /*
5882                  * zero out any left over data, this is going to
5883                  * user land.
5884                  */
5885                 size = ring_buffer_page_len(ref->page);
5886                 if (size < PAGE_SIZE)
5887                         memset(ref->page + size, 0, PAGE_SIZE - size);
5888
5889                 page = virt_to_page(ref->page);
5890
5891                 spd.pages[i] = page;
5892                 spd.partial[i].len = PAGE_SIZE;
5893                 spd.partial[i].offset = 0;
5894                 spd.partial[i].private = (unsigned long)ref;
5895                 spd.nr_pages++;
5896                 *ppos += PAGE_SIZE;
5897
5898                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5899         }
5900
5901         trace_access_unlock(iter->cpu_file);
5902         spd.nr_pages = i;
5903
5904         /* did we read anything? */
5905         if (!spd.nr_pages) {
5906                 if (ret)
5907                         return ret;
5908
5909                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
5910                         return -EAGAIN;
5911
5912                 ret = wait_on_pipe(iter, true);
5913                 if (ret)
5914                         return ret;
5915
5916                 goto again;
5917         }
5918
5919         ret = splice_to_pipe(pipe, &spd);
5920         splice_shrink_spd(&spd);
5921
5922         return ret;
5923 }
5924
5925 static const struct file_operations tracing_buffers_fops = {
5926         .open           = tracing_buffers_open,
5927         .read           = tracing_buffers_read,
5928         .poll           = tracing_buffers_poll,
5929         .release        = tracing_buffers_release,
5930         .splice_read    = tracing_buffers_splice_read,
5931         .llseek         = no_llseek,
5932 };
5933
5934 static ssize_t
5935 tracing_stats_read(struct file *filp, char __user *ubuf,
5936                    size_t count, loff_t *ppos)
5937 {
5938         struct inode *inode = file_inode(filp);
5939         struct trace_array *tr = inode->i_private;
5940         struct trace_buffer *trace_buf = &tr->trace_buffer;
5941         int cpu = tracing_get_cpu(inode);
5942         struct trace_seq *s;
5943         unsigned long cnt;
5944         unsigned long long t;
5945         unsigned long usec_rem;
5946
5947         s = kmalloc(sizeof(*s), GFP_KERNEL);
5948         if (!s)
5949                 return -ENOMEM;
5950
5951         trace_seq_init(s);
5952
5953         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
5954         trace_seq_printf(s, "entries: %ld\n", cnt);
5955
5956         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
5957         trace_seq_printf(s, "overrun: %ld\n", cnt);
5958
5959         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
5960         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
5961
5962         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
5963         trace_seq_printf(s, "bytes: %ld\n", cnt);
5964
5965         if (trace_clocks[tr->clock_id].in_ns) {
5966                 /* local or global for trace_clock */
5967                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5968                 usec_rem = do_div(t, USEC_PER_SEC);
5969                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
5970                                                                 t, usec_rem);
5971
5972                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
5973                 usec_rem = do_div(t, USEC_PER_SEC);
5974                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
5975         } else {
5976                 /* counter or tsc mode for trace_clock */
5977                 trace_seq_printf(s, "oldest event ts: %llu\n",
5978                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5979
5980                 trace_seq_printf(s, "now ts: %llu\n",
5981                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
5982         }
5983
5984         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
5985         trace_seq_printf(s, "dropped events: %ld\n", cnt);
5986
5987         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
5988         trace_seq_printf(s, "read events: %ld\n", cnt);
5989
5990         count = simple_read_from_buffer(ubuf, count, ppos,
5991                                         s->buffer, trace_seq_used(s));
5992
5993         kfree(s);
5994
5995         return count;
5996 }
5997
5998 static const struct file_operations tracing_stats_fops = {
5999         .open           = tracing_open_generic_tr,
6000         .read           = tracing_stats_read,
6001         .llseek         = generic_file_llseek,
6002         .release        = tracing_release_generic_tr,
6003 };
6004
6005 #ifdef CONFIG_DYNAMIC_FTRACE
6006
6007 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
6008 {
6009         return 0;
6010 }
6011
6012 static ssize_t
6013 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6014                   size_t cnt, loff_t *ppos)
6015 {
6016         static char ftrace_dyn_info_buffer[1024];
6017         static DEFINE_MUTEX(dyn_info_mutex);
6018         unsigned long *p = filp->private_data;
6019         char *buf = ftrace_dyn_info_buffer;
6020         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
6021         int r;
6022
6023         mutex_lock(&dyn_info_mutex);
6024         r = sprintf(buf, "%ld ", *p);
6025
6026         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
6027         buf[r++] = '\n';
6028
6029         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6030
6031         mutex_unlock(&dyn_info_mutex);
6032
6033         return r;
6034 }
6035
6036 static const struct file_operations tracing_dyn_info_fops = {
6037         .open           = tracing_open_generic,
6038         .read           = tracing_read_dyn_info,
6039         .llseek         = generic_file_llseek,
6040 };
6041 #endif /* CONFIG_DYNAMIC_FTRACE */
6042
6043 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6044 static void
6045 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6046 {
6047         tracing_snapshot();
6048 }
6049
6050 static void
6051 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6052 {
6053         unsigned long *count = (long *)data;
6054
6055         if (!*count)
6056                 return;
6057
6058         if (*count != -1)
6059                 (*count)--;
6060
6061         tracing_snapshot();
6062 }
6063
6064 static int
6065 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
6066                       struct ftrace_probe_ops *ops, void *data)
6067 {
6068         long count = (long)data;
6069
6070         seq_printf(m, "%ps:", (void *)ip);
6071
6072         seq_puts(m, "snapshot");
6073
6074         if (count == -1)
6075                 seq_puts(m, ":unlimited\n");
6076         else
6077                 seq_printf(m, ":count=%ld\n", count);
6078
6079         return 0;
6080 }
6081
6082 static struct ftrace_probe_ops snapshot_probe_ops = {
6083         .func                   = ftrace_snapshot,
6084         .print                  = ftrace_snapshot_print,
6085 };
6086
6087 static struct ftrace_probe_ops snapshot_count_probe_ops = {
6088         .func                   = ftrace_count_snapshot,
6089         .print                  = ftrace_snapshot_print,
6090 };
6091
6092 static int
6093 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
6094                                char *glob, char *cmd, char *param, int enable)
6095 {
6096         struct ftrace_probe_ops *ops;
6097         void *count = (void *)-1;
6098         char *number;
6099         int ret;
6100
6101         /* hash funcs only work with set_ftrace_filter */
6102         if (!enable)
6103                 return -EINVAL;
6104
6105         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
6106
6107         if (glob[0] == '!') {
6108                 unregister_ftrace_function_probe_func(glob+1, ops);
6109                 return 0;
6110         }
6111
6112         if (!param)
6113                 goto out_reg;
6114
6115         number = strsep(&param, ":");
6116
6117         if (!strlen(number))
6118                 goto out_reg;
6119
6120         /*
6121          * We use the callback data field (which is a pointer)
6122          * as our counter.
6123          */
6124         ret = kstrtoul(number, 0, (unsigned long *)&count);
6125         if (ret)
6126                 return ret;
6127
6128  out_reg:
6129         ret = register_ftrace_function_probe(glob, ops, count);
6130
6131         if (ret >= 0)
6132                 alloc_snapshot(&global_trace);
6133
6134         return ret < 0 ? ret : 0;
6135 }
6136
6137 static struct ftrace_func_command ftrace_snapshot_cmd = {
6138         .name                   = "snapshot",
6139         .func                   = ftrace_trace_snapshot_callback,
6140 };
6141
6142 static __init int register_snapshot_cmd(void)
6143 {
6144         return register_ftrace_command(&ftrace_snapshot_cmd);
6145 }
6146 #else
6147 static inline __init int register_snapshot_cmd(void) { return 0; }
6148 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
6149
6150 static struct dentry *tracing_get_dentry(struct trace_array *tr)
6151 {
6152         if (WARN_ON(!tr->dir))
6153                 return ERR_PTR(-ENODEV);
6154
6155         /* Top directory uses NULL as the parent */
6156         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
6157                 return NULL;
6158
6159         /* All sub buffers have a descriptor */
6160         return tr->dir;
6161 }
6162
6163 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
6164 {
6165         struct dentry *d_tracer;
6166
6167         if (tr->percpu_dir)
6168                 return tr->percpu_dir;
6169
6170         d_tracer = tracing_get_dentry(tr);
6171         if (IS_ERR(d_tracer))
6172                 return NULL;
6173
6174         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
6175
6176         WARN_ONCE(!tr->percpu_dir,
6177                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
6178
6179         return tr->percpu_dir;
6180 }
6181
6182 static struct dentry *
6183 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
6184                       void *data, long cpu, const struct file_operations *fops)
6185 {
6186         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
6187
6188         if (ret) /* See tracing_get_cpu() */
6189                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
6190         return ret;
6191 }
6192
6193 static void
6194 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
6195 {
6196         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
6197         struct dentry *d_cpu;
6198         char cpu_dir[30]; /* 30 characters should be more than enough */
6199
6200         if (!d_percpu)
6201                 return;
6202
6203         snprintf(cpu_dir, 30, "cpu%ld", cpu);
6204         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
6205         if (!d_cpu) {
6206                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
6207                 return;
6208         }
6209
6210         /* per cpu trace_pipe */
6211         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
6212                                 tr, cpu, &tracing_pipe_fops);
6213
6214         /* per cpu trace */
6215         trace_create_cpu_file("trace", 0644, d_cpu,
6216                                 tr, cpu, &tracing_fops);
6217
6218         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
6219                                 tr, cpu, &tracing_buffers_fops);
6220
6221         trace_create_cpu_file("stats", 0444, d_cpu,
6222                                 tr, cpu, &tracing_stats_fops);
6223
6224         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
6225                                 tr, cpu, &tracing_entries_fops);
6226
6227 #ifdef CONFIG_TRACER_SNAPSHOT
6228         trace_create_cpu_file("snapshot", 0644, d_cpu,
6229                                 tr, cpu, &snapshot_fops);
6230
6231         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
6232                                 tr, cpu, &snapshot_raw_fops);
6233 #endif
6234 }
6235
6236 #ifdef CONFIG_FTRACE_SELFTEST
6237 /* Let selftest have access to static functions in this file */
6238 #include "trace_selftest.c"
6239 #endif
6240
6241 static ssize_t
6242 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
6243                         loff_t *ppos)
6244 {
6245         struct trace_option_dentry *topt = filp->private_data;
6246         char *buf;
6247
6248         if (topt->flags->val & topt->opt->bit)
6249                 buf = "1\n";
6250         else
6251                 buf = "0\n";
6252
6253         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6254 }
6255
6256 static ssize_t
6257 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
6258                          loff_t *ppos)
6259 {
6260         struct trace_option_dentry *topt = filp->private_data;
6261         unsigned long val;
6262         int ret;
6263
6264         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6265         if (ret)
6266                 return ret;
6267
6268         if (val != 0 && val != 1)
6269                 return -EINVAL;
6270
6271         if (!!(topt->flags->val & topt->opt->bit) != val) {
6272                 mutex_lock(&trace_types_lock);
6273                 ret = __set_tracer_option(topt->tr, topt->flags,
6274                                           topt->opt, !val);
6275                 mutex_unlock(&trace_types_lock);
6276                 if (ret)
6277                         return ret;
6278         }
6279
6280         *ppos += cnt;
6281
6282         return cnt;
6283 }
6284
6285
6286 static const struct file_operations trace_options_fops = {
6287         .open = tracing_open_generic,
6288         .read = trace_options_read,
6289         .write = trace_options_write,
6290         .llseek = generic_file_llseek,
6291 };
6292
6293 /*
6294  * In order to pass in both the trace_array descriptor as well as the index
6295  * to the flag that the trace option file represents, the trace_array
6296  * has a character array of trace_flags_index[], which holds the index
6297  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
6298  * The address of this character array is passed to the flag option file
6299  * read/write callbacks.
6300  *
6301  * In order to extract both the index and the trace_array descriptor,
6302  * get_tr_index() uses the following algorithm.
6303  *
6304  *   idx = *ptr;
6305  *
6306  * As the pointer itself contains the address of the index (remember
6307  * index[1] == 1).
6308  *
6309  * Then to get the trace_array descriptor, by subtracting that index
6310  * from the ptr, we get to the start of the index itself.
6311  *
6312  *   ptr - idx == &index[0]
6313  *
6314  * Then a simple container_of() from that pointer gets us to the
6315  * trace_array descriptor.
6316  */
6317 static void get_tr_index(void *data, struct trace_array **ptr,
6318                          unsigned int *pindex)
6319 {
6320         *pindex = *(unsigned char *)data;
6321
6322         *ptr = container_of(data - *pindex, struct trace_array,
6323                             trace_flags_index);
6324 }
6325
6326 static ssize_t
6327 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
6328                         loff_t *ppos)
6329 {
6330         void *tr_index = filp->private_data;
6331         struct trace_array *tr;
6332         unsigned int index;
6333         char *buf;
6334
6335         get_tr_index(tr_index, &tr, &index);
6336
6337         if (tr->trace_flags & (1 << index))
6338                 buf = "1\n";
6339         else
6340                 buf = "0\n";
6341
6342         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6343 }
6344
6345 static ssize_t
6346 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
6347                          loff_t *ppos)
6348 {
6349         void *tr_index = filp->private_data;
6350         struct trace_array *tr;
6351         unsigned int index;
6352         unsigned long val;
6353         int ret;
6354
6355         get_tr_index(tr_index, &tr, &index);
6356
6357         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6358         if (ret)
6359                 return ret;
6360
6361         if (val != 0 && val != 1)
6362                 return -EINVAL;
6363
6364         mutex_lock(&trace_types_lock);
6365         ret = set_tracer_flag(tr, 1 << index, val);
6366         mutex_unlock(&trace_types_lock);
6367
6368         if (ret < 0)
6369                 return ret;
6370
6371         *ppos += cnt;
6372
6373         return cnt;
6374 }
6375
6376 static const struct file_operations trace_options_core_fops = {
6377         .open = tracing_open_generic,
6378         .read = trace_options_core_read,
6379         .write = trace_options_core_write,
6380         .llseek = generic_file_llseek,
6381 };
6382
6383 struct dentry *trace_create_file(const char *name,
6384                                  umode_t mode,
6385                                  struct dentry *parent,
6386                                  void *data,
6387                                  const struct file_operations *fops)
6388 {
6389         struct dentry *ret;
6390
6391         ret = tracefs_create_file(name, mode, parent, data, fops);
6392         if (!ret)
6393                 pr_warn("Could not create tracefs '%s' entry\n", name);
6394
6395         return ret;
6396 }
6397
6398
6399 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
6400 {
6401         struct dentry *d_tracer;
6402
6403         if (tr->options)
6404                 return tr->options;
6405
6406         d_tracer = tracing_get_dentry(tr);
6407         if (IS_ERR(d_tracer))
6408                 return NULL;
6409
6410         tr->options = tracefs_create_dir("options", d_tracer);
6411         if (!tr->options) {
6412                 pr_warn("Could not create tracefs directory 'options'\n");
6413                 return NULL;
6414         }
6415
6416         return tr->options;
6417 }
6418
6419 static void
6420 create_trace_option_file(struct trace_array *tr,
6421                          struct trace_option_dentry *topt,
6422                          struct tracer_flags *flags,
6423                          struct tracer_opt *opt)
6424 {
6425         struct dentry *t_options;
6426
6427         t_options = trace_options_init_dentry(tr);
6428         if (!t_options)
6429                 return;
6430
6431         topt->flags = flags;
6432         topt->opt = opt;
6433         topt->tr = tr;
6434
6435         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
6436                                     &trace_options_fops);
6437
6438 }
6439
6440 static void
6441 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
6442 {
6443         struct trace_option_dentry *topts;
6444         struct trace_options *tr_topts;
6445         struct tracer_flags *flags;
6446         struct tracer_opt *opts;
6447         int cnt;
6448         int i;
6449
6450         if (!tracer)
6451                 return;
6452
6453         flags = tracer->flags;
6454
6455         if (!flags || !flags->opts)
6456                 return;
6457
6458         /*
6459          * If this is an instance, only create flags for tracers
6460          * the instance may have.
6461          */
6462         if (!trace_ok_for_array(tracer, tr))
6463                 return;
6464
6465         for (i = 0; i < tr->nr_topts; i++) {
6466                 /* Make sure there's no duplicate flags. */
6467                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
6468                         return;
6469         }
6470
6471         opts = flags->opts;
6472
6473         for (cnt = 0; opts[cnt].name; cnt++)
6474                 ;
6475
6476         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
6477         if (!topts)
6478                 return;
6479
6480         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
6481                             GFP_KERNEL);
6482         if (!tr_topts) {
6483                 kfree(topts);
6484                 return;
6485         }
6486
6487         tr->topts = tr_topts;
6488         tr->topts[tr->nr_topts].tracer = tracer;
6489         tr->topts[tr->nr_topts].topts = topts;
6490         tr->nr_topts++;
6491
6492         for (cnt = 0; opts[cnt].name; cnt++) {
6493                 create_trace_option_file(tr, &topts[cnt], flags,
6494                                          &opts[cnt]);
6495                 WARN_ONCE(topts[cnt].entry == NULL,
6496                           "Failed to create trace option: %s",
6497                           opts[cnt].name);
6498         }
6499 }
6500
6501 static struct dentry *
6502 create_trace_option_core_file(struct trace_array *tr,
6503                               const char *option, long index)
6504 {
6505         struct dentry *t_options;
6506
6507         t_options = trace_options_init_dentry(tr);
6508         if (!t_options)
6509                 return NULL;
6510
6511         return trace_create_file(option, 0644, t_options,
6512                                  (void *)&tr->trace_flags_index[index],
6513                                  &trace_options_core_fops);
6514 }
6515
6516 static void create_trace_options_dir(struct trace_array *tr)
6517 {
6518         struct dentry *t_options;
6519         bool top_level = tr == &global_trace;
6520         int i;
6521
6522         t_options = trace_options_init_dentry(tr);
6523         if (!t_options)
6524                 return;
6525
6526         for (i = 0; trace_options[i]; i++) {
6527                 if (top_level ||
6528                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
6529                         create_trace_option_core_file(tr, trace_options[i], i);
6530         }
6531 }
6532
6533 static ssize_t
6534 rb_simple_read(struct file *filp, char __user *ubuf,
6535                size_t cnt, loff_t *ppos)
6536 {
6537         struct trace_array *tr = filp->private_data;
6538         char buf[64];
6539         int r;
6540
6541         r = tracer_tracing_is_on(tr);
6542         r = sprintf(buf, "%d\n", r);
6543
6544         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6545 }
6546
6547 static ssize_t
6548 rb_simple_write(struct file *filp, const char __user *ubuf,
6549                 size_t cnt, loff_t *ppos)
6550 {
6551         struct trace_array *tr = filp->private_data;
6552         struct ring_buffer *buffer = tr->trace_buffer.buffer;
6553         unsigned long val;
6554         int ret;
6555
6556         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6557         if (ret)
6558                 return ret;
6559
6560         if (buffer) {
6561                 mutex_lock(&trace_types_lock);
6562                 if (val) {
6563                         tracer_tracing_on(tr);
6564                         if (tr->current_trace->start)
6565                                 tr->current_trace->start(tr);
6566                 } else {
6567                         tracer_tracing_off(tr);
6568                         if (tr->current_trace->stop)
6569                                 tr->current_trace->stop(tr);
6570                 }
6571                 mutex_unlock(&trace_types_lock);
6572         }
6573
6574         (*ppos)++;
6575
6576         return cnt;
6577 }
6578
6579 static const struct file_operations rb_simple_fops = {
6580         .open           = tracing_open_generic_tr,
6581         .read           = rb_simple_read,
6582         .write          = rb_simple_write,
6583         .release        = tracing_release_generic_tr,
6584         .llseek         = default_llseek,
6585 };
6586
6587 struct dentry *trace_instance_dir;
6588
6589 static void
6590 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
6591
6592 static int
6593 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6594 {
6595         enum ring_buffer_flags rb_flags;
6596
6597         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6598
6599         buf->tr = tr;
6600
6601         buf->buffer = ring_buffer_alloc(size, rb_flags);
6602         if (!buf->buffer)
6603                 return -ENOMEM;
6604
6605         buf->data = alloc_percpu(struct trace_array_cpu);
6606         if (!buf->data) {
6607                 ring_buffer_free(buf->buffer);
6608                 return -ENOMEM;
6609         }
6610
6611         /* Allocate the first page for all buffers */
6612         set_buffer_entries(&tr->trace_buffer,
6613                            ring_buffer_size(tr->trace_buffer.buffer, 0));
6614
6615         return 0;
6616 }
6617
6618 static int allocate_trace_buffers(struct trace_array *tr, int size)
6619 {
6620         int ret;
6621
6622         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6623         if (ret)
6624                 return ret;
6625
6626 #ifdef CONFIG_TRACER_MAX_TRACE
6627         ret = allocate_trace_buffer(tr, &tr->max_buffer,
6628                                     allocate_snapshot ? size : 1);
6629         if (WARN_ON(ret)) {
6630                 ring_buffer_free(tr->trace_buffer.buffer);
6631                 free_percpu(tr->trace_buffer.data);
6632                 return -ENOMEM;
6633         }
6634         tr->allocated_snapshot = allocate_snapshot;
6635
6636         /*
6637          * Only the top level trace array gets its snapshot allocated
6638          * from the kernel command line.
6639          */
6640         allocate_snapshot = false;
6641 #endif
6642         return 0;
6643 }
6644
6645 static void free_trace_buffer(struct trace_buffer *buf)
6646 {
6647         if (buf->buffer) {
6648                 ring_buffer_free(buf->buffer);
6649                 buf->buffer = NULL;
6650                 free_percpu(buf->data);
6651                 buf->data = NULL;
6652         }
6653 }
6654
6655 static void free_trace_buffers(struct trace_array *tr)
6656 {
6657         if (!tr)
6658                 return;
6659
6660         free_trace_buffer(&tr->trace_buffer);
6661
6662 #ifdef CONFIG_TRACER_MAX_TRACE
6663         free_trace_buffer(&tr->max_buffer);
6664 #endif
6665 }
6666
6667 static void init_trace_flags_index(struct trace_array *tr)
6668 {
6669         int i;
6670
6671         /* Used by the trace options files */
6672         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
6673                 tr->trace_flags_index[i] = i;
6674 }
6675
6676 static void __update_tracer_options(struct trace_array *tr)
6677 {
6678         struct tracer *t;
6679
6680         for (t = trace_types; t; t = t->next)
6681                 add_tracer_options(tr, t);
6682 }
6683
6684 static void update_tracer_options(struct trace_array *tr)
6685 {
6686         mutex_lock(&trace_types_lock);
6687         __update_tracer_options(tr);
6688         mutex_unlock(&trace_types_lock);
6689 }
6690
6691 static int instance_mkdir(const char *name)
6692 {
6693         struct trace_array *tr;
6694         int ret;
6695
6696         mutex_lock(&trace_types_lock);
6697
6698         ret = -EEXIST;
6699         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6700                 if (tr->name && strcmp(tr->name, name) == 0)
6701                         goto out_unlock;
6702         }
6703
6704         ret = -ENOMEM;
6705         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
6706         if (!tr)
6707                 goto out_unlock;
6708
6709         tr->name = kstrdup(name, GFP_KERNEL);
6710         if (!tr->name)
6711                 goto out_free_tr;
6712
6713         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
6714                 goto out_free_tr;
6715
6716         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
6717
6718         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
6719
6720         raw_spin_lock_init(&tr->start_lock);
6721
6722         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
6723
6724         tr->current_trace = &nop_trace;
6725
6726         INIT_LIST_HEAD(&tr->systems);
6727         INIT_LIST_HEAD(&tr->events);
6728
6729         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
6730                 goto out_free_tr;
6731
6732         tr->dir = tracefs_create_dir(name, trace_instance_dir);
6733         if (!tr->dir)
6734                 goto out_free_tr;
6735
6736         ret = event_trace_add_tracer(tr->dir, tr);
6737         if (ret) {
6738                 tracefs_remove_recursive(tr->dir);
6739                 goto out_free_tr;
6740         }
6741
6742         init_tracer_tracefs(tr, tr->dir);
6743         init_trace_flags_index(tr);
6744         __update_tracer_options(tr);
6745
6746         list_add(&tr->list, &ftrace_trace_arrays);
6747
6748         mutex_unlock(&trace_types_lock);
6749
6750         return 0;
6751
6752  out_free_tr:
6753         free_trace_buffers(tr);
6754         free_cpumask_var(tr->tracing_cpumask);
6755         kfree(tr->name);
6756         kfree(tr);
6757
6758  out_unlock:
6759         mutex_unlock(&trace_types_lock);
6760
6761         return ret;
6762
6763 }
6764
6765 static int instance_rmdir(const char *name)
6766 {
6767         struct trace_array *tr;
6768         int found = 0;
6769         int ret;
6770         int i;
6771
6772         mutex_lock(&trace_types_lock);
6773
6774         ret = -ENODEV;
6775         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6776                 if (tr->name && strcmp(tr->name, name) == 0) {
6777                         found = 1;
6778                         break;
6779                 }
6780         }
6781         if (!found)
6782                 goto out_unlock;
6783
6784         ret = -EBUSY;
6785         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
6786                 goto out_unlock;
6787
6788         list_del(&tr->list);
6789
6790         /* Disable all the flags that were enabled coming in */
6791         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
6792                 if ((1 << i) & ZEROED_TRACE_FLAGS)
6793                         set_tracer_flag(tr, 1 << i, 0);
6794         }
6795
6796         tracing_set_nop(tr);
6797         event_trace_del_tracer(tr);
6798         ftrace_destroy_function_files(tr);
6799         tracefs_remove_recursive(tr->dir);
6800         free_trace_buffers(tr);
6801
6802         for (i = 0; i < tr->nr_topts; i++) {
6803                 kfree(tr->topts[i].topts);
6804         }
6805         kfree(tr->topts);
6806
6807         kfree(tr->name);
6808         kfree(tr);
6809
6810         ret = 0;
6811
6812  out_unlock:
6813         mutex_unlock(&trace_types_lock);
6814
6815         return ret;
6816 }
6817
6818 static __init void create_trace_instances(struct dentry *d_tracer)
6819 {
6820         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
6821                                                          instance_mkdir,
6822                                                          instance_rmdir);
6823         if (WARN_ON(!trace_instance_dir))
6824                 return;
6825 }
6826
6827 static void
6828 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
6829 {
6830         int cpu;
6831
6832         trace_create_file("available_tracers", 0444, d_tracer,
6833                         tr, &show_traces_fops);
6834
6835         trace_create_file("current_tracer", 0644, d_tracer,
6836                         tr, &set_tracer_fops);
6837
6838         trace_create_file("tracing_cpumask", 0644, d_tracer,
6839                           tr, &tracing_cpumask_fops);
6840
6841         trace_create_file("trace_options", 0644, d_tracer,
6842                           tr, &tracing_iter_fops);
6843
6844         trace_create_file("trace", 0644, d_tracer,
6845                           tr, &tracing_fops);
6846
6847         trace_create_file("trace_pipe", 0444, d_tracer,
6848                           tr, &tracing_pipe_fops);
6849
6850         trace_create_file("buffer_size_kb", 0644, d_tracer,
6851                           tr, &tracing_entries_fops);
6852
6853         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
6854                           tr, &tracing_total_entries_fops);
6855
6856         trace_create_file("free_buffer", 0200, d_tracer,
6857                           tr, &tracing_free_buffer_fops);
6858
6859         trace_create_file("trace_marker", 0220, d_tracer,
6860                           tr, &tracing_mark_fops);
6861
6862         trace_create_file("trace_clock", 0644, d_tracer, tr,
6863                           &trace_clock_fops);
6864
6865         trace_create_file("tracing_on", 0644, d_tracer,
6866                           tr, &rb_simple_fops);
6867
6868         create_trace_options_dir(tr);
6869
6870 #ifdef CONFIG_TRACER_MAX_TRACE
6871         trace_create_file("tracing_max_latency", 0644, d_tracer,
6872                         &tr->max_latency, &tracing_max_lat_fops);
6873 #endif
6874
6875         if (ftrace_create_function_files(tr, d_tracer))
6876                 WARN(1, "Could not allocate function filter files");
6877
6878 #ifdef CONFIG_TRACER_SNAPSHOT
6879         trace_create_file("snapshot", 0644, d_tracer,
6880                           tr, &snapshot_fops);
6881 #endif
6882
6883         for_each_tracing_cpu(cpu)
6884                 tracing_init_tracefs_percpu(tr, cpu);
6885
6886 }
6887
6888 static struct vfsmount *trace_automount(void *ingore)
6889 {
6890         struct vfsmount *mnt;
6891         struct file_system_type *type;
6892
6893         /*
6894          * To maintain backward compatibility for tools that mount
6895          * debugfs to get to the tracing facility, tracefs is automatically
6896          * mounted to the debugfs/tracing directory.
6897          */
6898         type = get_fs_type("tracefs");
6899         if (!type)
6900                 return NULL;
6901         mnt = vfs_kern_mount(type, 0, "tracefs", NULL);
6902         put_filesystem(type);
6903         if (IS_ERR(mnt))
6904                 return NULL;
6905         mntget(mnt);
6906
6907         return mnt;
6908 }
6909
6910 /**
6911  * tracing_init_dentry - initialize top level trace array
6912  *
6913  * This is called when creating files or directories in the tracing
6914  * directory. It is called via fs_initcall() by any of the boot up code
6915  * and expects to return the dentry of the top level tracing directory.
6916  */
6917 struct dentry *tracing_init_dentry(void)
6918 {
6919         struct trace_array *tr = &global_trace;
6920
6921         /* The top level trace array uses  NULL as parent */
6922         if (tr->dir)
6923                 return NULL;
6924
6925         if (WARN_ON(!tracefs_initialized()) ||
6926                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
6927                  WARN_ON(!debugfs_initialized())))
6928                 return ERR_PTR(-ENODEV);
6929
6930         /*
6931          * As there may still be users that expect the tracing
6932          * files to exist in debugfs/tracing, we must automount
6933          * the tracefs file system there, so older tools still
6934          * work with the newer kerenl.
6935          */
6936         tr->dir = debugfs_create_automount("tracing", NULL,
6937                                            trace_automount, NULL);
6938         if (!tr->dir) {
6939                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
6940                 return ERR_PTR(-ENOMEM);
6941         }
6942
6943         return NULL;
6944 }
6945
6946 extern struct trace_enum_map *__start_ftrace_enum_maps[];
6947 extern struct trace_enum_map *__stop_ftrace_enum_maps[];
6948
6949 static void __init trace_enum_init(void)
6950 {
6951         int len;
6952
6953         len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
6954         trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len);
6955 }
6956
6957 #ifdef CONFIG_MODULES
6958 static void trace_module_add_enums(struct module *mod)
6959 {
6960         if (!mod->num_trace_enums)
6961                 return;
6962
6963         /*
6964          * Modules with bad taint do not have events created, do
6965          * not bother with enums either.
6966          */
6967         if (trace_module_has_bad_taint(mod))
6968                 return;
6969
6970         trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
6971 }
6972
6973 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
6974 static void trace_module_remove_enums(struct module *mod)
6975 {
6976         union trace_enum_map_item *map;
6977         union trace_enum_map_item **last = &trace_enum_maps;
6978
6979         if (!mod->num_trace_enums)
6980                 return;
6981
6982         mutex_lock(&trace_enum_mutex);
6983
6984         map = trace_enum_maps;
6985
6986         while (map) {
6987                 if (map->head.mod == mod)
6988                         break;
6989                 map = trace_enum_jmp_to_tail(map);
6990                 last = &map->tail.next;
6991                 map = map->tail.next;
6992         }
6993         if (!map)
6994                 goto out;
6995
6996         *last = trace_enum_jmp_to_tail(map)->tail.next;
6997         kfree(map);
6998  out:
6999         mutex_unlock(&trace_enum_mutex);
7000 }
7001 #else
7002 static inline void trace_module_remove_enums(struct module *mod) { }
7003 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
7004
7005 static int trace_module_notify(struct notifier_block *self,
7006                                unsigned long val, void *data)
7007 {
7008         struct module *mod = data;
7009
7010         switch (val) {
7011         case MODULE_STATE_COMING:
7012                 trace_module_add_enums(mod);
7013                 break;
7014         case MODULE_STATE_GOING:
7015                 trace_module_remove_enums(mod);
7016                 break;
7017         }
7018
7019         return 0;
7020 }
7021
7022 static struct notifier_block trace_module_nb = {
7023         .notifier_call = trace_module_notify,
7024         .priority = 0,
7025 };
7026 #endif /* CONFIG_MODULES */
7027
7028 static __init int tracer_init_tracefs(void)
7029 {
7030         struct dentry *d_tracer;
7031
7032         trace_access_lock_init();
7033
7034         d_tracer = tracing_init_dentry();
7035         if (IS_ERR(d_tracer))
7036                 return 0;
7037
7038         init_tracer_tracefs(&global_trace, d_tracer);
7039
7040         trace_create_file("tracing_thresh", 0644, d_tracer,
7041                         &global_trace, &tracing_thresh_fops);
7042
7043         trace_create_file("README", 0444, d_tracer,
7044                         NULL, &tracing_readme_fops);
7045
7046         trace_create_file("saved_cmdlines", 0444, d_tracer,
7047                         NULL, &tracing_saved_cmdlines_fops);
7048
7049         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
7050                           NULL, &tracing_saved_cmdlines_size_fops);
7051
7052         trace_enum_init();
7053
7054         trace_create_enum_file(d_tracer);
7055
7056 #ifdef CONFIG_MODULES
7057         register_module_notifier(&trace_module_nb);
7058 #endif
7059
7060 #ifdef CONFIG_DYNAMIC_FTRACE
7061         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
7062                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
7063 #endif
7064
7065         create_trace_instances(d_tracer);
7066
7067         update_tracer_options(&global_trace);
7068
7069         return 0;
7070 }
7071
7072 static int trace_panic_handler(struct notifier_block *this,
7073                                unsigned long event, void *unused)
7074 {
7075         if (ftrace_dump_on_oops)
7076                 ftrace_dump(ftrace_dump_on_oops);
7077         return NOTIFY_OK;
7078 }
7079
7080 static struct notifier_block trace_panic_notifier = {
7081         .notifier_call  = trace_panic_handler,
7082         .next           = NULL,
7083         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
7084 };
7085
7086 static int trace_die_handler(struct notifier_block *self,
7087                              unsigned long val,
7088                              void *data)
7089 {
7090         switch (val) {
7091         case DIE_OOPS:
7092                 if (ftrace_dump_on_oops)
7093                         ftrace_dump(ftrace_dump_on_oops);
7094                 break;
7095         default:
7096                 break;
7097         }
7098         return NOTIFY_OK;
7099 }
7100
7101 static struct notifier_block trace_die_notifier = {
7102         .notifier_call = trace_die_handler,
7103         .priority = 200
7104 };
7105
7106 /*
7107  * printk is set to max of 1024, we really don't need it that big.
7108  * Nothing should be printing 1000 characters anyway.
7109  */
7110 #define TRACE_MAX_PRINT         1000
7111
7112 /*
7113  * Define here KERN_TRACE so that we have one place to modify
7114  * it if we decide to change what log level the ftrace dump
7115  * should be at.
7116  */
7117 #define KERN_TRACE              KERN_EMERG
7118
7119 void
7120 trace_printk_seq(struct trace_seq *s)
7121 {
7122         /* Probably should print a warning here. */
7123         if (s->seq.len >= TRACE_MAX_PRINT)
7124                 s->seq.len = TRACE_MAX_PRINT;
7125
7126         /*
7127          * More paranoid code. Although the buffer size is set to
7128          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
7129          * an extra layer of protection.
7130          */
7131         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
7132                 s->seq.len = s->seq.size - 1;
7133
7134         /* should be zero ended, but we are paranoid. */
7135         s->buffer[s->seq.len] = 0;
7136
7137         printk(KERN_TRACE "%s", s->buffer);
7138
7139         trace_seq_init(s);
7140 }
7141
7142 void trace_init_global_iter(struct trace_iterator *iter)
7143 {
7144         iter->tr = &global_trace;
7145         iter->trace = iter->tr->current_trace;
7146         iter->cpu_file = RING_BUFFER_ALL_CPUS;
7147         iter->trace_buffer = &global_trace.trace_buffer;
7148
7149         if (iter->trace && iter->trace->open)
7150                 iter->trace->open(iter);
7151
7152         /* Annotate start of buffers if we had overruns */
7153         if (ring_buffer_overruns(iter->trace_buffer->buffer))
7154                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
7155
7156         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
7157         if (trace_clocks[iter->tr->clock_id].in_ns)
7158                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
7159 }
7160
7161 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
7162 {
7163         /* use static because iter can be a bit big for the stack */
7164         static struct trace_iterator iter;
7165         static atomic_t dump_running;
7166         struct trace_array *tr = &global_trace;
7167         unsigned int old_userobj;
7168         unsigned long flags;
7169         int cnt = 0, cpu;
7170
7171         /* Only allow one dump user at a time. */
7172         if (atomic_inc_return(&dump_running) != 1) {
7173                 atomic_dec(&dump_running);
7174                 return;
7175         }
7176
7177         /*
7178          * Always turn off tracing when we dump.
7179          * We don't need to show trace output of what happens
7180          * between multiple crashes.
7181          *
7182          * If the user does a sysrq-z, then they can re-enable
7183          * tracing with echo 1 > tracing_on.
7184          */
7185         tracing_off();
7186
7187         local_irq_save(flags);
7188
7189         /* Simulate the iterator */
7190         trace_init_global_iter(&iter);
7191
7192         for_each_tracing_cpu(cpu) {
7193                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7194         }
7195
7196         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
7197
7198         /* don't look at user memory in panic mode */
7199         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
7200
7201         switch (oops_dump_mode) {
7202         case DUMP_ALL:
7203                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7204                 break;
7205         case DUMP_ORIG:
7206                 iter.cpu_file = raw_smp_processor_id();
7207                 break;
7208         case DUMP_NONE:
7209                 goto out_enable;
7210         default:
7211                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
7212                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7213         }
7214
7215         printk(KERN_TRACE "Dumping ftrace buffer:\n");
7216
7217         /* Did function tracer already get disabled? */
7218         if (ftrace_is_dead()) {
7219                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
7220                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
7221         }
7222
7223         /*
7224          * We need to stop all tracing on all CPUS to read the
7225          * the next buffer. This is a bit expensive, but is
7226          * not done often. We fill all what we can read,
7227          * and then release the locks again.
7228          */
7229
7230         while (!trace_empty(&iter)) {
7231
7232                 if (!cnt)
7233                         printk(KERN_TRACE "---------------------------------\n");
7234
7235                 cnt++;
7236
7237                 /* reset all but tr, trace, and overruns */
7238                 memset(&iter.seq, 0,
7239                        sizeof(struct trace_iterator) -
7240                        offsetof(struct trace_iterator, seq));
7241                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
7242                 iter.pos = -1;
7243
7244                 if (trace_find_next_entry_inc(&iter) != NULL) {
7245                         int ret;
7246
7247                         ret = print_trace_line(&iter);
7248                         if (ret != TRACE_TYPE_NO_CONSUME)
7249                                 trace_consume(&iter);
7250                 }
7251                 touch_nmi_watchdog();
7252
7253                 trace_printk_seq(&iter.seq);
7254         }
7255
7256         if (!cnt)
7257                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
7258         else
7259                 printk(KERN_TRACE "---------------------------------\n");
7260
7261  out_enable:
7262         tr->trace_flags |= old_userobj;
7263
7264         for_each_tracing_cpu(cpu) {
7265                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7266         }
7267         atomic_dec(&dump_running);
7268         local_irq_restore(flags);
7269 }
7270 EXPORT_SYMBOL_GPL(ftrace_dump);
7271
7272 __init static int tracer_alloc_buffers(void)
7273 {
7274         int ring_buf_size;
7275         int ret = -ENOMEM;
7276
7277         /*
7278          * Make sure we don't accidently add more trace options
7279          * than we have bits for.
7280          */
7281         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
7282
7283         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
7284                 goto out;
7285
7286         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
7287                 goto out_free_buffer_mask;
7288
7289         /* Only allocate trace_printk buffers if a trace_printk exists */
7290         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
7291                 /* Must be called before global_trace.buffer is allocated */
7292                 trace_printk_init_buffers();
7293
7294         /* To save memory, keep the ring buffer size to its minimum */
7295         if (ring_buffer_expanded)
7296                 ring_buf_size = trace_buf_size;
7297         else
7298                 ring_buf_size = 1;
7299
7300         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
7301         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
7302
7303         raw_spin_lock_init(&global_trace.start_lock);
7304
7305         /* Used for event triggers */
7306         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
7307         if (!temp_buffer)
7308                 goto out_free_cpumask;
7309
7310         if (trace_create_savedcmd() < 0)
7311                 goto out_free_temp_buffer;
7312
7313         /* TODO: make the number of buffers hot pluggable with CPUS */
7314         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
7315                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
7316                 WARN_ON(1);
7317                 goto out_free_savedcmd;
7318         }
7319
7320         if (global_trace.buffer_disabled)
7321                 tracing_off();
7322
7323         if (trace_boot_clock) {
7324                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
7325                 if (ret < 0)
7326                         pr_warn("Trace clock %s not defined, going back to default\n",
7327                                 trace_boot_clock);
7328         }
7329
7330         /*
7331          * register_tracer() might reference current_trace, so it
7332          * needs to be set before we register anything. This is
7333          * just a bootstrap of current_trace anyway.
7334          */
7335         global_trace.current_trace = &nop_trace;
7336
7337         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7338
7339         ftrace_init_global_array_ops(&global_trace);
7340
7341         init_trace_flags_index(&global_trace);
7342
7343         register_tracer(&nop_trace);
7344
7345         /* All seems OK, enable tracing */
7346         tracing_disabled = 0;
7347
7348         atomic_notifier_chain_register(&panic_notifier_list,
7349                                        &trace_panic_notifier);
7350
7351         register_die_notifier(&trace_die_notifier);
7352
7353         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
7354
7355         INIT_LIST_HEAD(&global_trace.systems);
7356         INIT_LIST_HEAD(&global_trace.events);
7357         list_add(&global_trace.list, &ftrace_trace_arrays);
7358
7359         apply_trace_boot_options();
7360
7361         register_snapshot_cmd();
7362
7363         return 0;
7364
7365 out_free_savedcmd:
7366         free_saved_cmdlines_buffer(savedcmd);
7367 out_free_temp_buffer:
7368         ring_buffer_free(temp_buffer);
7369 out_free_cpumask:
7370         free_cpumask_var(global_trace.tracing_cpumask);
7371 out_free_buffer_mask:
7372         free_cpumask_var(tracing_buffer_mask);
7373 out:
7374         return ret;
7375 }
7376
7377 void __init trace_init(void)
7378 {
7379         if (tracepoint_printk) {
7380                 tracepoint_print_iter =
7381                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
7382                 if (WARN_ON(!tracepoint_print_iter))
7383                         tracepoint_printk = 0;
7384         }
7385         tracer_alloc_buffers();
7386         trace_event_init();
7387 }
7388
7389 __init static int clear_boot_tracer(void)
7390 {
7391         /*
7392          * The default tracer at boot buffer is an init section.
7393          * This function is called in lateinit. If we did not
7394          * find the boot tracer, then clear it out, to prevent
7395          * later registration from accessing the buffer that is
7396          * about to be freed.
7397          */
7398         if (!default_bootup_tracer)
7399                 return 0;
7400
7401         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
7402                default_bootup_tracer);
7403         default_bootup_tracer = NULL;
7404
7405         return 0;
7406 }
7407
7408 fs_initcall(tracer_init_tracefs);
7409 late_initcall(clear_boot_tracer);