1ba54e241c8d30b7b5924e3eb374593304bef616
[cascardo/linux.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/kprobes.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/sched/rt.h>
44
45 #include "trace.h"
46 #include "trace_output.h"
47
48 /*
49  * On boot up, the ring buffer is set to the minimum size, so that
50  * we do not waste memory on systems that are not using tracing.
51  */
52 bool ring_buffer_expanded;
53
54 /*
55  * We need to change this state when a selftest is running.
56  * A selftest will lurk into the ring-buffer to count the
57  * entries inserted during the selftest although some concurrent
58  * insertions into the ring-buffer such as trace_printk could occurred
59  * at the same time, giving false positive or negative results.
60  */
61 static bool __read_mostly tracing_selftest_running;
62
63 /*
64  * If a tracer is running, we do not want to run SELFTEST.
65  */
66 bool __read_mostly tracing_selftest_disabled;
67
68 /* Pipe tracepoints to printk */
69 struct trace_iterator *tracepoint_print_iter;
70 int tracepoint_printk;
71
72 /* For tracers that don't implement custom flags */
73 static struct tracer_opt dummy_tracer_opt[] = {
74         { }
75 };
76
77 static int
78 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
79 {
80         return 0;
81 }
82
83 /*
84  * To prevent the comm cache from being overwritten when no
85  * tracing is active, only save the comm when a trace event
86  * occurred.
87  */
88 static DEFINE_PER_CPU(bool, trace_cmdline_save);
89
90 /*
91  * Kill all tracing for good (never come back).
92  * It is initialized to 1 but will turn to zero if the initialization
93  * of the tracer is successful. But that is the only place that sets
94  * this back to zero.
95  */
96 static int tracing_disabled = 1;
97
98 cpumask_var_t __read_mostly     tracing_buffer_mask;
99
100 /*
101  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
102  *
103  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
104  * is set, then ftrace_dump is called. This will output the contents
105  * of the ftrace buffers to the console.  This is very useful for
106  * capturing traces that lead to crashes and outputing it to a
107  * serial console.
108  *
109  * It is default off, but you can enable it with either specifying
110  * "ftrace_dump_on_oops" in the kernel command line, or setting
111  * /proc/sys/kernel/ftrace_dump_on_oops
112  * Set 1 if you want to dump buffers of all CPUs
113  * Set 2 if you want to dump the buffer of the CPU that triggered oops
114  */
115
116 enum ftrace_dump_mode ftrace_dump_on_oops;
117
118 /* When set, tracing will stop when a WARN*() is hit */
119 int __disable_trace_on_warning;
120
121 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
122 /* Map of enums to their values, for "enum_map" file */
123 struct trace_enum_map_head {
124         struct module                   *mod;
125         unsigned long                   length;
126 };
127
128 union trace_enum_map_item;
129
130 struct trace_enum_map_tail {
131         /*
132          * "end" is first and points to NULL as it must be different
133          * than "mod" or "enum_string"
134          */
135         union trace_enum_map_item       *next;
136         const char                      *end;   /* points to NULL */
137 };
138
139 static DEFINE_MUTEX(trace_enum_mutex);
140
141 /*
142  * The trace_enum_maps are saved in an array with two extra elements,
143  * one at the beginning, and one at the end. The beginning item contains
144  * the count of the saved maps (head.length), and the module they
145  * belong to if not built in (head.mod). The ending item contains a
146  * pointer to the next array of saved enum_map items.
147  */
148 union trace_enum_map_item {
149         struct trace_enum_map           map;
150         struct trace_enum_map_head      head;
151         struct trace_enum_map_tail      tail;
152 };
153
154 static union trace_enum_map_item *trace_enum_maps;
155 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
156
157 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
158
159 #define MAX_TRACER_SIZE         100
160 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
161 static char *default_bootup_tracer;
162
163 static bool allocate_snapshot;
164
165 static int __init set_cmdline_ftrace(char *str)
166 {
167         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
168         default_bootup_tracer = bootup_tracer_buf;
169         /* We are using ftrace early, expand it */
170         ring_buffer_expanded = true;
171         return 1;
172 }
173 __setup("ftrace=", set_cmdline_ftrace);
174
175 static int __init set_ftrace_dump_on_oops(char *str)
176 {
177         if (*str++ != '=' || !*str) {
178                 ftrace_dump_on_oops = DUMP_ALL;
179                 return 1;
180         }
181
182         if (!strcmp("orig_cpu", str)) {
183                 ftrace_dump_on_oops = DUMP_ORIG;
184                 return 1;
185         }
186
187         return 0;
188 }
189 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
190
191 static int __init stop_trace_on_warning(char *str)
192 {
193         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
194                 __disable_trace_on_warning = 1;
195         return 1;
196 }
197 __setup("traceoff_on_warning", stop_trace_on_warning);
198
199 static int __init boot_alloc_snapshot(char *str)
200 {
201         allocate_snapshot = true;
202         /* We also need the main ring buffer expanded */
203         ring_buffer_expanded = true;
204         return 1;
205 }
206 __setup("alloc_snapshot", boot_alloc_snapshot);
207
208
209 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
210
211 static int __init set_trace_boot_options(char *str)
212 {
213         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
214         return 0;
215 }
216 __setup("trace_options=", set_trace_boot_options);
217
218 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
219 static char *trace_boot_clock __initdata;
220
221 static int __init set_trace_boot_clock(char *str)
222 {
223         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
224         trace_boot_clock = trace_boot_clock_buf;
225         return 0;
226 }
227 __setup("trace_clock=", set_trace_boot_clock);
228
229 static int __init set_tracepoint_printk(char *str)
230 {
231         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
232                 tracepoint_printk = 1;
233         return 1;
234 }
235 __setup("tp_printk", set_tracepoint_printk);
236
237 unsigned long long ns2usecs(cycle_t nsec)
238 {
239         nsec += 500;
240         do_div(nsec, 1000);
241         return nsec;
242 }
243
244 /* trace_flags holds trace_options default values */
245 #define TRACE_DEFAULT_FLAGS                                             \
246         (FUNCTION_DEFAULT_FLAGS |                                       \
247          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
248          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
249          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
250          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
251
252 /* trace_options that are only supported by global_trace */
253 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
254                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
255
256 /* trace_flags that are default zero for instances */
257 #define ZEROED_TRACE_FLAGS \
258         TRACE_ITER_EVENT_FORK
259
260 /*
261  * The global_trace is the descriptor that holds the tracing
262  * buffers for the live tracing. For each CPU, it contains
263  * a link list of pages that will store trace entries. The
264  * page descriptor of the pages in the memory is used to hold
265  * the link list by linking the lru item in the page descriptor
266  * to each of the pages in the buffer per CPU.
267  *
268  * For each active CPU there is a data field that holds the
269  * pages for the buffer for that CPU. Each CPU has the same number
270  * of pages allocated for its buffer.
271  */
272 static struct trace_array global_trace = {
273         .trace_flags = TRACE_DEFAULT_FLAGS,
274 };
275
276 LIST_HEAD(ftrace_trace_arrays);
277
278 int trace_array_get(struct trace_array *this_tr)
279 {
280         struct trace_array *tr;
281         int ret = -ENODEV;
282
283         mutex_lock(&trace_types_lock);
284         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
285                 if (tr == this_tr) {
286                         tr->ref++;
287                         ret = 0;
288                         break;
289                 }
290         }
291         mutex_unlock(&trace_types_lock);
292
293         return ret;
294 }
295
296 static void __trace_array_put(struct trace_array *this_tr)
297 {
298         WARN_ON(!this_tr->ref);
299         this_tr->ref--;
300 }
301
302 void trace_array_put(struct trace_array *this_tr)
303 {
304         mutex_lock(&trace_types_lock);
305         __trace_array_put(this_tr);
306         mutex_unlock(&trace_types_lock);
307 }
308
309 int call_filter_check_discard(struct trace_event_call *call, void *rec,
310                               struct ring_buffer *buffer,
311                               struct ring_buffer_event *event)
312 {
313         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
314             !filter_match_preds(call->filter, rec)) {
315                 ring_buffer_discard_commit(buffer, event);
316                 return 1;
317         }
318
319         return 0;
320 }
321
322 static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
323 {
324         u64 ts;
325
326         /* Early boot up does not have a buffer yet */
327         if (!buf->buffer)
328                 return trace_clock_local();
329
330         ts = ring_buffer_time_stamp(buf->buffer, cpu);
331         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
332
333         return ts;
334 }
335
336 cycle_t ftrace_now(int cpu)
337 {
338         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
339 }
340
341 /**
342  * tracing_is_enabled - Show if global_trace has been disabled
343  *
344  * Shows if the global trace has been enabled or not. It uses the
345  * mirror flag "buffer_disabled" to be used in fast paths such as for
346  * the irqsoff tracer. But it may be inaccurate due to races. If you
347  * need to know the accurate state, use tracing_is_on() which is a little
348  * slower, but accurate.
349  */
350 int tracing_is_enabled(void)
351 {
352         /*
353          * For quick access (irqsoff uses this in fast path), just
354          * return the mirror variable of the state of the ring buffer.
355          * It's a little racy, but we don't really care.
356          */
357         smp_rmb();
358         return !global_trace.buffer_disabled;
359 }
360
361 /*
362  * trace_buf_size is the size in bytes that is allocated
363  * for a buffer. Note, the number of bytes is always rounded
364  * to page size.
365  *
366  * This number is purposely set to a low number of 16384.
367  * If the dump on oops happens, it will be much appreciated
368  * to not have to wait for all that output. Anyway this can be
369  * boot time and run time configurable.
370  */
371 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
372
373 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
374
375 /* trace_types holds a link list of available tracers. */
376 static struct tracer            *trace_types __read_mostly;
377
378 /*
379  * trace_types_lock is used to protect the trace_types list.
380  */
381 DEFINE_MUTEX(trace_types_lock);
382
383 /*
384  * serialize the access of the ring buffer
385  *
386  * ring buffer serializes readers, but it is low level protection.
387  * The validity of the events (which returns by ring_buffer_peek() ..etc)
388  * are not protected by ring buffer.
389  *
390  * The content of events may become garbage if we allow other process consumes
391  * these events concurrently:
392  *   A) the page of the consumed events may become a normal page
393  *      (not reader page) in ring buffer, and this page will be rewrited
394  *      by events producer.
395  *   B) The page of the consumed events may become a page for splice_read,
396  *      and this page will be returned to system.
397  *
398  * These primitives allow multi process access to different cpu ring buffer
399  * concurrently.
400  *
401  * These primitives don't distinguish read-only and read-consume access.
402  * Multi read-only access are also serialized.
403  */
404
405 #ifdef CONFIG_SMP
406 static DECLARE_RWSEM(all_cpu_access_lock);
407 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
408
409 static inline void trace_access_lock(int cpu)
410 {
411         if (cpu == RING_BUFFER_ALL_CPUS) {
412                 /* gain it for accessing the whole ring buffer. */
413                 down_write(&all_cpu_access_lock);
414         } else {
415                 /* gain it for accessing a cpu ring buffer. */
416
417                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
418                 down_read(&all_cpu_access_lock);
419
420                 /* Secondly block other access to this @cpu ring buffer. */
421                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
422         }
423 }
424
425 static inline void trace_access_unlock(int cpu)
426 {
427         if (cpu == RING_BUFFER_ALL_CPUS) {
428                 up_write(&all_cpu_access_lock);
429         } else {
430                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
431                 up_read(&all_cpu_access_lock);
432         }
433 }
434
435 static inline void trace_access_lock_init(void)
436 {
437         int cpu;
438
439         for_each_possible_cpu(cpu)
440                 mutex_init(&per_cpu(cpu_access_lock, cpu));
441 }
442
443 #else
444
445 static DEFINE_MUTEX(access_lock);
446
447 static inline void trace_access_lock(int cpu)
448 {
449         (void)cpu;
450         mutex_lock(&access_lock);
451 }
452
453 static inline void trace_access_unlock(int cpu)
454 {
455         (void)cpu;
456         mutex_unlock(&access_lock);
457 }
458
459 static inline void trace_access_lock_init(void)
460 {
461 }
462
463 #endif
464
465 #ifdef CONFIG_STACKTRACE
466 static void __ftrace_trace_stack(struct ring_buffer *buffer,
467                                  unsigned long flags,
468                                  int skip, int pc, struct pt_regs *regs);
469 static inline void ftrace_trace_stack(struct trace_array *tr,
470                                       struct ring_buffer *buffer,
471                                       unsigned long flags,
472                                       int skip, int pc, struct pt_regs *regs);
473
474 #else
475 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
476                                         unsigned long flags,
477                                         int skip, int pc, struct pt_regs *regs)
478 {
479 }
480 static inline void ftrace_trace_stack(struct trace_array *tr,
481                                       struct ring_buffer *buffer,
482                                       unsigned long flags,
483                                       int skip, int pc, struct pt_regs *regs)
484 {
485 }
486
487 #endif
488
489 static void tracer_tracing_on(struct trace_array *tr)
490 {
491         if (tr->trace_buffer.buffer)
492                 ring_buffer_record_on(tr->trace_buffer.buffer);
493         /*
494          * This flag is looked at when buffers haven't been allocated
495          * yet, or by some tracers (like irqsoff), that just want to
496          * know if the ring buffer has been disabled, but it can handle
497          * races of where it gets disabled but we still do a record.
498          * As the check is in the fast path of the tracers, it is more
499          * important to be fast than accurate.
500          */
501         tr->buffer_disabled = 0;
502         /* Make the flag seen by readers */
503         smp_wmb();
504 }
505
506 /**
507  * tracing_on - enable tracing buffers
508  *
509  * This function enables tracing buffers that may have been
510  * disabled with tracing_off.
511  */
512 void tracing_on(void)
513 {
514         tracer_tracing_on(&global_trace);
515 }
516 EXPORT_SYMBOL_GPL(tracing_on);
517
518 /**
519  * __trace_puts - write a constant string into the trace buffer.
520  * @ip:    The address of the caller
521  * @str:   The constant string to write
522  * @size:  The size of the string.
523  */
524 int __trace_puts(unsigned long ip, const char *str, int size)
525 {
526         struct ring_buffer_event *event;
527         struct ring_buffer *buffer;
528         struct print_entry *entry;
529         unsigned long irq_flags;
530         int alloc;
531         int pc;
532
533         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
534                 return 0;
535
536         pc = preempt_count();
537
538         if (unlikely(tracing_selftest_running || tracing_disabled))
539                 return 0;
540
541         alloc = sizeof(*entry) + size + 2; /* possible \n added */
542
543         local_save_flags(irq_flags);
544         buffer = global_trace.trace_buffer.buffer;
545         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
546                                           irq_flags, pc);
547         if (!event)
548                 return 0;
549
550         entry = ring_buffer_event_data(event);
551         entry->ip = ip;
552
553         memcpy(&entry->buf, str, size);
554
555         /* Add a newline if necessary */
556         if (entry->buf[size - 1] != '\n') {
557                 entry->buf[size] = '\n';
558                 entry->buf[size + 1] = '\0';
559         } else
560                 entry->buf[size] = '\0';
561
562         __buffer_unlock_commit(buffer, event);
563         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
564
565         return size;
566 }
567 EXPORT_SYMBOL_GPL(__trace_puts);
568
569 /**
570  * __trace_bputs - write the pointer to a constant string into trace buffer
571  * @ip:    The address of the caller
572  * @str:   The constant string to write to the buffer to
573  */
574 int __trace_bputs(unsigned long ip, const char *str)
575 {
576         struct ring_buffer_event *event;
577         struct ring_buffer *buffer;
578         struct bputs_entry *entry;
579         unsigned long irq_flags;
580         int size = sizeof(struct bputs_entry);
581         int pc;
582
583         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
584                 return 0;
585
586         pc = preempt_count();
587
588         if (unlikely(tracing_selftest_running || tracing_disabled))
589                 return 0;
590
591         local_save_flags(irq_flags);
592         buffer = global_trace.trace_buffer.buffer;
593         event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
594                                           irq_flags, pc);
595         if (!event)
596                 return 0;
597
598         entry = ring_buffer_event_data(event);
599         entry->ip                       = ip;
600         entry->str                      = str;
601
602         __buffer_unlock_commit(buffer, event);
603         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
604
605         return 1;
606 }
607 EXPORT_SYMBOL_GPL(__trace_bputs);
608
609 #ifdef CONFIG_TRACER_SNAPSHOT
610 /**
611  * trace_snapshot - take a snapshot of the current buffer.
612  *
613  * This causes a swap between the snapshot buffer and the current live
614  * tracing buffer. You can use this to take snapshots of the live
615  * trace when some condition is triggered, but continue to trace.
616  *
617  * Note, make sure to allocate the snapshot with either
618  * a tracing_snapshot_alloc(), or by doing it manually
619  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
620  *
621  * If the snapshot buffer is not allocated, it will stop tracing.
622  * Basically making a permanent snapshot.
623  */
624 void tracing_snapshot(void)
625 {
626         struct trace_array *tr = &global_trace;
627         struct tracer *tracer = tr->current_trace;
628         unsigned long flags;
629
630         if (in_nmi()) {
631                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
632                 internal_trace_puts("*** snapshot is being ignored        ***\n");
633                 return;
634         }
635
636         if (!tr->allocated_snapshot) {
637                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
638                 internal_trace_puts("*** stopping trace here!   ***\n");
639                 tracing_off();
640                 return;
641         }
642
643         /* Note, snapshot can not be used when the tracer uses it */
644         if (tracer->use_max_tr) {
645                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
646                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
647                 return;
648         }
649
650         local_irq_save(flags);
651         update_max_tr(tr, current, smp_processor_id());
652         local_irq_restore(flags);
653 }
654 EXPORT_SYMBOL_GPL(tracing_snapshot);
655
656 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
657                                         struct trace_buffer *size_buf, int cpu_id);
658 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
659
660 static int alloc_snapshot(struct trace_array *tr)
661 {
662         int ret;
663
664         if (!tr->allocated_snapshot) {
665
666                 /* allocate spare buffer */
667                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
668                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
669                 if (ret < 0)
670                         return ret;
671
672                 tr->allocated_snapshot = true;
673         }
674
675         return 0;
676 }
677
678 static void free_snapshot(struct trace_array *tr)
679 {
680         /*
681          * We don't free the ring buffer. instead, resize it because
682          * The max_tr ring buffer has some state (e.g. ring->clock) and
683          * we want preserve it.
684          */
685         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
686         set_buffer_entries(&tr->max_buffer, 1);
687         tracing_reset_online_cpus(&tr->max_buffer);
688         tr->allocated_snapshot = false;
689 }
690
691 /**
692  * tracing_alloc_snapshot - allocate snapshot buffer.
693  *
694  * This only allocates the snapshot buffer if it isn't already
695  * allocated - it doesn't also take a snapshot.
696  *
697  * This is meant to be used in cases where the snapshot buffer needs
698  * to be set up for events that can't sleep but need to be able to
699  * trigger a snapshot.
700  */
701 int tracing_alloc_snapshot(void)
702 {
703         struct trace_array *tr = &global_trace;
704         int ret;
705
706         ret = alloc_snapshot(tr);
707         WARN_ON(ret < 0);
708
709         return ret;
710 }
711 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
712
713 /**
714  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
715  *
716  * This is similar to trace_snapshot(), but it will allocate the
717  * snapshot buffer if it isn't already allocated. Use this only
718  * where it is safe to sleep, as the allocation may sleep.
719  *
720  * This causes a swap between the snapshot buffer and the current live
721  * tracing buffer. You can use this to take snapshots of the live
722  * trace when some condition is triggered, but continue to trace.
723  */
724 void tracing_snapshot_alloc(void)
725 {
726         int ret;
727
728         ret = tracing_alloc_snapshot();
729         if (ret < 0)
730                 return;
731
732         tracing_snapshot();
733 }
734 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
735 #else
736 void tracing_snapshot(void)
737 {
738         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
739 }
740 EXPORT_SYMBOL_GPL(tracing_snapshot);
741 int tracing_alloc_snapshot(void)
742 {
743         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
744         return -ENODEV;
745 }
746 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
747 void tracing_snapshot_alloc(void)
748 {
749         /* Give warning */
750         tracing_snapshot();
751 }
752 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
753 #endif /* CONFIG_TRACER_SNAPSHOT */
754
755 static void tracer_tracing_off(struct trace_array *tr)
756 {
757         if (tr->trace_buffer.buffer)
758                 ring_buffer_record_off(tr->trace_buffer.buffer);
759         /*
760          * This flag is looked at when buffers haven't been allocated
761          * yet, or by some tracers (like irqsoff), that just want to
762          * know if the ring buffer has been disabled, but it can handle
763          * races of where it gets disabled but we still do a record.
764          * As the check is in the fast path of the tracers, it is more
765          * important to be fast than accurate.
766          */
767         tr->buffer_disabled = 1;
768         /* Make the flag seen by readers */
769         smp_wmb();
770 }
771
772 /**
773  * tracing_off - turn off tracing buffers
774  *
775  * This function stops the tracing buffers from recording data.
776  * It does not disable any overhead the tracers themselves may
777  * be causing. This function simply causes all recording to
778  * the ring buffers to fail.
779  */
780 void tracing_off(void)
781 {
782         tracer_tracing_off(&global_trace);
783 }
784 EXPORT_SYMBOL_GPL(tracing_off);
785
786 void disable_trace_on_warning(void)
787 {
788         if (__disable_trace_on_warning)
789                 tracing_off();
790 }
791
792 /**
793  * tracer_tracing_is_on - show real state of ring buffer enabled
794  * @tr : the trace array to know if ring buffer is enabled
795  *
796  * Shows real state of the ring buffer if it is enabled or not.
797  */
798 static int tracer_tracing_is_on(struct trace_array *tr)
799 {
800         if (tr->trace_buffer.buffer)
801                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
802         return !tr->buffer_disabled;
803 }
804
805 /**
806  * tracing_is_on - show state of ring buffers enabled
807  */
808 int tracing_is_on(void)
809 {
810         return tracer_tracing_is_on(&global_trace);
811 }
812 EXPORT_SYMBOL_GPL(tracing_is_on);
813
814 static int __init set_buf_size(char *str)
815 {
816         unsigned long buf_size;
817
818         if (!str)
819                 return 0;
820         buf_size = memparse(str, &str);
821         /* nr_entries can not be zero */
822         if (buf_size == 0)
823                 return 0;
824         trace_buf_size = buf_size;
825         return 1;
826 }
827 __setup("trace_buf_size=", set_buf_size);
828
829 static int __init set_tracing_thresh(char *str)
830 {
831         unsigned long threshold;
832         int ret;
833
834         if (!str)
835                 return 0;
836         ret = kstrtoul(str, 0, &threshold);
837         if (ret < 0)
838                 return 0;
839         tracing_thresh = threshold * 1000;
840         return 1;
841 }
842 __setup("tracing_thresh=", set_tracing_thresh);
843
844 unsigned long nsecs_to_usecs(unsigned long nsecs)
845 {
846         return nsecs / 1000;
847 }
848
849 /*
850  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
851  * It uses C(a, b) where 'a' is the enum name and 'b' is the string that
852  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
853  * of strings in the order that the enums were defined.
854  */
855 #undef C
856 #define C(a, b) b
857
858 /* These must match the bit postions in trace_iterator_flags */
859 static const char *trace_options[] = {
860         TRACE_FLAGS
861         NULL
862 };
863
864 static struct {
865         u64 (*func)(void);
866         const char *name;
867         int in_ns;              /* is this clock in nanoseconds? */
868 } trace_clocks[] = {
869         { trace_clock_local,            "local",        1 },
870         { trace_clock_global,           "global",       1 },
871         { trace_clock_counter,          "counter",      0 },
872         { trace_clock_jiffies,          "uptime",       0 },
873         { trace_clock,                  "perf",         1 },
874         { ktime_get_mono_fast_ns,       "mono",         1 },
875         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
876         ARCH_TRACE_CLOCKS
877 };
878
879 /*
880  * trace_parser_get_init - gets the buffer for trace parser
881  */
882 int trace_parser_get_init(struct trace_parser *parser, int size)
883 {
884         memset(parser, 0, sizeof(*parser));
885
886         parser->buffer = kmalloc(size, GFP_KERNEL);
887         if (!parser->buffer)
888                 return 1;
889
890         parser->size = size;
891         return 0;
892 }
893
894 /*
895  * trace_parser_put - frees the buffer for trace parser
896  */
897 void trace_parser_put(struct trace_parser *parser)
898 {
899         kfree(parser->buffer);
900 }
901
902 /*
903  * trace_get_user - reads the user input string separated by  space
904  * (matched by isspace(ch))
905  *
906  * For each string found the 'struct trace_parser' is updated,
907  * and the function returns.
908  *
909  * Returns number of bytes read.
910  *
911  * See kernel/trace/trace.h for 'struct trace_parser' details.
912  */
913 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
914         size_t cnt, loff_t *ppos)
915 {
916         char ch;
917         size_t read = 0;
918         ssize_t ret;
919
920         if (!*ppos)
921                 trace_parser_clear(parser);
922
923         ret = get_user(ch, ubuf++);
924         if (ret)
925                 goto out;
926
927         read++;
928         cnt--;
929
930         /*
931          * The parser is not finished with the last write,
932          * continue reading the user input without skipping spaces.
933          */
934         if (!parser->cont) {
935                 /* skip white space */
936                 while (cnt && isspace(ch)) {
937                         ret = get_user(ch, ubuf++);
938                         if (ret)
939                                 goto out;
940                         read++;
941                         cnt--;
942                 }
943
944                 /* only spaces were written */
945                 if (isspace(ch)) {
946                         *ppos += read;
947                         ret = read;
948                         goto out;
949                 }
950
951                 parser->idx = 0;
952         }
953
954         /* read the non-space input */
955         while (cnt && !isspace(ch)) {
956                 if (parser->idx < parser->size - 1)
957                         parser->buffer[parser->idx++] = ch;
958                 else {
959                         ret = -EINVAL;
960                         goto out;
961                 }
962                 ret = get_user(ch, ubuf++);
963                 if (ret)
964                         goto out;
965                 read++;
966                 cnt--;
967         }
968
969         /* We either got finished input or we have to wait for another call. */
970         if (isspace(ch)) {
971                 parser->buffer[parser->idx] = 0;
972                 parser->cont = false;
973         } else if (parser->idx < parser->size - 1) {
974                 parser->cont = true;
975                 parser->buffer[parser->idx++] = ch;
976         } else {
977                 ret = -EINVAL;
978                 goto out;
979         }
980
981         *ppos += read;
982         ret = read;
983
984 out:
985         return ret;
986 }
987
988 /* TODO add a seq_buf_to_buffer() */
989 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
990 {
991         int len;
992
993         if (trace_seq_used(s) <= s->seq.readpos)
994                 return -EBUSY;
995
996         len = trace_seq_used(s) - s->seq.readpos;
997         if (cnt > len)
998                 cnt = len;
999         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1000
1001         s->seq.readpos += cnt;
1002         return cnt;
1003 }
1004
1005 unsigned long __read_mostly     tracing_thresh;
1006
1007 #ifdef CONFIG_TRACER_MAX_TRACE
1008 /*
1009  * Copy the new maximum trace into the separate maximum-trace
1010  * structure. (this way the maximum trace is permanently saved,
1011  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1012  */
1013 static void
1014 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1015 {
1016         struct trace_buffer *trace_buf = &tr->trace_buffer;
1017         struct trace_buffer *max_buf = &tr->max_buffer;
1018         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1019         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1020
1021         max_buf->cpu = cpu;
1022         max_buf->time_start = data->preempt_timestamp;
1023
1024         max_data->saved_latency = tr->max_latency;
1025         max_data->critical_start = data->critical_start;
1026         max_data->critical_end = data->critical_end;
1027
1028         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1029         max_data->pid = tsk->pid;
1030         /*
1031          * If tsk == current, then use current_uid(), as that does not use
1032          * RCU. The irq tracer can be called out of RCU scope.
1033          */
1034         if (tsk == current)
1035                 max_data->uid = current_uid();
1036         else
1037                 max_data->uid = task_uid(tsk);
1038
1039         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1040         max_data->policy = tsk->policy;
1041         max_data->rt_priority = tsk->rt_priority;
1042
1043         /* record this tasks comm */
1044         tracing_record_cmdline(tsk);
1045 }
1046
1047 /**
1048  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1049  * @tr: tracer
1050  * @tsk: the task with the latency
1051  * @cpu: The cpu that initiated the trace.
1052  *
1053  * Flip the buffers between the @tr and the max_tr and record information
1054  * about which task was the cause of this latency.
1055  */
1056 void
1057 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1058 {
1059         struct ring_buffer *buf;
1060
1061         if (tr->stop_count)
1062                 return;
1063
1064         WARN_ON_ONCE(!irqs_disabled());
1065
1066         if (!tr->allocated_snapshot) {
1067                 /* Only the nop tracer should hit this when disabling */
1068                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1069                 return;
1070         }
1071
1072         arch_spin_lock(&tr->max_lock);
1073
1074         buf = tr->trace_buffer.buffer;
1075         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1076         tr->max_buffer.buffer = buf;
1077
1078         __update_max_tr(tr, tsk, cpu);
1079         arch_spin_unlock(&tr->max_lock);
1080 }
1081
1082 /**
1083  * update_max_tr_single - only copy one trace over, and reset the rest
1084  * @tr - tracer
1085  * @tsk - task with the latency
1086  * @cpu - the cpu of the buffer to copy.
1087  *
1088  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1089  */
1090 void
1091 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1092 {
1093         int ret;
1094
1095         if (tr->stop_count)
1096                 return;
1097
1098         WARN_ON_ONCE(!irqs_disabled());
1099         if (!tr->allocated_snapshot) {
1100                 /* Only the nop tracer should hit this when disabling */
1101                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1102                 return;
1103         }
1104
1105         arch_spin_lock(&tr->max_lock);
1106
1107         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1108
1109         if (ret == -EBUSY) {
1110                 /*
1111                  * We failed to swap the buffer due to a commit taking
1112                  * place on this CPU. We fail to record, but we reset
1113                  * the max trace buffer (no one writes directly to it)
1114                  * and flag that it failed.
1115                  */
1116                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1117                         "Failed to swap buffers due to commit in progress\n");
1118         }
1119
1120         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1121
1122         __update_max_tr(tr, tsk, cpu);
1123         arch_spin_unlock(&tr->max_lock);
1124 }
1125 #endif /* CONFIG_TRACER_MAX_TRACE */
1126
1127 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1128 {
1129         /* Iterators are static, they should be filled or empty */
1130         if (trace_buffer_iter(iter, iter->cpu_file))
1131                 return 0;
1132
1133         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1134                                 full);
1135 }
1136
1137 #ifdef CONFIG_FTRACE_STARTUP_TEST
1138 static int run_tracer_selftest(struct tracer *type)
1139 {
1140         struct trace_array *tr = &global_trace;
1141         struct tracer *saved_tracer = tr->current_trace;
1142         int ret;
1143
1144         if (!type->selftest || tracing_selftest_disabled)
1145                 return 0;
1146
1147         /*
1148          * Run a selftest on this tracer.
1149          * Here we reset the trace buffer, and set the current
1150          * tracer to be this tracer. The tracer can then run some
1151          * internal tracing to verify that everything is in order.
1152          * If we fail, we do not register this tracer.
1153          */
1154         tracing_reset_online_cpus(&tr->trace_buffer);
1155
1156         tr->current_trace = type;
1157
1158 #ifdef CONFIG_TRACER_MAX_TRACE
1159         if (type->use_max_tr) {
1160                 /* If we expanded the buffers, make sure the max is expanded too */
1161                 if (ring_buffer_expanded)
1162                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1163                                            RING_BUFFER_ALL_CPUS);
1164                 tr->allocated_snapshot = true;
1165         }
1166 #endif
1167
1168         /* the test is responsible for initializing and enabling */
1169         pr_info("Testing tracer %s: ", type->name);
1170         ret = type->selftest(type, tr);
1171         /* the test is responsible for resetting too */
1172         tr->current_trace = saved_tracer;
1173         if (ret) {
1174                 printk(KERN_CONT "FAILED!\n");
1175                 /* Add the warning after printing 'FAILED' */
1176                 WARN_ON(1);
1177                 return -1;
1178         }
1179         /* Only reset on passing, to avoid touching corrupted buffers */
1180         tracing_reset_online_cpus(&tr->trace_buffer);
1181
1182 #ifdef CONFIG_TRACER_MAX_TRACE
1183         if (type->use_max_tr) {
1184                 tr->allocated_snapshot = false;
1185
1186                 /* Shrink the max buffer again */
1187                 if (ring_buffer_expanded)
1188                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1189                                            RING_BUFFER_ALL_CPUS);
1190         }
1191 #endif
1192
1193         printk(KERN_CONT "PASSED\n");
1194         return 0;
1195 }
1196 #else
1197 static inline int run_tracer_selftest(struct tracer *type)
1198 {
1199         return 0;
1200 }
1201 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1202
1203 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1204
1205 static void __init apply_trace_boot_options(void);
1206
1207 /**
1208  * register_tracer - register a tracer with the ftrace system.
1209  * @type - the plugin for the tracer
1210  *
1211  * Register a new plugin tracer.
1212  */
1213 int __init register_tracer(struct tracer *type)
1214 {
1215         struct tracer *t;
1216         int ret = 0;
1217
1218         if (!type->name) {
1219                 pr_info("Tracer must have a name\n");
1220                 return -1;
1221         }
1222
1223         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1224                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1225                 return -1;
1226         }
1227
1228         mutex_lock(&trace_types_lock);
1229
1230         tracing_selftest_running = true;
1231
1232         for (t = trace_types; t; t = t->next) {
1233                 if (strcmp(type->name, t->name) == 0) {
1234                         /* already found */
1235                         pr_info("Tracer %s already registered\n",
1236                                 type->name);
1237                         ret = -1;
1238                         goto out;
1239                 }
1240         }
1241
1242         if (!type->set_flag)
1243                 type->set_flag = &dummy_set_flag;
1244         if (!type->flags) {
1245                 /*allocate a dummy tracer_flags*/
1246                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1247                 if (!type->flags) {
1248                         ret = -ENOMEM;
1249                         goto out;
1250                 }
1251                 type->flags->val = 0;
1252                 type->flags->opts = dummy_tracer_opt;
1253         } else
1254                 if (!type->flags->opts)
1255                         type->flags->opts = dummy_tracer_opt;
1256
1257         /* store the tracer for __set_tracer_option */
1258         type->flags->trace = type;
1259
1260         ret = run_tracer_selftest(type);
1261         if (ret < 0)
1262                 goto out;
1263
1264         type->next = trace_types;
1265         trace_types = type;
1266         add_tracer_options(&global_trace, type);
1267
1268  out:
1269         tracing_selftest_running = false;
1270         mutex_unlock(&trace_types_lock);
1271
1272         if (ret || !default_bootup_tracer)
1273                 goto out_unlock;
1274
1275         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1276                 goto out_unlock;
1277
1278         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1279         /* Do we want this tracer to start on bootup? */
1280         tracing_set_tracer(&global_trace, type->name);
1281         default_bootup_tracer = NULL;
1282
1283         apply_trace_boot_options();
1284
1285         /* disable other selftests, since this will break it. */
1286         tracing_selftest_disabled = true;
1287 #ifdef CONFIG_FTRACE_STARTUP_TEST
1288         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1289                type->name);
1290 #endif
1291
1292  out_unlock:
1293         return ret;
1294 }
1295
1296 void tracing_reset(struct trace_buffer *buf, int cpu)
1297 {
1298         struct ring_buffer *buffer = buf->buffer;
1299
1300         if (!buffer)
1301                 return;
1302
1303         ring_buffer_record_disable(buffer);
1304
1305         /* Make sure all commits have finished */
1306         synchronize_sched();
1307         ring_buffer_reset_cpu(buffer, cpu);
1308
1309         ring_buffer_record_enable(buffer);
1310 }
1311
1312 void tracing_reset_online_cpus(struct trace_buffer *buf)
1313 {
1314         struct ring_buffer *buffer = buf->buffer;
1315         int cpu;
1316
1317         if (!buffer)
1318                 return;
1319
1320         ring_buffer_record_disable(buffer);
1321
1322         /* Make sure all commits have finished */
1323         synchronize_sched();
1324
1325         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1326
1327         for_each_online_cpu(cpu)
1328                 ring_buffer_reset_cpu(buffer, cpu);
1329
1330         ring_buffer_record_enable(buffer);
1331 }
1332
1333 /* Must have trace_types_lock held */
1334 void tracing_reset_all_online_cpus(void)
1335 {
1336         struct trace_array *tr;
1337
1338         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1339                 tracing_reset_online_cpus(&tr->trace_buffer);
1340 #ifdef CONFIG_TRACER_MAX_TRACE
1341                 tracing_reset_online_cpus(&tr->max_buffer);
1342 #endif
1343         }
1344 }
1345
1346 #define SAVED_CMDLINES_DEFAULT 128
1347 #define NO_CMDLINE_MAP UINT_MAX
1348 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1349 struct saved_cmdlines_buffer {
1350         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1351         unsigned *map_cmdline_to_pid;
1352         unsigned cmdline_num;
1353         int cmdline_idx;
1354         char *saved_cmdlines;
1355 };
1356 static struct saved_cmdlines_buffer *savedcmd;
1357
1358 /* temporary disable recording */
1359 static atomic_t trace_record_cmdline_disabled __read_mostly;
1360
1361 static inline char *get_saved_cmdlines(int idx)
1362 {
1363         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1364 }
1365
1366 static inline void set_cmdline(int idx, const char *cmdline)
1367 {
1368         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1369 }
1370
1371 static int allocate_cmdlines_buffer(unsigned int val,
1372                                     struct saved_cmdlines_buffer *s)
1373 {
1374         s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1375                                         GFP_KERNEL);
1376         if (!s->map_cmdline_to_pid)
1377                 return -ENOMEM;
1378
1379         s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1380         if (!s->saved_cmdlines) {
1381                 kfree(s->map_cmdline_to_pid);
1382                 return -ENOMEM;
1383         }
1384
1385         s->cmdline_idx = 0;
1386         s->cmdline_num = val;
1387         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1388                sizeof(s->map_pid_to_cmdline));
1389         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1390                val * sizeof(*s->map_cmdline_to_pid));
1391
1392         return 0;
1393 }
1394
1395 static int trace_create_savedcmd(void)
1396 {
1397         int ret;
1398
1399         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1400         if (!savedcmd)
1401                 return -ENOMEM;
1402
1403         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1404         if (ret < 0) {
1405                 kfree(savedcmd);
1406                 savedcmd = NULL;
1407                 return -ENOMEM;
1408         }
1409
1410         return 0;
1411 }
1412
1413 int is_tracing_stopped(void)
1414 {
1415         return global_trace.stop_count;
1416 }
1417
1418 /**
1419  * tracing_start - quick start of the tracer
1420  *
1421  * If tracing is enabled but was stopped by tracing_stop,
1422  * this will start the tracer back up.
1423  */
1424 void tracing_start(void)
1425 {
1426         struct ring_buffer *buffer;
1427         unsigned long flags;
1428
1429         if (tracing_disabled)
1430                 return;
1431
1432         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1433         if (--global_trace.stop_count) {
1434                 if (global_trace.stop_count < 0) {
1435                         /* Someone screwed up their debugging */
1436                         WARN_ON_ONCE(1);
1437                         global_trace.stop_count = 0;
1438                 }
1439                 goto out;
1440         }
1441
1442         /* Prevent the buffers from switching */
1443         arch_spin_lock(&global_trace.max_lock);
1444
1445         buffer = global_trace.trace_buffer.buffer;
1446         if (buffer)
1447                 ring_buffer_record_enable(buffer);
1448
1449 #ifdef CONFIG_TRACER_MAX_TRACE
1450         buffer = global_trace.max_buffer.buffer;
1451         if (buffer)
1452                 ring_buffer_record_enable(buffer);
1453 #endif
1454
1455         arch_spin_unlock(&global_trace.max_lock);
1456
1457  out:
1458         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1459 }
1460
1461 static void tracing_start_tr(struct trace_array *tr)
1462 {
1463         struct ring_buffer *buffer;
1464         unsigned long flags;
1465
1466         if (tracing_disabled)
1467                 return;
1468
1469         /* If global, we need to also start the max tracer */
1470         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1471                 return tracing_start();
1472
1473         raw_spin_lock_irqsave(&tr->start_lock, flags);
1474
1475         if (--tr->stop_count) {
1476                 if (tr->stop_count < 0) {
1477                         /* Someone screwed up their debugging */
1478                         WARN_ON_ONCE(1);
1479                         tr->stop_count = 0;
1480                 }
1481                 goto out;
1482         }
1483
1484         buffer = tr->trace_buffer.buffer;
1485         if (buffer)
1486                 ring_buffer_record_enable(buffer);
1487
1488  out:
1489         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1490 }
1491
1492 /**
1493  * tracing_stop - quick stop of the tracer
1494  *
1495  * Light weight way to stop tracing. Use in conjunction with
1496  * tracing_start.
1497  */
1498 void tracing_stop(void)
1499 {
1500         struct ring_buffer *buffer;
1501         unsigned long flags;
1502
1503         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1504         if (global_trace.stop_count++)
1505                 goto out;
1506
1507         /* Prevent the buffers from switching */
1508         arch_spin_lock(&global_trace.max_lock);
1509
1510         buffer = global_trace.trace_buffer.buffer;
1511         if (buffer)
1512                 ring_buffer_record_disable(buffer);
1513
1514 #ifdef CONFIG_TRACER_MAX_TRACE
1515         buffer = global_trace.max_buffer.buffer;
1516         if (buffer)
1517                 ring_buffer_record_disable(buffer);
1518 #endif
1519
1520         arch_spin_unlock(&global_trace.max_lock);
1521
1522  out:
1523         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1524 }
1525
1526 static void tracing_stop_tr(struct trace_array *tr)
1527 {
1528         struct ring_buffer *buffer;
1529         unsigned long flags;
1530
1531         /* If global, we need to also stop the max tracer */
1532         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1533                 return tracing_stop();
1534
1535         raw_spin_lock_irqsave(&tr->start_lock, flags);
1536         if (tr->stop_count++)
1537                 goto out;
1538
1539         buffer = tr->trace_buffer.buffer;
1540         if (buffer)
1541                 ring_buffer_record_disable(buffer);
1542
1543  out:
1544         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1545 }
1546
1547 void trace_stop_cmdline_recording(void);
1548
1549 static int trace_save_cmdline(struct task_struct *tsk)
1550 {
1551         unsigned pid, idx;
1552
1553         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1554                 return 0;
1555
1556         /*
1557          * It's not the end of the world if we don't get
1558          * the lock, but we also don't want to spin
1559          * nor do we want to disable interrupts,
1560          * so if we miss here, then better luck next time.
1561          */
1562         if (!arch_spin_trylock(&trace_cmdline_lock))
1563                 return 0;
1564
1565         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1566         if (idx == NO_CMDLINE_MAP) {
1567                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1568
1569                 /*
1570                  * Check whether the cmdline buffer at idx has a pid
1571                  * mapped. We are going to overwrite that entry so we
1572                  * need to clear the map_pid_to_cmdline. Otherwise we
1573                  * would read the new comm for the old pid.
1574                  */
1575                 pid = savedcmd->map_cmdline_to_pid[idx];
1576                 if (pid != NO_CMDLINE_MAP)
1577                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1578
1579                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1580                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1581
1582                 savedcmd->cmdline_idx = idx;
1583         }
1584
1585         set_cmdline(idx, tsk->comm);
1586
1587         arch_spin_unlock(&trace_cmdline_lock);
1588
1589         return 1;
1590 }
1591
1592 static void __trace_find_cmdline(int pid, char comm[])
1593 {
1594         unsigned map;
1595
1596         if (!pid) {
1597                 strcpy(comm, "<idle>");
1598                 return;
1599         }
1600
1601         if (WARN_ON_ONCE(pid < 0)) {
1602                 strcpy(comm, "<XXX>");
1603                 return;
1604         }
1605
1606         if (pid > PID_MAX_DEFAULT) {
1607                 strcpy(comm, "<...>");
1608                 return;
1609         }
1610
1611         map = savedcmd->map_pid_to_cmdline[pid];
1612         if (map != NO_CMDLINE_MAP)
1613                 strcpy(comm, get_saved_cmdlines(map));
1614         else
1615                 strcpy(comm, "<...>");
1616 }
1617
1618 void trace_find_cmdline(int pid, char comm[])
1619 {
1620         preempt_disable();
1621         arch_spin_lock(&trace_cmdline_lock);
1622
1623         __trace_find_cmdline(pid, comm);
1624
1625         arch_spin_unlock(&trace_cmdline_lock);
1626         preempt_enable();
1627 }
1628
1629 void tracing_record_cmdline(struct task_struct *tsk)
1630 {
1631         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1632                 return;
1633
1634         if (!__this_cpu_read(trace_cmdline_save))
1635                 return;
1636
1637         if (trace_save_cmdline(tsk))
1638                 __this_cpu_write(trace_cmdline_save, false);
1639 }
1640
1641 void
1642 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1643                              int pc)
1644 {
1645         struct task_struct *tsk = current;
1646
1647         entry->preempt_count            = pc & 0xff;
1648         entry->pid                      = (tsk) ? tsk->pid : 0;
1649         entry->flags =
1650 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1651                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1652 #else
1653                 TRACE_FLAG_IRQS_NOSUPPORT |
1654 #endif
1655                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
1656                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1657                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1658                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1659                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1660 }
1661 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1662
1663 struct ring_buffer_event *
1664 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1665                           int type,
1666                           unsigned long len,
1667                           unsigned long flags, int pc)
1668 {
1669         struct ring_buffer_event *event;
1670
1671         event = ring_buffer_lock_reserve(buffer, len);
1672         if (event != NULL) {
1673                 struct trace_entry *ent = ring_buffer_event_data(event);
1674
1675                 tracing_generic_entry_update(ent, flags, pc);
1676                 ent->type = type;
1677         }
1678
1679         return event;
1680 }
1681
1682 void
1683 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1684 {
1685         __this_cpu_write(trace_cmdline_save, true);
1686         ring_buffer_unlock_commit(buffer, event);
1687 }
1688
1689 void trace_buffer_unlock_commit(struct trace_array *tr,
1690                                 struct ring_buffer *buffer,
1691                                 struct ring_buffer_event *event,
1692                                 unsigned long flags, int pc)
1693 {
1694         __buffer_unlock_commit(buffer, event);
1695
1696         ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
1697         ftrace_trace_userstack(buffer, flags, pc);
1698 }
1699 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
1700
1701 static struct ring_buffer *temp_buffer;
1702
1703 struct ring_buffer_event *
1704 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
1705                           struct trace_event_file *trace_file,
1706                           int type, unsigned long len,
1707                           unsigned long flags, int pc)
1708 {
1709         struct ring_buffer_event *entry;
1710
1711         *current_rb = trace_file->tr->trace_buffer.buffer;
1712         entry = trace_buffer_lock_reserve(*current_rb,
1713                                          type, len, flags, pc);
1714         /*
1715          * If tracing is off, but we have triggers enabled
1716          * we still need to look at the event data. Use the temp_buffer
1717          * to store the trace event for the tigger to use. It's recusive
1718          * safe and will not be recorded anywhere.
1719          */
1720         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
1721                 *current_rb = temp_buffer;
1722                 entry = trace_buffer_lock_reserve(*current_rb,
1723                                                   type, len, flags, pc);
1724         }
1725         return entry;
1726 }
1727 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
1728
1729 struct ring_buffer_event *
1730 trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
1731                                   int type, unsigned long len,
1732                                   unsigned long flags, int pc)
1733 {
1734         *current_rb = global_trace.trace_buffer.buffer;
1735         return trace_buffer_lock_reserve(*current_rb,
1736                                          type, len, flags, pc);
1737 }
1738 EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
1739
1740 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
1741                                      struct ring_buffer *buffer,
1742                                      struct ring_buffer_event *event,
1743                                      unsigned long flags, int pc,
1744                                      struct pt_regs *regs)
1745 {
1746         __buffer_unlock_commit(buffer, event);
1747
1748         ftrace_trace_stack(tr, buffer, flags, 0, pc, regs);
1749         ftrace_trace_userstack(buffer, flags, pc);
1750 }
1751 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
1752
1753 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1754                                          struct ring_buffer_event *event)
1755 {
1756         ring_buffer_discard_commit(buffer, event);
1757 }
1758 EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
1759
1760 void
1761 trace_function(struct trace_array *tr,
1762                unsigned long ip, unsigned long parent_ip, unsigned long flags,
1763                int pc)
1764 {
1765         struct trace_event_call *call = &event_function;
1766         struct ring_buffer *buffer = tr->trace_buffer.buffer;
1767         struct ring_buffer_event *event;
1768         struct ftrace_entry *entry;
1769
1770         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1771                                           flags, pc);
1772         if (!event)
1773                 return;
1774         entry   = ring_buffer_event_data(event);
1775         entry->ip                       = ip;
1776         entry->parent_ip                = parent_ip;
1777
1778         if (!call_filter_check_discard(call, entry, buffer, event))
1779                 __buffer_unlock_commit(buffer, event);
1780 }
1781
1782 #ifdef CONFIG_STACKTRACE
1783
1784 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
1785 struct ftrace_stack {
1786         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
1787 };
1788
1789 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
1790 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
1791
1792 static void __ftrace_trace_stack(struct ring_buffer *buffer,
1793                                  unsigned long flags,
1794                                  int skip, int pc, struct pt_regs *regs)
1795 {
1796         struct trace_event_call *call = &event_kernel_stack;
1797         struct ring_buffer_event *event;
1798         struct stack_entry *entry;
1799         struct stack_trace trace;
1800         int use_stack;
1801         int size = FTRACE_STACK_ENTRIES;
1802
1803         trace.nr_entries        = 0;
1804         trace.skip              = skip;
1805
1806         /*
1807          * Since events can happen in NMIs there's no safe way to
1808          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
1809          * or NMI comes in, it will just have to use the default
1810          * FTRACE_STACK_SIZE.
1811          */
1812         preempt_disable_notrace();
1813
1814         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1815         /*
1816          * We don't need any atomic variables, just a barrier.
1817          * If an interrupt comes in, we don't care, because it would
1818          * have exited and put the counter back to what we want.
1819          * We just need a barrier to keep gcc from moving things
1820          * around.
1821          */
1822         barrier();
1823         if (use_stack == 1) {
1824                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
1825                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
1826
1827                 if (regs)
1828                         save_stack_trace_regs(regs, &trace);
1829                 else
1830                         save_stack_trace(&trace);
1831
1832                 if (trace.nr_entries > size)
1833                         size = trace.nr_entries;
1834         } else
1835                 /* From now on, use_stack is a boolean */
1836                 use_stack = 0;
1837
1838         size *= sizeof(unsigned long);
1839
1840         event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1841                                           sizeof(*entry) + size, flags, pc);
1842         if (!event)
1843                 goto out;
1844         entry = ring_buffer_event_data(event);
1845
1846         memset(&entry->caller, 0, size);
1847
1848         if (use_stack)
1849                 memcpy(&entry->caller, trace.entries,
1850                        trace.nr_entries * sizeof(unsigned long));
1851         else {
1852                 trace.max_entries       = FTRACE_STACK_ENTRIES;
1853                 trace.entries           = entry->caller;
1854                 if (regs)
1855                         save_stack_trace_regs(regs, &trace);
1856                 else
1857                         save_stack_trace(&trace);
1858         }
1859
1860         entry->size = trace.nr_entries;
1861
1862         if (!call_filter_check_discard(call, entry, buffer, event))
1863                 __buffer_unlock_commit(buffer, event);
1864
1865  out:
1866         /* Again, don't let gcc optimize things here */
1867         barrier();
1868         __this_cpu_dec(ftrace_stack_reserve);
1869         preempt_enable_notrace();
1870
1871 }
1872
1873 static inline void ftrace_trace_stack(struct trace_array *tr,
1874                                       struct ring_buffer *buffer,
1875                                       unsigned long flags,
1876                                       int skip, int pc, struct pt_regs *regs)
1877 {
1878         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
1879                 return;
1880
1881         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
1882 }
1883
1884 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1885                    int pc)
1886 {
1887         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
1888 }
1889
1890 /**
1891  * trace_dump_stack - record a stack back trace in the trace buffer
1892  * @skip: Number of functions to skip (helper handlers)
1893  */
1894 void trace_dump_stack(int skip)
1895 {
1896         unsigned long flags;
1897
1898         if (tracing_disabled || tracing_selftest_running)
1899                 return;
1900
1901         local_save_flags(flags);
1902
1903         /*
1904          * Skip 3 more, seems to get us at the caller of
1905          * this function.
1906          */
1907         skip += 3;
1908         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
1909                              flags, skip, preempt_count(), NULL);
1910 }
1911
1912 static DEFINE_PER_CPU(int, user_stack_count);
1913
1914 void
1915 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1916 {
1917         struct trace_event_call *call = &event_user_stack;
1918         struct ring_buffer_event *event;
1919         struct userstack_entry *entry;
1920         struct stack_trace trace;
1921
1922         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
1923                 return;
1924
1925         /*
1926          * NMIs can not handle page faults, even with fix ups.
1927          * The save user stack can (and often does) fault.
1928          */
1929         if (unlikely(in_nmi()))
1930                 return;
1931
1932         /*
1933          * prevent recursion, since the user stack tracing may
1934          * trigger other kernel events.
1935          */
1936         preempt_disable();
1937         if (__this_cpu_read(user_stack_count))
1938                 goto out;
1939
1940         __this_cpu_inc(user_stack_count);
1941
1942         event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1943                                           sizeof(*entry), flags, pc);
1944         if (!event)
1945                 goto out_drop_count;
1946         entry   = ring_buffer_event_data(event);
1947
1948         entry->tgid             = current->tgid;
1949         memset(&entry->caller, 0, sizeof(entry->caller));
1950
1951         trace.nr_entries        = 0;
1952         trace.max_entries       = FTRACE_STACK_ENTRIES;
1953         trace.skip              = 0;
1954         trace.entries           = entry->caller;
1955
1956         save_stack_trace_user(&trace);
1957         if (!call_filter_check_discard(call, entry, buffer, event))
1958                 __buffer_unlock_commit(buffer, event);
1959
1960  out_drop_count:
1961         __this_cpu_dec(user_stack_count);
1962  out:
1963         preempt_enable();
1964 }
1965
1966 #ifdef UNUSED
1967 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1968 {
1969         ftrace_trace_userstack(tr, flags, preempt_count());
1970 }
1971 #endif /* UNUSED */
1972
1973 #endif /* CONFIG_STACKTRACE */
1974
1975 /* created for use with alloc_percpu */
1976 struct trace_buffer_struct {
1977         char buffer[TRACE_BUF_SIZE];
1978 };
1979
1980 static struct trace_buffer_struct *trace_percpu_buffer;
1981 static struct trace_buffer_struct *trace_percpu_sirq_buffer;
1982 static struct trace_buffer_struct *trace_percpu_irq_buffer;
1983 static struct trace_buffer_struct *trace_percpu_nmi_buffer;
1984
1985 /*
1986  * The buffer used is dependent on the context. There is a per cpu
1987  * buffer for normal context, softirq contex, hard irq context and
1988  * for NMI context. Thise allows for lockless recording.
1989  *
1990  * Note, if the buffers failed to be allocated, then this returns NULL
1991  */
1992 static char *get_trace_buf(void)
1993 {
1994         struct trace_buffer_struct *percpu_buffer;
1995
1996         /*
1997          * If we have allocated per cpu buffers, then we do not
1998          * need to do any locking.
1999          */
2000         if (in_nmi())
2001                 percpu_buffer = trace_percpu_nmi_buffer;
2002         else if (in_irq())
2003                 percpu_buffer = trace_percpu_irq_buffer;
2004         else if (in_softirq())
2005                 percpu_buffer = trace_percpu_sirq_buffer;
2006         else
2007                 percpu_buffer = trace_percpu_buffer;
2008
2009         if (!percpu_buffer)
2010                 return NULL;
2011
2012         return this_cpu_ptr(&percpu_buffer->buffer[0]);
2013 }
2014
2015 static int alloc_percpu_trace_buffer(void)
2016 {
2017         struct trace_buffer_struct *buffers;
2018         struct trace_buffer_struct *sirq_buffers;
2019         struct trace_buffer_struct *irq_buffers;
2020         struct trace_buffer_struct *nmi_buffers;
2021
2022         buffers = alloc_percpu(struct trace_buffer_struct);
2023         if (!buffers)
2024                 goto err_warn;
2025
2026         sirq_buffers = alloc_percpu(struct trace_buffer_struct);
2027         if (!sirq_buffers)
2028                 goto err_sirq;
2029
2030         irq_buffers = alloc_percpu(struct trace_buffer_struct);
2031         if (!irq_buffers)
2032                 goto err_irq;
2033
2034         nmi_buffers = alloc_percpu(struct trace_buffer_struct);
2035         if (!nmi_buffers)
2036                 goto err_nmi;
2037
2038         trace_percpu_buffer = buffers;
2039         trace_percpu_sirq_buffer = sirq_buffers;
2040         trace_percpu_irq_buffer = irq_buffers;
2041         trace_percpu_nmi_buffer = nmi_buffers;
2042
2043         return 0;
2044
2045  err_nmi:
2046         free_percpu(irq_buffers);
2047  err_irq:
2048         free_percpu(sirq_buffers);
2049  err_sirq:
2050         free_percpu(buffers);
2051  err_warn:
2052         WARN(1, "Could not allocate percpu trace_printk buffer");
2053         return -ENOMEM;
2054 }
2055
2056 static int buffers_allocated;
2057
2058 void trace_printk_init_buffers(void)
2059 {
2060         if (buffers_allocated)
2061                 return;
2062
2063         if (alloc_percpu_trace_buffer())
2064                 return;
2065
2066         /* trace_printk() is for debug use only. Don't use it in production. */
2067
2068         pr_warn("\n");
2069         pr_warn("**********************************************************\n");
2070         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2071         pr_warn("**                                                      **\n");
2072         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2073         pr_warn("**                                                      **\n");
2074         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2075         pr_warn("** unsafe for production use.                           **\n");
2076         pr_warn("**                                                      **\n");
2077         pr_warn("** If you see this message and you are not debugging    **\n");
2078         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2079         pr_warn("**                                                      **\n");
2080         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2081         pr_warn("**********************************************************\n");
2082
2083         /* Expand the buffers to set size */
2084         tracing_update_buffers();
2085
2086         buffers_allocated = 1;
2087
2088         /*
2089          * trace_printk_init_buffers() can be called by modules.
2090          * If that happens, then we need to start cmdline recording
2091          * directly here. If the global_trace.buffer is already
2092          * allocated here, then this was called by module code.
2093          */
2094         if (global_trace.trace_buffer.buffer)
2095                 tracing_start_cmdline_record();
2096 }
2097
2098 void trace_printk_start_comm(void)
2099 {
2100         /* Start tracing comms if trace printk is set */
2101         if (!buffers_allocated)
2102                 return;
2103         tracing_start_cmdline_record();
2104 }
2105
2106 static void trace_printk_start_stop_comm(int enabled)
2107 {
2108         if (!buffers_allocated)
2109                 return;
2110
2111         if (enabled)
2112                 tracing_start_cmdline_record();
2113         else
2114                 tracing_stop_cmdline_record();
2115 }
2116
2117 /**
2118  * trace_vbprintk - write binary msg to tracing buffer
2119  *
2120  */
2121 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2122 {
2123         struct trace_event_call *call = &event_bprint;
2124         struct ring_buffer_event *event;
2125         struct ring_buffer *buffer;
2126         struct trace_array *tr = &global_trace;
2127         struct bprint_entry *entry;
2128         unsigned long flags;
2129         char *tbuffer;
2130         int len = 0, size, pc;
2131
2132         if (unlikely(tracing_selftest_running || tracing_disabled))
2133                 return 0;
2134
2135         /* Don't pollute graph traces with trace_vprintk internals */
2136         pause_graph_tracing();
2137
2138         pc = preempt_count();
2139         preempt_disable_notrace();
2140
2141         tbuffer = get_trace_buf();
2142         if (!tbuffer) {
2143                 len = 0;
2144                 goto out;
2145         }
2146
2147         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2148
2149         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2150                 goto out;
2151
2152         local_save_flags(flags);
2153         size = sizeof(*entry) + sizeof(u32) * len;
2154         buffer = tr->trace_buffer.buffer;
2155         event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2156                                           flags, pc);
2157         if (!event)
2158                 goto out;
2159         entry = ring_buffer_event_data(event);
2160         entry->ip                       = ip;
2161         entry->fmt                      = fmt;
2162
2163         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2164         if (!call_filter_check_discard(call, entry, buffer, event)) {
2165                 __buffer_unlock_commit(buffer, event);
2166                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2167         }
2168
2169 out:
2170         preempt_enable_notrace();
2171         unpause_graph_tracing();
2172
2173         return len;
2174 }
2175 EXPORT_SYMBOL_GPL(trace_vbprintk);
2176
2177 static int
2178 __trace_array_vprintk(struct ring_buffer *buffer,
2179                       unsigned long ip, const char *fmt, va_list args)
2180 {
2181         struct trace_event_call *call = &event_print;
2182         struct ring_buffer_event *event;
2183         int len = 0, size, pc;
2184         struct print_entry *entry;
2185         unsigned long flags;
2186         char *tbuffer;
2187
2188         if (tracing_disabled || tracing_selftest_running)
2189                 return 0;
2190
2191         /* Don't pollute graph traces with trace_vprintk internals */
2192         pause_graph_tracing();
2193
2194         pc = preempt_count();
2195         preempt_disable_notrace();
2196
2197
2198         tbuffer = get_trace_buf();
2199         if (!tbuffer) {
2200                 len = 0;
2201                 goto out;
2202         }
2203
2204         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2205
2206         local_save_flags(flags);
2207         size = sizeof(*entry) + len + 1;
2208         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2209                                           flags, pc);
2210         if (!event)
2211                 goto out;
2212         entry = ring_buffer_event_data(event);
2213         entry->ip = ip;
2214
2215         memcpy(&entry->buf, tbuffer, len + 1);
2216         if (!call_filter_check_discard(call, entry, buffer, event)) {
2217                 __buffer_unlock_commit(buffer, event);
2218                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2219         }
2220  out:
2221         preempt_enable_notrace();
2222         unpause_graph_tracing();
2223
2224         return len;
2225 }
2226
2227 int trace_array_vprintk(struct trace_array *tr,
2228                         unsigned long ip, const char *fmt, va_list args)
2229 {
2230         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2231 }
2232
2233 int trace_array_printk(struct trace_array *tr,
2234                        unsigned long ip, const char *fmt, ...)
2235 {
2236         int ret;
2237         va_list ap;
2238
2239         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2240                 return 0;
2241
2242         va_start(ap, fmt);
2243         ret = trace_array_vprintk(tr, ip, fmt, ap);
2244         va_end(ap);
2245         return ret;
2246 }
2247
2248 int trace_array_printk_buf(struct ring_buffer *buffer,
2249                            unsigned long ip, const char *fmt, ...)
2250 {
2251         int ret;
2252         va_list ap;
2253
2254         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2255                 return 0;
2256
2257         va_start(ap, fmt);
2258         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2259         va_end(ap);
2260         return ret;
2261 }
2262
2263 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2264 {
2265         return trace_array_vprintk(&global_trace, ip, fmt, args);
2266 }
2267 EXPORT_SYMBOL_GPL(trace_vprintk);
2268
2269 static void trace_iterator_increment(struct trace_iterator *iter)
2270 {
2271         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2272
2273         iter->idx++;
2274         if (buf_iter)
2275                 ring_buffer_read(buf_iter, NULL);
2276 }
2277
2278 static struct trace_entry *
2279 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2280                 unsigned long *lost_events)
2281 {
2282         struct ring_buffer_event *event;
2283         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2284
2285         if (buf_iter)
2286                 event = ring_buffer_iter_peek(buf_iter, ts);
2287         else
2288                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2289                                          lost_events);
2290
2291         if (event) {
2292                 iter->ent_size = ring_buffer_event_length(event);
2293                 return ring_buffer_event_data(event);
2294         }
2295         iter->ent_size = 0;
2296         return NULL;
2297 }
2298
2299 static struct trace_entry *
2300 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2301                   unsigned long *missing_events, u64 *ent_ts)
2302 {
2303         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2304         struct trace_entry *ent, *next = NULL;
2305         unsigned long lost_events = 0, next_lost = 0;
2306         int cpu_file = iter->cpu_file;
2307         u64 next_ts = 0, ts;
2308         int next_cpu = -1;
2309         int next_size = 0;
2310         int cpu;
2311
2312         /*
2313          * If we are in a per_cpu trace file, don't bother by iterating over
2314          * all cpu and peek directly.
2315          */
2316         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2317                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2318                         return NULL;
2319                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2320                 if (ent_cpu)
2321                         *ent_cpu = cpu_file;
2322
2323                 return ent;
2324         }
2325
2326         for_each_tracing_cpu(cpu) {
2327
2328                 if (ring_buffer_empty_cpu(buffer, cpu))
2329                         continue;
2330
2331                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2332
2333                 /*
2334                  * Pick the entry with the smallest timestamp:
2335                  */
2336                 if (ent && (!next || ts < next_ts)) {
2337                         next = ent;
2338                         next_cpu = cpu;
2339                         next_ts = ts;
2340                         next_lost = lost_events;
2341                         next_size = iter->ent_size;
2342                 }
2343         }
2344
2345         iter->ent_size = next_size;
2346
2347         if (ent_cpu)
2348                 *ent_cpu = next_cpu;
2349
2350         if (ent_ts)
2351                 *ent_ts = next_ts;
2352
2353         if (missing_events)
2354                 *missing_events = next_lost;
2355
2356         return next;
2357 }
2358
2359 /* Find the next real entry, without updating the iterator itself */
2360 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2361                                           int *ent_cpu, u64 *ent_ts)
2362 {
2363         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2364 }
2365
2366 /* Find the next real entry, and increment the iterator to the next entry */
2367 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2368 {
2369         iter->ent = __find_next_entry(iter, &iter->cpu,
2370                                       &iter->lost_events, &iter->ts);
2371
2372         if (iter->ent)
2373                 trace_iterator_increment(iter);
2374
2375         return iter->ent ? iter : NULL;
2376 }
2377
2378 static void trace_consume(struct trace_iterator *iter)
2379 {
2380         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2381                             &iter->lost_events);
2382 }
2383
2384 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2385 {
2386         struct trace_iterator *iter = m->private;
2387         int i = (int)*pos;
2388         void *ent;
2389
2390         WARN_ON_ONCE(iter->leftover);
2391
2392         (*pos)++;
2393
2394         /* can't go backwards */
2395         if (iter->idx > i)
2396                 return NULL;
2397
2398         if (iter->idx < 0)
2399                 ent = trace_find_next_entry_inc(iter);
2400         else
2401                 ent = iter;
2402
2403         while (ent && iter->idx < i)
2404                 ent = trace_find_next_entry_inc(iter);
2405
2406         iter->pos = *pos;
2407
2408         return ent;
2409 }
2410
2411 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2412 {
2413         struct ring_buffer_event *event;
2414         struct ring_buffer_iter *buf_iter;
2415         unsigned long entries = 0;
2416         u64 ts;
2417
2418         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2419
2420         buf_iter = trace_buffer_iter(iter, cpu);
2421         if (!buf_iter)
2422                 return;
2423
2424         ring_buffer_iter_reset(buf_iter);
2425
2426         /*
2427          * We could have the case with the max latency tracers
2428          * that a reset never took place on a cpu. This is evident
2429          * by the timestamp being before the start of the buffer.
2430          */
2431         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2432                 if (ts >= iter->trace_buffer->time_start)
2433                         break;
2434                 entries++;
2435                 ring_buffer_read(buf_iter, NULL);
2436         }
2437
2438         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2439 }
2440
2441 /*
2442  * The current tracer is copied to avoid a global locking
2443  * all around.
2444  */
2445 static void *s_start(struct seq_file *m, loff_t *pos)
2446 {
2447         struct trace_iterator *iter = m->private;
2448         struct trace_array *tr = iter->tr;
2449         int cpu_file = iter->cpu_file;
2450         void *p = NULL;
2451         loff_t l = 0;
2452         int cpu;
2453
2454         /*
2455          * copy the tracer to avoid using a global lock all around.
2456          * iter->trace is a copy of current_trace, the pointer to the
2457          * name may be used instead of a strcmp(), as iter->trace->name
2458          * will point to the same string as current_trace->name.
2459          */
2460         mutex_lock(&trace_types_lock);
2461         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2462                 *iter->trace = *tr->current_trace;
2463         mutex_unlock(&trace_types_lock);
2464
2465 #ifdef CONFIG_TRACER_MAX_TRACE
2466         if (iter->snapshot && iter->trace->use_max_tr)
2467                 return ERR_PTR(-EBUSY);
2468 #endif
2469
2470         if (!iter->snapshot)
2471                 atomic_inc(&trace_record_cmdline_disabled);
2472
2473         if (*pos != iter->pos) {
2474                 iter->ent = NULL;
2475                 iter->cpu = 0;
2476                 iter->idx = -1;
2477
2478                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2479                         for_each_tracing_cpu(cpu)
2480                                 tracing_iter_reset(iter, cpu);
2481                 } else
2482                         tracing_iter_reset(iter, cpu_file);
2483
2484                 iter->leftover = 0;
2485                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2486                         ;
2487
2488         } else {
2489                 /*
2490                  * If we overflowed the seq_file before, then we want
2491                  * to just reuse the trace_seq buffer again.
2492                  */
2493                 if (iter->leftover)
2494                         p = iter;
2495                 else {
2496                         l = *pos - 1;
2497                         p = s_next(m, p, &l);
2498                 }
2499         }
2500
2501         trace_event_read_lock();
2502         trace_access_lock(cpu_file);
2503         return p;
2504 }
2505
2506 static void s_stop(struct seq_file *m, void *p)
2507 {
2508         struct trace_iterator *iter = m->private;
2509
2510 #ifdef CONFIG_TRACER_MAX_TRACE
2511         if (iter->snapshot && iter->trace->use_max_tr)
2512                 return;
2513 #endif
2514
2515         if (!iter->snapshot)
2516                 atomic_dec(&trace_record_cmdline_disabled);
2517
2518         trace_access_unlock(iter->cpu_file);
2519         trace_event_read_unlock();
2520 }
2521
2522 static void
2523 get_total_entries(struct trace_buffer *buf,
2524                   unsigned long *total, unsigned long *entries)
2525 {
2526         unsigned long count;
2527         int cpu;
2528
2529         *total = 0;
2530         *entries = 0;
2531
2532         for_each_tracing_cpu(cpu) {
2533                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2534                 /*
2535                  * If this buffer has skipped entries, then we hold all
2536                  * entries for the trace and we need to ignore the
2537                  * ones before the time stamp.
2538                  */
2539                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2540                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2541                         /* total is the same as the entries */
2542                         *total += count;
2543                 } else
2544                         *total += count +
2545                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
2546                 *entries += count;
2547         }
2548 }
2549
2550 static void print_lat_help_header(struct seq_file *m)
2551 {
2552         seq_puts(m, "#                  _------=> CPU#            \n"
2553                     "#                 / _-----=> irqs-off        \n"
2554                     "#                | / _----=> need-resched    \n"
2555                     "#                || / _---=> hardirq/softirq \n"
2556                     "#                ||| / _--=> preempt-depth   \n"
2557                     "#                |||| /     delay            \n"
2558                     "#  cmd     pid   ||||| time  |   caller      \n"
2559                     "#     \\   /      |||||  \\    |   /         \n");
2560 }
2561
2562 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2563 {
2564         unsigned long total;
2565         unsigned long entries;
2566
2567         get_total_entries(buf, &total, &entries);
2568         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2569                    entries, total, num_online_cpus());
2570         seq_puts(m, "#\n");
2571 }
2572
2573 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2574 {
2575         print_event_info(buf, m);
2576         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n"
2577                     "#              | |       |          |         |\n");
2578 }
2579
2580 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2581 {
2582         print_event_info(buf, m);
2583         seq_puts(m, "#                              _-----=> irqs-off\n"
2584                     "#                             / _----=> need-resched\n"
2585                     "#                            | / _---=> hardirq/softirq\n"
2586                     "#                            || / _--=> preempt-depth\n"
2587                     "#                            ||| /     delay\n"
2588                     "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n"
2589                     "#              | |       |   ||||       |         |\n");
2590 }
2591
2592 void
2593 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2594 {
2595         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
2596         struct trace_buffer *buf = iter->trace_buffer;
2597         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2598         struct tracer *type = iter->trace;
2599         unsigned long entries;
2600         unsigned long total;
2601         const char *name = "preemption";
2602
2603         name = type->name;
2604
2605         get_total_entries(buf, &total, &entries);
2606
2607         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2608                    name, UTS_RELEASE);
2609         seq_puts(m, "# -----------------------------------"
2610                  "---------------------------------\n");
2611         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2612                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2613                    nsecs_to_usecs(data->saved_latency),
2614                    entries,
2615                    total,
2616                    buf->cpu,
2617 #if defined(CONFIG_PREEMPT_NONE)
2618                    "server",
2619 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2620                    "desktop",
2621 #elif defined(CONFIG_PREEMPT)
2622                    "preempt",
2623 #else
2624                    "unknown",
2625 #endif
2626                    /* These are reserved for later use */
2627                    0, 0, 0, 0);
2628 #ifdef CONFIG_SMP
2629         seq_printf(m, " #P:%d)\n", num_online_cpus());
2630 #else
2631         seq_puts(m, ")\n");
2632 #endif
2633         seq_puts(m, "#    -----------------\n");
2634         seq_printf(m, "#    | task: %.16s-%d "
2635                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2636                    data->comm, data->pid,
2637                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2638                    data->policy, data->rt_priority);
2639         seq_puts(m, "#    -----------------\n");
2640
2641         if (data->critical_start) {
2642                 seq_puts(m, "#  => started at: ");
2643                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2644                 trace_print_seq(m, &iter->seq);
2645                 seq_puts(m, "\n#  => ended at:   ");
2646                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2647                 trace_print_seq(m, &iter->seq);
2648                 seq_puts(m, "\n#\n");
2649         }
2650
2651         seq_puts(m, "#\n");
2652 }
2653
2654 static void test_cpu_buff_start(struct trace_iterator *iter)
2655 {
2656         struct trace_seq *s = &iter->seq;
2657         struct trace_array *tr = iter->tr;
2658
2659         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
2660                 return;
2661
2662         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
2663                 return;
2664
2665         if (iter->started && cpumask_test_cpu(iter->cpu, iter->started))
2666                 return;
2667
2668         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
2669                 return;
2670
2671         if (iter->started)
2672                 cpumask_set_cpu(iter->cpu, iter->started);
2673
2674         /* Don't print started cpu buffer for the first entry of the trace */
2675         if (iter->idx > 1)
2676                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
2677                                 iter->cpu);
2678 }
2679
2680 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
2681 {
2682         struct trace_array *tr = iter->tr;
2683         struct trace_seq *s = &iter->seq;
2684         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
2685         struct trace_entry *entry;
2686         struct trace_event *event;
2687
2688         entry = iter->ent;
2689
2690         test_cpu_buff_start(iter);
2691
2692         event = ftrace_find_event(entry->type);
2693
2694         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
2695                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2696                         trace_print_lat_context(iter);
2697                 else
2698                         trace_print_context(iter);
2699         }
2700
2701         if (trace_seq_has_overflowed(s))
2702                 return TRACE_TYPE_PARTIAL_LINE;
2703
2704         if (event)
2705                 return event->funcs->trace(iter, sym_flags, event);
2706
2707         trace_seq_printf(s, "Unknown type %d\n", entry->type);
2708
2709         return trace_handle_return(s);
2710 }
2711
2712 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2713 {
2714         struct trace_array *tr = iter->tr;
2715         struct trace_seq *s = &iter->seq;
2716         struct trace_entry *entry;
2717         struct trace_event *event;
2718
2719         entry = iter->ent;
2720
2721         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
2722                 trace_seq_printf(s, "%d %d %llu ",
2723                                  entry->pid, iter->cpu, iter->ts);
2724
2725         if (trace_seq_has_overflowed(s))
2726                 return TRACE_TYPE_PARTIAL_LINE;
2727
2728         event = ftrace_find_event(entry->type);
2729         if (event)
2730                 return event->funcs->raw(iter, 0, event);
2731
2732         trace_seq_printf(s, "%d ?\n", entry->type);
2733
2734         return trace_handle_return(s);
2735 }
2736
2737 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2738 {
2739         struct trace_array *tr = iter->tr;
2740         struct trace_seq *s = &iter->seq;
2741         unsigned char newline = '\n';
2742         struct trace_entry *entry;
2743         struct trace_event *event;
2744
2745         entry = iter->ent;
2746
2747         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
2748                 SEQ_PUT_HEX_FIELD(s, entry->pid);
2749                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
2750                 SEQ_PUT_HEX_FIELD(s, iter->ts);
2751                 if (trace_seq_has_overflowed(s))
2752                         return TRACE_TYPE_PARTIAL_LINE;
2753         }
2754
2755         event = ftrace_find_event(entry->type);
2756         if (event) {
2757                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
2758                 if (ret != TRACE_TYPE_HANDLED)
2759                         return ret;
2760         }
2761
2762         SEQ_PUT_FIELD(s, newline);
2763
2764         return trace_handle_return(s);
2765 }
2766
2767 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2768 {
2769         struct trace_array *tr = iter->tr;
2770         struct trace_seq *s = &iter->seq;
2771         struct trace_entry *entry;
2772         struct trace_event *event;
2773
2774         entry = iter->ent;
2775
2776         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
2777                 SEQ_PUT_FIELD(s, entry->pid);
2778                 SEQ_PUT_FIELD(s, iter->cpu);
2779                 SEQ_PUT_FIELD(s, iter->ts);
2780                 if (trace_seq_has_overflowed(s))
2781                         return TRACE_TYPE_PARTIAL_LINE;
2782         }
2783
2784         event = ftrace_find_event(entry->type);
2785         return event ? event->funcs->binary(iter, 0, event) :
2786                 TRACE_TYPE_HANDLED;
2787 }
2788
2789 int trace_empty(struct trace_iterator *iter)
2790 {
2791         struct ring_buffer_iter *buf_iter;
2792         int cpu;
2793
2794         /* If we are looking at one CPU buffer, only check that one */
2795         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
2796                 cpu = iter->cpu_file;
2797                 buf_iter = trace_buffer_iter(iter, cpu);
2798                 if (buf_iter) {
2799                         if (!ring_buffer_iter_empty(buf_iter))
2800                                 return 0;
2801                 } else {
2802                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2803                                 return 0;
2804                 }
2805                 return 1;
2806         }
2807
2808         for_each_tracing_cpu(cpu) {
2809                 buf_iter = trace_buffer_iter(iter, cpu);
2810                 if (buf_iter) {
2811                         if (!ring_buffer_iter_empty(buf_iter))
2812                                 return 0;
2813                 } else {
2814                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2815                                 return 0;
2816                 }
2817         }
2818
2819         return 1;
2820 }
2821
2822 /*  Called with trace_event_read_lock() held. */
2823 enum print_line_t print_trace_line(struct trace_iterator *iter)
2824 {
2825         struct trace_array *tr = iter->tr;
2826         unsigned long trace_flags = tr->trace_flags;
2827         enum print_line_t ret;
2828
2829         if (iter->lost_events) {
2830                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2831                                  iter->cpu, iter->lost_events);
2832                 if (trace_seq_has_overflowed(&iter->seq))
2833                         return TRACE_TYPE_PARTIAL_LINE;
2834         }
2835
2836         if (iter->trace && iter->trace->print_line) {
2837                 ret = iter->trace->print_line(iter);
2838                 if (ret != TRACE_TYPE_UNHANDLED)
2839                         return ret;
2840         }
2841
2842         if (iter->ent->type == TRACE_BPUTS &&
2843                         trace_flags & TRACE_ITER_PRINTK &&
2844                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2845                 return trace_print_bputs_msg_only(iter);
2846
2847         if (iter->ent->type == TRACE_BPRINT &&
2848                         trace_flags & TRACE_ITER_PRINTK &&
2849                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2850                 return trace_print_bprintk_msg_only(iter);
2851
2852         if (iter->ent->type == TRACE_PRINT &&
2853                         trace_flags & TRACE_ITER_PRINTK &&
2854                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2855                 return trace_print_printk_msg_only(iter);
2856
2857         if (trace_flags & TRACE_ITER_BIN)
2858                 return print_bin_fmt(iter);
2859
2860         if (trace_flags & TRACE_ITER_HEX)
2861                 return print_hex_fmt(iter);
2862
2863         if (trace_flags & TRACE_ITER_RAW)
2864                 return print_raw_fmt(iter);
2865
2866         return print_trace_fmt(iter);
2867 }
2868
2869 void trace_latency_header(struct seq_file *m)
2870 {
2871         struct trace_iterator *iter = m->private;
2872         struct trace_array *tr = iter->tr;
2873
2874         /* print nothing if the buffers are empty */
2875         if (trace_empty(iter))
2876                 return;
2877
2878         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2879                 print_trace_header(m, iter);
2880
2881         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
2882                 print_lat_help_header(m);
2883 }
2884
2885 void trace_default_header(struct seq_file *m)
2886 {
2887         struct trace_iterator *iter = m->private;
2888         struct trace_array *tr = iter->tr;
2889         unsigned long trace_flags = tr->trace_flags;
2890
2891         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
2892                 return;
2893
2894         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2895                 /* print nothing if the buffers are empty */
2896                 if (trace_empty(iter))
2897                         return;
2898                 print_trace_header(m, iter);
2899                 if (!(trace_flags & TRACE_ITER_VERBOSE))
2900                         print_lat_help_header(m);
2901         } else {
2902                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
2903                         if (trace_flags & TRACE_ITER_IRQ_INFO)
2904                                 print_func_help_header_irq(iter->trace_buffer, m);
2905                         else
2906                                 print_func_help_header(iter->trace_buffer, m);
2907                 }
2908         }
2909 }
2910
2911 static void test_ftrace_alive(struct seq_file *m)
2912 {
2913         if (!ftrace_is_dead())
2914                 return;
2915         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
2916                     "#          MAY BE MISSING FUNCTION EVENTS\n");
2917 }
2918
2919 #ifdef CONFIG_TRACER_MAX_TRACE
2920 static void show_snapshot_main_help(struct seq_file *m)
2921 {
2922         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
2923                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
2924                     "#                      Takes a snapshot of the main buffer.\n"
2925                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
2926                     "#                      (Doesn't have to be '2' works with any number that\n"
2927                     "#                       is not a '0' or '1')\n");
2928 }
2929
2930 static void show_snapshot_percpu_help(struct seq_file *m)
2931 {
2932         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
2933 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2934         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
2935                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
2936 #else
2937         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
2938                     "#                     Must use main snapshot file to allocate.\n");
2939 #endif
2940         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
2941                     "#                      (Doesn't have to be '2' works with any number that\n"
2942                     "#                       is not a '0' or '1')\n");
2943 }
2944
2945 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
2946 {
2947         if (iter->tr->allocated_snapshot)
2948                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
2949         else
2950                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
2951
2952         seq_puts(m, "# Snapshot commands:\n");
2953         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
2954                 show_snapshot_main_help(m);
2955         else
2956                 show_snapshot_percpu_help(m);
2957 }
2958 #else
2959 /* Should never be called */
2960 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
2961 #endif
2962
2963 static int s_show(struct seq_file *m, void *v)
2964 {
2965         struct trace_iterator *iter = v;
2966         int ret;
2967
2968         if (iter->ent == NULL) {
2969                 if (iter->tr) {
2970                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
2971                         seq_puts(m, "#\n");
2972                         test_ftrace_alive(m);
2973                 }
2974                 if (iter->snapshot && trace_empty(iter))
2975                         print_snapshot_help(m, iter);
2976                 else if (iter->trace && iter->trace->print_header)
2977                         iter->trace->print_header(m);
2978                 else
2979                         trace_default_header(m);
2980
2981         } else if (iter->leftover) {
2982                 /*
2983                  * If we filled the seq_file buffer earlier, we
2984                  * want to just show it now.
2985                  */
2986                 ret = trace_print_seq(m, &iter->seq);
2987
2988                 /* ret should this time be zero, but you never know */
2989                 iter->leftover = ret;
2990
2991         } else {
2992                 print_trace_line(iter);
2993                 ret = trace_print_seq(m, &iter->seq);
2994                 /*
2995                  * If we overflow the seq_file buffer, then it will
2996                  * ask us for this data again at start up.
2997                  * Use that instead.
2998                  *  ret is 0 if seq_file write succeeded.
2999                  *        -1 otherwise.
3000                  */
3001                 iter->leftover = ret;
3002         }
3003
3004         return 0;
3005 }
3006
3007 /*
3008  * Should be used after trace_array_get(), trace_types_lock
3009  * ensures that i_cdev was already initialized.
3010  */
3011 static inline int tracing_get_cpu(struct inode *inode)
3012 {
3013         if (inode->i_cdev) /* See trace_create_cpu_file() */
3014                 return (long)inode->i_cdev - 1;
3015         return RING_BUFFER_ALL_CPUS;
3016 }
3017
3018 static const struct seq_operations tracer_seq_ops = {
3019         .start          = s_start,
3020         .next           = s_next,
3021         .stop           = s_stop,
3022         .show           = s_show,
3023 };
3024
3025 static struct trace_iterator *
3026 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3027 {
3028         struct trace_array *tr = inode->i_private;
3029         struct trace_iterator *iter;
3030         int cpu;
3031
3032         if (tracing_disabled)
3033                 return ERR_PTR(-ENODEV);
3034
3035         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3036         if (!iter)
3037                 return ERR_PTR(-ENOMEM);
3038
3039         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3040                                     GFP_KERNEL);
3041         if (!iter->buffer_iter)
3042                 goto release;
3043
3044         /*
3045          * We make a copy of the current tracer to avoid concurrent
3046          * changes on it while we are reading.
3047          */
3048         mutex_lock(&trace_types_lock);
3049         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3050         if (!iter->trace)
3051                 goto fail;
3052
3053         *iter->trace = *tr->current_trace;
3054
3055         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3056                 goto fail;
3057
3058         iter->tr = tr;
3059
3060 #ifdef CONFIG_TRACER_MAX_TRACE
3061         /* Currently only the top directory has a snapshot */
3062         if (tr->current_trace->print_max || snapshot)
3063                 iter->trace_buffer = &tr->max_buffer;
3064         else
3065 #endif
3066                 iter->trace_buffer = &tr->trace_buffer;
3067         iter->snapshot = snapshot;
3068         iter->pos = -1;
3069         iter->cpu_file = tracing_get_cpu(inode);
3070         mutex_init(&iter->mutex);
3071
3072         /* Notify the tracer early; before we stop tracing. */
3073         if (iter->trace && iter->trace->open)
3074                 iter->trace->open(iter);
3075
3076         /* Annotate start of buffers if we had overruns */
3077         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3078                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3079
3080         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3081         if (trace_clocks[tr->clock_id].in_ns)
3082                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3083
3084         /* stop the trace while dumping if we are not opening "snapshot" */
3085         if (!iter->snapshot)
3086                 tracing_stop_tr(tr);
3087
3088         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3089                 for_each_tracing_cpu(cpu) {
3090                         iter->buffer_iter[cpu] =
3091                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3092                 }
3093                 ring_buffer_read_prepare_sync();
3094                 for_each_tracing_cpu(cpu) {
3095                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3096                         tracing_iter_reset(iter, cpu);
3097                 }
3098         } else {
3099                 cpu = iter->cpu_file;
3100                 iter->buffer_iter[cpu] =
3101                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3102                 ring_buffer_read_prepare_sync();
3103                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3104                 tracing_iter_reset(iter, cpu);
3105         }
3106
3107         mutex_unlock(&trace_types_lock);
3108
3109         return iter;
3110
3111  fail:
3112         mutex_unlock(&trace_types_lock);
3113         kfree(iter->trace);
3114         kfree(iter->buffer_iter);
3115 release:
3116         seq_release_private(inode, file);
3117         return ERR_PTR(-ENOMEM);
3118 }
3119
3120 int tracing_open_generic(struct inode *inode, struct file *filp)
3121 {
3122         if (tracing_disabled)
3123                 return -ENODEV;
3124
3125         filp->private_data = inode->i_private;
3126         return 0;
3127 }
3128
3129 bool tracing_is_disabled(void)
3130 {
3131         return (tracing_disabled) ? true: false;
3132 }
3133
3134 /*
3135  * Open and update trace_array ref count.
3136  * Must have the current trace_array passed to it.
3137  */
3138 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3139 {
3140         struct trace_array *tr = inode->i_private;
3141
3142         if (tracing_disabled)
3143                 return -ENODEV;
3144
3145         if (trace_array_get(tr) < 0)
3146                 return -ENODEV;
3147
3148         filp->private_data = inode->i_private;
3149
3150         return 0;
3151 }
3152
3153 static int tracing_release(struct inode *inode, struct file *file)
3154 {
3155         struct trace_array *tr = inode->i_private;
3156         struct seq_file *m = file->private_data;
3157         struct trace_iterator *iter;
3158         int cpu;
3159
3160         if (!(file->f_mode & FMODE_READ)) {
3161                 trace_array_put(tr);
3162                 return 0;
3163         }
3164
3165         /* Writes do not use seq_file */
3166         iter = m->private;
3167         mutex_lock(&trace_types_lock);
3168
3169         for_each_tracing_cpu(cpu) {
3170                 if (iter->buffer_iter[cpu])
3171                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3172         }
3173
3174         if (iter->trace && iter->trace->close)
3175                 iter->trace->close(iter);
3176
3177         if (!iter->snapshot)
3178                 /* reenable tracing if it was previously enabled */
3179                 tracing_start_tr(tr);
3180
3181         __trace_array_put(tr);
3182
3183         mutex_unlock(&trace_types_lock);
3184
3185         mutex_destroy(&iter->mutex);
3186         free_cpumask_var(iter->started);
3187         kfree(iter->trace);
3188         kfree(iter->buffer_iter);
3189         seq_release_private(inode, file);
3190
3191         return 0;
3192 }
3193
3194 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3195 {
3196         struct trace_array *tr = inode->i_private;
3197
3198         trace_array_put(tr);
3199         return 0;
3200 }
3201
3202 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3203 {
3204         struct trace_array *tr = inode->i_private;
3205
3206         trace_array_put(tr);
3207
3208         return single_release(inode, file);
3209 }
3210
3211 static int tracing_open(struct inode *inode, struct file *file)
3212 {
3213         struct trace_array *tr = inode->i_private;
3214         struct trace_iterator *iter;
3215         int ret = 0;
3216
3217         if (trace_array_get(tr) < 0)
3218                 return -ENODEV;
3219
3220         /* If this file was open for write, then erase contents */
3221         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3222                 int cpu = tracing_get_cpu(inode);
3223
3224                 if (cpu == RING_BUFFER_ALL_CPUS)
3225                         tracing_reset_online_cpus(&tr->trace_buffer);
3226                 else
3227                         tracing_reset(&tr->trace_buffer, cpu);
3228         }
3229
3230         if (file->f_mode & FMODE_READ) {
3231                 iter = __tracing_open(inode, file, false);
3232                 if (IS_ERR(iter))
3233                         ret = PTR_ERR(iter);
3234                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3235                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3236         }
3237
3238         if (ret < 0)
3239                 trace_array_put(tr);
3240
3241         return ret;
3242 }
3243
3244 /*
3245  * Some tracers are not suitable for instance buffers.
3246  * A tracer is always available for the global array (toplevel)
3247  * or if it explicitly states that it is.
3248  */
3249 static bool
3250 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3251 {
3252         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3253 }
3254
3255 /* Find the next tracer that this trace array may use */
3256 static struct tracer *
3257 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3258 {
3259         while (t && !trace_ok_for_array(t, tr))
3260                 t = t->next;
3261
3262         return t;
3263 }
3264
3265 static void *
3266 t_next(struct seq_file *m, void *v, loff_t *pos)
3267 {
3268         struct trace_array *tr = m->private;
3269         struct tracer *t = v;
3270
3271         (*pos)++;
3272
3273         if (t)
3274                 t = get_tracer_for_array(tr, t->next);
3275
3276         return t;
3277 }
3278
3279 static void *t_start(struct seq_file *m, loff_t *pos)
3280 {
3281         struct trace_array *tr = m->private;
3282         struct tracer *t;
3283         loff_t l = 0;
3284
3285         mutex_lock(&trace_types_lock);
3286
3287         t = get_tracer_for_array(tr, trace_types);
3288         for (; t && l < *pos; t = t_next(m, t, &l))
3289                         ;
3290
3291         return t;
3292 }
3293
3294 static void t_stop(struct seq_file *m, void *p)
3295 {
3296         mutex_unlock(&trace_types_lock);
3297 }
3298
3299 static int t_show(struct seq_file *m, void *v)
3300 {
3301         struct tracer *t = v;
3302
3303         if (!t)
3304                 return 0;
3305
3306         seq_puts(m, t->name);
3307         if (t->next)
3308                 seq_putc(m, ' ');
3309         else
3310                 seq_putc(m, '\n');
3311
3312         return 0;
3313 }
3314
3315 static const struct seq_operations show_traces_seq_ops = {
3316         .start          = t_start,
3317         .next           = t_next,
3318         .stop           = t_stop,
3319         .show           = t_show,
3320 };
3321
3322 static int show_traces_open(struct inode *inode, struct file *file)
3323 {
3324         struct trace_array *tr = inode->i_private;
3325         struct seq_file *m;
3326         int ret;
3327
3328         if (tracing_disabled)
3329                 return -ENODEV;
3330
3331         ret = seq_open(file, &show_traces_seq_ops);
3332         if (ret)
3333                 return ret;
3334
3335         m = file->private_data;
3336         m->private = tr;
3337
3338         return 0;
3339 }
3340
3341 static ssize_t
3342 tracing_write_stub(struct file *filp, const char __user *ubuf,
3343                    size_t count, loff_t *ppos)
3344 {
3345         return count;
3346 }
3347
3348 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3349 {
3350         int ret;
3351
3352         if (file->f_mode & FMODE_READ)
3353                 ret = seq_lseek(file, offset, whence);
3354         else
3355                 file->f_pos = ret = 0;
3356
3357         return ret;
3358 }
3359
3360 static const struct file_operations tracing_fops = {
3361         .open           = tracing_open,
3362         .read           = seq_read,
3363         .write          = tracing_write_stub,
3364         .llseek         = tracing_lseek,
3365         .release        = tracing_release,
3366 };
3367
3368 static const struct file_operations show_traces_fops = {
3369         .open           = show_traces_open,
3370         .read           = seq_read,
3371         .release        = seq_release,
3372         .llseek         = seq_lseek,
3373 };
3374
3375 /*
3376  * The tracer itself will not take this lock, but still we want
3377  * to provide a consistent cpumask to user-space:
3378  */
3379 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3380
3381 /*
3382  * Temporary storage for the character representation of the
3383  * CPU bitmask (and one more byte for the newline):
3384  */
3385 static char mask_str[NR_CPUS + 1];
3386
3387 static ssize_t
3388 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3389                      size_t count, loff_t *ppos)
3390 {
3391         struct trace_array *tr = file_inode(filp)->i_private;
3392         int len;
3393
3394         mutex_lock(&tracing_cpumask_update_lock);
3395
3396         len = snprintf(mask_str, count, "%*pb\n",
3397                        cpumask_pr_args(tr->tracing_cpumask));
3398         if (len >= count) {
3399                 count = -EINVAL;
3400                 goto out_err;
3401         }
3402         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3403
3404 out_err:
3405         mutex_unlock(&tracing_cpumask_update_lock);
3406
3407         return count;
3408 }
3409
3410 static ssize_t
3411 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3412                       size_t count, loff_t *ppos)
3413 {
3414         struct trace_array *tr = file_inode(filp)->i_private;
3415         cpumask_var_t tracing_cpumask_new;
3416         int err, cpu;
3417
3418         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3419                 return -ENOMEM;
3420
3421         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3422         if (err)
3423                 goto err_unlock;
3424
3425         mutex_lock(&tracing_cpumask_update_lock);
3426
3427         local_irq_disable();
3428         arch_spin_lock(&tr->max_lock);
3429         for_each_tracing_cpu(cpu) {
3430                 /*
3431                  * Increase/decrease the disabled counter if we are
3432                  * about to flip a bit in the cpumask:
3433                  */
3434                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3435                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3436                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3437                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3438                 }
3439                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3440                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3441                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3442                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3443                 }
3444         }
3445         arch_spin_unlock(&tr->max_lock);
3446         local_irq_enable();
3447
3448         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3449
3450         mutex_unlock(&tracing_cpumask_update_lock);
3451         free_cpumask_var(tracing_cpumask_new);
3452
3453         return count;
3454
3455 err_unlock:
3456         free_cpumask_var(tracing_cpumask_new);
3457
3458         return err;
3459 }
3460
3461 static const struct file_operations tracing_cpumask_fops = {
3462         .open           = tracing_open_generic_tr,
3463         .read           = tracing_cpumask_read,
3464         .write          = tracing_cpumask_write,
3465         .release        = tracing_release_generic_tr,
3466         .llseek         = generic_file_llseek,
3467 };
3468
3469 static int tracing_trace_options_show(struct seq_file *m, void *v)
3470 {
3471         struct tracer_opt *trace_opts;
3472         struct trace_array *tr = m->private;
3473         u32 tracer_flags;
3474         int i;
3475
3476         mutex_lock(&trace_types_lock);
3477         tracer_flags = tr->current_trace->flags->val;
3478         trace_opts = tr->current_trace->flags->opts;
3479
3480         for (i = 0; trace_options[i]; i++) {
3481                 if (tr->trace_flags & (1 << i))
3482                         seq_printf(m, "%s\n", trace_options[i]);
3483                 else
3484                         seq_printf(m, "no%s\n", trace_options[i]);
3485         }
3486
3487         for (i = 0; trace_opts[i].name; i++) {
3488                 if (tracer_flags & trace_opts[i].bit)
3489                         seq_printf(m, "%s\n", trace_opts[i].name);
3490                 else
3491                         seq_printf(m, "no%s\n", trace_opts[i].name);
3492         }
3493         mutex_unlock(&trace_types_lock);
3494
3495         return 0;
3496 }
3497
3498 static int __set_tracer_option(struct trace_array *tr,
3499                                struct tracer_flags *tracer_flags,
3500                                struct tracer_opt *opts, int neg)
3501 {
3502         struct tracer *trace = tracer_flags->trace;
3503         int ret;
3504
3505         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3506         if (ret)
3507                 return ret;
3508
3509         if (neg)
3510                 tracer_flags->val &= ~opts->bit;
3511         else
3512                 tracer_flags->val |= opts->bit;
3513         return 0;
3514 }
3515
3516 /* Try to assign a tracer specific option */
3517 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3518 {
3519         struct tracer *trace = tr->current_trace;
3520         struct tracer_flags *tracer_flags = trace->flags;
3521         struct tracer_opt *opts = NULL;
3522         int i;
3523
3524         for (i = 0; tracer_flags->opts[i].name; i++) {
3525                 opts = &tracer_flags->opts[i];
3526
3527                 if (strcmp(cmp, opts->name) == 0)
3528                         return __set_tracer_option(tr, trace->flags, opts, neg);
3529         }
3530
3531         return -EINVAL;
3532 }
3533
3534 /* Some tracers require overwrite to stay enabled */
3535 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3536 {
3537         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3538                 return -1;
3539
3540         return 0;
3541 }
3542
3543 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3544 {
3545         /* do nothing if flag is already set */
3546         if (!!(tr->trace_flags & mask) == !!enabled)
3547                 return 0;
3548
3549         /* Give the tracer a chance to approve the change */
3550         if (tr->current_trace->flag_changed)
3551                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3552                         return -EINVAL;
3553
3554         if (enabled)
3555                 tr->trace_flags |= mask;
3556         else
3557                 tr->trace_flags &= ~mask;
3558
3559         if (mask == TRACE_ITER_RECORD_CMD)
3560                 trace_event_enable_cmd_record(enabled);
3561
3562         if (mask == TRACE_ITER_EVENT_FORK)
3563                 trace_event_follow_fork(tr, enabled);
3564
3565         if (mask == TRACE_ITER_OVERWRITE) {
3566                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3567 #ifdef CONFIG_TRACER_MAX_TRACE
3568                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3569 #endif
3570         }
3571
3572         if (mask == TRACE_ITER_PRINTK) {
3573                 trace_printk_start_stop_comm(enabled);
3574                 trace_printk_control(enabled);
3575         }
3576
3577         return 0;
3578 }
3579
3580 static int trace_set_options(struct trace_array *tr, char *option)
3581 {
3582         char *cmp;
3583         int neg = 0;
3584         int ret = -ENODEV;
3585         int i;
3586         size_t orig_len = strlen(option);
3587
3588         cmp = strstrip(option);
3589
3590         if (strncmp(cmp, "no", 2) == 0) {
3591                 neg = 1;
3592                 cmp += 2;
3593         }
3594
3595         mutex_lock(&trace_types_lock);
3596
3597         for (i = 0; trace_options[i]; i++) {
3598                 if (strcmp(cmp, trace_options[i]) == 0) {
3599                         ret = set_tracer_flag(tr, 1 << i, !neg);
3600                         break;
3601                 }
3602         }
3603
3604         /* If no option could be set, test the specific tracer options */
3605         if (!trace_options[i])
3606                 ret = set_tracer_option(tr, cmp, neg);
3607
3608         mutex_unlock(&trace_types_lock);
3609
3610         /*
3611          * If the first trailing whitespace is replaced with '\0' by strstrip,
3612          * turn it back into a space.
3613          */
3614         if (orig_len > strlen(option))
3615                 option[strlen(option)] = ' ';
3616
3617         return ret;
3618 }
3619
3620 static void __init apply_trace_boot_options(void)
3621 {
3622         char *buf = trace_boot_options_buf;
3623         char *option;
3624
3625         while (true) {
3626                 option = strsep(&buf, ",");
3627
3628                 if (!option)
3629                         break;
3630
3631                 if (*option)
3632                         trace_set_options(&global_trace, option);
3633
3634                 /* Put back the comma to allow this to be called again */
3635                 if (buf)
3636                         *(buf - 1) = ',';
3637         }
3638 }
3639
3640 static ssize_t
3641 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3642                         size_t cnt, loff_t *ppos)
3643 {
3644         struct seq_file *m = filp->private_data;
3645         struct trace_array *tr = m->private;
3646         char buf[64];
3647         int ret;
3648
3649         if (cnt >= sizeof(buf))
3650                 return -EINVAL;
3651
3652         if (copy_from_user(buf, ubuf, cnt))
3653                 return -EFAULT;
3654
3655         buf[cnt] = 0;
3656
3657         ret = trace_set_options(tr, buf);
3658         if (ret < 0)
3659                 return ret;
3660
3661         *ppos += cnt;
3662
3663         return cnt;
3664 }
3665
3666 static int tracing_trace_options_open(struct inode *inode, struct file *file)
3667 {
3668         struct trace_array *tr = inode->i_private;
3669         int ret;
3670
3671         if (tracing_disabled)
3672                 return -ENODEV;
3673
3674         if (trace_array_get(tr) < 0)
3675                 return -ENODEV;
3676
3677         ret = single_open(file, tracing_trace_options_show, inode->i_private);
3678         if (ret < 0)
3679                 trace_array_put(tr);
3680
3681         return ret;
3682 }
3683
3684 static const struct file_operations tracing_iter_fops = {
3685         .open           = tracing_trace_options_open,
3686         .read           = seq_read,
3687         .llseek         = seq_lseek,
3688         .release        = tracing_single_release_tr,
3689         .write          = tracing_trace_options_write,
3690 };
3691
3692 static const char readme_msg[] =
3693         "tracing mini-HOWTO:\n\n"
3694         "# echo 0 > tracing_on : quick way to disable tracing\n"
3695         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
3696         " Important files:\n"
3697         "  trace\t\t\t- The static contents of the buffer\n"
3698         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
3699         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
3700         "  current_tracer\t- function and latency tracers\n"
3701         "  available_tracers\t- list of configured tracers for current_tracer\n"
3702         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
3703         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
3704         "  trace_clock\t\t-change the clock used to order events\n"
3705         "       local:   Per cpu clock but may not be synced across CPUs\n"
3706         "      global:   Synced across CPUs but slows tracing down.\n"
3707         "     counter:   Not a clock, but just an increment\n"
3708         "      uptime:   Jiffy counter from time of boot\n"
3709         "        perf:   Same clock that perf events use\n"
3710 #ifdef CONFIG_X86_64
3711         "     x86-tsc:   TSC cycle counter\n"
3712 #endif
3713         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
3714         "  tracing_cpumask\t- Limit which CPUs to trace\n"
3715         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
3716         "\t\t\t  Remove sub-buffer with rmdir\n"
3717         "  trace_options\t\t- Set format or modify how tracing happens\n"
3718         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
3719         "\t\t\t  option name\n"
3720         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
3721 #ifdef CONFIG_DYNAMIC_FTRACE
3722         "\n  available_filter_functions - list of functions that can be filtered on\n"
3723         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
3724         "\t\t\t  functions\n"
3725         "\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3726         "\t     modules: Can select a group via module\n"
3727         "\t      Format: :mod:<module-name>\n"
3728         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
3729         "\t    triggers: a command to perform when function is hit\n"
3730         "\t      Format: <function>:<trigger>[:count]\n"
3731         "\t     trigger: traceon, traceoff\n"
3732         "\t\t      enable_event:<system>:<event>\n"
3733         "\t\t      disable_event:<system>:<event>\n"
3734 #ifdef CONFIG_STACKTRACE
3735         "\t\t      stacktrace\n"
3736 #endif
3737 #ifdef CONFIG_TRACER_SNAPSHOT
3738         "\t\t      snapshot\n"
3739 #endif
3740         "\t\t      dump\n"
3741         "\t\t      cpudump\n"
3742         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
3743         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
3744         "\t     The first one will disable tracing every time do_fault is hit\n"
3745         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
3746         "\t       The first time do trap is hit and it disables tracing, the\n"
3747         "\t       counter will decrement to 2. If tracing is already disabled,\n"
3748         "\t       the counter will not decrement. It only decrements when the\n"
3749         "\t       trigger did work\n"
3750         "\t     To remove trigger without count:\n"
3751         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
3752         "\t     To remove trigger with a count:\n"
3753         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
3754         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
3755         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3756         "\t    modules: Can select a group via module command :mod:\n"
3757         "\t    Does not accept triggers\n"
3758 #endif /* CONFIG_DYNAMIC_FTRACE */
3759 #ifdef CONFIG_FUNCTION_TRACER
3760         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
3761         "\t\t    (function)\n"
3762 #endif
3763 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
3764         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
3765         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
3766         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
3767 #endif
3768 #ifdef CONFIG_TRACER_SNAPSHOT
3769         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
3770         "\t\t\t  snapshot buffer. Read the contents for more\n"
3771         "\t\t\t  information\n"
3772 #endif
3773 #ifdef CONFIG_STACK_TRACER
3774         "  stack_trace\t\t- Shows the max stack trace when active\n"
3775         "  stack_max_size\t- Shows current max stack size that was traced\n"
3776         "\t\t\t  Write into this file to reset the max size (trigger a\n"
3777         "\t\t\t  new trace)\n"
3778 #ifdef CONFIG_DYNAMIC_FTRACE
3779         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
3780         "\t\t\t  traces\n"
3781 #endif
3782 #endif /* CONFIG_STACK_TRACER */
3783         "  events/\t\t- Directory containing all trace event subsystems:\n"
3784         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
3785         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
3786         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
3787         "\t\t\t  events\n"
3788         "      filter\t\t- If set, only events passing filter are traced\n"
3789         "  events/<system>/<event>/\t- Directory containing control files for\n"
3790         "\t\t\t  <event>:\n"
3791         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
3792         "      filter\t\t- If set, only events passing filter are traced\n"
3793         "      trigger\t\t- If set, a command to perform when event is hit\n"
3794         "\t    Format: <trigger>[:count][if <filter>]\n"
3795         "\t   trigger: traceon, traceoff\n"
3796         "\t            enable_event:<system>:<event>\n"
3797         "\t            disable_event:<system>:<event>\n"
3798 #ifdef CONFIG_HIST_TRIGGERS
3799         "\t            enable_hist:<system>:<event>\n"
3800         "\t            disable_hist:<system>:<event>\n"
3801 #endif
3802 #ifdef CONFIG_STACKTRACE
3803         "\t\t    stacktrace\n"
3804 #endif
3805 #ifdef CONFIG_TRACER_SNAPSHOT
3806         "\t\t    snapshot\n"
3807 #endif
3808 #ifdef CONFIG_HIST_TRIGGERS
3809         "\t\t    hist (see below)\n"
3810 #endif
3811         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
3812         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
3813         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
3814         "\t                  events/block/block_unplug/trigger\n"
3815         "\t   The first disables tracing every time block_unplug is hit.\n"
3816         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
3817         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
3818         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
3819         "\t   Like function triggers, the counter is only decremented if it\n"
3820         "\t    enabled or disabled tracing.\n"
3821         "\t   To remove a trigger without a count:\n"
3822         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
3823         "\t   To remove a trigger with a count:\n"
3824         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
3825         "\t   Filters can be ignored when removing a trigger.\n"
3826 #ifdef CONFIG_HIST_TRIGGERS
3827         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
3828         "\t    Format: hist:keys=<field1[,field2,...]>\n"
3829         "\t            [:values=<field1[,field2,...]>]\n"
3830         "\t            [:sort=<field1[,field2,...]>]\n"
3831         "\t            [:size=#entries]\n"
3832         "\t            [:pause][:continue][:clear]\n"
3833         "\t            [:name=histname1]\n"
3834         "\t            [if <filter>]\n\n"
3835         "\t    When a matching event is hit, an entry is added to a hash\n"
3836         "\t    table using the key(s) and value(s) named, and the value of a\n"
3837         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
3838         "\t    correspond to fields in the event's format description.  Keys\n"
3839         "\t    can be any field, or the special string 'stacktrace'.\n"
3840         "\t    Compound keys consisting of up to two fields can be specified\n"
3841         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
3842         "\t    fields.  Sort keys consisting of up to two fields can be\n"
3843         "\t    specified using the 'sort' keyword.  The sort direction can\n"
3844         "\t    be modified by appending '.descending' or '.ascending' to a\n"
3845         "\t    sort field.  The 'size' parameter can be used to specify more\n"
3846         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
3847         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
3848         "\t    its histogram data will be shared with other triggers of the\n"
3849         "\t    same name, and trigger hits will update this common data.\n\n"
3850         "\t    Reading the 'hist' file for the event will dump the hash\n"
3851         "\t    table in its entirety to stdout.  If there are multiple hist\n"
3852         "\t    triggers attached to an event, there will be a table for each\n"
3853         "\t    trigger in the output.  The table displayed for a named\n"
3854         "\t    trigger will be the same as any other instance having the\n"
3855         "\t    same name.  The default format used to display a given field\n"
3856         "\t    can be modified by appending any of the following modifiers\n"
3857         "\t    to the field name, as applicable:\n\n"
3858         "\t            .hex        display a number as a hex value\n"
3859         "\t            .sym        display an address as a symbol\n"
3860         "\t            .sym-offset display an address as a symbol and offset\n"
3861         "\t            .execname   display a common_pid as a program name\n"
3862         "\t            .syscall    display a syscall id as a syscall name\n\n"
3863         "\t            .log2       display log2 value rather than raw number\n\n"
3864         "\t    The 'pause' parameter can be used to pause an existing hist\n"
3865         "\t    trigger or to start a hist trigger but not log any events\n"
3866         "\t    until told to do so.  'continue' can be used to start or\n"
3867         "\t    restart a paused hist trigger.\n\n"
3868         "\t    The 'clear' parameter will clear the contents of a running\n"
3869         "\t    hist trigger and leave its current paused/active state\n"
3870         "\t    unchanged.\n\n"
3871         "\t    The enable_hist and disable_hist triggers can be used to\n"
3872         "\t    have one event conditionally start and stop another event's\n"
3873         "\t    already-attached hist trigger.  The syntax is analagous to\n"
3874         "\t    the enable_event and disable_event triggers.\n"
3875 #endif
3876 ;
3877
3878 static ssize_t
3879 tracing_readme_read(struct file *filp, char __user *ubuf,
3880                        size_t cnt, loff_t *ppos)
3881 {
3882         return simple_read_from_buffer(ubuf, cnt, ppos,
3883                                         readme_msg, strlen(readme_msg));
3884 }
3885
3886 static const struct file_operations tracing_readme_fops = {
3887         .open           = tracing_open_generic,
3888         .read           = tracing_readme_read,
3889         .llseek         = generic_file_llseek,
3890 };
3891
3892 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
3893 {
3894         unsigned int *ptr = v;
3895
3896         if (*pos || m->count)
3897                 ptr++;
3898
3899         (*pos)++;
3900
3901         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
3902              ptr++) {
3903                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
3904                         continue;
3905
3906                 return ptr;
3907         }
3908
3909         return NULL;
3910 }
3911
3912 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
3913 {
3914         void *v;
3915         loff_t l = 0;
3916
3917         preempt_disable();
3918         arch_spin_lock(&trace_cmdline_lock);
3919
3920         v = &savedcmd->map_cmdline_to_pid[0];
3921         while (l <= *pos) {
3922                 v = saved_cmdlines_next(m, v, &l);
3923                 if (!v)
3924                         return NULL;
3925         }
3926
3927         return v;
3928 }
3929
3930 static void saved_cmdlines_stop(struct seq_file *m, void *v)
3931 {
3932         arch_spin_unlock(&trace_cmdline_lock);
3933         preempt_enable();
3934 }
3935
3936 static int saved_cmdlines_show(struct seq_file *m, void *v)
3937 {
3938         char buf[TASK_COMM_LEN];
3939         unsigned int *pid = v;
3940
3941         __trace_find_cmdline(*pid, buf);
3942         seq_printf(m, "%d %s\n", *pid, buf);
3943         return 0;
3944 }
3945
3946 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
3947         .start          = saved_cmdlines_start,
3948         .next           = saved_cmdlines_next,
3949         .stop           = saved_cmdlines_stop,
3950         .show           = saved_cmdlines_show,
3951 };
3952
3953 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
3954 {
3955         if (tracing_disabled)
3956                 return -ENODEV;
3957
3958         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
3959 }
3960
3961 static const struct file_operations tracing_saved_cmdlines_fops = {
3962         .open           = tracing_saved_cmdlines_open,
3963         .read           = seq_read,
3964         .llseek         = seq_lseek,
3965         .release        = seq_release,
3966 };
3967
3968 static ssize_t
3969 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
3970                                  size_t cnt, loff_t *ppos)
3971 {
3972         char buf[64];
3973         int r;
3974
3975         arch_spin_lock(&trace_cmdline_lock);
3976         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
3977         arch_spin_unlock(&trace_cmdline_lock);
3978
3979         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3980 }
3981
3982 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
3983 {
3984         kfree(s->saved_cmdlines);
3985         kfree(s->map_cmdline_to_pid);
3986         kfree(s);
3987 }
3988
3989 static int tracing_resize_saved_cmdlines(unsigned int val)
3990 {
3991         struct saved_cmdlines_buffer *s, *savedcmd_temp;
3992
3993         s = kmalloc(sizeof(*s), GFP_KERNEL);
3994         if (!s)
3995                 return -ENOMEM;
3996
3997         if (allocate_cmdlines_buffer(val, s) < 0) {
3998                 kfree(s);
3999                 return -ENOMEM;
4000         }
4001
4002         arch_spin_lock(&trace_cmdline_lock);
4003         savedcmd_temp = savedcmd;
4004         savedcmd = s;
4005         arch_spin_unlock(&trace_cmdline_lock);
4006         free_saved_cmdlines_buffer(savedcmd_temp);
4007
4008         return 0;
4009 }
4010
4011 static ssize_t
4012 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4013                                   size_t cnt, loff_t *ppos)
4014 {
4015         unsigned long val;
4016         int ret;
4017
4018         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4019         if (ret)
4020                 return ret;
4021
4022         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4023         if (!val || val > PID_MAX_DEFAULT)
4024                 return -EINVAL;
4025
4026         ret = tracing_resize_saved_cmdlines((unsigned int)val);
4027         if (ret < 0)
4028                 return ret;
4029
4030         *ppos += cnt;
4031
4032         return cnt;
4033 }
4034
4035 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4036         .open           = tracing_open_generic,
4037         .read           = tracing_saved_cmdlines_size_read,
4038         .write          = tracing_saved_cmdlines_size_write,
4039 };
4040
4041 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
4042 static union trace_enum_map_item *
4043 update_enum_map(union trace_enum_map_item *ptr)
4044 {
4045         if (!ptr->map.enum_string) {
4046                 if (ptr->tail.next) {
4047                         ptr = ptr->tail.next;
4048                         /* Set ptr to the next real item (skip head) */
4049                         ptr++;
4050                 } else
4051                         return NULL;
4052         }
4053         return ptr;
4054 }
4055
4056 static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
4057 {
4058         union trace_enum_map_item *ptr = v;
4059
4060         /*
4061          * Paranoid! If ptr points to end, we don't want to increment past it.
4062          * This really should never happen.
4063          */
4064         ptr = update_enum_map(ptr);
4065         if (WARN_ON_ONCE(!ptr))
4066                 return NULL;
4067
4068         ptr++;
4069
4070         (*pos)++;
4071
4072         ptr = update_enum_map(ptr);
4073
4074         return ptr;
4075 }
4076
4077 static void *enum_map_start(struct seq_file *m, loff_t *pos)
4078 {
4079         union trace_enum_map_item *v;
4080         loff_t l = 0;
4081
4082         mutex_lock(&trace_enum_mutex);
4083
4084         v = trace_enum_maps;
4085         if (v)
4086                 v++;
4087
4088         while (v && l < *pos) {
4089                 v = enum_map_next(m, v, &l);
4090         }
4091
4092         return v;
4093 }
4094
4095 static void enum_map_stop(struct seq_file *m, void *v)
4096 {
4097         mutex_unlock(&trace_enum_mutex);
4098 }
4099
4100 static int enum_map_show(struct seq_file *m, void *v)
4101 {
4102         union trace_enum_map_item *ptr = v;
4103
4104         seq_printf(m, "%s %ld (%s)\n",
4105                    ptr->map.enum_string, ptr->map.enum_value,
4106                    ptr->map.system);
4107
4108         return 0;
4109 }
4110
4111 static const struct seq_operations tracing_enum_map_seq_ops = {
4112         .start          = enum_map_start,
4113         .next           = enum_map_next,
4114         .stop           = enum_map_stop,
4115         .show           = enum_map_show,
4116 };
4117
4118 static int tracing_enum_map_open(struct inode *inode, struct file *filp)
4119 {
4120         if (tracing_disabled)
4121                 return -ENODEV;
4122
4123         return seq_open(filp, &tracing_enum_map_seq_ops);
4124 }
4125
4126 static const struct file_operations tracing_enum_map_fops = {
4127         .open           = tracing_enum_map_open,
4128         .read           = seq_read,
4129         .llseek         = seq_lseek,
4130         .release        = seq_release,
4131 };
4132
4133 static inline union trace_enum_map_item *
4134 trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
4135 {
4136         /* Return tail of array given the head */
4137         return ptr + ptr->head.length + 1;
4138 }
4139
4140 static void
4141 trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
4142                            int len)
4143 {
4144         struct trace_enum_map **stop;
4145         struct trace_enum_map **map;
4146         union trace_enum_map_item *map_array;
4147         union trace_enum_map_item *ptr;
4148
4149         stop = start + len;
4150
4151         /*
4152          * The trace_enum_maps contains the map plus a head and tail item,
4153          * where the head holds the module and length of array, and the
4154          * tail holds a pointer to the next list.
4155          */
4156         map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4157         if (!map_array) {
4158                 pr_warn("Unable to allocate trace enum mapping\n");
4159                 return;
4160         }
4161
4162         mutex_lock(&trace_enum_mutex);
4163
4164         if (!trace_enum_maps)
4165                 trace_enum_maps = map_array;
4166         else {
4167                 ptr = trace_enum_maps;
4168                 for (;;) {
4169                         ptr = trace_enum_jmp_to_tail(ptr);
4170                         if (!ptr->tail.next)
4171                                 break;
4172                         ptr = ptr->tail.next;
4173
4174                 }
4175                 ptr->tail.next = map_array;
4176         }
4177         map_array->head.mod = mod;
4178         map_array->head.length = len;
4179         map_array++;
4180
4181         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4182                 map_array->map = **map;
4183                 map_array++;
4184         }
4185         memset(map_array, 0, sizeof(*map_array));
4186
4187         mutex_unlock(&trace_enum_mutex);
4188 }
4189
4190 static void trace_create_enum_file(struct dentry *d_tracer)
4191 {
4192         trace_create_file("enum_map", 0444, d_tracer,
4193                           NULL, &tracing_enum_map_fops);
4194 }
4195
4196 #else /* CONFIG_TRACE_ENUM_MAP_FILE */
4197 static inline void trace_create_enum_file(struct dentry *d_tracer) { }
4198 static inline void trace_insert_enum_map_file(struct module *mod,
4199                               struct trace_enum_map **start, int len) { }
4200 #endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
4201
4202 static void trace_insert_enum_map(struct module *mod,
4203                                   struct trace_enum_map **start, int len)
4204 {
4205         struct trace_enum_map **map;
4206
4207         if (len <= 0)
4208                 return;
4209
4210         map = start;
4211
4212         trace_event_enum_update(map, len);
4213
4214         trace_insert_enum_map_file(mod, start, len);
4215 }
4216
4217 static ssize_t
4218 tracing_set_trace_read(struct file *filp, char __user *ubuf,
4219                        size_t cnt, loff_t *ppos)
4220 {
4221         struct trace_array *tr = filp->private_data;
4222         char buf[MAX_TRACER_SIZE+2];
4223         int r;
4224
4225         mutex_lock(&trace_types_lock);
4226         r = sprintf(buf, "%s\n", tr->current_trace->name);
4227         mutex_unlock(&trace_types_lock);
4228
4229         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4230 }
4231
4232 int tracer_init(struct tracer *t, struct trace_array *tr)
4233 {
4234         tracing_reset_online_cpus(&tr->trace_buffer);
4235         return t->init(tr);
4236 }
4237
4238 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4239 {
4240         int cpu;
4241
4242         for_each_tracing_cpu(cpu)
4243                 per_cpu_ptr(buf->data, cpu)->entries = val;
4244 }
4245
4246 #ifdef CONFIG_TRACER_MAX_TRACE
4247 /* resize @tr's buffer to the size of @size_tr's entries */
4248 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4249                                         struct trace_buffer *size_buf, int cpu_id)
4250 {
4251         int cpu, ret = 0;
4252
4253         if (cpu_id == RING_BUFFER_ALL_CPUS) {
4254                 for_each_tracing_cpu(cpu) {
4255                         ret = ring_buffer_resize(trace_buf->buffer,
4256                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4257                         if (ret < 0)
4258                                 break;
4259                         per_cpu_ptr(trace_buf->data, cpu)->entries =
4260                                 per_cpu_ptr(size_buf->data, cpu)->entries;
4261                 }
4262         } else {
4263                 ret = ring_buffer_resize(trace_buf->buffer,
4264                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4265                 if (ret == 0)
4266                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4267                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
4268         }
4269
4270         return ret;
4271 }
4272 #endif /* CONFIG_TRACER_MAX_TRACE */
4273
4274 static int __tracing_resize_ring_buffer(struct trace_array *tr,
4275                                         unsigned long size, int cpu)
4276 {
4277         int ret;
4278
4279         /*
4280          * If kernel or user changes the size of the ring buffer
4281          * we use the size that was given, and we can forget about
4282          * expanding it later.
4283          */
4284         ring_buffer_expanded = true;
4285
4286         /* May be called before buffers are initialized */
4287         if (!tr->trace_buffer.buffer)
4288                 return 0;
4289
4290         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4291         if (ret < 0)
4292                 return ret;
4293
4294 #ifdef CONFIG_TRACER_MAX_TRACE
4295         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
4296             !tr->current_trace->use_max_tr)
4297                 goto out;
4298
4299         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4300         if (ret < 0) {
4301                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
4302                                                      &tr->trace_buffer, cpu);
4303                 if (r < 0) {
4304                         /*
4305                          * AARGH! We are left with different
4306                          * size max buffer!!!!
4307                          * The max buffer is our "snapshot" buffer.
4308                          * When a tracer needs a snapshot (one of the
4309                          * latency tracers), it swaps the max buffer
4310                          * with the saved snap shot. We succeeded to
4311                          * update the size of the main buffer, but failed to
4312                          * update the size of the max buffer. But when we tried
4313                          * to reset the main buffer to the original size, we
4314                          * failed there too. This is very unlikely to
4315                          * happen, but if it does, warn and kill all
4316                          * tracing.
4317                          */
4318                         WARN_ON(1);
4319                         tracing_disabled = 1;
4320                 }
4321                 return ret;
4322         }
4323
4324         if (cpu == RING_BUFFER_ALL_CPUS)
4325                 set_buffer_entries(&tr->max_buffer, size);
4326         else
4327                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4328
4329  out:
4330 #endif /* CONFIG_TRACER_MAX_TRACE */
4331
4332         if (cpu == RING_BUFFER_ALL_CPUS)
4333                 set_buffer_entries(&tr->trace_buffer, size);
4334         else
4335                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4336
4337         return ret;
4338 }
4339
4340 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4341                                           unsigned long size, int cpu_id)
4342 {
4343         int ret = size;
4344
4345         mutex_lock(&trace_types_lock);
4346
4347         if (cpu_id != RING_BUFFER_ALL_CPUS) {
4348                 /* make sure, this cpu is enabled in the mask */
4349                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4350                         ret = -EINVAL;
4351                         goto out;
4352                 }
4353         }
4354
4355         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4356         if (ret < 0)
4357                 ret = -ENOMEM;
4358
4359 out:
4360         mutex_unlock(&trace_types_lock);
4361
4362         return ret;
4363 }
4364
4365
4366 /**
4367  * tracing_update_buffers - used by tracing facility to expand ring buffers
4368  *
4369  * To save on memory when the tracing is never used on a system with it
4370  * configured in. The ring buffers are set to a minimum size. But once
4371  * a user starts to use the tracing facility, then they need to grow
4372  * to their default size.
4373  *
4374  * This function is to be called when a tracer is about to be used.
4375  */
4376 int tracing_update_buffers(void)
4377 {
4378         int ret = 0;
4379
4380         mutex_lock(&trace_types_lock);
4381         if (!ring_buffer_expanded)
4382                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4383                                                 RING_BUFFER_ALL_CPUS);
4384         mutex_unlock(&trace_types_lock);
4385
4386         return ret;
4387 }
4388
4389 struct trace_option_dentry;
4390
4391 static void
4392 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4393
4394 /*
4395  * Used to clear out the tracer before deletion of an instance.
4396  * Must have trace_types_lock held.
4397  */
4398 static void tracing_set_nop(struct trace_array *tr)
4399 {
4400         if (tr->current_trace == &nop_trace)
4401                 return;
4402         
4403         tr->current_trace->enabled--;
4404
4405         if (tr->current_trace->reset)
4406                 tr->current_trace->reset(tr);
4407
4408         tr->current_trace = &nop_trace;
4409 }
4410
4411 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
4412 {
4413         /* Only enable if the directory has been created already. */
4414         if (!tr->dir)
4415                 return;
4416
4417         create_trace_option_files(tr, t);
4418 }
4419
4420 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
4421 {
4422         struct tracer *t;
4423 #ifdef CONFIG_TRACER_MAX_TRACE
4424         bool had_max_tr;
4425 #endif
4426         int ret = 0;
4427
4428         mutex_lock(&trace_types_lock);
4429
4430         if (!ring_buffer_expanded) {
4431                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4432                                                 RING_BUFFER_ALL_CPUS);
4433                 if (ret < 0)
4434                         goto out;
4435                 ret = 0;
4436         }
4437
4438         for (t = trace_types; t; t = t->next) {
4439                 if (strcmp(t->name, buf) == 0)
4440                         break;
4441         }
4442         if (!t) {
4443                 ret = -EINVAL;
4444                 goto out;
4445         }
4446         if (t == tr->current_trace)
4447                 goto out;
4448
4449         /* Some tracers are only allowed for the top level buffer */
4450         if (!trace_ok_for_array(t, tr)) {
4451                 ret = -EINVAL;
4452                 goto out;
4453         }
4454
4455         /* If trace pipe files are being read, we can't change the tracer */
4456         if (tr->current_trace->ref) {
4457                 ret = -EBUSY;
4458                 goto out;
4459         }
4460
4461         trace_branch_disable();
4462
4463         tr->current_trace->enabled--;
4464
4465         if (tr->current_trace->reset)
4466                 tr->current_trace->reset(tr);
4467
4468         /* Current trace needs to be nop_trace before synchronize_sched */
4469         tr->current_trace = &nop_trace;
4470
4471 #ifdef CONFIG_TRACER_MAX_TRACE
4472         had_max_tr = tr->allocated_snapshot;
4473
4474         if (had_max_tr && !t->use_max_tr) {
4475                 /*
4476                  * We need to make sure that the update_max_tr sees that
4477                  * current_trace changed to nop_trace to keep it from
4478                  * swapping the buffers after we resize it.
4479                  * The update_max_tr is called from interrupts disabled
4480                  * so a synchronized_sched() is sufficient.
4481                  */
4482                 synchronize_sched();
4483                 free_snapshot(tr);
4484         }
4485 #endif
4486
4487 #ifdef CONFIG_TRACER_MAX_TRACE
4488         if (t->use_max_tr && !had_max_tr) {
4489                 ret = alloc_snapshot(tr);
4490                 if (ret < 0)
4491                         goto out;
4492         }
4493 #endif
4494
4495         if (t->init) {
4496                 ret = tracer_init(t, tr);
4497                 if (ret)
4498                         goto out;
4499         }
4500
4501         tr->current_trace = t;
4502         tr->current_trace->enabled++;
4503         trace_branch_enable(tr);
4504  out:
4505         mutex_unlock(&trace_types_lock);
4506
4507         return ret;
4508 }
4509
4510 static ssize_t
4511 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4512                         size_t cnt, loff_t *ppos)
4513 {
4514         struct trace_array *tr = filp->private_data;
4515         char buf[MAX_TRACER_SIZE+1];
4516         int i;
4517         size_t ret;
4518         int err;
4519
4520         ret = cnt;
4521
4522         if (cnt > MAX_TRACER_SIZE)
4523                 cnt = MAX_TRACER_SIZE;
4524
4525         if (copy_from_user(buf, ubuf, cnt))
4526                 return -EFAULT;
4527
4528         buf[cnt] = 0;
4529
4530         /* strip ending whitespace. */
4531         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4532                 buf[i] = 0;
4533
4534         err = tracing_set_tracer(tr, buf);
4535         if (err)
4536                 return err;
4537
4538         *ppos += ret;
4539
4540         return ret;
4541 }
4542
4543 static ssize_t
4544 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
4545                    size_t cnt, loff_t *ppos)
4546 {
4547         char buf[64];
4548         int r;
4549
4550         r = snprintf(buf, sizeof(buf), "%ld\n",
4551                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4552         if (r > sizeof(buf))
4553                 r = sizeof(buf);
4554         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4555 }
4556
4557 static ssize_t
4558 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
4559                     size_t cnt, loff_t *ppos)
4560 {
4561         unsigned long val;
4562         int ret;
4563
4564         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4565         if (ret)
4566                 return ret;
4567
4568         *ptr = val * 1000;
4569
4570         return cnt;
4571 }
4572
4573 static ssize_t
4574 tracing_thresh_read(struct file *filp, char __user *ubuf,
4575                     size_t cnt, loff_t *ppos)
4576 {
4577         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
4578 }
4579
4580 static ssize_t
4581 tracing_thresh_write(struct file *filp, const char __user *ubuf,
4582                      size_t cnt, loff_t *ppos)
4583 {
4584         struct trace_array *tr = filp->private_data;
4585         int ret;
4586
4587         mutex_lock(&trace_types_lock);
4588         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
4589         if (ret < 0)
4590                 goto out;
4591
4592         if (tr->current_trace->update_thresh) {
4593                 ret = tr->current_trace->update_thresh(tr);
4594                 if (ret < 0)
4595                         goto out;
4596         }
4597
4598         ret = cnt;
4599 out:
4600         mutex_unlock(&trace_types_lock);
4601
4602         return ret;
4603 }
4604
4605 #ifdef CONFIG_TRACER_MAX_TRACE
4606
4607 static ssize_t
4608 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4609                      size_t cnt, loff_t *ppos)
4610 {
4611         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
4612 }
4613
4614 static ssize_t
4615 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4616                       size_t cnt, loff_t *ppos)
4617 {
4618         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
4619 }
4620
4621 #endif
4622
4623 static int tracing_open_pipe(struct inode *inode, struct file *filp)
4624 {
4625         struct trace_array *tr = inode->i_private;
4626         struct trace_iterator *iter;
4627         int ret = 0;
4628
4629         if (tracing_disabled)
4630                 return -ENODEV;
4631
4632         if (trace_array_get(tr) < 0)
4633                 return -ENODEV;
4634
4635         mutex_lock(&trace_types_lock);
4636
4637         /* create a buffer to store the information to pass to userspace */
4638         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4639         if (!iter) {
4640                 ret = -ENOMEM;
4641                 __trace_array_put(tr);
4642                 goto out;
4643         }
4644
4645         trace_seq_init(&iter->seq);
4646         iter->trace = tr->current_trace;
4647
4648         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
4649                 ret = -ENOMEM;
4650                 goto fail;
4651         }
4652
4653         /* trace pipe does not show start of buffer */
4654         cpumask_setall(iter->started);
4655
4656         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4657                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4658
4659         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4660         if (trace_clocks[tr->clock_id].in_ns)
4661                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4662
4663         iter->tr = tr;
4664         iter->trace_buffer = &tr->trace_buffer;
4665         iter->cpu_file = tracing_get_cpu(inode);
4666         mutex_init(&iter->mutex);
4667         filp->private_data = iter;
4668
4669         if (iter->trace->pipe_open)
4670                 iter->trace->pipe_open(iter);
4671
4672         nonseekable_open(inode, filp);
4673
4674         tr->current_trace->ref++;
4675 out:
4676         mutex_unlock(&trace_types_lock);
4677         return ret;
4678
4679 fail:
4680         kfree(iter->trace);
4681         kfree(iter);
4682         __trace_array_put(tr);
4683         mutex_unlock(&trace_types_lock);
4684         return ret;
4685 }
4686
4687 static int tracing_release_pipe(struct inode *inode, struct file *file)
4688 {
4689         struct trace_iterator *iter = file->private_data;
4690         struct trace_array *tr = inode->i_private;
4691
4692         mutex_lock(&trace_types_lock);
4693
4694         tr->current_trace->ref--;
4695
4696         if (iter->trace->pipe_close)
4697                 iter->trace->pipe_close(iter);
4698
4699         mutex_unlock(&trace_types_lock);
4700
4701         free_cpumask_var(iter->started);
4702         mutex_destroy(&iter->mutex);
4703         kfree(iter);
4704
4705         trace_array_put(tr);
4706
4707         return 0;
4708 }
4709
4710 static unsigned int
4711 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
4712 {
4713         struct trace_array *tr = iter->tr;
4714
4715         /* Iterators are static, they should be filled or empty */
4716         if (trace_buffer_iter(iter, iter->cpu_file))
4717                 return POLLIN | POLLRDNORM;
4718
4719         if (tr->trace_flags & TRACE_ITER_BLOCK)
4720                 /*
4721                  * Always select as readable when in blocking mode
4722                  */
4723                 return POLLIN | POLLRDNORM;
4724         else
4725                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
4726                                              filp, poll_table);
4727 }
4728
4729 static unsigned int
4730 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
4731 {
4732         struct trace_iterator *iter = filp->private_data;
4733
4734         return trace_poll(iter, filp, poll_table);
4735 }
4736
4737 /* Must be called with iter->mutex held. */
4738 static int tracing_wait_pipe(struct file *filp)
4739 {
4740         struct trace_iterator *iter = filp->private_data;
4741         int ret;
4742
4743         while (trace_empty(iter)) {
4744
4745                 if ((filp->f_flags & O_NONBLOCK)) {
4746                         return -EAGAIN;
4747                 }
4748
4749                 /*
4750                  * We block until we read something and tracing is disabled.
4751                  * We still block if tracing is disabled, but we have never
4752                  * read anything. This allows a user to cat this file, and
4753                  * then enable tracing. But after we have read something,
4754                  * we give an EOF when tracing is again disabled.
4755                  *
4756                  * iter->pos will be 0 if we haven't read anything.
4757                  */
4758                 if (!tracing_is_on() && iter->pos)
4759                         break;
4760
4761                 mutex_unlock(&iter->mutex);
4762
4763                 ret = wait_on_pipe(iter, false);
4764
4765                 mutex_lock(&iter->mutex);
4766
4767                 if (ret)
4768                         return ret;
4769         }
4770
4771         return 1;
4772 }
4773
4774 /*
4775  * Consumer reader.
4776  */
4777 static ssize_t
4778 tracing_read_pipe(struct file *filp, char __user *ubuf,
4779                   size_t cnt, loff_t *ppos)
4780 {
4781         struct trace_iterator *iter = filp->private_data;
4782         ssize_t sret;
4783
4784         /* return any leftover data */
4785         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4786         if (sret != -EBUSY)
4787                 return sret;
4788
4789         trace_seq_init(&iter->seq);
4790
4791         /*
4792          * Avoid more than one consumer on a single file descriptor
4793          * This is just a matter of traces coherency, the ring buffer itself
4794          * is protected.
4795          */
4796         mutex_lock(&iter->mutex);
4797         if (iter->trace->read) {
4798                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
4799                 if (sret)
4800                         goto out;
4801         }
4802
4803 waitagain:
4804         sret = tracing_wait_pipe(filp);
4805         if (sret <= 0)
4806                 goto out;
4807
4808         /* stop when tracing is finished */
4809         if (trace_empty(iter)) {
4810                 sret = 0;
4811                 goto out;
4812         }
4813
4814         if (cnt >= PAGE_SIZE)
4815                 cnt = PAGE_SIZE - 1;
4816
4817         /* reset all but tr, trace, and overruns */
4818         memset(&iter->seq, 0,
4819                sizeof(struct trace_iterator) -
4820                offsetof(struct trace_iterator, seq));
4821         cpumask_clear(iter->started);
4822         iter->pos = -1;
4823
4824         trace_event_read_lock();
4825         trace_access_lock(iter->cpu_file);
4826         while (trace_find_next_entry_inc(iter) != NULL) {
4827                 enum print_line_t ret;
4828                 int save_len = iter->seq.seq.len;
4829
4830                 ret = print_trace_line(iter);
4831                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4832                         /* don't print partial lines */
4833                         iter->seq.seq.len = save_len;
4834                         break;
4835                 }
4836                 if (ret != TRACE_TYPE_NO_CONSUME)
4837                         trace_consume(iter);
4838
4839                 if (trace_seq_used(&iter->seq) >= cnt)
4840                         break;
4841
4842                 /*
4843                  * Setting the full flag means we reached the trace_seq buffer
4844                  * size and we should leave by partial output condition above.
4845                  * One of the trace_seq_* functions is not used properly.
4846                  */
4847                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
4848                           iter->ent->type);
4849         }
4850         trace_access_unlock(iter->cpu_file);
4851         trace_event_read_unlock();
4852
4853         /* Now copy what we have to the user */
4854         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4855         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
4856                 trace_seq_init(&iter->seq);
4857
4858         /*
4859          * If there was nothing to send to user, in spite of consuming trace
4860          * entries, go back to wait for more entries.
4861          */
4862         if (sret == -EBUSY)
4863                 goto waitagain;
4864
4865 out:
4866         mutex_unlock(&iter->mutex);
4867
4868         return sret;
4869 }
4870
4871 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
4872                                      unsigned int idx)
4873 {
4874         __free_page(spd->pages[idx]);
4875 }
4876
4877 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
4878         .can_merge              = 0,
4879         .confirm                = generic_pipe_buf_confirm,
4880         .release                = generic_pipe_buf_release,
4881         .steal                  = generic_pipe_buf_steal,
4882         .get                    = generic_pipe_buf_get,
4883 };
4884
4885 static size_t
4886 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
4887 {
4888         size_t count;
4889         int save_len;
4890         int ret;
4891
4892         /* Seq buffer is page-sized, exactly what we need. */
4893         for (;;) {
4894                 save_len = iter->seq.seq.len;
4895                 ret = print_trace_line(iter);
4896
4897                 if (trace_seq_has_overflowed(&iter->seq)) {
4898                         iter->seq.seq.len = save_len;
4899                         break;
4900                 }
4901
4902                 /*
4903                  * This should not be hit, because it should only
4904                  * be set if the iter->seq overflowed. But check it
4905                  * anyway to be safe.
4906                  */
4907                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4908                         iter->seq.seq.len = save_len;
4909                         break;
4910                 }
4911
4912                 count = trace_seq_used(&iter->seq) - save_len;
4913                 if (rem < count) {
4914                         rem = 0;
4915                         iter->seq.seq.len = save_len;
4916                         break;
4917                 }
4918
4919                 if (ret != TRACE_TYPE_NO_CONSUME)
4920                         trace_consume(iter);
4921                 rem -= count;
4922                 if (!trace_find_next_entry_inc(iter))   {
4923                         rem = 0;
4924                         iter->ent = NULL;
4925                         break;
4926                 }
4927         }
4928
4929         return rem;
4930 }
4931
4932 static ssize_t tracing_splice_read_pipe(struct file *filp,
4933                                         loff_t *ppos,
4934                                         struct pipe_inode_info *pipe,
4935                                         size_t len,
4936                                         unsigned int flags)
4937 {
4938         struct page *pages_def[PIPE_DEF_BUFFERS];
4939         struct partial_page partial_def[PIPE_DEF_BUFFERS];
4940         struct trace_iterator *iter = filp->private_data;
4941         struct splice_pipe_desc spd = {
4942                 .pages          = pages_def,
4943                 .partial        = partial_def,
4944                 .nr_pages       = 0, /* This gets updated below. */
4945                 .nr_pages_max   = PIPE_DEF_BUFFERS,
4946                 .flags          = flags,
4947                 .ops            = &tracing_pipe_buf_ops,
4948                 .spd_release    = tracing_spd_release_pipe,
4949         };
4950         ssize_t ret;
4951         size_t rem;
4952         unsigned int i;
4953
4954         if (splice_grow_spd(pipe, &spd))
4955                 return -ENOMEM;
4956
4957         mutex_lock(&iter->mutex);
4958
4959         if (iter->trace->splice_read) {
4960                 ret = iter->trace->splice_read(iter, filp,
4961                                                ppos, pipe, len, flags);
4962                 if (ret)
4963                         goto out_err;
4964         }
4965
4966         ret = tracing_wait_pipe(filp);
4967         if (ret <= 0)
4968                 goto out_err;
4969
4970         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
4971                 ret = -EFAULT;
4972                 goto out_err;
4973         }
4974
4975         trace_event_read_lock();
4976         trace_access_lock(iter->cpu_file);
4977
4978         /* Fill as many pages as possible. */
4979         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
4980                 spd.pages[i] = alloc_page(GFP_KERNEL);
4981                 if (!spd.pages[i])
4982                         break;
4983
4984                 rem = tracing_fill_pipe_page(rem, iter);
4985
4986                 /* Copy the data into the page, so we can start over. */
4987                 ret = trace_seq_to_buffer(&iter->seq,
4988                                           page_address(spd.pages[i]),
4989                                           trace_seq_used(&iter->seq));
4990                 if (ret < 0) {
4991                         __free_page(spd.pages[i]);
4992                         break;
4993                 }
4994                 spd.partial[i].offset = 0;
4995                 spd.partial[i].len = trace_seq_used(&iter->seq);
4996
4997                 trace_seq_init(&iter->seq);
4998         }
4999
5000         trace_access_unlock(iter->cpu_file);
5001         trace_event_read_unlock();
5002         mutex_unlock(&iter->mutex);
5003
5004         spd.nr_pages = i;
5005
5006         if (i)
5007                 ret = splice_to_pipe(pipe, &spd);
5008         else
5009                 ret = 0;
5010 out:
5011         splice_shrink_spd(&spd);
5012         return ret;
5013
5014 out_err:
5015         mutex_unlock(&iter->mutex);
5016         goto out;
5017 }
5018
5019 static ssize_t
5020 tracing_entries_read(struct file *filp, char __user *ubuf,
5021                      size_t cnt, loff_t *ppos)
5022 {
5023         struct inode *inode = file_inode(filp);
5024         struct trace_array *tr = inode->i_private;
5025         int cpu = tracing_get_cpu(inode);
5026         char buf[64];
5027         int r = 0;
5028         ssize_t ret;
5029
5030         mutex_lock(&trace_types_lock);
5031
5032         if (cpu == RING_BUFFER_ALL_CPUS) {
5033                 int cpu, buf_size_same;
5034                 unsigned long size;
5035
5036                 size = 0;
5037                 buf_size_same = 1;
5038                 /* check if all cpu sizes are same */
5039                 for_each_tracing_cpu(cpu) {
5040                         /* fill in the size from first enabled cpu */
5041                         if (size == 0)
5042                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5043                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5044                                 buf_size_same = 0;
5045                                 break;
5046                         }
5047                 }
5048
5049                 if (buf_size_same) {
5050                         if (!ring_buffer_expanded)
5051                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
5052                                             size >> 10,
5053                                             trace_buf_size >> 10);
5054                         else
5055                                 r = sprintf(buf, "%lu\n", size >> 10);
5056                 } else
5057                         r = sprintf(buf, "X\n");
5058         } else
5059                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5060
5061         mutex_unlock(&trace_types_lock);
5062
5063         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5064         return ret;
5065 }
5066
5067 static ssize_t
5068 tracing_entries_write(struct file *filp, const char __user *ubuf,
5069                       size_t cnt, loff_t *ppos)
5070 {
5071         struct inode *inode = file_inode(filp);
5072         struct trace_array *tr = inode->i_private;
5073         unsigned long val;
5074         int ret;
5075
5076         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5077         if (ret)
5078                 return ret;
5079
5080         /* must have at least 1 entry */
5081         if (!val)
5082                 return -EINVAL;
5083
5084         /* value is in KB */
5085         val <<= 10;
5086         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5087         if (ret < 0)
5088                 return ret;
5089
5090         *ppos += cnt;
5091
5092         return cnt;
5093 }
5094
5095 static ssize_t
5096 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5097                                 size_t cnt, loff_t *ppos)
5098 {
5099         struct trace_array *tr = filp->private_data;
5100         char buf[64];
5101         int r, cpu;
5102         unsigned long size = 0, expanded_size = 0;
5103
5104         mutex_lock(&trace_types_lock);
5105         for_each_tracing_cpu(cpu) {
5106                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5107                 if (!ring_buffer_expanded)
5108                         expanded_size += trace_buf_size >> 10;
5109         }
5110         if (ring_buffer_expanded)
5111                 r = sprintf(buf, "%lu\n", size);
5112         else
5113                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5114         mutex_unlock(&trace_types_lock);
5115
5116         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5117 }
5118
5119 static ssize_t
5120 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5121                           size_t cnt, loff_t *ppos)
5122 {
5123         /*
5124          * There is no need to read what the user has written, this function
5125          * is just to make sure that there is no error when "echo" is used
5126          */
5127
5128         *ppos += cnt;
5129
5130         return cnt;
5131 }
5132
5133 static int
5134 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5135 {
5136         struct trace_array *tr = inode->i_private;
5137
5138         /* disable tracing ? */
5139         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5140                 tracer_tracing_off(tr);
5141         /* resize the ring buffer to 0 */
5142         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5143
5144         trace_array_put(tr);
5145
5146         return 0;
5147 }
5148
5149 static ssize_t
5150 tracing_mark_write(struct file *filp, const char __user *ubuf,
5151                                         size_t cnt, loff_t *fpos)
5152 {
5153         unsigned long addr = (unsigned long)ubuf;
5154         struct trace_array *tr = filp->private_data;
5155         struct ring_buffer_event *event;
5156         struct ring_buffer *buffer;
5157         struct print_entry *entry;
5158         unsigned long irq_flags;
5159         struct page *pages[2];
5160         void *map_page[2];
5161         int nr_pages = 1;
5162         ssize_t written;
5163         int offset;
5164         int size;
5165         int len;
5166         int ret;
5167         int i;
5168
5169         if (tracing_disabled)
5170                 return -EINVAL;
5171
5172         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5173                 return -EINVAL;
5174
5175         if (cnt > TRACE_BUF_SIZE)
5176                 cnt = TRACE_BUF_SIZE;
5177
5178         /*
5179          * Userspace is injecting traces into the kernel trace buffer.
5180          * We want to be as non intrusive as possible.
5181          * To do so, we do not want to allocate any special buffers
5182          * or take any locks, but instead write the userspace data
5183          * straight into the ring buffer.
5184          *
5185          * First we need to pin the userspace buffer into memory,
5186          * which, most likely it is, because it just referenced it.
5187          * But there's no guarantee that it is. By using get_user_pages_fast()
5188          * and kmap_atomic/kunmap_atomic() we can get access to the
5189          * pages directly. We then write the data directly into the
5190          * ring buffer.
5191          */
5192         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5193
5194         /* check if we cross pages */
5195         if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
5196                 nr_pages = 2;
5197
5198         offset = addr & (PAGE_SIZE - 1);
5199         addr &= PAGE_MASK;
5200
5201         ret = get_user_pages_fast(addr, nr_pages, 0, pages);
5202         if (ret < nr_pages) {
5203                 while (--ret >= 0)
5204                         put_page(pages[ret]);
5205                 written = -EFAULT;
5206                 goto out;
5207         }
5208
5209         for (i = 0; i < nr_pages; i++)
5210                 map_page[i] = kmap_atomic(pages[i]);
5211
5212         local_save_flags(irq_flags);
5213         size = sizeof(*entry) + cnt + 2; /* possible \n added */
5214         buffer = tr->trace_buffer.buffer;
5215         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5216                                           irq_flags, preempt_count());
5217         if (!event) {
5218                 /* Ring buffer disabled, return as if not open for write */
5219                 written = -EBADF;
5220                 goto out_unlock;
5221         }
5222
5223         entry = ring_buffer_event_data(event);
5224         entry->ip = _THIS_IP_;
5225
5226         if (nr_pages == 2) {
5227                 len = PAGE_SIZE - offset;
5228                 memcpy(&entry->buf, map_page[0] + offset, len);
5229                 memcpy(&entry->buf[len], map_page[1], cnt - len);
5230         } else
5231                 memcpy(&entry->buf, map_page[0] + offset, cnt);
5232
5233         if (entry->buf[cnt - 1] != '\n') {
5234                 entry->buf[cnt] = '\n';
5235                 entry->buf[cnt + 1] = '\0';
5236         } else
5237                 entry->buf[cnt] = '\0';
5238
5239         __buffer_unlock_commit(buffer, event);
5240
5241         written = cnt;
5242
5243         *fpos += written;
5244
5245  out_unlock:
5246         for (i = nr_pages - 1; i >= 0; i--) {
5247                 kunmap_atomic(map_page[i]);
5248                 put_page(pages[i]);
5249         }
5250  out:
5251         return written;
5252 }
5253
5254 static int tracing_clock_show(struct seq_file *m, void *v)
5255 {
5256         struct trace_array *tr = m->private;
5257         int i;
5258
5259         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5260                 seq_printf(m,
5261                         "%s%s%s%s", i ? " " : "",
5262                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5263                         i == tr->clock_id ? "]" : "");
5264         seq_putc(m, '\n');
5265
5266         return 0;
5267 }
5268
5269 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
5270 {
5271         int i;
5272
5273         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
5274                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
5275                         break;
5276         }
5277         if (i == ARRAY_SIZE(trace_clocks))
5278                 return -EINVAL;
5279
5280         mutex_lock(&trace_types_lock);
5281
5282         tr->clock_id = i;
5283
5284         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
5285
5286         /*
5287          * New clock may not be consistent with the previous clock.
5288          * Reset the buffer so that it doesn't have incomparable timestamps.
5289          */
5290         tracing_reset_online_cpus(&tr->trace_buffer);
5291
5292 #ifdef CONFIG_TRACER_MAX_TRACE
5293         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
5294                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
5295         tracing_reset_online_cpus(&tr->max_buffer);
5296 #endif
5297
5298         mutex_unlock(&trace_types_lock);
5299
5300         return 0;
5301 }
5302
5303 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
5304                                    size_t cnt, loff_t *fpos)
5305 {
5306         struct seq_file *m = filp->private_data;
5307         struct trace_array *tr = m->private;
5308         char buf[64];
5309         const char *clockstr;
5310         int ret;
5311
5312         if (cnt >= sizeof(buf))
5313                 return -EINVAL;
5314
5315         if (copy_from_user(buf, ubuf, cnt))
5316                 return -EFAULT;
5317
5318         buf[cnt] = 0;
5319
5320         clockstr = strstrip(buf);
5321
5322         ret = tracing_set_clock(tr, clockstr);
5323         if (ret)
5324                 return ret;
5325
5326         *fpos += cnt;
5327
5328         return cnt;
5329 }
5330
5331 static int tracing_clock_open(struct inode *inode, struct file *file)
5332 {
5333         struct trace_array *tr = inode->i_private;
5334         int ret;
5335
5336         if (tracing_disabled)
5337                 return -ENODEV;
5338
5339         if (trace_array_get(tr))
5340                 return -ENODEV;
5341
5342         ret = single_open(file, tracing_clock_show, inode->i_private);
5343         if (ret < 0)
5344                 trace_array_put(tr);
5345
5346         return ret;
5347 }
5348
5349 struct ftrace_buffer_info {
5350         struct trace_iterator   iter;
5351         void                    *spare;
5352         unsigned int            read;
5353 };
5354
5355 #ifdef CONFIG_TRACER_SNAPSHOT
5356 static int tracing_snapshot_open(struct inode *inode, struct file *file)
5357 {
5358         struct trace_array *tr = inode->i_private;
5359         struct trace_iterator *iter;
5360         struct seq_file *m;
5361         int ret = 0;
5362
5363         if (trace_array_get(tr) < 0)
5364                 return -ENODEV;
5365
5366         if (file->f_mode & FMODE_READ) {
5367                 iter = __tracing_open(inode, file, true);
5368                 if (IS_ERR(iter))
5369                         ret = PTR_ERR(iter);
5370         } else {
5371                 /* Writes still need the seq_file to hold the private data */
5372                 ret = -ENOMEM;
5373                 m = kzalloc(sizeof(*m), GFP_KERNEL);
5374                 if (!m)
5375                         goto out;
5376                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5377                 if (!iter) {
5378                         kfree(m);
5379                         goto out;
5380                 }
5381                 ret = 0;
5382
5383                 iter->tr = tr;
5384                 iter->trace_buffer = &tr->max_buffer;
5385                 iter->cpu_file = tracing_get_cpu(inode);
5386                 m->private = iter;
5387                 file->private_data = m;
5388         }
5389 out:
5390         if (ret < 0)
5391                 trace_array_put(tr);
5392
5393         return ret;
5394 }
5395
5396 static ssize_t
5397 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
5398                        loff_t *ppos)
5399 {
5400         struct seq_file *m = filp->private_data;
5401         struct trace_iterator *iter = m->private;
5402         struct trace_array *tr = iter->tr;
5403         unsigned long val;
5404         int ret;
5405
5406         ret = tracing_update_buffers();
5407         if (ret < 0)
5408                 return ret;
5409
5410         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5411         if (ret)
5412                 return ret;
5413
5414         mutex_lock(&trace_types_lock);
5415
5416         if (tr->current_trace->use_max_tr) {
5417                 ret = -EBUSY;
5418                 goto out;
5419         }
5420
5421         switch (val) {
5422         case 0:
5423                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5424                         ret = -EINVAL;
5425                         break;
5426                 }
5427                 if (tr->allocated_snapshot)
5428                         free_snapshot(tr);
5429                 break;
5430         case 1:
5431 /* Only allow per-cpu swap if the ring buffer supports it */
5432 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
5433                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5434                         ret = -EINVAL;
5435                         break;
5436                 }
5437 #endif
5438                 if (!tr->allocated_snapshot) {
5439                         ret = alloc_snapshot(tr);
5440                         if (ret < 0)
5441                                 break;
5442                 }
5443                 local_irq_disable();
5444                 /* Now, we're going to swap */
5445                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5446                         update_max_tr(tr, current, smp_processor_id());
5447                 else
5448                         update_max_tr_single(tr, current, iter->cpu_file);
5449                 local_irq_enable();
5450                 break;
5451         default:
5452                 if (tr->allocated_snapshot) {
5453                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5454                                 tracing_reset_online_cpus(&tr->max_buffer);
5455                         else
5456                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
5457                 }
5458                 break;
5459         }
5460
5461         if (ret >= 0) {
5462                 *ppos += cnt;
5463                 ret = cnt;
5464         }
5465 out:
5466         mutex_unlock(&trace_types_lock);
5467         return ret;
5468 }
5469
5470 static int tracing_snapshot_release(struct inode *inode, struct file *file)
5471 {
5472         struct seq_file *m = file->private_data;
5473         int ret;
5474
5475         ret = tracing_release(inode, file);
5476
5477         if (file->f_mode & FMODE_READ)
5478                 return ret;
5479
5480         /* If write only, the seq_file is just a stub */
5481         if (m)
5482                 kfree(m->private);
5483         kfree(m);
5484
5485         return 0;
5486 }
5487
5488 static int tracing_buffers_open(struct inode *inode, struct file *filp);
5489 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
5490                                     size_t count, loff_t *ppos);
5491 static int tracing_buffers_release(struct inode *inode, struct file *file);
5492 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5493                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
5494
5495 static int snapshot_raw_open(struct inode *inode, struct file *filp)
5496 {
5497         struct ftrace_buffer_info *info;
5498         int ret;
5499
5500         ret = tracing_buffers_open(inode, filp);
5501         if (ret < 0)
5502                 return ret;
5503
5504         info = filp->private_data;
5505
5506         if (info->iter.trace->use_max_tr) {
5507                 tracing_buffers_release(inode, filp);
5508                 return -EBUSY;
5509         }
5510
5511         info->iter.snapshot = true;
5512         info->iter.trace_buffer = &info->iter.tr->max_buffer;
5513
5514         return ret;
5515 }
5516
5517 #endif /* CONFIG_TRACER_SNAPSHOT */
5518
5519
5520 static const struct file_operations tracing_thresh_fops = {
5521         .open           = tracing_open_generic,
5522         .read           = tracing_thresh_read,
5523         .write          = tracing_thresh_write,
5524         .llseek         = generic_file_llseek,
5525 };
5526
5527 #ifdef CONFIG_TRACER_MAX_TRACE
5528 static const struct file_operations tracing_max_lat_fops = {
5529         .open           = tracing_open_generic,
5530         .read           = tracing_max_lat_read,
5531         .write          = tracing_max_lat_write,
5532         .llseek         = generic_file_llseek,
5533 };
5534 #endif
5535
5536 static const struct file_operations set_tracer_fops = {
5537         .open           = tracing_open_generic,
5538         .read           = tracing_set_trace_read,
5539         .write          = tracing_set_trace_write,
5540         .llseek         = generic_file_llseek,
5541 };
5542
5543 static const struct file_operations tracing_pipe_fops = {
5544         .open           = tracing_open_pipe,
5545         .poll           = tracing_poll_pipe,
5546         .read           = tracing_read_pipe,
5547         .splice_read    = tracing_splice_read_pipe,
5548         .release        = tracing_release_pipe,
5549         .llseek         = no_llseek,
5550 };
5551
5552 static const struct file_operations tracing_entries_fops = {
5553         .open           = tracing_open_generic_tr,
5554         .read           = tracing_entries_read,
5555         .write          = tracing_entries_write,
5556         .llseek         = generic_file_llseek,
5557         .release        = tracing_release_generic_tr,
5558 };
5559
5560 static const struct file_operations tracing_total_entries_fops = {
5561         .open           = tracing_open_generic_tr,
5562         .read           = tracing_total_entries_read,
5563         .llseek         = generic_file_llseek,
5564         .release        = tracing_release_generic_tr,
5565 };
5566
5567 static const struct file_operations tracing_free_buffer_fops = {
5568         .open           = tracing_open_generic_tr,
5569         .write          = tracing_free_buffer_write,
5570         .release        = tracing_free_buffer_release,
5571 };
5572
5573 static const struct file_operations tracing_mark_fops = {
5574         .open           = tracing_open_generic_tr,
5575         .write          = tracing_mark_write,
5576         .llseek         = generic_file_llseek,
5577         .release        = tracing_release_generic_tr,
5578 };
5579
5580 static const struct file_operations trace_clock_fops = {
5581         .open           = tracing_clock_open,
5582         .read           = seq_read,
5583         .llseek         = seq_lseek,
5584         .release        = tracing_single_release_tr,
5585         .write          = tracing_clock_write,
5586 };
5587
5588 #ifdef CONFIG_TRACER_SNAPSHOT
5589 static const struct file_operations snapshot_fops = {
5590         .open           = tracing_snapshot_open,
5591         .read           = seq_read,
5592         .write          = tracing_snapshot_write,
5593         .llseek         = tracing_lseek,
5594         .release        = tracing_snapshot_release,
5595 };
5596
5597 static const struct file_operations snapshot_raw_fops = {
5598         .open           = snapshot_raw_open,
5599         .read           = tracing_buffers_read,
5600         .release        = tracing_buffers_release,
5601         .splice_read    = tracing_buffers_splice_read,
5602         .llseek         = no_llseek,
5603 };
5604
5605 #endif /* CONFIG_TRACER_SNAPSHOT */
5606
5607 static int tracing_buffers_open(struct inode *inode, struct file *filp)
5608 {
5609         struct trace_array *tr = inode->i_private;
5610         struct ftrace_buffer_info *info;
5611         int ret;
5612
5613         if (tracing_disabled)
5614                 return -ENODEV;
5615
5616         if (trace_array_get(tr) < 0)
5617                 return -ENODEV;
5618
5619         info = kzalloc(sizeof(*info), GFP_KERNEL);
5620         if (!info) {
5621                 trace_array_put(tr);
5622                 return -ENOMEM;
5623         }
5624
5625         mutex_lock(&trace_types_lock);
5626
5627         info->iter.tr           = tr;
5628         info->iter.cpu_file     = tracing_get_cpu(inode);
5629         info->iter.trace        = tr->current_trace;
5630         info->iter.trace_buffer = &tr->trace_buffer;
5631         info->spare             = NULL;
5632         /* Force reading ring buffer for first read */
5633         info->read              = (unsigned int)-1;
5634
5635         filp->private_data = info;
5636
5637         tr->current_trace->ref++;
5638
5639         mutex_unlock(&trace_types_lock);
5640
5641         ret = nonseekable_open(inode, filp);
5642         if (ret < 0)
5643                 trace_array_put(tr);
5644
5645         return ret;
5646 }
5647
5648 static unsigned int
5649 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
5650 {
5651         struct ftrace_buffer_info *info = filp->private_data;
5652         struct trace_iterator *iter = &info->iter;
5653
5654         return trace_poll(iter, filp, poll_table);
5655 }
5656
5657 static ssize_t
5658 tracing_buffers_read(struct file *filp, char __user *ubuf,
5659                      size_t count, loff_t *ppos)
5660 {
5661         struct ftrace_buffer_info *info = filp->private_data;
5662         struct trace_iterator *iter = &info->iter;
5663         ssize_t ret;
5664         ssize_t size;
5665
5666         if (!count)
5667                 return 0;
5668
5669 #ifdef CONFIG_TRACER_MAX_TRACE
5670         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
5671                 return -EBUSY;
5672 #endif
5673
5674         if (!info->spare)
5675                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
5676                                                           iter->cpu_file);
5677         if (!info->spare)
5678                 return -ENOMEM;
5679
5680         /* Do we have previous read data to read? */
5681         if (info->read < PAGE_SIZE)
5682                 goto read;
5683
5684  again:
5685         trace_access_lock(iter->cpu_file);
5686         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
5687                                     &info->spare,
5688                                     count,
5689                                     iter->cpu_file, 0);
5690         trace_access_unlock(iter->cpu_file);
5691
5692         if (ret < 0) {
5693                 if (trace_empty(iter)) {
5694                         if ((filp->f_flags & O_NONBLOCK))
5695                                 return -EAGAIN;
5696
5697                         ret = wait_on_pipe(iter, false);
5698                         if (ret)
5699                                 return ret;
5700
5701                         goto again;
5702                 }
5703                 return 0;
5704         }
5705
5706         info->read = 0;
5707  read:
5708         size = PAGE_SIZE - info->read;
5709         if (size > count)
5710                 size = count;
5711
5712         ret = copy_to_user(ubuf, info->spare + info->read, size);
5713         if (ret == size)
5714                 return -EFAULT;
5715
5716         size -= ret;
5717
5718         *ppos += size;
5719         info->read += size;
5720
5721         return size;
5722 }
5723
5724 static int tracing_buffers_release(struct inode *inode, struct file *file)
5725 {
5726         struct ftrace_buffer_info *info = file->private_data;
5727         struct trace_iterator *iter = &info->iter;
5728
5729         mutex_lock(&trace_types_lock);
5730
5731         iter->tr->current_trace->ref--;
5732
5733         __trace_array_put(iter->tr);
5734
5735         if (info->spare)
5736                 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
5737         kfree(info);
5738
5739         mutex_unlock(&trace_types_lock);
5740
5741         return 0;
5742 }
5743
5744 struct buffer_ref {
5745         struct ring_buffer      *buffer;
5746         void                    *page;
5747         int                     ref;
5748 };
5749
5750 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
5751                                     struct pipe_buffer *buf)
5752 {
5753         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5754
5755         if (--ref->ref)
5756                 return;
5757
5758         ring_buffer_free_read_page(ref->buffer, ref->page);
5759         kfree(ref);
5760         buf->private = 0;
5761 }
5762
5763 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
5764                                 struct pipe_buffer *buf)
5765 {
5766         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5767
5768         ref->ref++;
5769 }
5770
5771 /* Pipe buffer operations for a buffer. */
5772 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
5773         .can_merge              = 0,
5774         .confirm                = generic_pipe_buf_confirm,
5775         .release                = buffer_pipe_buf_release,
5776         .steal                  = generic_pipe_buf_steal,
5777         .get                    = buffer_pipe_buf_get,
5778 };
5779
5780 /*
5781  * Callback from splice_to_pipe(), if we need to release some pages
5782  * at the end of the spd in case we error'ed out in filling the pipe.
5783  */
5784 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
5785 {
5786         struct buffer_ref *ref =
5787                 (struct buffer_ref *)spd->partial[i].private;
5788
5789         if (--ref->ref)
5790                 return;
5791
5792         ring_buffer_free_read_page(ref->buffer, ref->page);
5793         kfree(ref);
5794         spd->partial[i].private = 0;
5795 }
5796
5797 static ssize_t
5798 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5799                             struct pipe_inode_info *pipe, size_t len,
5800                             unsigned int flags)
5801 {
5802         struct ftrace_buffer_info *info = file->private_data;
5803         struct trace_iterator *iter = &info->iter;
5804         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5805         struct page *pages_def[PIPE_DEF_BUFFERS];
5806         struct splice_pipe_desc spd = {
5807                 .pages          = pages_def,
5808                 .partial        = partial_def,
5809                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5810                 .flags          = flags,
5811                 .ops            = &buffer_pipe_buf_ops,
5812                 .spd_release    = buffer_spd_release,
5813         };
5814         struct buffer_ref *ref;
5815         int entries, size, i;
5816         ssize_t ret = 0;
5817
5818 #ifdef CONFIG_TRACER_MAX_TRACE
5819         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
5820                 return -EBUSY;
5821 #endif
5822
5823         if (splice_grow_spd(pipe, &spd))
5824                 return -ENOMEM;
5825
5826         if (*ppos & (PAGE_SIZE - 1))
5827                 return -EINVAL;
5828
5829         if (len & (PAGE_SIZE - 1)) {
5830                 if (len < PAGE_SIZE)
5831                         return -EINVAL;
5832                 len &= PAGE_MASK;
5833         }
5834
5835  again:
5836         trace_access_lock(iter->cpu_file);
5837         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5838
5839         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
5840                 struct page *page;
5841                 int r;
5842
5843                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
5844                 if (!ref) {
5845                         ret = -ENOMEM;
5846                         break;
5847                 }
5848
5849                 ref->ref = 1;
5850                 ref->buffer = iter->trace_buffer->buffer;
5851                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
5852                 if (!ref->page) {
5853                         ret = -ENOMEM;
5854                         kfree(ref);
5855                         break;
5856                 }
5857
5858                 r = ring_buffer_read_page(ref->buffer, &ref->page,
5859                                           len, iter->cpu_file, 1);
5860                 if (r < 0) {
5861                         ring_buffer_free_read_page(ref->buffer, ref->page);
5862                         kfree(ref);
5863                         break;
5864                 }
5865
5866                 /*
5867                  * zero out any left over data, this is going to
5868                  * user land.
5869                  */
5870                 size = ring_buffer_page_len(ref->page);
5871                 if (size < PAGE_SIZE)
5872                         memset(ref->page + size, 0, PAGE_SIZE - size);
5873
5874                 page = virt_to_page(ref->page);
5875
5876                 spd.pages[i] = page;
5877                 spd.partial[i].len = PAGE_SIZE;
5878                 spd.partial[i].offset = 0;
5879                 spd.partial[i].private = (unsigned long)ref;
5880                 spd.nr_pages++;
5881                 *ppos += PAGE_SIZE;
5882
5883                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5884         }
5885
5886         trace_access_unlock(iter->cpu_file);
5887         spd.nr_pages = i;
5888
5889         /* did we read anything? */
5890         if (!spd.nr_pages) {
5891                 if (ret)
5892                         return ret;
5893
5894                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
5895                         return -EAGAIN;
5896
5897                 ret = wait_on_pipe(iter, true);
5898                 if (ret)
5899                         return ret;
5900
5901                 goto again;
5902         }
5903
5904         ret = splice_to_pipe(pipe, &spd);
5905         splice_shrink_spd(&spd);
5906
5907         return ret;
5908 }
5909
5910 static const struct file_operations tracing_buffers_fops = {
5911         .open           = tracing_buffers_open,
5912         .read           = tracing_buffers_read,
5913         .poll           = tracing_buffers_poll,
5914         .release        = tracing_buffers_release,
5915         .splice_read    = tracing_buffers_splice_read,
5916         .llseek         = no_llseek,
5917 };
5918
5919 static ssize_t
5920 tracing_stats_read(struct file *filp, char __user *ubuf,
5921                    size_t count, loff_t *ppos)
5922 {
5923         struct inode *inode = file_inode(filp);
5924         struct trace_array *tr = inode->i_private;
5925         struct trace_buffer *trace_buf = &tr->trace_buffer;
5926         int cpu = tracing_get_cpu(inode);
5927         struct trace_seq *s;
5928         unsigned long cnt;
5929         unsigned long long t;
5930         unsigned long usec_rem;
5931
5932         s = kmalloc(sizeof(*s), GFP_KERNEL);
5933         if (!s)
5934                 return -ENOMEM;
5935
5936         trace_seq_init(s);
5937
5938         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
5939         trace_seq_printf(s, "entries: %ld\n", cnt);
5940
5941         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
5942         trace_seq_printf(s, "overrun: %ld\n", cnt);
5943
5944         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
5945         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
5946
5947         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
5948         trace_seq_printf(s, "bytes: %ld\n", cnt);
5949
5950         if (trace_clocks[tr->clock_id].in_ns) {
5951                 /* local or global for trace_clock */
5952                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5953                 usec_rem = do_div(t, USEC_PER_SEC);
5954                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
5955                                                                 t, usec_rem);
5956
5957                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
5958                 usec_rem = do_div(t, USEC_PER_SEC);
5959                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
5960         } else {
5961                 /* counter or tsc mode for trace_clock */
5962                 trace_seq_printf(s, "oldest event ts: %llu\n",
5963                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5964
5965                 trace_seq_printf(s, "now ts: %llu\n",
5966                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
5967         }
5968
5969         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
5970         trace_seq_printf(s, "dropped events: %ld\n", cnt);
5971
5972         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
5973         trace_seq_printf(s, "read events: %ld\n", cnt);
5974
5975         count = simple_read_from_buffer(ubuf, count, ppos,
5976                                         s->buffer, trace_seq_used(s));
5977
5978         kfree(s);
5979
5980         return count;
5981 }
5982
5983 static const struct file_operations tracing_stats_fops = {
5984         .open           = tracing_open_generic_tr,
5985         .read           = tracing_stats_read,
5986         .llseek         = generic_file_llseek,
5987         .release        = tracing_release_generic_tr,
5988 };
5989
5990 #ifdef CONFIG_DYNAMIC_FTRACE
5991
5992 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
5993 {
5994         return 0;
5995 }
5996
5997 static ssize_t
5998 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
5999                   size_t cnt, loff_t *ppos)
6000 {
6001         static char ftrace_dyn_info_buffer[1024];
6002         static DEFINE_MUTEX(dyn_info_mutex);
6003         unsigned long *p = filp->private_data;
6004         char *buf = ftrace_dyn_info_buffer;
6005         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
6006         int r;
6007
6008         mutex_lock(&dyn_info_mutex);
6009         r = sprintf(buf, "%ld ", *p);
6010
6011         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
6012         buf[r++] = '\n';
6013
6014         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6015
6016         mutex_unlock(&dyn_info_mutex);
6017
6018         return r;
6019 }
6020
6021 static const struct file_operations tracing_dyn_info_fops = {
6022         .open           = tracing_open_generic,
6023         .read           = tracing_read_dyn_info,
6024         .llseek         = generic_file_llseek,
6025 };
6026 #endif /* CONFIG_DYNAMIC_FTRACE */
6027
6028 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6029 static void
6030 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6031 {
6032         tracing_snapshot();
6033 }
6034
6035 static void
6036 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6037 {
6038         unsigned long *count = (long *)data;
6039
6040         if (!*count)
6041                 return;
6042
6043         if (*count != -1)
6044                 (*count)--;
6045
6046         tracing_snapshot();
6047 }
6048
6049 static int
6050 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
6051                       struct ftrace_probe_ops *ops, void *data)
6052 {
6053         long count = (long)data;
6054
6055         seq_printf(m, "%ps:", (void *)ip);
6056
6057         seq_puts(m, "snapshot");
6058
6059         if (count == -1)
6060                 seq_puts(m, ":unlimited\n");
6061         else
6062                 seq_printf(m, ":count=%ld\n", count);
6063
6064         return 0;
6065 }
6066
6067 static struct ftrace_probe_ops snapshot_probe_ops = {
6068         .func                   = ftrace_snapshot,
6069         .print                  = ftrace_snapshot_print,
6070 };
6071
6072 static struct ftrace_probe_ops snapshot_count_probe_ops = {
6073         .func                   = ftrace_count_snapshot,
6074         .print                  = ftrace_snapshot_print,
6075 };
6076
6077 static int
6078 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
6079                                char *glob, char *cmd, char *param, int enable)
6080 {
6081         struct ftrace_probe_ops *ops;
6082         void *count = (void *)-1;
6083         char *number;
6084         int ret;
6085
6086         /* hash funcs only work with set_ftrace_filter */
6087         if (!enable)
6088                 return -EINVAL;
6089
6090         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
6091
6092         if (glob[0] == '!') {
6093                 unregister_ftrace_function_probe_func(glob+1, ops);
6094                 return 0;
6095         }
6096
6097         if (!param)
6098                 goto out_reg;
6099
6100         number = strsep(&param, ":");
6101
6102         if (!strlen(number))
6103                 goto out_reg;
6104
6105         /*
6106          * We use the callback data field (which is a pointer)
6107          * as our counter.
6108          */
6109         ret = kstrtoul(number, 0, (unsigned long *)&count);
6110         if (ret)
6111                 return ret;
6112
6113  out_reg:
6114         ret = register_ftrace_function_probe(glob, ops, count);
6115
6116         if (ret >= 0)
6117                 alloc_snapshot(&global_trace);
6118
6119         return ret < 0 ? ret : 0;
6120 }
6121
6122 static struct ftrace_func_command ftrace_snapshot_cmd = {
6123         .name                   = "snapshot",
6124         .func                   = ftrace_trace_snapshot_callback,
6125 };
6126
6127 static __init int register_snapshot_cmd(void)
6128 {
6129         return register_ftrace_command(&ftrace_snapshot_cmd);
6130 }
6131 #else
6132 static inline __init int register_snapshot_cmd(void) { return 0; }
6133 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
6134
6135 static struct dentry *tracing_get_dentry(struct trace_array *tr)
6136 {
6137         if (WARN_ON(!tr->dir))
6138                 return ERR_PTR(-ENODEV);
6139
6140         /* Top directory uses NULL as the parent */
6141         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
6142                 return NULL;
6143
6144         /* All sub buffers have a descriptor */
6145         return tr->dir;
6146 }
6147
6148 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
6149 {
6150         struct dentry *d_tracer;
6151
6152         if (tr->percpu_dir)
6153                 return tr->percpu_dir;
6154
6155         d_tracer = tracing_get_dentry(tr);
6156         if (IS_ERR(d_tracer))
6157                 return NULL;
6158
6159         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
6160
6161         WARN_ONCE(!tr->percpu_dir,
6162                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
6163
6164         return tr->percpu_dir;
6165 }
6166
6167 static struct dentry *
6168 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
6169                       void *data, long cpu, const struct file_operations *fops)
6170 {
6171         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
6172
6173         if (ret) /* See tracing_get_cpu() */
6174                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
6175         return ret;
6176 }
6177
6178 static void
6179 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
6180 {
6181         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
6182         struct dentry *d_cpu;
6183         char cpu_dir[30]; /* 30 characters should be more than enough */
6184
6185         if (!d_percpu)
6186                 return;
6187
6188         snprintf(cpu_dir, 30, "cpu%ld", cpu);
6189         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
6190         if (!d_cpu) {
6191                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
6192                 return;
6193         }
6194
6195         /* per cpu trace_pipe */
6196         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
6197                                 tr, cpu, &tracing_pipe_fops);
6198
6199         /* per cpu trace */
6200         trace_create_cpu_file("trace", 0644, d_cpu,
6201                                 tr, cpu, &tracing_fops);
6202
6203         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
6204                                 tr, cpu, &tracing_buffers_fops);
6205
6206         trace_create_cpu_file("stats", 0444, d_cpu,
6207                                 tr, cpu, &tracing_stats_fops);
6208
6209         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
6210                                 tr, cpu, &tracing_entries_fops);
6211
6212 #ifdef CONFIG_TRACER_SNAPSHOT
6213         trace_create_cpu_file("snapshot", 0644, d_cpu,
6214                                 tr, cpu, &snapshot_fops);
6215
6216         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
6217                                 tr, cpu, &snapshot_raw_fops);
6218 #endif
6219 }
6220
6221 #ifdef CONFIG_FTRACE_SELFTEST
6222 /* Let selftest have access to static functions in this file */
6223 #include "trace_selftest.c"
6224 #endif
6225
6226 static ssize_t
6227 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
6228                         loff_t *ppos)
6229 {
6230         struct trace_option_dentry *topt = filp->private_data;
6231         char *buf;
6232
6233         if (topt->flags->val & topt->opt->bit)
6234                 buf = "1\n";
6235         else
6236                 buf = "0\n";
6237
6238         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6239 }
6240
6241 static ssize_t
6242 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
6243                          loff_t *ppos)
6244 {
6245         struct trace_option_dentry *topt = filp->private_data;
6246         unsigned long val;
6247         int ret;
6248
6249         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6250         if (ret)
6251                 return ret;
6252
6253         if (val != 0 && val != 1)
6254                 return -EINVAL;
6255
6256         if (!!(topt->flags->val & topt->opt->bit) != val) {
6257                 mutex_lock(&trace_types_lock);
6258                 ret = __set_tracer_option(topt->tr, topt->flags,
6259                                           topt->opt, !val);
6260                 mutex_unlock(&trace_types_lock);
6261                 if (ret)
6262                         return ret;
6263         }
6264
6265         *ppos += cnt;
6266
6267         return cnt;
6268 }
6269
6270
6271 static const struct file_operations trace_options_fops = {
6272         .open = tracing_open_generic,
6273         .read = trace_options_read,
6274         .write = trace_options_write,
6275         .llseek = generic_file_llseek,
6276 };
6277
6278 /*
6279  * In order to pass in both the trace_array descriptor as well as the index
6280  * to the flag that the trace option file represents, the trace_array
6281  * has a character array of trace_flags_index[], which holds the index
6282  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
6283  * The address of this character array is passed to the flag option file
6284  * read/write callbacks.
6285  *
6286  * In order to extract both the index and the trace_array descriptor,
6287  * get_tr_index() uses the following algorithm.
6288  *
6289  *   idx = *ptr;
6290  *
6291  * As the pointer itself contains the address of the index (remember
6292  * index[1] == 1).
6293  *
6294  * Then to get the trace_array descriptor, by subtracting that index
6295  * from the ptr, we get to the start of the index itself.
6296  *
6297  *   ptr - idx == &index[0]
6298  *
6299  * Then a simple container_of() from that pointer gets us to the
6300  * trace_array descriptor.
6301  */
6302 static void get_tr_index(void *data, struct trace_array **ptr,
6303                          unsigned int *pindex)
6304 {
6305         *pindex = *(unsigned char *)data;
6306
6307         *ptr = container_of(data - *pindex, struct trace_array,
6308                             trace_flags_index);
6309 }
6310
6311 static ssize_t
6312 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
6313                         loff_t *ppos)
6314 {
6315         void *tr_index = filp->private_data;
6316         struct trace_array *tr;
6317         unsigned int index;
6318         char *buf;
6319
6320         get_tr_index(tr_index, &tr, &index);
6321
6322         if (tr->trace_flags & (1 << index))
6323                 buf = "1\n";
6324         else
6325                 buf = "0\n";
6326
6327         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6328 }
6329
6330 static ssize_t
6331 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
6332                          loff_t *ppos)
6333 {
6334         void *tr_index = filp->private_data;
6335         struct trace_array *tr;
6336         unsigned int index;
6337         unsigned long val;
6338         int ret;
6339
6340         get_tr_index(tr_index, &tr, &index);
6341
6342         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6343         if (ret)
6344                 return ret;
6345
6346         if (val != 0 && val != 1)
6347                 return -EINVAL;
6348
6349         mutex_lock(&trace_types_lock);
6350         ret = set_tracer_flag(tr, 1 << index, val);
6351         mutex_unlock(&trace_types_lock);
6352
6353         if (ret < 0)
6354                 return ret;
6355
6356         *ppos += cnt;
6357
6358         return cnt;
6359 }
6360
6361 static const struct file_operations trace_options_core_fops = {
6362         .open = tracing_open_generic,
6363         .read = trace_options_core_read,
6364         .write = trace_options_core_write,
6365         .llseek = generic_file_llseek,
6366 };
6367
6368 struct dentry *trace_create_file(const char *name,
6369                                  umode_t mode,
6370                                  struct dentry *parent,
6371                                  void *data,
6372                                  const struct file_operations *fops)
6373 {
6374         struct dentry *ret;
6375
6376         ret = tracefs_create_file(name, mode, parent, data, fops);
6377         if (!ret)
6378                 pr_warn("Could not create tracefs '%s' entry\n", name);
6379
6380         return ret;
6381 }
6382
6383
6384 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
6385 {
6386         struct dentry *d_tracer;
6387
6388         if (tr->options)
6389                 return tr->options;
6390
6391         d_tracer = tracing_get_dentry(tr);
6392         if (IS_ERR(d_tracer))
6393                 return NULL;
6394
6395         tr->options = tracefs_create_dir("options", d_tracer);
6396         if (!tr->options) {
6397                 pr_warn("Could not create tracefs directory 'options'\n");
6398                 return NULL;
6399         }
6400
6401         return tr->options;
6402 }
6403
6404 static void
6405 create_trace_option_file(struct trace_array *tr,
6406                          struct trace_option_dentry *topt,
6407                          struct tracer_flags *flags,
6408                          struct tracer_opt *opt)
6409 {
6410         struct dentry *t_options;
6411
6412         t_options = trace_options_init_dentry(tr);
6413         if (!t_options)
6414                 return;
6415
6416         topt->flags = flags;
6417         topt->opt = opt;
6418         topt->tr = tr;
6419
6420         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
6421                                     &trace_options_fops);
6422
6423 }
6424
6425 static void
6426 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
6427 {
6428         struct trace_option_dentry *topts;
6429         struct trace_options *tr_topts;
6430         struct tracer_flags *flags;
6431         struct tracer_opt *opts;
6432         int cnt;
6433         int i;
6434
6435         if (!tracer)
6436                 return;
6437
6438         flags = tracer->flags;
6439
6440         if (!flags || !flags->opts)
6441                 return;
6442
6443         /*
6444          * If this is an instance, only create flags for tracers
6445          * the instance may have.
6446          */
6447         if (!trace_ok_for_array(tracer, tr))
6448                 return;
6449
6450         for (i = 0; i < tr->nr_topts; i++) {
6451                 /* Make sure there's no duplicate flags. */
6452                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
6453                         return;
6454         }
6455
6456         opts = flags->opts;
6457
6458         for (cnt = 0; opts[cnt].name; cnt++)
6459                 ;
6460
6461         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
6462         if (!topts)
6463                 return;
6464
6465         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
6466                             GFP_KERNEL);
6467         if (!tr_topts) {
6468                 kfree(topts);
6469                 return;
6470         }
6471
6472         tr->topts = tr_topts;
6473         tr->topts[tr->nr_topts].tracer = tracer;
6474         tr->topts[tr->nr_topts].topts = topts;
6475         tr->nr_topts++;
6476
6477         for (cnt = 0; opts[cnt].name; cnt++) {
6478                 create_trace_option_file(tr, &topts[cnt], flags,
6479                                          &opts[cnt]);
6480                 WARN_ONCE(topts[cnt].entry == NULL,
6481                           "Failed to create trace option: %s",
6482                           opts[cnt].name);
6483         }
6484 }
6485
6486 static struct dentry *
6487 create_trace_option_core_file(struct trace_array *tr,
6488                               const char *option, long index)
6489 {
6490         struct dentry *t_options;
6491
6492         t_options = trace_options_init_dentry(tr);
6493         if (!t_options)
6494                 return NULL;
6495
6496         return trace_create_file(option, 0644, t_options,
6497                                  (void *)&tr->trace_flags_index[index],
6498                                  &trace_options_core_fops);
6499 }
6500
6501 static void create_trace_options_dir(struct trace_array *tr)
6502 {
6503         struct dentry *t_options;
6504         bool top_level = tr == &global_trace;
6505         int i;
6506
6507         t_options = trace_options_init_dentry(tr);
6508         if (!t_options)
6509                 return;
6510
6511         for (i = 0; trace_options[i]; i++) {
6512                 if (top_level ||
6513                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
6514                         create_trace_option_core_file(tr, trace_options[i], i);
6515         }
6516 }
6517
6518 static ssize_t
6519 rb_simple_read(struct file *filp, char __user *ubuf,
6520                size_t cnt, loff_t *ppos)
6521 {
6522         struct trace_array *tr = filp->private_data;
6523         char buf[64];
6524         int r;
6525
6526         r = tracer_tracing_is_on(tr);
6527         r = sprintf(buf, "%d\n", r);
6528
6529         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6530 }
6531
6532 static ssize_t
6533 rb_simple_write(struct file *filp, const char __user *ubuf,
6534                 size_t cnt, loff_t *ppos)
6535 {
6536         struct trace_array *tr = filp->private_data;
6537         struct ring_buffer *buffer = tr->trace_buffer.buffer;
6538         unsigned long val;
6539         int ret;
6540
6541         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6542         if (ret)
6543                 return ret;
6544
6545         if (buffer) {
6546                 mutex_lock(&trace_types_lock);
6547                 if (val) {
6548                         tracer_tracing_on(tr);
6549                         if (tr->current_trace->start)
6550                                 tr->current_trace->start(tr);
6551                 } else {
6552                         tracer_tracing_off(tr);
6553                         if (tr->current_trace->stop)
6554                                 tr->current_trace->stop(tr);
6555                 }
6556                 mutex_unlock(&trace_types_lock);
6557         }
6558
6559         (*ppos)++;
6560
6561         return cnt;
6562 }
6563
6564 static const struct file_operations rb_simple_fops = {
6565         .open           = tracing_open_generic_tr,
6566         .read           = rb_simple_read,
6567         .write          = rb_simple_write,
6568         .release        = tracing_release_generic_tr,
6569         .llseek         = default_llseek,
6570 };
6571
6572 struct dentry *trace_instance_dir;
6573
6574 static void
6575 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
6576
6577 static int
6578 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6579 {
6580         enum ring_buffer_flags rb_flags;
6581
6582         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6583
6584         buf->tr = tr;
6585
6586         buf->buffer = ring_buffer_alloc(size, rb_flags);
6587         if (!buf->buffer)
6588                 return -ENOMEM;
6589
6590         buf->data = alloc_percpu(struct trace_array_cpu);
6591         if (!buf->data) {
6592                 ring_buffer_free(buf->buffer);
6593                 return -ENOMEM;
6594         }
6595
6596         /* Allocate the first page for all buffers */
6597         set_buffer_entries(&tr->trace_buffer,
6598                            ring_buffer_size(tr->trace_buffer.buffer, 0));
6599
6600         return 0;
6601 }
6602
6603 static int allocate_trace_buffers(struct trace_array *tr, int size)
6604 {
6605         int ret;
6606
6607         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6608         if (ret)
6609                 return ret;
6610
6611 #ifdef CONFIG_TRACER_MAX_TRACE
6612         ret = allocate_trace_buffer(tr, &tr->max_buffer,
6613                                     allocate_snapshot ? size : 1);
6614         if (WARN_ON(ret)) {
6615                 ring_buffer_free(tr->trace_buffer.buffer);
6616                 free_percpu(tr->trace_buffer.data);
6617                 return -ENOMEM;
6618         }
6619         tr->allocated_snapshot = allocate_snapshot;
6620
6621         /*
6622          * Only the top level trace array gets its snapshot allocated
6623          * from the kernel command line.
6624          */
6625         allocate_snapshot = false;
6626 #endif
6627         return 0;
6628 }
6629
6630 static void free_trace_buffer(struct trace_buffer *buf)
6631 {
6632         if (buf->buffer) {
6633                 ring_buffer_free(buf->buffer);
6634                 buf->buffer = NULL;
6635                 free_percpu(buf->data);
6636                 buf->data = NULL;
6637         }
6638 }
6639
6640 static void free_trace_buffers(struct trace_array *tr)
6641 {
6642         if (!tr)
6643                 return;
6644
6645         free_trace_buffer(&tr->trace_buffer);
6646
6647 #ifdef CONFIG_TRACER_MAX_TRACE
6648         free_trace_buffer(&tr->max_buffer);
6649 #endif
6650 }
6651
6652 static void init_trace_flags_index(struct trace_array *tr)
6653 {
6654         int i;
6655
6656         /* Used by the trace options files */
6657         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
6658                 tr->trace_flags_index[i] = i;
6659 }
6660
6661 static void __update_tracer_options(struct trace_array *tr)
6662 {
6663         struct tracer *t;
6664
6665         for (t = trace_types; t; t = t->next)
6666                 add_tracer_options(tr, t);
6667 }
6668
6669 static void update_tracer_options(struct trace_array *tr)
6670 {
6671         mutex_lock(&trace_types_lock);
6672         __update_tracer_options(tr);
6673         mutex_unlock(&trace_types_lock);
6674 }
6675
6676 static int instance_mkdir(const char *name)
6677 {
6678         struct trace_array *tr;
6679         int ret;
6680
6681         mutex_lock(&trace_types_lock);
6682
6683         ret = -EEXIST;
6684         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6685                 if (tr->name && strcmp(tr->name, name) == 0)
6686                         goto out_unlock;
6687         }
6688
6689         ret = -ENOMEM;
6690         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
6691         if (!tr)
6692                 goto out_unlock;
6693
6694         tr->name = kstrdup(name, GFP_KERNEL);
6695         if (!tr->name)
6696                 goto out_free_tr;
6697
6698         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
6699                 goto out_free_tr;
6700
6701         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
6702
6703         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
6704
6705         raw_spin_lock_init(&tr->start_lock);
6706
6707         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
6708
6709         tr->current_trace = &nop_trace;
6710
6711         INIT_LIST_HEAD(&tr->systems);
6712         INIT_LIST_HEAD(&tr->events);
6713
6714         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
6715                 goto out_free_tr;
6716
6717         tr->dir = tracefs_create_dir(name, trace_instance_dir);
6718         if (!tr->dir)
6719                 goto out_free_tr;
6720
6721         ret = event_trace_add_tracer(tr->dir, tr);
6722         if (ret) {
6723                 tracefs_remove_recursive(tr->dir);
6724                 goto out_free_tr;
6725         }
6726
6727         init_tracer_tracefs(tr, tr->dir);
6728         init_trace_flags_index(tr);
6729         __update_tracer_options(tr);
6730
6731         list_add(&tr->list, &ftrace_trace_arrays);
6732
6733         mutex_unlock(&trace_types_lock);
6734
6735         return 0;
6736
6737  out_free_tr:
6738         free_trace_buffers(tr);
6739         free_cpumask_var(tr->tracing_cpumask);
6740         kfree(tr->name);
6741         kfree(tr);
6742
6743  out_unlock:
6744         mutex_unlock(&trace_types_lock);
6745
6746         return ret;
6747
6748 }
6749
6750 static int instance_rmdir(const char *name)
6751 {
6752         struct trace_array *tr;
6753         int found = 0;
6754         int ret;
6755         int i;
6756
6757         mutex_lock(&trace_types_lock);
6758
6759         ret = -ENODEV;
6760         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6761                 if (tr->name && strcmp(tr->name, name) == 0) {
6762                         found = 1;
6763                         break;
6764                 }
6765         }
6766         if (!found)
6767                 goto out_unlock;
6768
6769         ret = -EBUSY;
6770         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
6771                 goto out_unlock;
6772
6773         list_del(&tr->list);
6774
6775         /* Disable all the flags that were enabled coming in */
6776         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
6777                 if ((1 << i) & ZEROED_TRACE_FLAGS)
6778                         set_tracer_flag(tr, 1 << i, 0);
6779         }
6780
6781         tracing_set_nop(tr);
6782         event_trace_del_tracer(tr);
6783         ftrace_destroy_function_files(tr);
6784         tracefs_remove_recursive(tr->dir);
6785         free_trace_buffers(tr);
6786
6787         for (i = 0; i < tr->nr_topts; i++) {
6788                 kfree(tr->topts[i].topts);
6789         }
6790         kfree(tr->topts);
6791
6792         kfree(tr->name);
6793         kfree(tr);
6794
6795         ret = 0;
6796
6797  out_unlock:
6798         mutex_unlock(&trace_types_lock);
6799
6800         return ret;
6801 }
6802
6803 static __init void create_trace_instances(struct dentry *d_tracer)
6804 {
6805         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
6806                                                          instance_mkdir,
6807                                                          instance_rmdir);
6808         if (WARN_ON(!trace_instance_dir))
6809                 return;
6810 }
6811
6812 static void
6813 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
6814 {
6815         int cpu;
6816
6817         trace_create_file("available_tracers", 0444, d_tracer,
6818                         tr, &show_traces_fops);
6819
6820         trace_create_file("current_tracer", 0644, d_tracer,
6821                         tr, &set_tracer_fops);
6822
6823         trace_create_file("tracing_cpumask", 0644, d_tracer,
6824                           tr, &tracing_cpumask_fops);
6825
6826         trace_create_file("trace_options", 0644, d_tracer,
6827                           tr, &tracing_iter_fops);
6828
6829         trace_create_file("trace", 0644, d_tracer,
6830                           tr, &tracing_fops);
6831
6832         trace_create_file("trace_pipe", 0444, d_tracer,
6833                           tr, &tracing_pipe_fops);
6834
6835         trace_create_file("buffer_size_kb", 0644, d_tracer,
6836                           tr, &tracing_entries_fops);
6837
6838         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
6839                           tr, &tracing_total_entries_fops);
6840
6841         trace_create_file("free_buffer", 0200, d_tracer,
6842                           tr, &tracing_free_buffer_fops);
6843
6844         trace_create_file("trace_marker", 0220, d_tracer,
6845                           tr, &tracing_mark_fops);
6846
6847         trace_create_file("trace_clock", 0644, d_tracer, tr,
6848                           &trace_clock_fops);
6849
6850         trace_create_file("tracing_on", 0644, d_tracer,
6851                           tr, &rb_simple_fops);
6852
6853         create_trace_options_dir(tr);
6854
6855 #ifdef CONFIG_TRACER_MAX_TRACE
6856         trace_create_file("tracing_max_latency", 0644, d_tracer,
6857                         &tr->max_latency, &tracing_max_lat_fops);
6858 #endif
6859
6860         if (ftrace_create_function_files(tr, d_tracer))
6861                 WARN(1, "Could not allocate function filter files");
6862
6863 #ifdef CONFIG_TRACER_SNAPSHOT
6864         trace_create_file("snapshot", 0644, d_tracer,
6865                           tr, &snapshot_fops);
6866 #endif
6867
6868         for_each_tracing_cpu(cpu)
6869                 tracing_init_tracefs_percpu(tr, cpu);
6870
6871 }
6872
6873 static struct vfsmount *trace_automount(void *ingore)
6874 {
6875         struct vfsmount *mnt;
6876         struct file_system_type *type;
6877
6878         /*
6879          * To maintain backward compatibility for tools that mount
6880          * debugfs to get to the tracing facility, tracefs is automatically
6881          * mounted to the debugfs/tracing directory.
6882          */
6883         type = get_fs_type("tracefs");
6884         if (!type)
6885                 return NULL;
6886         mnt = vfs_kern_mount(type, 0, "tracefs", NULL);
6887         put_filesystem(type);
6888         if (IS_ERR(mnt))
6889                 return NULL;
6890         mntget(mnt);
6891
6892         return mnt;
6893 }
6894
6895 /**
6896  * tracing_init_dentry - initialize top level trace array
6897  *
6898  * This is called when creating files or directories in the tracing
6899  * directory. It is called via fs_initcall() by any of the boot up code
6900  * and expects to return the dentry of the top level tracing directory.
6901  */
6902 struct dentry *tracing_init_dentry(void)
6903 {
6904         struct trace_array *tr = &global_trace;
6905
6906         /* The top level trace array uses  NULL as parent */
6907         if (tr->dir)
6908                 return NULL;
6909
6910         if (WARN_ON(!tracefs_initialized()) ||
6911                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
6912                  WARN_ON(!debugfs_initialized())))
6913                 return ERR_PTR(-ENODEV);
6914
6915         /*
6916          * As there may still be users that expect the tracing
6917          * files to exist in debugfs/tracing, we must automount
6918          * the tracefs file system there, so older tools still
6919          * work with the newer kerenl.
6920          */
6921         tr->dir = debugfs_create_automount("tracing", NULL,
6922                                            trace_automount, NULL);
6923         if (!tr->dir) {
6924                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
6925                 return ERR_PTR(-ENOMEM);
6926         }
6927
6928         return NULL;
6929 }
6930
6931 extern struct trace_enum_map *__start_ftrace_enum_maps[];
6932 extern struct trace_enum_map *__stop_ftrace_enum_maps[];
6933
6934 static void __init trace_enum_init(void)
6935 {
6936         int len;
6937
6938         len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
6939         trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len);
6940 }
6941
6942 #ifdef CONFIG_MODULES
6943 static void trace_module_add_enums(struct module *mod)
6944 {
6945         if (!mod->num_trace_enums)
6946                 return;
6947
6948         /*
6949          * Modules with bad taint do not have events created, do
6950          * not bother with enums either.
6951          */
6952         if (trace_module_has_bad_taint(mod))
6953                 return;
6954
6955         trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
6956 }
6957
6958 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
6959 static void trace_module_remove_enums(struct module *mod)
6960 {
6961         union trace_enum_map_item *map;
6962         union trace_enum_map_item **last = &trace_enum_maps;
6963
6964         if (!mod->num_trace_enums)
6965                 return;
6966
6967         mutex_lock(&trace_enum_mutex);
6968
6969         map = trace_enum_maps;
6970
6971         while (map) {
6972                 if (map->head.mod == mod)
6973                         break;
6974                 map = trace_enum_jmp_to_tail(map);
6975                 last = &map->tail.next;
6976                 map = map->tail.next;
6977         }
6978         if (!map)
6979                 goto out;
6980
6981         *last = trace_enum_jmp_to_tail(map)->tail.next;
6982         kfree(map);
6983  out:
6984         mutex_unlock(&trace_enum_mutex);
6985 }
6986 #else
6987 static inline void trace_module_remove_enums(struct module *mod) { }
6988 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
6989
6990 static int trace_module_notify(struct notifier_block *self,
6991                                unsigned long val, void *data)
6992 {
6993         struct module *mod = data;
6994
6995         switch (val) {
6996         case MODULE_STATE_COMING:
6997                 trace_module_add_enums(mod);
6998                 break;
6999         case MODULE_STATE_GOING:
7000                 trace_module_remove_enums(mod);
7001                 break;
7002         }
7003
7004         return 0;
7005 }
7006
7007 static struct notifier_block trace_module_nb = {
7008         .notifier_call = trace_module_notify,
7009         .priority = 0,
7010 };
7011 #endif /* CONFIG_MODULES */
7012
7013 static __init int tracer_init_tracefs(void)
7014 {
7015         struct dentry *d_tracer;
7016
7017         trace_access_lock_init();
7018
7019         d_tracer = tracing_init_dentry();
7020         if (IS_ERR(d_tracer))
7021                 return 0;
7022
7023         init_tracer_tracefs(&global_trace, d_tracer);
7024
7025         trace_create_file("tracing_thresh", 0644, d_tracer,
7026                         &global_trace, &tracing_thresh_fops);
7027
7028         trace_create_file("README", 0444, d_tracer,
7029                         NULL, &tracing_readme_fops);
7030
7031         trace_create_file("saved_cmdlines", 0444, d_tracer,
7032                         NULL, &tracing_saved_cmdlines_fops);
7033
7034         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
7035                           NULL, &tracing_saved_cmdlines_size_fops);
7036
7037         trace_enum_init();
7038
7039         trace_create_enum_file(d_tracer);
7040
7041 #ifdef CONFIG_MODULES
7042         register_module_notifier(&trace_module_nb);
7043 #endif
7044
7045 #ifdef CONFIG_DYNAMIC_FTRACE
7046         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
7047                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
7048 #endif
7049
7050         create_trace_instances(d_tracer);
7051
7052         update_tracer_options(&global_trace);
7053
7054         return 0;
7055 }
7056
7057 static int trace_panic_handler(struct notifier_block *this,
7058                                unsigned long event, void *unused)
7059 {
7060         if (ftrace_dump_on_oops)
7061                 ftrace_dump(ftrace_dump_on_oops);
7062         return NOTIFY_OK;
7063 }
7064
7065 static struct notifier_block trace_panic_notifier = {
7066         .notifier_call  = trace_panic_handler,
7067         .next           = NULL,
7068         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
7069 };
7070
7071 static int trace_die_handler(struct notifier_block *self,
7072                              unsigned long val,
7073                              void *data)
7074 {
7075         switch (val) {
7076         case DIE_OOPS:
7077                 if (ftrace_dump_on_oops)
7078                         ftrace_dump(ftrace_dump_on_oops);
7079                 break;
7080         default:
7081                 break;
7082         }
7083         return NOTIFY_OK;
7084 }
7085
7086 static struct notifier_block trace_die_notifier = {
7087         .notifier_call = trace_die_handler,
7088         .priority = 200
7089 };
7090
7091 /*
7092  * printk is set to max of 1024, we really don't need it that big.
7093  * Nothing should be printing 1000 characters anyway.
7094  */
7095 #define TRACE_MAX_PRINT         1000
7096
7097 /*
7098  * Define here KERN_TRACE so that we have one place to modify
7099  * it if we decide to change what log level the ftrace dump
7100  * should be at.
7101  */
7102 #define KERN_TRACE              KERN_EMERG
7103
7104 void
7105 trace_printk_seq(struct trace_seq *s)
7106 {
7107         /* Probably should print a warning here. */
7108         if (s->seq.len >= TRACE_MAX_PRINT)
7109                 s->seq.len = TRACE_MAX_PRINT;
7110
7111         /*
7112          * More paranoid code. Although the buffer size is set to
7113          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
7114          * an extra layer of protection.
7115          */
7116         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
7117                 s->seq.len = s->seq.size - 1;
7118
7119         /* should be zero ended, but we are paranoid. */
7120         s->buffer[s->seq.len] = 0;
7121
7122         printk(KERN_TRACE "%s", s->buffer);
7123
7124         trace_seq_init(s);
7125 }
7126
7127 void trace_init_global_iter(struct trace_iterator *iter)
7128 {
7129         iter->tr = &global_trace;
7130         iter->trace = iter->tr->current_trace;
7131         iter->cpu_file = RING_BUFFER_ALL_CPUS;
7132         iter->trace_buffer = &global_trace.trace_buffer;
7133
7134         if (iter->trace && iter->trace->open)
7135                 iter->trace->open(iter);
7136
7137         /* Annotate start of buffers if we had overruns */
7138         if (ring_buffer_overruns(iter->trace_buffer->buffer))
7139                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
7140
7141         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
7142         if (trace_clocks[iter->tr->clock_id].in_ns)
7143                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
7144 }
7145
7146 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
7147 {
7148         /* use static because iter can be a bit big for the stack */
7149         static struct trace_iterator iter;
7150         static atomic_t dump_running;
7151         struct trace_array *tr = &global_trace;
7152         unsigned int old_userobj;
7153         unsigned long flags;
7154         int cnt = 0, cpu;
7155
7156         /* Only allow one dump user at a time. */
7157         if (atomic_inc_return(&dump_running) != 1) {
7158                 atomic_dec(&dump_running);
7159                 return;
7160         }
7161
7162         /*
7163          * Always turn off tracing when we dump.
7164          * We don't need to show trace output of what happens
7165          * between multiple crashes.
7166          *
7167          * If the user does a sysrq-z, then they can re-enable
7168          * tracing with echo 1 > tracing_on.
7169          */
7170         tracing_off();
7171
7172         local_irq_save(flags);
7173
7174         /* Simulate the iterator */
7175         trace_init_global_iter(&iter);
7176
7177         for_each_tracing_cpu(cpu) {
7178                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7179         }
7180
7181         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
7182
7183         /* don't look at user memory in panic mode */
7184         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
7185
7186         switch (oops_dump_mode) {
7187         case DUMP_ALL:
7188                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7189                 break;
7190         case DUMP_ORIG:
7191                 iter.cpu_file = raw_smp_processor_id();
7192                 break;
7193         case DUMP_NONE:
7194                 goto out_enable;
7195         default:
7196                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
7197                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7198         }
7199
7200         printk(KERN_TRACE "Dumping ftrace buffer:\n");
7201
7202         /* Did function tracer already get disabled? */
7203         if (ftrace_is_dead()) {
7204                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
7205                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
7206         }
7207
7208         /*
7209          * We need to stop all tracing on all CPUS to read the
7210          * the next buffer. This is a bit expensive, but is
7211          * not done often. We fill all what we can read,
7212          * and then release the locks again.
7213          */
7214
7215         while (!trace_empty(&iter)) {
7216
7217                 if (!cnt)
7218                         printk(KERN_TRACE "---------------------------------\n");
7219
7220                 cnt++;
7221
7222                 /* reset all but tr, trace, and overruns */
7223                 memset(&iter.seq, 0,
7224                        sizeof(struct trace_iterator) -
7225                        offsetof(struct trace_iterator, seq));
7226                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
7227                 iter.pos = -1;
7228
7229                 if (trace_find_next_entry_inc(&iter) != NULL) {
7230                         int ret;
7231
7232                         ret = print_trace_line(&iter);
7233                         if (ret != TRACE_TYPE_NO_CONSUME)
7234                                 trace_consume(&iter);
7235                 }
7236                 touch_nmi_watchdog();
7237
7238                 trace_printk_seq(&iter.seq);
7239         }
7240
7241         if (!cnt)
7242                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
7243         else
7244                 printk(KERN_TRACE "---------------------------------\n");
7245
7246  out_enable:
7247         tr->trace_flags |= old_userobj;
7248
7249         for_each_tracing_cpu(cpu) {
7250                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7251         }
7252         atomic_dec(&dump_running);
7253         local_irq_restore(flags);
7254 }
7255 EXPORT_SYMBOL_GPL(ftrace_dump);
7256
7257 __init static int tracer_alloc_buffers(void)
7258 {
7259         int ring_buf_size;
7260         int ret = -ENOMEM;
7261
7262         /*
7263          * Make sure we don't accidently add more trace options
7264          * than we have bits for.
7265          */
7266         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
7267
7268         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
7269                 goto out;
7270
7271         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
7272                 goto out_free_buffer_mask;
7273
7274         /* Only allocate trace_printk buffers if a trace_printk exists */
7275         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
7276                 /* Must be called before global_trace.buffer is allocated */
7277                 trace_printk_init_buffers();
7278
7279         /* To save memory, keep the ring buffer size to its minimum */
7280         if (ring_buffer_expanded)
7281                 ring_buf_size = trace_buf_size;
7282         else
7283                 ring_buf_size = 1;
7284
7285         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
7286         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
7287
7288         raw_spin_lock_init(&global_trace.start_lock);
7289
7290         /* Used for event triggers */
7291         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
7292         if (!temp_buffer)
7293                 goto out_free_cpumask;
7294
7295         if (trace_create_savedcmd() < 0)
7296                 goto out_free_temp_buffer;
7297
7298         /* TODO: make the number of buffers hot pluggable with CPUS */
7299         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
7300                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
7301                 WARN_ON(1);
7302                 goto out_free_savedcmd;
7303         }
7304
7305         if (global_trace.buffer_disabled)
7306                 tracing_off();
7307
7308         if (trace_boot_clock) {
7309                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
7310                 if (ret < 0)
7311                         pr_warn("Trace clock %s not defined, going back to default\n",
7312                                 trace_boot_clock);
7313         }
7314
7315         /*
7316          * register_tracer() might reference current_trace, so it
7317          * needs to be set before we register anything. This is
7318          * just a bootstrap of current_trace anyway.
7319          */
7320         global_trace.current_trace = &nop_trace;
7321
7322         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7323
7324         ftrace_init_global_array_ops(&global_trace);
7325
7326         init_trace_flags_index(&global_trace);
7327
7328         register_tracer(&nop_trace);
7329
7330         /* All seems OK, enable tracing */
7331         tracing_disabled = 0;
7332
7333         atomic_notifier_chain_register(&panic_notifier_list,
7334                                        &trace_panic_notifier);
7335
7336         register_die_notifier(&trace_die_notifier);
7337
7338         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
7339
7340         INIT_LIST_HEAD(&global_trace.systems);
7341         INIT_LIST_HEAD(&global_trace.events);
7342         list_add(&global_trace.list, &ftrace_trace_arrays);
7343
7344         apply_trace_boot_options();
7345
7346         register_snapshot_cmd();
7347
7348         return 0;
7349
7350 out_free_savedcmd:
7351         free_saved_cmdlines_buffer(savedcmd);
7352 out_free_temp_buffer:
7353         ring_buffer_free(temp_buffer);
7354 out_free_cpumask:
7355         free_cpumask_var(global_trace.tracing_cpumask);
7356 out_free_buffer_mask:
7357         free_cpumask_var(tracing_buffer_mask);
7358 out:
7359         return ret;
7360 }
7361
7362 void __init trace_init(void)
7363 {
7364         if (tracepoint_printk) {
7365                 tracepoint_print_iter =
7366                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
7367                 if (WARN_ON(!tracepoint_print_iter))
7368                         tracepoint_printk = 0;
7369         }
7370         tracer_alloc_buffers();
7371         trace_event_init();
7372 }
7373
7374 __init static int clear_boot_tracer(void)
7375 {
7376         /*
7377          * The default tracer at boot buffer is an init section.
7378          * This function is called in lateinit. If we did not
7379          * find the boot tracer, then clear it out, to prevent
7380          * later registration from accessing the buffer that is
7381          * about to be freed.
7382          */
7383         if (!default_bootup_tracer)
7384                 return 0;
7385
7386         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
7387                default_bootup_tracer);
7388         default_bootup_tracer = NULL;
7389
7390         return 0;
7391 }
7392
7393 fs_initcall(tracer_init_tracefs);
7394 late_initcall(clear_boot_tracer);