tracing: Have trace_buffer_unlock_commit() call the _regs version with NULL
[cascardo/linux.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/kprobes.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/sched/rt.h>
44
45 #include "trace.h"
46 #include "trace_output.h"
47
48 /*
49  * On boot up, the ring buffer is set to the minimum size, so that
50  * we do not waste memory on systems that are not using tracing.
51  */
52 bool ring_buffer_expanded;
53
54 /*
55  * We need to change this state when a selftest is running.
56  * A selftest will lurk into the ring-buffer to count the
57  * entries inserted during the selftest although some concurrent
58  * insertions into the ring-buffer such as trace_printk could occurred
59  * at the same time, giving false positive or negative results.
60  */
61 static bool __read_mostly tracing_selftest_running;
62
63 /*
64  * If a tracer is running, we do not want to run SELFTEST.
65  */
66 bool __read_mostly tracing_selftest_disabled;
67
68 /* Pipe tracepoints to printk */
69 struct trace_iterator *tracepoint_print_iter;
70 int tracepoint_printk;
71
72 /* For tracers that don't implement custom flags */
73 static struct tracer_opt dummy_tracer_opt[] = {
74         { }
75 };
76
77 static int
78 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
79 {
80         return 0;
81 }
82
83 /*
84  * To prevent the comm cache from being overwritten when no
85  * tracing is active, only save the comm when a trace event
86  * occurred.
87  */
88 static DEFINE_PER_CPU(bool, trace_cmdline_save);
89
90 /*
91  * Kill all tracing for good (never come back).
92  * It is initialized to 1 but will turn to zero if the initialization
93  * of the tracer is successful. But that is the only place that sets
94  * this back to zero.
95  */
96 static int tracing_disabled = 1;
97
98 cpumask_var_t __read_mostly     tracing_buffer_mask;
99
100 /*
101  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
102  *
103  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
104  * is set, then ftrace_dump is called. This will output the contents
105  * of the ftrace buffers to the console.  This is very useful for
106  * capturing traces that lead to crashes and outputing it to a
107  * serial console.
108  *
109  * It is default off, but you can enable it with either specifying
110  * "ftrace_dump_on_oops" in the kernel command line, or setting
111  * /proc/sys/kernel/ftrace_dump_on_oops
112  * Set 1 if you want to dump buffers of all CPUs
113  * Set 2 if you want to dump the buffer of the CPU that triggered oops
114  */
115
116 enum ftrace_dump_mode ftrace_dump_on_oops;
117
118 /* When set, tracing will stop when a WARN*() is hit */
119 int __disable_trace_on_warning;
120
121 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
122 /* Map of enums to their values, for "enum_map" file */
123 struct trace_enum_map_head {
124         struct module                   *mod;
125         unsigned long                   length;
126 };
127
128 union trace_enum_map_item;
129
130 struct trace_enum_map_tail {
131         /*
132          * "end" is first and points to NULL as it must be different
133          * than "mod" or "enum_string"
134          */
135         union trace_enum_map_item       *next;
136         const char                      *end;   /* points to NULL */
137 };
138
139 static DEFINE_MUTEX(trace_enum_mutex);
140
141 /*
142  * The trace_enum_maps are saved in an array with two extra elements,
143  * one at the beginning, and one at the end. The beginning item contains
144  * the count of the saved maps (head.length), and the module they
145  * belong to if not built in (head.mod). The ending item contains a
146  * pointer to the next array of saved enum_map items.
147  */
148 union trace_enum_map_item {
149         struct trace_enum_map           map;
150         struct trace_enum_map_head      head;
151         struct trace_enum_map_tail      tail;
152 };
153
154 static union trace_enum_map_item *trace_enum_maps;
155 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
156
157 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
158
159 #define MAX_TRACER_SIZE         100
160 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
161 static char *default_bootup_tracer;
162
163 static bool allocate_snapshot;
164
165 static int __init set_cmdline_ftrace(char *str)
166 {
167         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
168         default_bootup_tracer = bootup_tracer_buf;
169         /* We are using ftrace early, expand it */
170         ring_buffer_expanded = true;
171         return 1;
172 }
173 __setup("ftrace=", set_cmdline_ftrace);
174
175 static int __init set_ftrace_dump_on_oops(char *str)
176 {
177         if (*str++ != '=' || !*str) {
178                 ftrace_dump_on_oops = DUMP_ALL;
179                 return 1;
180         }
181
182         if (!strcmp("orig_cpu", str)) {
183                 ftrace_dump_on_oops = DUMP_ORIG;
184                 return 1;
185         }
186
187         return 0;
188 }
189 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
190
191 static int __init stop_trace_on_warning(char *str)
192 {
193         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
194                 __disable_trace_on_warning = 1;
195         return 1;
196 }
197 __setup("traceoff_on_warning", stop_trace_on_warning);
198
199 static int __init boot_alloc_snapshot(char *str)
200 {
201         allocate_snapshot = true;
202         /* We also need the main ring buffer expanded */
203         ring_buffer_expanded = true;
204         return 1;
205 }
206 __setup("alloc_snapshot", boot_alloc_snapshot);
207
208
209 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
210
211 static int __init set_trace_boot_options(char *str)
212 {
213         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
214         return 0;
215 }
216 __setup("trace_options=", set_trace_boot_options);
217
218 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
219 static char *trace_boot_clock __initdata;
220
221 static int __init set_trace_boot_clock(char *str)
222 {
223         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
224         trace_boot_clock = trace_boot_clock_buf;
225         return 0;
226 }
227 __setup("trace_clock=", set_trace_boot_clock);
228
229 static int __init set_tracepoint_printk(char *str)
230 {
231         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
232                 tracepoint_printk = 1;
233         return 1;
234 }
235 __setup("tp_printk", set_tracepoint_printk);
236
237 unsigned long long ns2usecs(cycle_t nsec)
238 {
239         nsec += 500;
240         do_div(nsec, 1000);
241         return nsec;
242 }
243
244 /* trace_flags holds trace_options default values */
245 #define TRACE_DEFAULT_FLAGS                                             \
246         (FUNCTION_DEFAULT_FLAGS |                                       \
247          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
248          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
249          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
250          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
251
252 /* trace_options that are only supported by global_trace */
253 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
254                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
255
256 /* trace_flags that are default zero for instances */
257 #define ZEROED_TRACE_FLAGS \
258         TRACE_ITER_EVENT_FORK
259
260 /*
261  * The global_trace is the descriptor that holds the tracing
262  * buffers for the live tracing. For each CPU, it contains
263  * a link list of pages that will store trace entries. The
264  * page descriptor of the pages in the memory is used to hold
265  * the link list by linking the lru item in the page descriptor
266  * to each of the pages in the buffer per CPU.
267  *
268  * For each active CPU there is a data field that holds the
269  * pages for the buffer for that CPU. Each CPU has the same number
270  * of pages allocated for its buffer.
271  */
272 static struct trace_array global_trace = {
273         .trace_flags = TRACE_DEFAULT_FLAGS,
274 };
275
276 LIST_HEAD(ftrace_trace_arrays);
277
278 int trace_array_get(struct trace_array *this_tr)
279 {
280         struct trace_array *tr;
281         int ret = -ENODEV;
282
283         mutex_lock(&trace_types_lock);
284         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
285                 if (tr == this_tr) {
286                         tr->ref++;
287                         ret = 0;
288                         break;
289                 }
290         }
291         mutex_unlock(&trace_types_lock);
292
293         return ret;
294 }
295
296 static void __trace_array_put(struct trace_array *this_tr)
297 {
298         WARN_ON(!this_tr->ref);
299         this_tr->ref--;
300 }
301
302 void trace_array_put(struct trace_array *this_tr)
303 {
304         mutex_lock(&trace_types_lock);
305         __trace_array_put(this_tr);
306         mutex_unlock(&trace_types_lock);
307 }
308
309 int call_filter_check_discard(struct trace_event_call *call, void *rec,
310                               struct ring_buffer *buffer,
311                               struct ring_buffer_event *event)
312 {
313         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
314             !filter_match_preds(call->filter, rec)) {
315                 ring_buffer_discard_commit(buffer, event);
316                 return 1;
317         }
318
319         return 0;
320 }
321
322 static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
323 {
324         u64 ts;
325
326         /* Early boot up does not have a buffer yet */
327         if (!buf->buffer)
328                 return trace_clock_local();
329
330         ts = ring_buffer_time_stamp(buf->buffer, cpu);
331         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
332
333         return ts;
334 }
335
336 cycle_t ftrace_now(int cpu)
337 {
338         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
339 }
340
341 /**
342  * tracing_is_enabled - Show if global_trace has been disabled
343  *
344  * Shows if the global trace has been enabled or not. It uses the
345  * mirror flag "buffer_disabled" to be used in fast paths such as for
346  * the irqsoff tracer. But it may be inaccurate due to races. If you
347  * need to know the accurate state, use tracing_is_on() which is a little
348  * slower, but accurate.
349  */
350 int tracing_is_enabled(void)
351 {
352         /*
353          * For quick access (irqsoff uses this in fast path), just
354          * return the mirror variable of the state of the ring buffer.
355          * It's a little racy, but we don't really care.
356          */
357         smp_rmb();
358         return !global_trace.buffer_disabled;
359 }
360
361 /*
362  * trace_buf_size is the size in bytes that is allocated
363  * for a buffer. Note, the number of bytes is always rounded
364  * to page size.
365  *
366  * This number is purposely set to a low number of 16384.
367  * If the dump on oops happens, it will be much appreciated
368  * to not have to wait for all that output. Anyway this can be
369  * boot time and run time configurable.
370  */
371 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
372
373 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
374
375 /* trace_types holds a link list of available tracers. */
376 static struct tracer            *trace_types __read_mostly;
377
378 /*
379  * trace_types_lock is used to protect the trace_types list.
380  */
381 DEFINE_MUTEX(trace_types_lock);
382
383 /*
384  * serialize the access of the ring buffer
385  *
386  * ring buffer serializes readers, but it is low level protection.
387  * The validity of the events (which returns by ring_buffer_peek() ..etc)
388  * are not protected by ring buffer.
389  *
390  * The content of events may become garbage if we allow other process consumes
391  * these events concurrently:
392  *   A) the page of the consumed events may become a normal page
393  *      (not reader page) in ring buffer, and this page will be rewrited
394  *      by events producer.
395  *   B) The page of the consumed events may become a page for splice_read,
396  *      and this page will be returned to system.
397  *
398  * These primitives allow multi process access to different cpu ring buffer
399  * concurrently.
400  *
401  * These primitives don't distinguish read-only and read-consume access.
402  * Multi read-only access are also serialized.
403  */
404
405 #ifdef CONFIG_SMP
406 static DECLARE_RWSEM(all_cpu_access_lock);
407 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
408
409 static inline void trace_access_lock(int cpu)
410 {
411         if (cpu == RING_BUFFER_ALL_CPUS) {
412                 /* gain it for accessing the whole ring buffer. */
413                 down_write(&all_cpu_access_lock);
414         } else {
415                 /* gain it for accessing a cpu ring buffer. */
416
417                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
418                 down_read(&all_cpu_access_lock);
419
420                 /* Secondly block other access to this @cpu ring buffer. */
421                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
422         }
423 }
424
425 static inline void trace_access_unlock(int cpu)
426 {
427         if (cpu == RING_BUFFER_ALL_CPUS) {
428                 up_write(&all_cpu_access_lock);
429         } else {
430                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
431                 up_read(&all_cpu_access_lock);
432         }
433 }
434
435 static inline void trace_access_lock_init(void)
436 {
437         int cpu;
438
439         for_each_possible_cpu(cpu)
440                 mutex_init(&per_cpu(cpu_access_lock, cpu));
441 }
442
443 #else
444
445 static DEFINE_MUTEX(access_lock);
446
447 static inline void trace_access_lock(int cpu)
448 {
449         (void)cpu;
450         mutex_lock(&access_lock);
451 }
452
453 static inline void trace_access_unlock(int cpu)
454 {
455         (void)cpu;
456         mutex_unlock(&access_lock);
457 }
458
459 static inline void trace_access_lock_init(void)
460 {
461 }
462
463 #endif
464
465 #ifdef CONFIG_STACKTRACE
466 static void __ftrace_trace_stack(struct ring_buffer *buffer,
467                                  unsigned long flags,
468                                  int skip, int pc, struct pt_regs *regs);
469 static inline void ftrace_trace_stack(struct trace_array *tr,
470                                       struct ring_buffer *buffer,
471                                       unsigned long flags,
472                                       int skip, int pc, struct pt_regs *regs);
473
474 #else
475 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
476                                         unsigned long flags,
477                                         int skip, int pc, struct pt_regs *regs)
478 {
479 }
480 static inline void ftrace_trace_stack(struct trace_array *tr,
481                                       struct ring_buffer *buffer,
482                                       unsigned long flags,
483                                       int skip, int pc, struct pt_regs *regs)
484 {
485 }
486
487 #endif
488
489 static void tracer_tracing_on(struct trace_array *tr)
490 {
491         if (tr->trace_buffer.buffer)
492                 ring_buffer_record_on(tr->trace_buffer.buffer);
493         /*
494          * This flag is looked at when buffers haven't been allocated
495          * yet, or by some tracers (like irqsoff), that just want to
496          * know if the ring buffer has been disabled, but it can handle
497          * races of where it gets disabled but we still do a record.
498          * As the check is in the fast path of the tracers, it is more
499          * important to be fast than accurate.
500          */
501         tr->buffer_disabled = 0;
502         /* Make the flag seen by readers */
503         smp_wmb();
504 }
505
506 /**
507  * tracing_on - enable tracing buffers
508  *
509  * This function enables tracing buffers that may have been
510  * disabled with tracing_off.
511  */
512 void tracing_on(void)
513 {
514         tracer_tracing_on(&global_trace);
515 }
516 EXPORT_SYMBOL_GPL(tracing_on);
517
518 /**
519  * __trace_puts - write a constant string into the trace buffer.
520  * @ip:    The address of the caller
521  * @str:   The constant string to write
522  * @size:  The size of the string.
523  */
524 int __trace_puts(unsigned long ip, const char *str, int size)
525 {
526         struct ring_buffer_event *event;
527         struct ring_buffer *buffer;
528         struct print_entry *entry;
529         unsigned long irq_flags;
530         int alloc;
531         int pc;
532
533         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
534                 return 0;
535
536         pc = preempt_count();
537
538         if (unlikely(tracing_selftest_running || tracing_disabled))
539                 return 0;
540
541         alloc = sizeof(*entry) + size + 2; /* possible \n added */
542
543         local_save_flags(irq_flags);
544         buffer = global_trace.trace_buffer.buffer;
545         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
546                                           irq_flags, pc);
547         if (!event)
548                 return 0;
549
550         entry = ring_buffer_event_data(event);
551         entry->ip = ip;
552
553         memcpy(&entry->buf, str, size);
554
555         /* Add a newline if necessary */
556         if (entry->buf[size - 1] != '\n') {
557                 entry->buf[size] = '\n';
558                 entry->buf[size + 1] = '\0';
559         } else
560                 entry->buf[size] = '\0';
561
562         __buffer_unlock_commit(buffer, event);
563         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
564
565         return size;
566 }
567 EXPORT_SYMBOL_GPL(__trace_puts);
568
569 /**
570  * __trace_bputs - write the pointer to a constant string into trace buffer
571  * @ip:    The address of the caller
572  * @str:   The constant string to write to the buffer to
573  */
574 int __trace_bputs(unsigned long ip, const char *str)
575 {
576         struct ring_buffer_event *event;
577         struct ring_buffer *buffer;
578         struct bputs_entry *entry;
579         unsigned long irq_flags;
580         int size = sizeof(struct bputs_entry);
581         int pc;
582
583         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
584                 return 0;
585
586         pc = preempt_count();
587
588         if (unlikely(tracing_selftest_running || tracing_disabled))
589                 return 0;
590
591         local_save_flags(irq_flags);
592         buffer = global_trace.trace_buffer.buffer;
593         event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
594                                           irq_flags, pc);
595         if (!event)
596                 return 0;
597
598         entry = ring_buffer_event_data(event);
599         entry->ip                       = ip;
600         entry->str                      = str;
601
602         __buffer_unlock_commit(buffer, event);
603         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
604
605         return 1;
606 }
607 EXPORT_SYMBOL_GPL(__trace_bputs);
608
609 #ifdef CONFIG_TRACER_SNAPSHOT
610 /**
611  * trace_snapshot - take a snapshot of the current buffer.
612  *
613  * This causes a swap between the snapshot buffer and the current live
614  * tracing buffer. You can use this to take snapshots of the live
615  * trace when some condition is triggered, but continue to trace.
616  *
617  * Note, make sure to allocate the snapshot with either
618  * a tracing_snapshot_alloc(), or by doing it manually
619  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
620  *
621  * If the snapshot buffer is not allocated, it will stop tracing.
622  * Basically making a permanent snapshot.
623  */
624 void tracing_snapshot(void)
625 {
626         struct trace_array *tr = &global_trace;
627         struct tracer *tracer = tr->current_trace;
628         unsigned long flags;
629
630         if (in_nmi()) {
631                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
632                 internal_trace_puts("*** snapshot is being ignored        ***\n");
633                 return;
634         }
635
636         if (!tr->allocated_snapshot) {
637                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
638                 internal_trace_puts("*** stopping trace here!   ***\n");
639                 tracing_off();
640                 return;
641         }
642
643         /* Note, snapshot can not be used when the tracer uses it */
644         if (tracer->use_max_tr) {
645                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
646                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
647                 return;
648         }
649
650         local_irq_save(flags);
651         update_max_tr(tr, current, smp_processor_id());
652         local_irq_restore(flags);
653 }
654 EXPORT_SYMBOL_GPL(tracing_snapshot);
655
656 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
657                                         struct trace_buffer *size_buf, int cpu_id);
658 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
659
660 static int alloc_snapshot(struct trace_array *tr)
661 {
662         int ret;
663
664         if (!tr->allocated_snapshot) {
665
666                 /* allocate spare buffer */
667                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
668                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
669                 if (ret < 0)
670                         return ret;
671
672                 tr->allocated_snapshot = true;
673         }
674
675         return 0;
676 }
677
678 static void free_snapshot(struct trace_array *tr)
679 {
680         /*
681          * We don't free the ring buffer. instead, resize it because
682          * The max_tr ring buffer has some state (e.g. ring->clock) and
683          * we want preserve it.
684          */
685         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
686         set_buffer_entries(&tr->max_buffer, 1);
687         tracing_reset_online_cpus(&tr->max_buffer);
688         tr->allocated_snapshot = false;
689 }
690
691 /**
692  * tracing_alloc_snapshot - allocate snapshot buffer.
693  *
694  * This only allocates the snapshot buffer if it isn't already
695  * allocated - it doesn't also take a snapshot.
696  *
697  * This is meant to be used in cases where the snapshot buffer needs
698  * to be set up for events that can't sleep but need to be able to
699  * trigger a snapshot.
700  */
701 int tracing_alloc_snapshot(void)
702 {
703         struct trace_array *tr = &global_trace;
704         int ret;
705
706         ret = alloc_snapshot(tr);
707         WARN_ON(ret < 0);
708
709         return ret;
710 }
711 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
712
713 /**
714  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
715  *
716  * This is similar to trace_snapshot(), but it will allocate the
717  * snapshot buffer if it isn't already allocated. Use this only
718  * where it is safe to sleep, as the allocation may sleep.
719  *
720  * This causes a swap between the snapshot buffer and the current live
721  * tracing buffer. You can use this to take snapshots of the live
722  * trace when some condition is triggered, but continue to trace.
723  */
724 void tracing_snapshot_alloc(void)
725 {
726         int ret;
727
728         ret = tracing_alloc_snapshot();
729         if (ret < 0)
730                 return;
731
732         tracing_snapshot();
733 }
734 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
735 #else
736 void tracing_snapshot(void)
737 {
738         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
739 }
740 EXPORT_SYMBOL_GPL(tracing_snapshot);
741 int tracing_alloc_snapshot(void)
742 {
743         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
744         return -ENODEV;
745 }
746 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
747 void tracing_snapshot_alloc(void)
748 {
749         /* Give warning */
750         tracing_snapshot();
751 }
752 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
753 #endif /* CONFIG_TRACER_SNAPSHOT */
754
755 static void tracer_tracing_off(struct trace_array *tr)
756 {
757         if (tr->trace_buffer.buffer)
758                 ring_buffer_record_off(tr->trace_buffer.buffer);
759         /*
760          * This flag is looked at when buffers haven't been allocated
761          * yet, or by some tracers (like irqsoff), that just want to
762          * know if the ring buffer has been disabled, but it can handle
763          * races of where it gets disabled but we still do a record.
764          * As the check is in the fast path of the tracers, it is more
765          * important to be fast than accurate.
766          */
767         tr->buffer_disabled = 1;
768         /* Make the flag seen by readers */
769         smp_wmb();
770 }
771
772 /**
773  * tracing_off - turn off tracing buffers
774  *
775  * This function stops the tracing buffers from recording data.
776  * It does not disable any overhead the tracers themselves may
777  * be causing. This function simply causes all recording to
778  * the ring buffers to fail.
779  */
780 void tracing_off(void)
781 {
782         tracer_tracing_off(&global_trace);
783 }
784 EXPORT_SYMBOL_GPL(tracing_off);
785
786 void disable_trace_on_warning(void)
787 {
788         if (__disable_trace_on_warning)
789                 tracing_off();
790 }
791
792 /**
793  * tracer_tracing_is_on - show real state of ring buffer enabled
794  * @tr : the trace array to know if ring buffer is enabled
795  *
796  * Shows real state of the ring buffer if it is enabled or not.
797  */
798 static int tracer_tracing_is_on(struct trace_array *tr)
799 {
800         if (tr->trace_buffer.buffer)
801                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
802         return !tr->buffer_disabled;
803 }
804
805 /**
806  * tracing_is_on - show state of ring buffers enabled
807  */
808 int tracing_is_on(void)
809 {
810         return tracer_tracing_is_on(&global_trace);
811 }
812 EXPORT_SYMBOL_GPL(tracing_is_on);
813
814 static int __init set_buf_size(char *str)
815 {
816         unsigned long buf_size;
817
818         if (!str)
819                 return 0;
820         buf_size = memparse(str, &str);
821         /* nr_entries can not be zero */
822         if (buf_size == 0)
823                 return 0;
824         trace_buf_size = buf_size;
825         return 1;
826 }
827 __setup("trace_buf_size=", set_buf_size);
828
829 static int __init set_tracing_thresh(char *str)
830 {
831         unsigned long threshold;
832         int ret;
833
834         if (!str)
835                 return 0;
836         ret = kstrtoul(str, 0, &threshold);
837         if (ret < 0)
838                 return 0;
839         tracing_thresh = threshold * 1000;
840         return 1;
841 }
842 __setup("tracing_thresh=", set_tracing_thresh);
843
844 unsigned long nsecs_to_usecs(unsigned long nsecs)
845 {
846         return nsecs / 1000;
847 }
848
849 /*
850  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
851  * It uses C(a, b) where 'a' is the enum name and 'b' is the string that
852  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
853  * of strings in the order that the enums were defined.
854  */
855 #undef C
856 #define C(a, b) b
857
858 /* These must match the bit postions in trace_iterator_flags */
859 static const char *trace_options[] = {
860         TRACE_FLAGS
861         NULL
862 };
863
864 static struct {
865         u64 (*func)(void);
866         const char *name;
867         int in_ns;              /* is this clock in nanoseconds? */
868 } trace_clocks[] = {
869         { trace_clock_local,            "local",        1 },
870         { trace_clock_global,           "global",       1 },
871         { trace_clock_counter,          "counter",      0 },
872         { trace_clock_jiffies,          "uptime",       0 },
873         { trace_clock,                  "perf",         1 },
874         { ktime_get_mono_fast_ns,       "mono",         1 },
875         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
876         ARCH_TRACE_CLOCKS
877 };
878
879 /*
880  * trace_parser_get_init - gets the buffer for trace parser
881  */
882 int trace_parser_get_init(struct trace_parser *parser, int size)
883 {
884         memset(parser, 0, sizeof(*parser));
885
886         parser->buffer = kmalloc(size, GFP_KERNEL);
887         if (!parser->buffer)
888                 return 1;
889
890         parser->size = size;
891         return 0;
892 }
893
894 /*
895  * trace_parser_put - frees the buffer for trace parser
896  */
897 void trace_parser_put(struct trace_parser *parser)
898 {
899         kfree(parser->buffer);
900 }
901
902 /*
903  * trace_get_user - reads the user input string separated by  space
904  * (matched by isspace(ch))
905  *
906  * For each string found the 'struct trace_parser' is updated,
907  * and the function returns.
908  *
909  * Returns number of bytes read.
910  *
911  * See kernel/trace/trace.h for 'struct trace_parser' details.
912  */
913 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
914         size_t cnt, loff_t *ppos)
915 {
916         char ch;
917         size_t read = 0;
918         ssize_t ret;
919
920         if (!*ppos)
921                 trace_parser_clear(parser);
922
923         ret = get_user(ch, ubuf++);
924         if (ret)
925                 goto out;
926
927         read++;
928         cnt--;
929
930         /*
931          * The parser is not finished with the last write,
932          * continue reading the user input without skipping spaces.
933          */
934         if (!parser->cont) {
935                 /* skip white space */
936                 while (cnt && isspace(ch)) {
937                         ret = get_user(ch, ubuf++);
938                         if (ret)
939                                 goto out;
940                         read++;
941                         cnt--;
942                 }
943
944                 /* only spaces were written */
945                 if (isspace(ch)) {
946                         *ppos += read;
947                         ret = read;
948                         goto out;
949                 }
950
951                 parser->idx = 0;
952         }
953
954         /* read the non-space input */
955         while (cnt && !isspace(ch)) {
956                 if (parser->idx < parser->size - 1)
957                         parser->buffer[parser->idx++] = ch;
958                 else {
959                         ret = -EINVAL;
960                         goto out;
961                 }
962                 ret = get_user(ch, ubuf++);
963                 if (ret)
964                         goto out;
965                 read++;
966                 cnt--;
967         }
968
969         /* We either got finished input or we have to wait for another call. */
970         if (isspace(ch)) {
971                 parser->buffer[parser->idx] = 0;
972                 parser->cont = false;
973         } else if (parser->idx < parser->size - 1) {
974                 parser->cont = true;
975                 parser->buffer[parser->idx++] = ch;
976         } else {
977                 ret = -EINVAL;
978                 goto out;
979         }
980
981         *ppos += read;
982         ret = read;
983
984 out:
985         return ret;
986 }
987
988 /* TODO add a seq_buf_to_buffer() */
989 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
990 {
991         int len;
992
993         if (trace_seq_used(s) <= s->seq.readpos)
994                 return -EBUSY;
995
996         len = trace_seq_used(s) - s->seq.readpos;
997         if (cnt > len)
998                 cnt = len;
999         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1000
1001         s->seq.readpos += cnt;
1002         return cnt;
1003 }
1004
1005 unsigned long __read_mostly     tracing_thresh;
1006
1007 #ifdef CONFIG_TRACER_MAX_TRACE
1008 /*
1009  * Copy the new maximum trace into the separate maximum-trace
1010  * structure. (this way the maximum trace is permanently saved,
1011  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1012  */
1013 static void
1014 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1015 {
1016         struct trace_buffer *trace_buf = &tr->trace_buffer;
1017         struct trace_buffer *max_buf = &tr->max_buffer;
1018         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1019         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1020
1021         max_buf->cpu = cpu;
1022         max_buf->time_start = data->preempt_timestamp;
1023
1024         max_data->saved_latency = tr->max_latency;
1025         max_data->critical_start = data->critical_start;
1026         max_data->critical_end = data->critical_end;
1027
1028         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1029         max_data->pid = tsk->pid;
1030         /*
1031          * If tsk == current, then use current_uid(), as that does not use
1032          * RCU. The irq tracer can be called out of RCU scope.
1033          */
1034         if (tsk == current)
1035                 max_data->uid = current_uid();
1036         else
1037                 max_data->uid = task_uid(tsk);
1038
1039         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1040         max_data->policy = tsk->policy;
1041         max_data->rt_priority = tsk->rt_priority;
1042
1043         /* record this tasks comm */
1044         tracing_record_cmdline(tsk);
1045 }
1046
1047 /**
1048  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1049  * @tr: tracer
1050  * @tsk: the task with the latency
1051  * @cpu: The cpu that initiated the trace.
1052  *
1053  * Flip the buffers between the @tr and the max_tr and record information
1054  * about which task was the cause of this latency.
1055  */
1056 void
1057 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1058 {
1059         struct ring_buffer *buf;
1060
1061         if (tr->stop_count)
1062                 return;
1063
1064         WARN_ON_ONCE(!irqs_disabled());
1065
1066         if (!tr->allocated_snapshot) {
1067                 /* Only the nop tracer should hit this when disabling */
1068                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1069                 return;
1070         }
1071
1072         arch_spin_lock(&tr->max_lock);
1073
1074         buf = tr->trace_buffer.buffer;
1075         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1076         tr->max_buffer.buffer = buf;
1077
1078         __update_max_tr(tr, tsk, cpu);
1079         arch_spin_unlock(&tr->max_lock);
1080 }
1081
1082 /**
1083  * update_max_tr_single - only copy one trace over, and reset the rest
1084  * @tr - tracer
1085  * @tsk - task with the latency
1086  * @cpu - the cpu of the buffer to copy.
1087  *
1088  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1089  */
1090 void
1091 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1092 {
1093         int ret;
1094
1095         if (tr->stop_count)
1096                 return;
1097
1098         WARN_ON_ONCE(!irqs_disabled());
1099         if (!tr->allocated_snapshot) {
1100                 /* Only the nop tracer should hit this when disabling */
1101                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1102                 return;
1103         }
1104
1105         arch_spin_lock(&tr->max_lock);
1106
1107         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1108
1109         if (ret == -EBUSY) {
1110                 /*
1111                  * We failed to swap the buffer due to a commit taking
1112                  * place on this CPU. We fail to record, but we reset
1113                  * the max trace buffer (no one writes directly to it)
1114                  * and flag that it failed.
1115                  */
1116                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1117                         "Failed to swap buffers due to commit in progress\n");
1118         }
1119
1120         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1121
1122         __update_max_tr(tr, tsk, cpu);
1123         arch_spin_unlock(&tr->max_lock);
1124 }
1125 #endif /* CONFIG_TRACER_MAX_TRACE */
1126
1127 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1128 {
1129         /* Iterators are static, they should be filled or empty */
1130         if (trace_buffer_iter(iter, iter->cpu_file))
1131                 return 0;
1132
1133         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1134                                 full);
1135 }
1136
1137 #ifdef CONFIG_FTRACE_STARTUP_TEST
1138 static int run_tracer_selftest(struct tracer *type)
1139 {
1140         struct trace_array *tr = &global_trace;
1141         struct tracer *saved_tracer = tr->current_trace;
1142         int ret;
1143
1144         if (!type->selftest || tracing_selftest_disabled)
1145                 return 0;
1146
1147         /*
1148          * Run a selftest on this tracer.
1149          * Here we reset the trace buffer, and set the current
1150          * tracer to be this tracer. The tracer can then run some
1151          * internal tracing to verify that everything is in order.
1152          * If we fail, we do not register this tracer.
1153          */
1154         tracing_reset_online_cpus(&tr->trace_buffer);
1155
1156         tr->current_trace = type;
1157
1158 #ifdef CONFIG_TRACER_MAX_TRACE
1159         if (type->use_max_tr) {
1160                 /* If we expanded the buffers, make sure the max is expanded too */
1161                 if (ring_buffer_expanded)
1162                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1163                                            RING_BUFFER_ALL_CPUS);
1164                 tr->allocated_snapshot = true;
1165         }
1166 #endif
1167
1168         /* the test is responsible for initializing and enabling */
1169         pr_info("Testing tracer %s: ", type->name);
1170         ret = type->selftest(type, tr);
1171         /* the test is responsible for resetting too */
1172         tr->current_trace = saved_tracer;
1173         if (ret) {
1174                 printk(KERN_CONT "FAILED!\n");
1175                 /* Add the warning after printing 'FAILED' */
1176                 WARN_ON(1);
1177                 return -1;
1178         }
1179         /* Only reset on passing, to avoid touching corrupted buffers */
1180         tracing_reset_online_cpus(&tr->trace_buffer);
1181
1182 #ifdef CONFIG_TRACER_MAX_TRACE
1183         if (type->use_max_tr) {
1184                 tr->allocated_snapshot = false;
1185
1186                 /* Shrink the max buffer again */
1187                 if (ring_buffer_expanded)
1188                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1189                                            RING_BUFFER_ALL_CPUS);
1190         }
1191 #endif
1192
1193         printk(KERN_CONT "PASSED\n");
1194         return 0;
1195 }
1196 #else
1197 static inline int run_tracer_selftest(struct tracer *type)
1198 {
1199         return 0;
1200 }
1201 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1202
1203 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1204
1205 static void __init apply_trace_boot_options(void);
1206
1207 /**
1208  * register_tracer - register a tracer with the ftrace system.
1209  * @type - the plugin for the tracer
1210  *
1211  * Register a new plugin tracer.
1212  */
1213 int __init register_tracer(struct tracer *type)
1214 {
1215         struct tracer *t;
1216         int ret = 0;
1217
1218         if (!type->name) {
1219                 pr_info("Tracer must have a name\n");
1220                 return -1;
1221         }
1222
1223         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1224                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1225                 return -1;
1226         }
1227
1228         mutex_lock(&trace_types_lock);
1229
1230         tracing_selftest_running = true;
1231
1232         for (t = trace_types; t; t = t->next) {
1233                 if (strcmp(type->name, t->name) == 0) {
1234                         /* already found */
1235                         pr_info("Tracer %s already registered\n",
1236                                 type->name);
1237                         ret = -1;
1238                         goto out;
1239                 }
1240         }
1241
1242         if (!type->set_flag)
1243                 type->set_flag = &dummy_set_flag;
1244         if (!type->flags) {
1245                 /*allocate a dummy tracer_flags*/
1246                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1247                 if (!type->flags) {
1248                         ret = -ENOMEM;
1249                         goto out;
1250                 }
1251                 type->flags->val = 0;
1252                 type->flags->opts = dummy_tracer_opt;
1253         } else
1254                 if (!type->flags->opts)
1255                         type->flags->opts = dummy_tracer_opt;
1256
1257         /* store the tracer for __set_tracer_option */
1258         type->flags->trace = type;
1259
1260         ret = run_tracer_selftest(type);
1261         if (ret < 0)
1262                 goto out;
1263
1264         type->next = trace_types;
1265         trace_types = type;
1266         add_tracer_options(&global_trace, type);
1267
1268  out:
1269         tracing_selftest_running = false;
1270         mutex_unlock(&trace_types_lock);
1271
1272         if (ret || !default_bootup_tracer)
1273                 goto out_unlock;
1274
1275         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1276                 goto out_unlock;
1277
1278         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1279         /* Do we want this tracer to start on bootup? */
1280         tracing_set_tracer(&global_trace, type->name);
1281         default_bootup_tracer = NULL;
1282
1283         apply_trace_boot_options();
1284
1285         /* disable other selftests, since this will break it. */
1286         tracing_selftest_disabled = true;
1287 #ifdef CONFIG_FTRACE_STARTUP_TEST
1288         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1289                type->name);
1290 #endif
1291
1292  out_unlock:
1293         return ret;
1294 }
1295
1296 void tracing_reset(struct trace_buffer *buf, int cpu)
1297 {
1298         struct ring_buffer *buffer = buf->buffer;
1299
1300         if (!buffer)
1301                 return;
1302
1303         ring_buffer_record_disable(buffer);
1304
1305         /* Make sure all commits have finished */
1306         synchronize_sched();
1307         ring_buffer_reset_cpu(buffer, cpu);
1308
1309         ring_buffer_record_enable(buffer);
1310 }
1311
1312 void tracing_reset_online_cpus(struct trace_buffer *buf)
1313 {
1314         struct ring_buffer *buffer = buf->buffer;
1315         int cpu;
1316
1317         if (!buffer)
1318                 return;
1319
1320         ring_buffer_record_disable(buffer);
1321
1322         /* Make sure all commits have finished */
1323         synchronize_sched();
1324
1325         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1326
1327         for_each_online_cpu(cpu)
1328                 ring_buffer_reset_cpu(buffer, cpu);
1329
1330         ring_buffer_record_enable(buffer);
1331 }
1332
1333 /* Must have trace_types_lock held */
1334 void tracing_reset_all_online_cpus(void)
1335 {
1336         struct trace_array *tr;
1337
1338         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1339                 tracing_reset_online_cpus(&tr->trace_buffer);
1340 #ifdef CONFIG_TRACER_MAX_TRACE
1341                 tracing_reset_online_cpus(&tr->max_buffer);
1342 #endif
1343         }
1344 }
1345
1346 #define SAVED_CMDLINES_DEFAULT 128
1347 #define NO_CMDLINE_MAP UINT_MAX
1348 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1349 struct saved_cmdlines_buffer {
1350         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1351         unsigned *map_cmdline_to_pid;
1352         unsigned cmdline_num;
1353         int cmdline_idx;
1354         char *saved_cmdlines;
1355 };
1356 static struct saved_cmdlines_buffer *savedcmd;
1357
1358 /* temporary disable recording */
1359 static atomic_t trace_record_cmdline_disabled __read_mostly;
1360
1361 static inline char *get_saved_cmdlines(int idx)
1362 {
1363         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1364 }
1365
1366 static inline void set_cmdline(int idx, const char *cmdline)
1367 {
1368         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1369 }
1370
1371 static int allocate_cmdlines_buffer(unsigned int val,
1372                                     struct saved_cmdlines_buffer *s)
1373 {
1374         s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1375                                         GFP_KERNEL);
1376         if (!s->map_cmdline_to_pid)
1377                 return -ENOMEM;
1378
1379         s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1380         if (!s->saved_cmdlines) {
1381                 kfree(s->map_cmdline_to_pid);
1382                 return -ENOMEM;
1383         }
1384
1385         s->cmdline_idx = 0;
1386         s->cmdline_num = val;
1387         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1388                sizeof(s->map_pid_to_cmdline));
1389         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1390                val * sizeof(*s->map_cmdline_to_pid));
1391
1392         return 0;
1393 }
1394
1395 static int trace_create_savedcmd(void)
1396 {
1397         int ret;
1398
1399         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1400         if (!savedcmd)
1401                 return -ENOMEM;
1402
1403         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1404         if (ret < 0) {
1405                 kfree(savedcmd);
1406                 savedcmd = NULL;
1407                 return -ENOMEM;
1408         }
1409
1410         return 0;
1411 }
1412
1413 int is_tracing_stopped(void)
1414 {
1415         return global_trace.stop_count;
1416 }
1417
1418 /**
1419  * tracing_start - quick start of the tracer
1420  *
1421  * If tracing is enabled but was stopped by tracing_stop,
1422  * this will start the tracer back up.
1423  */
1424 void tracing_start(void)
1425 {
1426         struct ring_buffer *buffer;
1427         unsigned long flags;
1428
1429         if (tracing_disabled)
1430                 return;
1431
1432         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1433         if (--global_trace.stop_count) {
1434                 if (global_trace.stop_count < 0) {
1435                         /* Someone screwed up their debugging */
1436                         WARN_ON_ONCE(1);
1437                         global_trace.stop_count = 0;
1438                 }
1439                 goto out;
1440         }
1441
1442         /* Prevent the buffers from switching */
1443         arch_spin_lock(&global_trace.max_lock);
1444
1445         buffer = global_trace.trace_buffer.buffer;
1446         if (buffer)
1447                 ring_buffer_record_enable(buffer);
1448
1449 #ifdef CONFIG_TRACER_MAX_TRACE
1450         buffer = global_trace.max_buffer.buffer;
1451         if (buffer)
1452                 ring_buffer_record_enable(buffer);
1453 #endif
1454
1455         arch_spin_unlock(&global_trace.max_lock);
1456
1457  out:
1458         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1459 }
1460
1461 static void tracing_start_tr(struct trace_array *tr)
1462 {
1463         struct ring_buffer *buffer;
1464         unsigned long flags;
1465
1466         if (tracing_disabled)
1467                 return;
1468
1469         /* If global, we need to also start the max tracer */
1470         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1471                 return tracing_start();
1472
1473         raw_spin_lock_irqsave(&tr->start_lock, flags);
1474
1475         if (--tr->stop_count) {
1476                 if (tr->stop_count < 0) {
1477                         /* Someone screwed up their debugging */
1478                         WARN_ON_ONCE(1);
1479                         tr->stop_count = 0;
1480                 }
1481                 goto out;
1482         }
1483
1484         buffer = tr->trace_buffer.buffer;
1485         if (buffer)
1486                 ring_buffer_record_enable(buffer);
1487
1488  out:
1489         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1490 }
1491
1492 /**
1493  * tracing_stop - quick stop of the tracer
1494  *
1495  * Light weight way to stop tracing. Use in conjunction with
1496  * tracing_start.
1497  */
1498 void tracing_stop(void)
1499 {
1500         struct ring_buffer *buffer;
1501         unsigned long flags;
1502
1503         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1504         if (global_trace.stop_count++)
1505                 goto out;
1506
1507         /* Prevent the buffers from switching */
1508         arch_spin_lock(&global_trace.max_lock);
1509
1510         buffer = global_trace.trace_buffer.buffer;
1511         if (buffer)
1512                 ring_buffer_record_disable(buffer);
1513
1514 #ifdef CONFIG_TRACER_MAX_TRACE
1515         buffer = global_trace.max_buffer.buffer;
1516         if (buffer)
1517                 ring_buffer_record_disable(buffer);
1518 #endif
1519
1520         arch_spin_unlock(&global_trace.max_lock);
1521
1522  out:
1523         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1524 }
1525
1526 static void tracing_stop_tr(struct trace_array *tr)
1527 {
1528         struct ring_buffer *buffer;
1529         unsigned long flags;
1530
1531         /* If global, we need to also stop the max tracer */
1532         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1533                 return tracing_stop();
1534
1535         raw_spin_lock_irqsave(&tr->start_lock, flags);
1536         if (tr->stop_count++)
1537                 goto out;
1538
1539         buffer = tr->trace_buffer.buffer;
1540         if (buffer)
1541                 ring_buffer_record_disable(buffer);
1542
1543  out:
1544         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1545 }
1546
1547 void trace_stop_cmdline_recording(void);
1548
1549 static int trace_save_cmdline(struct task_struct *tsk)
1550 {
1551         unsigned pid, idx;
1552
1553         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1554                 return 0;
1555
1556         /*
1557          * It's not the end of the world if we don't get
1558          * the lock, but we also don't want to spin
1559          * nor do we want to disable interrupts,
1560          * so if we miss here, then better luck next time.
1561          */
1562         if (!arch_spin_trylock(&trace_cmdline_lock))
1563                 return 0;
1564
1565         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1566         if (idx == NO_CMDLINE_MAP) {
1567                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1568
1569                 /*
1570                  * Check whether the cmdline buffer at idx has a pid
1571                  * mapped. We are going to overwrite that entry so we
1572                  * need to clear the map_pid_to_cmdline. Otherwise we
1573                  * would read the new comm for the old pid.
1574                  */
1575                 pid = savedcmd->map_cmdline_to_pid[idx];
1576                 if (pid != NO_CMDLINE_MAP)
1577                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1578
1579                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1580                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1581
1582                 savedcmd->cmdline_idx = idx;
1583         }
1584
1585         set_cmdline(idx, tsk->comm);
1586
1587         arch_spin_unlock(&trace_cmdline_lock);
1588
1589         return 1;
1590 }
1591
1592 static void __trace_find_cmdline(int pid, char comm[])
1593 {
1594         unsigned map;
1595
1596         if (!pid) {
1597                 strcpy(comm, "<idle>");
1598                 return;
1599         }
1600
1601         if (WARN_ON_ONCE(pid < 0)) {
1602                 strcpy(comm, "<XXX>");
1603                 return;
1604         }
1605
1606         if (pid > PID_MAX_DEFAULT) {
1607                 strcpy(comm, "<...>");
1608                 return;
1609         }
1610
1611         map = savedcmd->map_pid_to_cmdline[pid];
1612         if (map != NO_CMDLINE_MAP)
1613                 strcpy(comm, get_saved_cmdlines(map));
1614         else
1615                 strcpy(comm, "<...>");
1616 }
1617
1618 void trace_find_cmdline(int pid, char comm[])
1619 {
1620         preempt_disable();
1621         arch_spin_lock(&trace_cmdline_lock);
1622
1623         __trace_find_cmdline(pid, comm);
1624
1625         arch_spin_unlock(&trace_cmdline_lock);
1626         preempt_enable();
1627 }
1628
1629 void tracing_record_cmdline(struct task_struct *tsk)
1630 {
1631         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1632                 return;
1633
1634         if (!__this_cpu_read(trace_cmdline_save))
1635                 return;
1636
1637         if (trace_save_cmdline(tsk))
1638                 __this_cpu_write(trace_cmdline_save, false);
1639 }
1640
1641 void
1642 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1643                              int pc)
1644 {
1645         struct task_struct *tsk = current;
1646
1647         entry->preempt_count            = pc & 0xff;
1648         entry->pid                      = (tsk) ? tsk->pid : 0;
1649         entry->flags =
1650 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1651                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1652 #else
1653                 TRACE_FLAG_IRQS_NOSUPPORT |
1654 #endif
1655                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
1656                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1657                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1658                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1659                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1660 }
1661 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1662
1663 struct ring_buffer_event *
1664 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1665                           int type,
1666                           unsigned long len,
1667                           unsigned long flags, int pc)
1668 {
1669         struct ring_buffer_event *event;
1670
1671         event = ring_buffer_lock_reserve(buffer, len);
1672         if (event != NULL) {
1673                 struct trace_entry *ent = ring_buffer_event_data(event);
1674
1675                 tracing_generic_entry_update(ent, flags, pc);
1676                 ent->type = type;
1677         }
1678
1679         return event;
1680 }
1681
1682 void
1683 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1684 {
1685         __this_cpu_write(trace_cmdline_save, true);
1686         ring_buffer_unlock_commit(buffer, event);
1687 }
1688
1689 static struct ring_buffer *temp_buffer;
1690
1691 struct ring_buffer_event *
1692 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
1693                           struct trace_event_file *trace_file,
1694                           int type, unsigned long len,
1695                           unsigned long flags, int pc)
1696 {
1697         struct ring_buffer_event *entry;
1698
1699         *current_rb = trace_file->tr->trace_buffer.buffer;
1700         entry = trace_buffer_lock_reserve(*current_rb,
1701                                          type, len, flags, pc);
1702         /*
1703          * If tracing is off, but we have triggers enabled
1704          * we still need to look at the event data. Use the temp_buffer
1705          * to store the trace event for the tigger to use. It's recusive
1706          * safe and will not be recorded anywhere.
1707          */
1708         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
1709                 *current_rb = temp_buffer;
1710                 entry = trace_buffer_lock_reserve(*current_rb,
1711                                                   type, len, flags, pc);
1712         }
1713         return entry;
1714 }
1715 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
1716
1717 struct ring_buffer_event *
1718 trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
1719                                   int type, unsigned long len,
1720                                   unsigned long flags, int pc)
1721 {
1722         *current_rb = global_trace.trace_buffer.buffer;
1723         return trace_buffer_lock_reserve(*current_rb,
1724                                          type, len, flags, pc);
1725 }
1726
1727 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
1728                                      struct ring_buffer *buffer,
1729                                      struct ring_buffer_event *event,
1730                                      unsigned long flags, int pc,
1731                                      struct pt_regs *regs)
1732 {
1733         __buffer_unlock_commit(buffer, event);
1734
1735         ftrace_trace_stack(tr, buffer, flags, 0, pc, regs);
1736         ftrace_trace_userstack(buffer, flags, pc);
1737 }
1738
1739 void
1740 trace_function(struct trace_array *tr,
1741                unsigned long ip, unsigned long parent_ip, unsigned long flags,
1742                int pc)
1743 {
1744         struct trace_event_call *call = &event_function;
1745         struct ring_buffer *buffer = tr->trace_buffer.buffer;
1746         struct ring_buffer_event *event;
1747         struct ftrace_entry *entry;
1748
1749         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1750                                           flags, pc);
1751         if (!event)
1752                 return;
1753         entry   = ring_buffer_event_data(event);
1754         entry->ip                       = ip;
1755         entry->parent_ip                = parent_ip;
1756
1757         if (!call_filter_check_discard(call, entry, buffer, event))
1758                 __buffer_unlock_commit(buffer, event);
1759 }
1760
1761 #ifdef CONFIG_STACKTRACE
1762
1763 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
1764 struct ftrace_stack {
1765         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
1766 };
1767
1768 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
1769 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
1770
1771 static void __ftrace_trace_stack(struct ring_buffer *buffer,
1772                                  unsigned long flags,
1773                                  int skip, int pc, struct pt_regs *regs)
1774 {
1775         struct trace_event_call *call = &event_kernel_stack;
1776         struct ring_buffer_event *event;
1777         struct stack_entry *entry;
1778         struct stack_trace trace;
1779         int use_stack;
1780         int size = FTRACE_STACK_ENTRIES;
1781
1782         trace.nr_entries        = 0;
1783         trace.skip              = skip;
1784
1785         /*
1786          * Since events can happen in NMIs there's no safe way to
1787          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
1788          * or NMI comes in, it will just have to use the default
1789          * FTRACE_STACK_SIZE.
1790          */
1791         preempt_disable_notrace();
1792
1793         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1794         /*
1795          * We don't need any atomic variables, just a barrier.
1796          * If an interrupt comes in, we don't care, because it would
1797          * have exited and put the counter back to what we want.
1798          * We just need a barrier to keep gcc from moving things
1799          * around.
1800          */
1801         barrier();
1802         if (use_stack == 1) {
1803                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
1804                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
1805
1806                 if (regs)
1807                         save_stack_trace_regs(regs, &trace);
1808                 else
1809                         save_stack_trace(&trace);
1810
1811                 if (trace.nr_entries > size)
1812                         size = trace.nr_entries;
1813         } else
1814                 /* From now on, use_stack is a boolean */
1815                 use_stack = 0;
1816
1817         size *= sizeof(unsigned long);
1818
1819         event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1820                                           sizeof(*entry) + size, flags, pc);
1821         if (!event)
1822                 goto out;
1823         entry = ring_buffer_event_data(event);
1824
1825         memset(&entry->caller, 0, size);
1826
1827         if (use_stack)
1828                 memcpy(&entry->caller, trace.entries,
1829                        trace.nr_entries * sizeof(unsigned long));
1830         else {
1831                 trace.max_entries       = FTRACE_STACK_ENTRIES;
1832                 trace.entries           = entry->caller;
1833                 if (regs)
1834                         save_stack_trace_regs(regs, &trace);
1835                 else
1836                         save_stack_trace(&trace);
1837         }
1838
1839         entry->size = trace.nr_entries;
1840
1841         if (!call_filter_check_discard(call, entry, buffer, event))
1842                 __buffer_unlock_commit(buffer, event);
1843
1844  out:
1845         /* Again, don't let gcc optimize things here */
1846         barrier();
1847         __this_cpu_dec(ftrace_stack_reserve);
1848         preempt_enable_notrace();
1849
1850 }
1851
1852 static inline void ftrace_trace_stack(struct trace_array *tr,
1853                                       struct ring_buffer *buffer,
1854                                       unsigned long flags,
1855                                       int skip, int pc, struct pt_regs *regs)
1856 {
1857         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
1858                 return;
1859
1860         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
1861 }
1862
1863 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1864                    int pc)
1865 {
1866         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
1867 }
1868
1869 /**
1870  * trace_dump_stack - record a stack back trace in the trace buffer
1871  * @skip: Number of functions to skip (helper handlers)
1872  */
1873 void trace_dump_stack(int skip)
1874 {
1875         unsigned long flags;
1876
1877         if (tracing_disabled || tracing_selftest_running)
1878                 return;
1879
1880         local_save_flags(flags);
1881
1882         /*
1883          * Skip 3 more, seems to get us at the caller of
1884          * this function.
1885          */
1886         skip += 3;
1887         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
1888                              flags, skip, preempt_count(), NULL);
1889 }
1890
1891 static DEFINE_PER_CPU(int, user_stack_count);
1892
1893 void
1894 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1895 {
1896         struct trace_event_call *call = &event_user_stack;
1897         struct ring_buffer_event *event;
1898         struct userstack_entry *entry;
1899         struct stack_trace trace;
1900
1901         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
1902                 return;
1903
1904         /*
1905          * NMIs can not handle page faults, even with fix ups.
1906          * The save user stack can (and often does) fault.
1907          */
1908         if (unlikely(in_nmi()))
1909                 return;
1910
1911         /*
1912          * prevent recursion, since the user stack tracing may
1913          * trigger other kernel events.
1914          */
1915         preempt_disable();
1916         if (__this_cpu_read(user_stack_count))
1917                 goto out;
1918
1919         __this_cpu_inc(user_stack_count);
1920
1921         event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1922                                           sizeof(*entry), flags, pc);
1923         if (!event)
1924                 goto out_drop_count;
1925         entry   = ring_buffer_event_data(event);
1926
1927         entry->tgid             = current->tgid;
1928         memset(&entry->caller, 0, sizeof(entry->caller));
1929
1930         trace.nr_entries        = 0;
1931         trace.max_entries       = FTRACE_STACK_ENTRIES;
1932         trace.skip              = 0;
1933         trace.entries           = entry->caller;
1934
1935         save_stack_trace_user(&trace);
1936         if (!call_filter_check_discard(call, entry, buffer, event))
1937                 __buffer_unlock_commit(buffer, event);
1938
1939  out_drop_count:
1940         __this_cpu_dec(user_stack_count);
1941  out:
1942         preempt_enable();
1943 }
1944
1945 #ifdef UNUSED
1946 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1947 {
1948         ftrace_trace_userstack(tr, flags, preempt_count());
1949 }
1950 #endif /* UNUSED */
1951
1952 #endif /* CONFIG_STACKTRACE */
1953
1954 /* created for use with alloc_percpu */
1955 struct trace_buffer_struct {
1956         char buffer[TRACE_BUF_SIZE];
1957 };
1958
1959 static struct trace_buffer_struct *trace_percpu_buffer;
1960 static struct trace_buffer_struct *trace_percpu_sirq_buffer;
1961 static struct trace_buffer_struct *trace_percpu_irq_buffer;
1962 static struct trace_buffer_struct *trace_percpu_nmi_buffer;
1963
1964 /*
1965  * The buffer used is dependent on the context. There is a per cpu
1966  * buffer for normal context, softirq contex, hard irq context and
1967  * for NMI context. Thise allows for lockless recording.
1968  *
1969  * Note, if the buffers failed to be allocated, then this returns NULL
1970  */
1971 static char *get_trace_buf(void)
1972 {
1973         struct trace_buffer_struct *percpu_buffer;
1974
1975         /*
1976          * If we have allocated per cpu buffers, then we do not
1977          * need to do any locking.
1978          */
1979         if (in_nmi())
1980                 percpu_buffer = trace_percpu_nmi_buffer;
1981         else if (in_irq())
1982                 percpu_buffer = trace_percpu_irq_buffer;
1983         else if (in_softirq())
1984                 percpu_buffer = trace_percpu_sirq_buffer;
1985         else
1986                 percpu_buffer = trace_percpu_buffer;
1987
1988         if (!percpu_buffer)
1989                 return NULL;
1990
1991         return this_cpu_ptr(&percpu_buffer->buffer[0]);
1992 }
1993
1994 static int alloc_percpu_trace_buffer(void)
1995 {
1996         struct trace_buffer_struct *buffers;
1997         struct trace_buffer_struct *sirq_buffers;
1998         struct trace_buffer_struct *irq_buffers;
1999         struct trace_buffer_struct *nmi_buffers;
2000
2001         buffers = alloc_percpu(struct trace_buffer_struct);
2002         if (!buffers)
2003                 goto err_warn;
2004
2005         sirq_buffers = alloc_percpu(struct trace_buffer_struct);
2006         if (!sirq_buffers)
2007                 goto err_sirq;
2008
2009         irq_buffers = alloc_percpu(struct trace_buffer_struct);
2010         if (!irq_buffers)
2011                 goto err_irq;
2012
2013         nmi_buffers = alloc_percpu(struct trace_buffer_struct);
2014         if (!nmi_buffers)
2015                 goto err_nmi;
2016
2017         trace_percpu_buffer = buffers;
2018         trace_percpu_sirq_buffer = sirq_buffers;
2019         trace_percpu_irq_buffer = irq_buffers;
2020         trace_percpu_nmi_buffer = nmi_buffers;
2021
2022         return 0;
2023
2024  err_nmi:
2025         free_percpu(irq_buffers);
2026  err_irq:
2027         free_percpu(sirq_buffers);
2028  err_sirq:
2029         free_percpu(buffers);
2030  err_warn:
2031         WARN(1, "Could not allocate percpu trace_printk buffer");
2032         return -ENOMEM;
2033 }
2034
2035 static int buffers_allocated;
2036
2037 void trace_printk_init_buffers(void)
2038 {
2039         if (buffers_allocated)
2040                 return;
2041
2042         if (alloc_percpu_trace_buffer())
2043                 return;
2044
2045         /* trace_printk() is for debug use only. Don't use it in production. */
2046
2047         pr_warn("\n");
2048         pr_warn("**********************************************************\n");
2049         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2050         pr_warn("**                                                      **\n");
2051         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2052         pr_warn("**                                                      **\n");
2053         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2054         pr_warn("** unsafe for production use.                           **\n");
2055         pr_warn("**                                                      **\n");
2056         pr_warn("** If you see this message and you are not debugging    **\n");
2057         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2058         pr_warn("**                                                      **\n");
2059         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2060         pr_warn("**********************************************************\n");
2061
2062         /* Expand the buffers to set size */
2063         tracing_update_buffers();
2064
2065         buffers_allocated = 1;
2066
2067         /*
2068          * trace_printk_init_buffers() can be called by modules.
2069          * If that happens, then we need to start cmdline recording
2070          * directly here. If the global_trace.buffer is already
2071          * allocated here, then this was called by module code.
2072          */
2073         if (global_trace.trace_buffer.buffer)
2074                 tracing_start_cmdline_record();
2075 }
2076
2077 void trace_printk_start_comm(void)
2078 {
2079         /* Start tracing comms if trace printk is set */
2080         if (!buffers_allocated)
2081                 return;
2082         tracing_start_cmdline_record();
2083 }
2084
2085 static void trace_printk_start_stop_comm(int enabled)
2086 {
2087         if (!buffers_allocated)
2088                 return;
2089
2090         if (enabled)
2091                 tracing_start_cmdline_record();
2092         else
2093                 tracing_stop_cmdline_record();
2094 }
2095
2096 /**
2097  * trace_vbprintk - write binary msg to tracing buffer
2098  *
2099  */
2100 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2101 {
2102         struct trace_event_call *call = &event_bprint;
2103         struct ring_buffer_event *event;
2104         struct ring_buffer *buffer;
2105         struct trace_array *tr = &global_trace;
2106         struct bprint_entry *entry;
2107         unsigned long flags;
2108         char *tbuffer;
2109         int len = 0, size, pc;
2110
2111         if (unlikely(tracing_selftest_running || tracing_disabled))
2112                 return 0;
2113
2114         /* Don't pollute graph traces with trace_vprintk internals */
2115         pause_graph_tracing();
2116
2117         pc = preempt_count();
2118         preempt_disable_notrace();
2119
2120         tbuffer = get_trace_buf();
2121         if (!tbuffer) {
2122                 len = 0;
2123                 goto out;
2124         }
2125
2126         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2127
2128         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2129                 goto out;
2130
2131         local_save_flags(flags);
2132         size = sizeof(*entry) + sizeof(u32) * len;
2133         buffer = tr->trace_buffer.buffer;
2134         event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2135                                           flags, pc);
2136         if (!event)
2137                 goto out;
2138         entry = ring_buffer_event_data(event);
2139         entry->ip                       = ip;
2140         entry->fmt                      = fmt;
2141
2142         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2143         if (!call_filter_check_discard(call, entry, buffer, event)) {
2144                 __buffer_unlock_commit(buffer, event);
2145                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2146         }
2147
2148 out:
2149         preempt_enable_notrace();
2150         unpause_graph_tracing();
2151
2152         return len;
2153 }
2154 EXPORT_SYMBOL_GPL(trace_vbprintk);
2155
2156 static int
2157 __trace_array_vprintk(struct ring_buffer *buffer,
2158                       unsigned long ip, const char *fmt, va_list args)
2159 {
2160         struct trace_event_call *call = &event_print;
2161         struct ring_buffer_event *event;
2162         int len = 0, size, pc;
2163         struct print_entry *entry;
2164         unsigned long flags;
2165         char *tbuffer;
2166
2167         if (tracing_disabled || tracing_selftest_running)
2168                 return 0;
2169
2170         /* Don't pollute graph traces with trace_vprintk internals */
2171         pause_graph_tracing();
2172
2173         pc = preempt_count();
2174         preempt_disable_notrace();
2175
2176
2177         tbuffer = get_trace_buf();
2178         if (!tbuffer) {
2179                 len = 0;
2180                 goto out;
2181         }
2182
2183         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2184
2185         local_save_flags(flags);
2186         size = sizeof(*entry) + len + 1;
2187         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2188                                           flags, pc);
2189         if (!event)
2190                 goto out;
2191         entry = ring_buffer_event_data(event);
2192         entry->ip = ip;
2193
2194         memcpy(&entry->buf, tbuffer, len + 1);
2195         if (!call_filter_check_discard(call, entry, buffer, event)) {
2196                 __buffer_unlock_commit(buffer, event);
2197                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2198         }
2199  out:
2200         preempt_enable_notrace();
2201         unpause_graph_tracing();
2202
2203         return len;
2204 }
2205
2206 int trace_array_vprintk(struct trace_array *tr,
2207                         unsigned long ip, const char *fmt, va_list args)
2208 {
2209         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2210 }
2211
2212 int trace_array_printk(struct trace_array *tr,
2213                        unsigned long ip, const char *fmt, ...)
2214 {
2215         int ret;
2216         va_list ap;
2217
2218         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2219                 return 0;
2220
2221         va_start(ap, fmt);
2222         ret = trace_array_vprintk(tr, ip, fmt, ap);
2223         va_end(ap);
2224         return ret;
2225 }
2226
2227 int trace_array_printk_buf(struct ring_buffer *buffer,
2228                            unsigned long ip, const char *fmt, ...)
2229 {
2230         int ret;
2231         va_list ap;
2232
2233         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2234                 return 0;
2235
2236         va_start(ap, fmt);
2237         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2238         va_end(ap);
2239         return ret;
2240 }
2241
2242 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2243 {
2244         return trace_array_vprintk(&global_trace, ip, fmt, args);
2245 }
2246 EXPORT_SYMBOL_GPL(trace_vprintk);
2247
2248 static void trace_iterator_increment(struct trace_iterator *iter)
2249 {
2250         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2251
2252         iter->idx++;
2253         if (buf_iter)
2254                 ring_buffer_read(buf_iter, NULL);
2255 }
2256
2257 static struct trace_entry *
2258 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2259                 unsigned long *lost_events)
2260 {
2261         struct ring_buffer_event *event;
2262         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2263
2264         if (buf_iter)
2265                 event = ring_buffer_iter_peek(buf_iter, ts);
2266         else
2267                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2268                                          lost_events);
2269
2270         if (event) {
2271                 iter->ent_size = ring_buffer_event_length(event);
2272                 return ring_buffer_event_data(event);
2273         }
2274         iter->ent_size = 0;
2275         return NULL;
2276 }
2277
2278 static struct trace_entry *
2279 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2280                   unsigned long *missing_events, u64 *ent_ts)
2281 {
2282         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2283         struct trace_entry *ent, *next = NULL;
2284         unsigned long lost_events = 0, next_lost = 0;
2285         int cpu_file = iter->cpu_file;
2286         u64 next_ts = 0, ts;
2287         int next_cpu = -1;
2288         int next_size = 0;
2289         int cpu;
2290
2291         /*
2292          * If we are in a per_cpu trace file, don't bother by iterating over
2293          * all cpu and peek directly.
2294          */
2295         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2296                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2297                         return NULL;
2298                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2299                 if (ent_cpu)
2300                         *ent_cpu = cpu_file;
2301
2302                 return ent;
2303         }
2304
2305         for_each_tracing_cpu(cpu) {
2306
2307                 if (ring_buffer_empty_cpu(buffer, cpu))
2308                         continue;
2309
2310                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2311
2312                 /*
2313                  * Pick the entry with the smallest timestamp:
2314                  */
2315                 if (ent && (!next || ts < next_ts)) {
2316                         next = ent;
2317                         next_cpu = cpu;
2318                         next_ts = ts;
2319                         next_lost = lost_events;
2320                         next_size = iter->ent_size;
2321                 }
2322         }
2323
2324         iter->ent_size = next_size;
2325
2326         if (ent_cpu)
2327                 *ent_cpu = next_cpu;
2328
2329         if (ent_ts)
2330                 *ent_ts = next_ts;
2331
2332         if (missing_events)
2333                 *missing_events = next_lost;
2334
2335         return next;
2336 }
2337
2338 /* Find the next real entry, without updating the iterator itself */
2339 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2340                                           int *ent_cpu, u64 *ent_ts)
2341 {
2342         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2343 }
2344
2345 /* Find the next real entry, and increment the iterator to the next entry */
2346 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2347 {
2348         iter->ent = __find_next_entry(iter, &iter->cpu,
2349                                       &iter->lost_events, &iter->ts);
2350
2351         if (iter->ent)
2352                 trace_iterator_increment(iter);
2353
2354         return iter->ent ? iter : NULL;
2355 }
2356
2357 static void trace_consume(struct trace_iterator *iter)
2358 {
2359         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2360                             &iter->lost_events);
2361 }
2362
2363 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2364 {
2365         struct trace_iterator *iter = m->private;
2366         int i = (int)*pos;
2367         void *ent;
2368
2369         WARN_ON_ONCE(iter->leftover);
2370
2371         (*pos)++;
2372
2373         /* can't go backwards */
2374         if (iter->idx > i)
2375                 return NULL;
2376
2377         if (iter->idx < 0)
2378                 ent = trace_find_next_entry_inc(iter);
2379         else
2380                 ent = iter;
2381
2382         while (ent && iter->idx < i)
2383                 ent = trace_find_next_entry_inc(iter);
2384
2385         iter->pos = *pos;
2386
2387         return ent;
2388 }
2389
2390 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2391 {
2392         struct ring_buffer_event *event;
2393         struct ring_buffer_iter *buf_iter;
2394         unsigned long entries = 0;
2395         u64 ts;
2396
2397         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2398
2399         buf_iter = trace_buffer_iter(iter, cpu);
2400         if (!buf_iter)
2401                 return;
2402
2403         ring_buffer_iter_reset(buf_iter);
2404
2405         /*
2406          * We could have the case with the max latency tracers
2407          * that a reset never took place on a cpu. This is evident
2408          * by the timestamp being before the start of the buffer.
2409          */
2410         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2411                 if (ts >= iter->trace_buffer->time_start)
2412                         break;
2413                 entries++;
2414                 ring_buffer_read(buf_iter, NULL);
2415         }
2416
2417         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2418 }
2419
2420 /*
2421  * The current tracer is copied to avoid a global locking
2422  * all around.
2423  */
2424 static void *s_start(struct seq_file *m, loff_t *pos)
2425 {
2426         struct trace_iterator *iter = m->private;
2427         struct trace_array *tr = iter->tr;
2428         int cpu_file = iter->cpu_file;
2429         void *p = NULL;
2430         loff_t l = 0;
2431         int cpu;
2432
2433         /*
2434          * copy the tracer to avoid using a global lock all around.
2435          * iter->trace is a copy of current_trace, the pointer to the
2436          * name may be used instead of a strcmp(), as iter->trace->name
2437          * will point to the same string as current_trace->name.
2438          */
2439         mutex_lock(&trace_types_lock);
2440         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2441                 *iter->trace = *tr->current_trace;
2442         mutex_unlock(&trace_types_lock);
2443
2444 #ifdef CONFIG_TRACER_MAX_TRACE
2445         if (iter->snapshot && iter->trace->use_max_tr)
2446                 return ERR_PTR(-EBUSY);
2447 #endif
2448
2449         if (!iter->snapshot)
2450                 atomic_inc(&trace_record_cmdline_disabled);
2451
2452         if (*pos != iter->pos) {
2453                 iter->ent = NULL;
2454                 iter->cpu = 0;
2455                 iter->idx = -1;
2456
2457                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2458                         for_each_tracing_cpu(cpu)
2459                                 tracing_iter_reset(iter, cpu);
2460                 } else
2461                         tracing_iter_reset(iter, cpu_file);
2462
2463                 iter->leftover = 0;
2464                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2465                         ;
2466
2467         } else {
2468                 /*
2469                  * If we overflowed the seq_file before, then we want
2470                  * to just reuse the trace_seq buffer again.
2471                  */
2472                 if (iter->leftover)
2473                         p = iter;
2474                 else {
2475                         l = *pos - 1;
2476                         p = s_next(m, p, &l);
2477                 }
2478         }
2479
2480         trace_event_read_lock();
2481         trace_access_lock(cpu_file);
2482         return p;
2483 }
2484
2485 static void s_stop(struct seq_file *m, void *p)
2486 {
2487         struct trace_iterator *iter = m->private;
2488
2489 #ifdef CONFIG_TRACER_MAX_TRACE
2490         if (iter->snapshot && iter->trace->use_max_tr)
2491                 return;
2492 #endif
2493
2494         if (!iter->snapshot)
2495                 atomic_dec(&trace_record_cmdline_disabled);
2496
2497         trace_access_unlock(iter->cpu_file);
2498         trace_event_read_unlock();
2499 }
2500
2501 static void
2502 get_total_entries(struct trace_buffer *buf,
2503                   unsigned long *total, unsigned long *entries)
2504 {
2505         unsigned long count;
2506         int cpu;
2507
2508         *total = 0;
2509         *entries = 0;
2510
2511         for_each_tracing_cpu(cpu) {
2512                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2513                 /*
2514                  * If this buffer has skipped entries, then we hold all
2515                  * entries for the trace and we need to ignore the
2516                  * ones before the time stamp.
2517                  */
2518                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2519                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2520                         /* total is the same as the entries */
2521                         *total += count;
2522                 } else
2523                         *total += count +
2524                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
2525                 *entries += count;
2526         }
2527 }
2528
2529 static void print_lat_help_header(struct seq_file *m)
2530 {
2531         seq_puts(m, "#                  _------=> CPU#            \n"
2532                     "#                 / _-----=> irqs-off        \n"
2533                     "#                | / _----=> need-resched    \n"
2534                     "#                || / _---=> hardirq/softirq \n"
2535                     "#                ||| / _--=> preempt-depth   \n"
2536                     "#                |||| /     delay            \n"
2537                     "#  cmd     pid   ||||| time  |   caller      \n"
2538                     "#     \\   /      |||||  \\    |   /         \n");
2539 }
2540
2541 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2542 {
2543         unsigned long total;
2544         unsigned long entries;
2545
2546         get_total_entries(buf, &total, &entries);
2547         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2548                    entries, total, num_online_cpus());
2549         seq_puts(m, "#\n");
2550 }
2551
2552 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2553 {
2554         print_event_info(buf, m);
2555         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n"
2556                     "#              | |       |          |         |\n");
2557 }
2558
2559 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2560 {
2561         print_event_info(buf, m);
2562         seq_puts(m, "#                              _-----=> irqs-off\n"
2563                     "#                             / _----=> need-resched\n"
2564                     "#                            | / _---=> hardirq/softirq\n"
2565                     "#                            || / _--=> preempt-depth\n"
2566                     "#                            ||| /     delay\n"
2567                     "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n"
2568                     "#              | |       |   ||||       |         |\n");
2569 }
2570
2571 void
2572 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2573 {
2574         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
2575         struct trace_buffer *buf = iter->trace_buffer;
2576         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2577         struct tracer *type = iter->trace;
2578         unsigned long entries;
2579         unsigned long total;
2580         const char *name = "preemption";
2581
2582         name = type->name;
2583
2584         get_total_entries(buf, &total, &entries);
2585
2586         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2587                    name, UTS_RELEASE);
2588         seq_puts(m, "# -----------------------------------"
2589                  "---------------------------------\n");
2590         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2591                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2592                    nsecs_to_usecs(data->saved_latency),
2593                    entries,
2594                    total,
2595                    buf->cpu,
2596 #if defined(CONFIG_PREEMPT_NONE)
2597                    "server",
2598 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2599                    "desktop",
2600 #elif defined(CONFIG_PREEMPT)
2601                    "preempt",
2602 #else
2603                    "unknown",
2604 #endif
2605                    /* These are reserved for later use */
2606                    0, 0, 0, 0);
2607 #ifdef CONFIG_SMP
2608         seq_printf(m, " #P:%d)\n", num_online_cpus());
2609 #else
2610         seq_puts(m, ")\n");
2611 #endif
2612         seq_puts(m, "#    -----------------\n");
2613         seq_printf(m, "#    | task: %.16s-%d "
2614                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2615                    data->comm, data->pid,
2616                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2617                    data->policy, data->rt_priority);
2618         seq_puts(m, "#    -----------------\n");
2619
2620         if (data->critical_start) {
2621                 seq_puts(m, "#  => started at: ");
2622                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2623                 trace_print_seq(m, &iter->seq);
2624                 seq_puts(m, "\n#  => ended at:   ");
2625                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2626                 trace_print_seq(m, &iter->seq);
2627                 seq_puts(m, "\n#\n");
2628         }
2629
2630         seq_puts(m, "#\n");
2631 }
2632
2633 static void test_cpu_buff_start(struct trace_iterator *iter)
2634 {
2635         struct trace_seq *s = &iter->seq;
2636         struct trace_array *tr = iter->tr;
2637
2638         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
2639                 return;
2640
2641         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
2642                 return;
2643
2644         if (iter->started && cpumask_test_cpu(iter->cpu, iter->started))
2645                 return;
2646
2647         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
2648                 return;
2649
2650         if (iter->started)
2651                 cpumask_set_cpu(iter->cpu, iter->started);
2652
2653         /* Don't print started cpu buffer for the first entry of the trace */
2654         if (iter->idx > 1)
2655                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
2656                                 iter->cpu);
2657 }
2658
2659 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
2660 {
2661         struct trace_array *tr = iter->tr;
2662         struct trace_seq *s = &iter->seq;
2663         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
2664         struct trace_entry *entry;
2665         struct trace_event *event;
2666
2667         entry = iter->ent;
2668
2669         test_cpu_buff_start(iter);
2670
2671         event = ftrace_find_event(entry->type);
2672
2673         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
2674                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2675                         trace_print_lat_context(iter);
2676                 else
2677                         trace_print_context(iter);
2678         }
2679
2680         if (trace_seq_has_overflowed(s))
2681                 return TRACE_TYPE_PARTIAL_LINE;
2682
2683         if (event)
2684                 return event->funcs->trace(iter, sym_flags, event);
2685
2686         trace_seq_printf(s, "Unknown type %d\n", entry->type);
2687
2688         return trace_handle_return(s);
2689 }
2690
2691 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2692 {
2693         struct trace_array *tr = iter->tr;
2694         struct trace_seq *s = &iter->seq;
2695         struct trace_entry *entry;
2696         struct trace_event *event;
2697
2698         entry = iter->ent;
2699
2700         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
2701                 trace_seq_printf(s, "%d %d %llu ",
2702                                  entry->pid, iter->cpu, iter->ts);
2703
2704         if (trace_seq_has_overflowed(s))
2705                 return TRACE_TYPE_PARTIAL_LINE;
2706
2707         event = ftrace_find_event(entry->type);
2708         if (event)
2709                 return event->funcs->raw(iter, 0, event);
2710
2711         trace_seq_printf(s, "%d ?\n", entry->type);
2712
2713         return trace_handle_return(s);
2714 }
2715
2716 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2717 {
2718         struct trace_array *tr = iter->tr;
2719         struct trace_seq *s = &iter->seq;
2720         unsigned char newline = '\n';
2721         struct trace_entry *entry;
2722         struct trace_event *event;
2723
2724         entry = iter->ent;
2725
2726         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
2727                 SEQ_PUT_HEX_FIELD(s, entry->pid);
2728                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
2729                 SEQ_PUT_HEX_FIELD(s, iter->ts);
2730                 if (trace_seq_has_overflowed(s))
2731                         return TRACE_TYPE_PARTIAL_LINE;
2732         }
2733
2734         event = ftrace_find_event(entry->type);
2735         if (event) {
2736                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
2737                 if (ret != TRACE_TYPE_HANDLED)
2738                         return ret;
2739         }
2740
2741         SEQ_PUT_FIELD(s, newline);
2742
2743         return trace_handle_return(s);
2744 }
2745
2746 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2747 {
2748         struct trace_array *tr = iter->tr;
2749         struct trace_seq *s = &iter->seq;
2750         struct trace_entry *entry;
2751         struct trace_event *event;
2752
2753         entry = iter->ent;
2754
2755         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
2756                 SEQ_PUT_FIELD(s, entry->pid);
2757                 SEQ_PUT_FIELD(s, iter->cpu);
2758                 SEQ_PUT_FIELD(s, iter->ts);
2759                 if (trace_seq_has_overflowed(s))
2760                         return TRACE_TYPE_PARTIAL_LINE;
2761         }
2762
2763         event = ftrace_find_event(entry->type);
2764         return event ? event->funcs->binary(iter, 0, event) :
2765                 TRACE_TYPE_HANDLED;
2766 }
2767
2768 int trace_empty(struct trace_iterator *iter)
2769 {
2770         struct ring_buffer_iter *buf_iter;
2771         int cpu;
2772
2773         /* If we are looking at one CPU buffer, only check that one */
2774         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
2775                 cpu = iter->cpu_file;
2776                 buf_iter = trace_buffer_iter(iter, cpu);
2777                 if (buf_iter) {
2778                         if (!ring_buffer_iter_empty(buf_iter))
2779                                 return 0;
2780                 } else {
2781                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2782                                 return 0;
2783                 }
2784                 return 1;
2785         }
2786
2787         for_each_tracing_cpu(cpu) {
2788                 buf_iter = trace_buffer_iter(iter, cpu);
2789                 if (buf_iter) {
2790                         if (!ring_buffer_iter_empty(buf_iter))
2791                                 return 0;
2792                 } else {
2793                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2794                                 return 0;
2795                 }
2796         }
2797
2798         return 1;
2799 }
2800
2801 /*  Called with trace_event_read_lock() held. */
2802 enum print_line_t print_trace_line(struct trace_iterator *iter)
2803 {
2804         struct trace_array *tr = iter->tr;
2805         unsigned long trace_flags = tr->trace_flags;
2806         enum print_line_t ret;
2807
2808         if (iter->lost_events) {
2809                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2810                                  iter->cpu, iter->lost_events);
2811                 if (trace_seq_has_overflowed(&iter->seq))
2812                         return TRACE_TYPE_PARTIAL_LINE;
2813         }
2814
2815         if (iter->trace && iter->trace->print_line) {
2816                 ret = iter->trace->print_line(iter);
2817                 if (ret != TRACE_TYPE_UNHANDLED)
2818                         return ret;
2819         }
2820
2821         if (iter->ent->type == TRACE_BPUTS &&
2822                         trace_flags & TRACE_ITER_PRINTK &&
2823                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2824                 return trace_print_bputs_msg_only(iter);
2825
2826         if (iter->ent->type == TRACE_BPRINT &&
2827                         trace_flags & TRACE_ITER_PRINTK &&
2828                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2829                 return trace_print_bprintk_msg_only(iter);
2830
2831         if (iter->ent->type == TRACE_PRINT &&
2832                         trace_flags & TRACE_ITER_PRINTK &&
2833                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2834                 return trace_print_printk_msg_only(iter);
2835
2836         if (trace_flags & TRACE_ITER_BIN)
2837                 return print_bin_fmt(iter);
2838
2839         if (trace_flags & TRACE_ITER_HEX)
2840                 return print_hex_fmt(iter);
2841
2842         if (trace_flags & TRACE_ITER_RAW)
2843                 return print_raw_fmt(iter);
2844
2845         return print_trace_fmt(iter);
2846 }
2847
2848 void trace_latency_header(struct seq_file *m)
2849 {
2850         struct trace_iterator *iter = m->private;
2851         struct trace_array *tr = iter->tr;
2852
2853         /* print nothing if the buffers are empty */
2854         if (trace_empty(iter))
2855                 return;
2856
2857         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2858                 print_trace_header(m, iter);
2859
2860         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
2861                 print_lat_help_header(m);
2862 }
2863
2864 void trace_default_header(struct seq_file *m)
2865 {
2866         struct trace_iterator *iter = m->private;
2867         struct trace_array *tr = iter->tr;
2868         unsigned long trace_flags = tr->trace_flags;
2869
2870         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
2871                 return;
2872
2873         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2874                 /* print nothing if the buffers are empty */
2875                 if (trace_empty(iter))
2876                         return;
2877                 print_trace_header(m, iter);
2878                 if (!(trace_flags & TRACE_ITER_VERBOSE))
2879                         print_lat_help_header(m);
2880         } else {
2881                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
2882                         if (trace_flags & TRACE_ITER_IRQ_INFO)
2883                                 print_func_help_header_irq(iter->trace_buffer, m);
2884                         else
2885                                 print_func_help_header(iter->trace_buffer, m);
2886                 }
2887         }
2888 }
2889
2890 static void test_ftrace_alive(struct seq_file *m)
2891 {
2892         if (!ftrace_is_dead())
2893                 return;
2894         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
2895                     "#          MAY BE MISSING FUNCTION EVENTS\n");
2896 }
2897
2898 #ifdef CONFIG_TRACER_MAX_TRACE
2899 static void show_snapshot_main_help(struct seq_file *m)
2900 {
2901         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
2902                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
2903                     "#                      Takes a snapshot of the main buffer.\n"
2904                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
2905                     "#                      (Doesn't have to be '2' works with any number that\n"
2906                     "#                       is not a '0' or '1')\n");
2907 }
2908
2909 static void show_snapshot_percpu_help(struct seq_file *m)
2910 {
2911         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
2912 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2913         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
2914                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
2915 #else
2916         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
2917                     "#                     Must use main snapshot file to allocate.\n");
2918 #endif
2919         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
2920                     "#                      (Doesn't have to be '2' works with any number that\n"
2921                     "#                       is not a '0' or '1')\n");
2922 }
2923
2924 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
2925 {
2926         if (iter->tr->allocated_snapshot)
2927                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
2928         else
2929                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
2930
2931         seq_puts(m, "# Snapshot commands:\n");
2932         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
2933                 show_snapshot_main_help(m);
2934         else
2935                 show_snapshot_percpu_help(m);
2936 }
2937 #else
2938 /* Should never be called */
2939 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
2940 #endif
2941
2942 static int s_show(struct seq_file *m, void *v)
2943 {
2944         struct trace_iterator *iter = v;
2945         int ret;
2946
2947         if (iter->ent == NULL) {
2948                 if (iter->tr) {
2949                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
2950                         seq_puts(m, "#\n");
2951                         test_ftrace_alive(m);
2952                 }
2953                 if (iter->snapshot && trace_empty(iter))
2954                         print_snapshot_help(m, iter);
2955                 else if (iter->trace && iter->trace->print_header)
2956                         iter->trace->print_header(m);
2957                 else
2958                         trace_default_header(m);
2959
2960         } else if (iter->leftover) {
2961                 /*
2962                  * If we filled the seq_file buffer earlier, we
2963                  * want to just show it now.
2964                  */
2965                 ret = trace_print_seq(m, &iter->seq);
2966
2967                 /* ret should this time be zero, but you never know */
2968                 iter->leftover = ret;
2969
2970         } else {
2971                 print_trace_line(iter);
2972                 ret = trace_print_seq(m, &iter->seq);
2973                 /*
2974                  * If we overflow the seq_file buffer, then it will
2975                  * ask us for this data again at start up.
2976                  * Use that instead.
2977                  *  ret is 0 if seq_file write succeeded.
2978                  *        -1 otherwise.
2979                  */
2980                 iter->leftover = ret;
2981         }
2982
2983         return 0;
2984 }
2985
2986 /*
2987  * Should be used after trace_array_get(), trace_types_lock
2988  * ensures that i_cdev was already initialized.
2989  */
2990 static inline int tracing_get_cpu(struct inode *inode)
2991 {
2992         if (inode->i_cdev) /* See trace_create_cpu_file() */
2993                 return (long)inode->i_cdev - 1;
2994         return RING_BUFFER_ALL_CPUS;
2995 }
2996
2997 static const struct seq_operations tracer_seq_ops = {
2998         .start          = s_start,
2999         .next           = s_next,
3000         .stop           = s_stop,
3001         .show           = s_show,
3002 };
3003
3004 static struct trace_iterator *
3005 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3006 {
3007         struct trace_array *tr = inode->i_private;
3008         struct trace_iterator *iter;
3009         int cpu;
3010
3011         if (tracing_disabled)
3012                 return ERR_PTR(-ENODEV);
3013
3014         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3015         if (!iter)
3016                 return ERR_PTR(-ENOMEM);
3017
3018         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3019                                     GFP_KERNEL);
3020         if (!iter->buffer_iter)
3021                 goto release;
3022
3023         /*
3024          * We make a copy of the current tracer to avoid concurrent
3025          * changes on it while we are reading.
3026          */
3027         mutex_lock(&trace_types_lock);
3028         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3029         if (!iter->trace)
3030                 goto fail;
3031
3032         *iter->trace = *tr->current_trace;
3033
3034         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3035                 goto fail;
3036
3037         iter->tr = tr;
3038
3039 #ifdef CONFIG_TRACER_MAX_TRACE
3040         /* Currently only the top directory has a snapshot */
3041         if (tr->current_trace->print_max || snapshot)
3042                 iter->trace_buffer = &tr->max_buffer;
3043         else
3044 #endif
3045                 iter->trace_buffer = &tr->trace_buffer;
3046         iter->snapshot = snapshot;
3047         iter->pos = -1;
3048         iter->cpu_file = tracing_get_cpu(inode);
3049         mutex_init(&iter->mutex);
3050
3051         /* Notify the tracer early; before we stop tracing. */
3052         if (iter->trace && iter->trace->open)
3053                 iter->trace->open(iter);
3054
3055         /* Annotate start of buffers if we had overruns */
3056         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3057                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3058
3059         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3060         if (trace_clocks[tr->clock_id].in_ns)
3061                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3062
3063         /* stop the trace while dumping if we are not opening "snapshot" */
3064         if (!iter->snapshot)
3065                 tracing_stop_tr(tr);
3066
3067         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3068                 for_each_tracing_cpu(cpu) {
3069                         iter->buffer_iter[cpu] =
3070                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3071                 }
3072                 ring_buffer_read_prepare_sync();
3073                 for_each_tracing_cpu(cpu) {
3074                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3075                         tracing_iter_reset(iter, cpu);
3076                 }
3077         } else {
3078                 cpu = iter->cpu_file;
3079                 iter->buffer_iter[cpu] =
3080                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3081                 ring_buffer_read_prepare_sync();
3082                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3083                 tracing_iter_reset(iter, cpu);
3084         }
3085
3086         mutex_unlock(&trace_types_lock);
3087
3088         return iter;
3089
3090  fail:
3091         mutex_unlock(&trace_types_lock);
3092         kfree(iter->trace);
3093         kfree(iter->buffer_iter);
3094 release:
3095         seq_release_private(inode, file);
3096         return ERR_PTR(-ENOMEM);
3097 }
3098
3099 int tracing_open_generic(struct inode *inode, struct file *filp)
3100 {
3101         if (tracing_disabled)
3102                 return -ENODEV;
3103
3104         filp->private_data = inode->i_private;
3105         return 0;
3106 }
3107
3108 bool tracing_is_disabled(void)
3109 {
3110         return (tracing_disabled) ? true: false;
3111 }
3112
3113 /*
3114  * Open and update trace_array ref count.
3115  * Must have the current trace_array passed to it.
3116  */
3117 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3118 {
3119         struct trace_array *tr = inode->i_private;
3120
3121         if (tracing_disabled)
3122                 return -ENODEV;
3123
3124         if (trace_array_get(tr) < 0)
3125                 return -ENODEV;
3126
3127         filp->private_data = inode->i_private;
3128
3129         return 0;
3130 }
3131
3132 static int tracing_release(struct inode *inode, struct file *file)
3133 {
3134         struct trace_array *tr = inode->i_private;
3135         struct seq_file *m = file->private_data;
3136         struct trace_iterator *iter;
3137         int cpu;
3138
3139         if (!(file->f_mode & FMODE_READ)) {
3140                 trace_array_put(tr);
3141                 return 0;
3142         }
3143
3144         /* Writes do not use seq_file */
3145         iter = m->private;
3146         mutex_lock(&trace_types_lock);
3147
3148         for_each_tracing_cpu(cpu) {
3149                 if (iter->buffer_iter[cpu])
3150                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3151         }
3152
3153         if (iter->trace && iter->trace->close)
3154                 iter->trace->close(iter);
3155
3156         if (!iter->snapshot)
3157                 /* reenable tracing if it was previously enabled */
3158                 tracing_start_tr(tr);
3159
3160         __trace_array_put(tr);
3161
3162         mutex_unlock(&trace_types_lock);
3163
3164         mutex_destroy(&iter->mutex);
3165         free_cpumask_var(iter->started);
3166         kfree(iter->trace);
3167         kfree(iter->buffer_iter);
3168         seq_release_private(inode, file);
3169
3170         return 0;
3171 }
3172
3173 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3174 {
3175         struct trace_array *tr = inode->i_private;
3176
3177         trace_array_put(tr);
3178         return 0;
3179 }
3180
3181 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3182 {
3183         struct trace_array *tr = inode->i_private;
3184
3185         trace_array_put(tr);
3186
3187         return single_release(inode, file);
3188 }
3189
3190 static int tracing_open(struct inode *inode, struct file *file)
3191 {
3192         struct trace_array *tr = inode->i_private;
3193         struct trace_iterator *iter;
3194         int ret = 0;
3195
3196         if (trace_array_get(tr) < 0)
3197                 return -ENODEV;
3198
3199         /* If this file was open for write, then erase contents */
3200         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3201                 int cpu = tracing_get_cpu(inode);
3202
3203                 if (cpu == RING_BUFFER_ALL_CPUS)
3204                         tracing_reset_online_cpus(&tr->trace_buffer);
3205                 else
3206                         tracing_reset(&tr->trace_buffer, cpu);
3207         }
3208
3209         if (file->f_mode & FMODE_READ) {
3210                 iter = __tracing_open(inode, file, false);
3211                 if (IS_ERR(iter))
3212                         ret = PTR_ERR(iter);
3213                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3214                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3215         }
3216
3217         if (ret < 0)
3218                 trace_array_put(tr);
3219
3220         return ret;
3221 }
3222
3223 /*
3224  * Some tracers are not suitable for instance buffers.
3225  * A tracer is always available for the global array (toplevel)
3226  * or if it explicitly states that it is.
3227  */
3228 static bool
3229 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3230 {
3231         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3232 }
3233
3234 /* Find the next tracer that this trace array may use */
3235 static struct tracer *
3236 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3237 {
3238         while (t && !trace_ok_for_array(t, tr))
3239                 t = t->next;
3240
3241         return t;
3242 }
3243
3244 static void *
3245 t_next(struct seq_file *m, void *v, loff_t *pos)
3246 {
3247         struct trace_array *tr = m->private;
3248         struct tracer *t = v;
3249
3250         (*pos)++;
3251
3252         if (t)
3253                 t = get_tracer_for_array(tr, t->next);
3254
3255         return t;
3256 }
3257
3258 static void *t_start(struct seq_file *m, loff_t *pos)
3259 {
3260         struct trace_array *tr = m->private;
3261         struct tracer *t;
3262         loff_t l = 0;
3263
3264         mutex_lock(&trace_types_lock);
3265
3266         t = get_tracer_for_array(tr, trace_types);
3267         for (; t && l < *pos; t = t_next(m, t, &l))
3268                         ;
3269
3270         return t;
3271 }
3272
3273 static void t_stop(struct seq_file *m, void *p)
3274 {
3275         mutex_unlock(&trace_types_lock);
3276 }
3277
3278 static int t_show(struct seq_file *m, void *v)
3279 {
3280         struct tracer *t = v;
3281
3282         if (!t)
3283                 return 0;
3284
3285         seq_puts(m, t->name);
3286         if (t->next)
3287                 seq_putc(m, ' ');
3288         else
3289                 seq_putc(m, '\n');
3290
3291         return 0;
3292 }
3293
3294 static const struct seq_operations show_traces_seq_ops = {
3295         .start          = t_start,
3296         .next           = t_next,
3297         .stop           = t_stop,
3298         .show           = t_show,
3299 };
3300
3301 static int show_traces_open(struct inode *inode, struct file *file)
3302 {
3303         struct trace_array *tr = inode->i_private;
3304         struct seq_file *m;
3305         int ret;
3306
3307         if (tracing_disabled)
3308                 return -ENODEV;
3309
3310         ret = seq_open(file, &show_traces_seq_ops);
3311         if (ret)
3312                 return ret;
3313
3314         m = file->private_data;
3315         m->private = tr;
3316
3317         return 0;
3318 }
3319
3320 static ssize_t
3321 tracing_write_stub(struct file *filp, const char __user *ubuf,
3322                    size_t count, loff_t *ppos)
3323 {
3324         return count;
3325 }
3326
3327 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3328 {
3329         int ret;
3330
3331         if (file->f_mode & FMODE_READ)
3332                 ret = seq_lseek(file, offset, whence);
3333         else
3334                 file->f_pos = ret = 0;
3335
3336         return ret;
3337 }
3338
3339 static const struct file_operations tracing_fops = {
3340         .open           = tracing_open,
3341         .read           = seq_read,
3342         .write          = tracing_write_stub,
3343         .llseek         = tracing_lseek,
3344         .release        = tracing_release,
3345 };
3346
3347 static const struct file_operations show_traces_fops = {
3348         .open           = show_traces_open,
3349         .read           = seq_read,
3350         .release        = seq_release,
3351         .llseek         = seq_lseek,
3352 };
3353
3354 /*
3355  * The tracer itself will not take this lock, but still we want
3356  * to provide a consistent cpumask to user-space:
3357  */
3358 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3359
3360 /*
3361  * Temporary storage for the character representation of the
3362  * CPU bitmask (and one more byte for the newline):
3363  */
3364 static char mask_str[NR_CPUS + 1];
3365
3366 static ssize_t
3367 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3368                      size_t count, loff_t *ppos)
3369 {
3370         struct trace_array *tr = file_inode(filp)->i_private;
3371         int len;
3372
3373         mutex_lock(&tracing_cpumask_update_lock);
3374
3375         len = snprintf(mask_str, count, "%*pb\n",
3376                        cpumask_pr_args(tr->tracing_cpumask));
3377         if (len >= count) {
3378                 count = -EINVAL;
3379                 goto out_err;
3380         }
3381         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3382
3383 out_err:
3384         mutex_unlock(&tracing_cpumask_update_lock);
3385
3386         return count;
3387 }
3388
3389 static ssize_t
3390 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3391                       size_t count, loff_t *ppos)
3392 {
3393         struct trace_array *tr = file_inode(filp)->i_private;
3394         cpumask_var_t tracing_cpumask_new;
3395         int err, cpu;
3396
3397         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3398                 return -ENOMEM;
3399
3400         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3401         if (err)
3402                 goto err_unlock;
3403
3404         mutex_lock(&tracing_cpumask_update_lock);
3405
3406         local_irq_disable();
3407         arch_spin_lock(&tr->max_lock);
3408         for_each_tracing_cpu(cpu) {
3409                 /*
3410                  * Increase/decrease the disabled counter if we are
3411                  * about to flip a bit in the cpumask:
3412                  */
3413                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3414                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3415                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3416                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3417                 }
3418                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3419                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3420                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3421                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3422                 }
3423         }
3424         arch_spin_unlock(&tr->max_lock);
3425         local_irq_enable();
3426
3427         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3428
3429         mutex_unlock(&tracing_cpumask_update_lock);
3430         free_cpumask_var(tracing_cpumask_new);
3431
3432         return count;
3433
3434 err_unlock:
3435         free_cpumask_var(tracing_cpumask_new);
3436
3437         return err;
3438 }
3439
3440 static const struct file_operations tracing_cpumask_fops = {
3441         .open           = tracing_open_generic_tr,
3442         .read           = tracing_cpumask_read,
3443         .write          = tracing_cpumask_write,
3444         .release        = tracing_release_generic_tr,
3445         .llseek         = generic_file_llseek,
3446 };
3447
3448 static int tracing_trace_options_show(struct seq_file *m, void *v)
3449 {
3450         struct tracer_opt *trace_opts;
3451         struct trace_array *tr = m->private;
3452         u32 tracer_flags;
3453         int i;
3454
3455         mutex_lock(&trace_types_lock);
3456         tracer_flags = tr->current_trace->flags->val;
3457         trace_opts = tr->current_trace->flags->opts;
3458
3459         for (i = 0; trace_options[i]; i++) {
3460                 if (tr->trace_flags & (1 << i))
3461                         seq_printf(m, "%s\n", trace_options[i]);
3462                 else
3463                         seq_printf(m, "no%s\n", trace_options[i]);
3464         }
3465
3466         for (i = 0; trace_opts[i].name; i++) {
3467                 if (tracer_flags & trace_opts[i].bit)
3468                         seq_printf(m, "%s\n", trace_opts[i].name);
3469                 else
3470                         seq_printf(m, "no%s\n", trace_opts[i].name);
3471         }
3472         mutex_unlock(&trace_types_lock);
3473
3474         return 0;
3475 }
3476
3477 static int __set_tracer_option(struct trace_array *tr,
3478                                struct tracer_flags *tracer_flags,
3479                                struct tracer_opt *opts, int neg)
3480 {
3481         struct tracer *trace = tracer_flags->trace;
3482         int ret;
3483
3484         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3485         if (ret)
3486                 return ret;
3487
3488         if (neg)
3489                 tracer_flags->val &= ~opts->bit;
3490         else
3491                 tracer_flags->val |= opts->bit;
3492         return 0;
3493 }
3494
3495 /* Try to assign a tracer specific option */
3496 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3497 {
3498         struct tracer *trace = tr->current_trace;
3499         struct tracer_flags *tracer_flags = trace->flags;
3500         struct tracer_opt *opts = NULL;
3501         int i;
3502
3503         for (i = 0; tracer_flags->opts[i].name; i++) {
3504                 opts = &tracer_flags->opts[i];
3505
3506                 if (strcmp(cmp, opts->name) == 0)
3507                         return __set_tracer_option(tr, trace->flags, opts, neg);
3508         }
3509
3510         return -EINVAL;
3511 }
3512
3513 /* Some tracers require overwrite to stay enabled */
3514 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3515 {
3516         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3517                 return -1;
3518
3519         return 0;
3520 }
3521
3522 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3523 {
3524         /* do nothing if flag is already set */
3525         if (!!(tr->trace_flags & mask) == !!enabled)
3526                 return 0;
3527
3528         /* Give the tracer a chance to approve the change */
3529         if (tr->current_trace->flag_changed)
3530                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3531                         return -EINVAL;
3532
3533         if (enabled)
3534                 tr->trace_flags |= mask;
3535         else
3536                 tr->trace_flags &= ~mask;
3537
3538         if (mask == TRACE_ITER_RECORD_CMD)
3539                 trace_event_enable_cmd_record(enabled);
3540
3541         if (mask == TRACE_ITER_EVENT_FORK)
3542                 trace_event_follow_fork(tr, enabled);
3543
3544         if (mask == TRACE_ITER_OVERWRITE) {
3545                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3546 #ifdef CONFIG_TRACER_MAX_TRACE
3547                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3548 #endif
3549         }
3550
3551         if (mask == TRACE_ITER_PRINTK) {
3552                 trace_printk_start_stop_comm(enabled);
3553                 trace_printk_control(enabled);
3554         }
3555
3556         return 0;
3557 }
3558
3559 static int trace_set_options(struct trace_array *tr, char *option)
3560 {
3561         char *cmp;
3562         int neg = 0;
3563         int ret = -ENODEV;
3564         int i;
3565         size_t orig_len = strlen(option);
3566
3567         cmp = strstrip(option);
3568
3569         if (strncmp(cmp, "no", 2) == 0) {
3570                 neg = 1;
3571                 cmp += 2;
3572         }
3573
3574         mutex_lock(&trace_types_lock);
3575
3576         for (i = 0; trace_options[i]; i++) {
3577                 if (strcmp(cmp, trace_options[i]) == 0) {
3578                         ret = set_tracer_flag(tr, 1 << i, !neg);
3579                         break;
3580                 }
3581         }
3582
3583         /* If no option could be set, test the specific tracer options */
3584         if (!trace_options[i])
3585                 ret = set_tracer_option(tr, cmp, neg);
3586
3587         mutex_unlock(&trace_types_lock);
3588
3589         /*
3590          * If the first trailing whitespace is replaced with '\0' by strstrip,
3591          * turn it back into a space.
3592          */
3593         if (orig_len > strlen(option))
3594                 option[strlen(option)] = ' ';
3595
3596         return ret;
3597 }
3598
3599 static void __init apply_trace_boot_options(void)
3600 {
3601         char *buf = trace_boot_options_buf;
3602         char *option;
3603
3604         while (true) {
3605                 option = strsep(&buf, ",");
3606
3607                 if (!option)
3608                         break;
3609
3610                 if (*option)
3611                         trace_set_options(&global_trace, option);
3612
3613                 /* Put back the comma to allow this to be called again */
3614                 if (buf)
3615                         *(buf - 1) = ',';
3616         }
3617 }
3618
3619 static ssize_t
3620 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3621                         size_t cnt, loff_t *ppos)
3622 {
3623         struct seq_file *m = filp->private_data;
3624         struct trace_array *tr = m->private;
3625         char buf[64];
3626         int ret;
3627
3628         if (cnt >= sizeof(buf))
3629                 return -EINVAL;
3630
3631         if (copy_from_user(buf, ubuf, cnt))
3632                 return -EFAULT;
3633
3634         buf[cnt] = 0;
3635
3636         ret = trace_set_options(tr, buf);
3637         if (ret < 0)
3638                 return ret;
3639
3640         *ppos += cnt;
3641
3642         return cnt;
3643 }
3644
3645 static int tracing_trace_options_open(struct inode *inode, struct file *file)
3646 {
3647         struct trace_array *tr = inode->i_private;
3648         int ret;
3649
3650         if (tracing_disabled)
3651                 return -ENODEV;
3652
3653         if (trace_array_get(tr) < 0)
3654                 return -ENODEV;
3655
3656         ret = single_open(file, tracing_trace_options_show, inode->i_private);
3657         if (ret < 0)
3658                 trace_array_put(tr);
3659
3660         return ret;
3661 }
3662
3663 static const struct file_operations tracing_iter_fops = {
3664         .open           = tracing_trace_options_open,
3665         .read           = seq_read,
3666         .llseek         = seq_lseek,
3667         .release        = tracing_single_release_tr,
3668         .write          = tracing_trace_options_write,
3669 };
3670
3671 static const char readme_msg[] =
3672         "tracing mini-HOWTO:\n\n"
3673         "# echo 0 > tracing_on : quick way to disable tracing\n"
3674         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
3675         " Important files:\n"
3676         "  trace\t\t\t- The static contents of the buffer\n"
3677         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
3678         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
3679         "  current_tracer\t- function and latency tracers\n"
3680         "  available_tracers\t- list of configured tracers for current_tracer\n"
3681         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
3682         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
3683         "  trace_clock\t\t-change the clock used to order events\n"
3684         "       local:   Per cpu clock but may not be synced across CPUs\n"
3685         "      global:   Synced across CPUs but slows tracing down.\n"
3686         "     counter:   Not a clock, but just an increment\n"
3687         "      uptime:   Jiffy counter from time of boot\n"
3688         "        perf:   Same clock that perf events use\n"
3689 #ifdef CONFIG_X86_64
3690         "     x86-tsc:   TSC cycle counter\n"
3691 #endif
3692         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
3693         "  tracing_cpumask\t- Limit which CPUs to trace\n"
3694         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
3695         "\t\t\t  Remove sub-buffer with rmdir\n"
3696         "  trace_options\t\t- Set format or modify how tracing happens\n"
3697         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
3698         "\t\t\t  option name\n"
3699         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
3700 #ifdef CONFIG_DYNAMIC_FTRACE
3701         "\n  available_filter_functions - list of functions that can be filtered on\n"
3702         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
3703         "\t\t\t  functions\n"
3704         "\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3705         "\t     modules: Can select a group via module\n"
3706         "\t      Format: :mod:<module-name>\n"
3707         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
3708         "\t    triggers: a command to perform when function is hit\n"
3709         "\t      Format: <function>:<trigger>[:count]\n"
3710         "\t     trigger: traceon, traceoff\n"
3711         "\t\t      enable_event:<system>:<event>\n"
3712         "\t\t      disable_event:<system>:<event>\n"
3713 #ifdef CONFIG_STACKTRACE
3714         "\t\t      stacktrace\n"
3715 #endif
3716 #ifdef CONFIG_TRACER_SNAPSHOT
3717         "\t\t      snapshot\n"
3718 #endif
3719         "\t\t      dump\n"
3720         "\t\t      cpudump\n"
3721         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
3722         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
3723         "\t     The first one will disable tracing every time do_fault is hit\n"
3724         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
3725         "\t       The first time do trap is hit and it disables tracing, the\n"
3726         "\t       counter will decrement to 2. If tracing is already disabled,\n"
3727         "\t       the counter will not decrement. It only decrements when the\n"
3728         "\t       trigger did work\n"
3729         "\t     To remove trigger without count:\n"
3730         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
3731         "\t     To remove trigger with a count:\n"
3732         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
3733         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
3734         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3735         "\t    modules: Can select a group via module command :mod:\n"
3736         "\t    Does not accept triggers\n"
3737 #endif /* CONFIG_DYNAMIC_FTRACE */
3738 #ifdef CONFIG_FUNCTION_TRACER
3739         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
3740         "\t\t    (function)\n"
3741 #endif
3742 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
3743         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
3744         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
3745         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
3746 #endif
3747 #ifdef CONFIG_TRACER_SNAPSHOT
3748         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
3749         "\t\t\t  snapshot buffer. Read the contents for more\n"
3750         "\t\t\t  information\n"
3751 #endif
3752 #ifdef CONFIG_STACK_TRACER
3753         "  stack_trace\t\t- Shows the max stack trace when active\n"
3754         "  stack_max_size\t- Shows current max stack size that was traced\n"
3755         "\t\t\t  Write into this file to reset the max size (trigger a\n"
3756         "\t\t\t  new trace)\n"
3757 #ifdef CONFIG_DYNAMIC_FTRACE
3758         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
3759         "\t\t\t  traces\n"
3760 #endif
3761 #endif /* CONFIG_STACK_TRACER */
3762         "  events/\t\t- Directory containing all trace event subsystems:\n"
3763         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
3764         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
3765         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
3766         "\t\t\t  events\n"
3767         "      filter\t\t- If set, only events passing filter are traced\n"
3768         "  events/<system>/<event>/\t- Directory containing control files for\n"
3769         "\t\t\t  <event>:\n"
3770         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
3771         "      filter\t\t- If set, only events passing filter are traced\n"
3772         "      trigger\t\t- If set, a command to perform when event is hit\n"
3773         "\t    Format: <trigger>[:count][if <filter>]\n"
3774         "\t   trigger: traceon, traceoff\n"
3775         "\t            enable_event:<system>:<event>\n"
3776         "\t            disable_event:<system>:<event>\n"
3777 #ifdef CONFIG_HIST_TRIGGERS
3778         "\t            enable_hist:<system>:<event>\n"
3779         "\t            disable_hist:<system>:<event>\n"
3780 #endif
3781 #ifdef CONFIG_STACKTRACE
3782         "\t\t    stacktrace\n"
3783 #endif
3784 #ifdef CONFIG_TRACER_SNAPSHOT
3785         "\t\t    snapshot\n"
3786 #endif
3787 #ifdef CONFIG_HIST_TRIGGERS
3788         "\t\t    hist (see below)\n"
3789 #endif
3790         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
3791         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
3792         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
3793         "\t                  events/block/block_unplug/trigger\n"
3794         "\t   The first disables tracing every time block_unplug is hit.\n"
3795         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
3796         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
3797         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
3798         "\t   Like function triggers, the counter is only decremented if it\n"
3799         "\t    enabled or disabled tracing.\n"
3800         "\t   To remove a trigger without a count:\n"
3801         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
3802         "\t   To remove a trigger with a count:\n"
3803         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
3804         "\t   Filters can be ignored when removing a trigger.\n"
3805 #ifdef CONFIG_HIST_TRIGGERS
3806         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
3807         "\t    Format: hist:keys=<field1[,field2,...]>\n"
3808         "\t            [:values=<field1[,field2,...]>]\n"
3809         "\t            [:sort=<field1[,field2,...]>]\n"
3810         "\t            [:size=#entries]\n"
3811         "\t            [:pause][:continue][:clear]\n"
3812         "\t            [:name=histname1]\n"
3813         "\t            [if <filter>]\n\n"
3814         "\t    When a matching event is hit, an entry is added to a hash\n"
3815         "\t    table using the key(s) and value(s) named, and the value of a\n"
3816         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
3817         "\t    correspond to fields in the event's format description.  Keys\n"
3818         "\t    can be any field, or the special string 'stacktrace'.\n"
3819         "\t    Compound keys consisting of up to two fields can be specified\n"
3820         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
3821         "\t    fields.  Sort keys consisting of up to two fields can be\n"
3822         "\t    specified using the 'sort' keyword.  The sort direction can\n"
3823         "\t    be modified by appending '.descending' or '.ascending' to a\n"
3824         "\t    sort field.  The 'size' parameter can be used to specify more\n"
3825         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
3826         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
3827         "\t    its histogram data will be shared with other triggers of the\n"
3828         "\t    same name, and trigger hits will update this common data.\n\n"
3829         "\t    Reading the 'hist' file for the event will dump the hash\n"
3830         "\t    table in its entirety to stdout.  If there are multiple hist\n"
3831         "\t    triggers attached to an event, there will be a table for each\n"
3832         "\t    trigger in the output.  The table displayed for a named\n"
3833         "\t    trigger will be the same as any other instance having the\n"
3834         "\t    same name.  The default format used to display a given field\n"
3835         "\t    can be modified by appending any of the following modifiers\n"
3836         "\t    to the field name, as applicable:\n\n"
3837         "\t            .hex        display a number as a hex value\n"
3838         "\t            .sym        display an address as a symbol\n"
3839         "\t            .sym-offset display an address as a symbol and offset\n"
3840         "\t            .execname   display a common_pid as a program name\n"
3841         "\t            .syscall    display a syscall id as a syscall name\n\n"
3842         "\t            .log2       display log2 value rather than raw number\n\n"
3843         "\t    The 'pause' parameter can be used to pause an existing hist\n"
3844         "\t    trigger or to start a hist trigger but not log any events\n"
3845         "\t    until told to do so.  'continue' can be used to start or\n"
3846         "\t    restart a paused hist trigger.\n\n"
3847         "\t    The 'clear' parameter will clear the contents of a running\n"
3848         "\t    hist trigger and leave its current paused/active state\n"
3849         "\t    unchanged.\n\n"
3850         "\t    The enable_hist and disable_hist triggers can be used to\n"
3851         "\t    have one event conditionally start and stop another event's\n"
3852         "\t    already-attached hist trigger.  The syntax is analagous to\n"
3853         "\t    the enable_event and disable_event triggers.\n"
3854 #endif
3855 ;
3856
3857 static ssize_t
3858 tracing_readme_read(struct file *filp, char __user *ubuf,
3859                        size_t cnt, loff_t *ppos)
3860 {
3861         return simple_read_from_buffer(ubuf, cnt, ppos,
3862                                         readme_msg, strlen(readme_msg));
3863 }
3864
3865 static const struct file_operations tracing_readme_fops = {
3866         .open           = tracing_open_generic,
3867         .read           = tracing_readme_read,
3868         .llseek         = generic_file_llseek,
3869 };
3870
3871 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
3872 {
3873         unsigned int *ptr = v;
3874
3875         if (*pos || m->count)
3876                 ptr++;
3877
3878         (*pos)++;
3879
3880         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
3881              ptr++) {
3882                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
3883                         continue;
3884
3885                 return ptr;
3886         }
3887
3888         return NULL;
3889 }
3890
3891 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
3892 {
3893         void *v;
3894         loff_t l = 0;
3895
3896         preempt_disable();
3897         arch_spin_lock(&trace_cmdline_lock);
3898
3899         v = &savedcmd->map_cmdline_to_pid[0];
3900         while (l <= *pos) {
3901                 v = saved_cmdlines_next(m, v, &l);
3902                 if (!v)
3903                         return NULL;
3904         }
3905
3906         return v;
3907 }
3908
3909 static void saved_cmdlines_stop(struct seq_file *m, void *v)
3910 {
3911         arch_spin_unlock(&trace_cmdline_lock);
3912         preempt_enable();
3913 }
3914
3915 static int saved_cmdlines_show(struct seq_file *m, void *v)
3916 {
3917         char buf[TASK_COMM_LEN];
3918         unsigned int *pid = v;
3919
3920         __trace_find_cmdline(*pid, buf);
3921         seq_printf(m, "%d %s\n", *pid, buf);
3922         return 0;
3923 }
3924
3925 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
3926         .start          = saved_cmdlines_start,
3927         .next           = saved_cmdlines_next,
3928         .stop           = saved_cmdlines_stop,
3929         .show           = saved_cmdlines_show,
3930 };
3931
3932 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
3933 {
3934         if (tracing_disabled)
3935                 return -ENODEV;
3936
3937         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
3938 }
3939
3940 static const struct file_operations tracing_saved_cmdlines_fops = {
3941         .open           = tracing_saved_cmdlines_open,
3942         .read           = seq_read,
3943         .llseek         = seq_lseek,
3944         .release        = seq_release,
3945 };
3946
3947 static ssize_t
3948 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
3949                                  size_t cnt, loff_t *ppos)
3950 {
3951         char buf[64];
3952         int r;
3953
3954         arch_spin_lock(&trace_cmdline_lock);
3955         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
3956         arch_spin_unlock(&trace_cmdline_lock);
3957
3958         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3959 }
3960
3961 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
3962 {
3963         kfree(s->saved_cmdlines);
3964         kfree(s->map_cmdline_to_pid);
3965         kfree(s);
3966 }
3967
3968 static int tracing_resize_saved_cmdlines(unsigned int val)
3969 {
3970         struct saved_cmdlines_buffer *s, *savedcmd_temp;
3971
3972         s = kmalloc(sizeof(*s), GFP_KERNEL);
3973         if (!s)
3974                 return -ENOMEM;
3975
3976         if (allocate_cmdlines_buffer(val, s) < 0) {
3977                 kfree(s);
3978                 return -ENOMEM;
3979         }
3980
3981         arch_spin_lock(&trace_cmdline_lock);
3982         savedcmd_temp = savedcmd;
3983         savedcmd = s;
3984         arch_spin_unlock(&trace_cmdline_lock);
3985         free_saved_cmdlines_buffer(savedcmd_temp);
3986
3987         return 0;
3988 }
3989
3990 static ssize_t
3991 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
3992                                   size_t cnt, loff_t *ppos)
3993 {
3994         unsigned long val;
3995         int ret;
3996
3997         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
3998         if (ret)
3999                 return ret;
4000
4001         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4002         if (!val || val > PID_MAX_DEFAULT)
4003                 return -EINVAL;
4004
4005         ret = tracing_resize_saved_cmdlines((unsigned int)val);
4006         if (ret < 0)
4007                 return ret;
4008
4009         *ppos += cnt;
4010
4011         return cnt;
4012 }
4013
4014 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4015         .open           = tracing_open_generic,
4016         .read           = tracing_saved_cmdlines_size_read,
4017         .write          = tracing_saved_cmdlines_size_write,
4018 };
4019
4020 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
4021 static union trace_enum_map_item *
4022 update_enum_map(union trace_enum_map_item *ptr)
4023 {
4024         if (!ptr->map.enum_string) {
4025                 if (ptr->tail.next) {
4026                         ptr = ptr->tail.next;
4027                         /* Set ptr to the next real item (skip head) */
4028                         ptr++;
4029                 } else
4030                         return NULL;
4031         }
4032         return ptr;
4033 }
4034
4035 static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
4036 {
4037         union trace_enum_map_item *ptr = v;
4038
4039         /*
4040          * Paranoid! If ptr points to end, we don't want to increment past it.
4041          * This really should never happen.
4042          */
4043         ptr = update_enum_map(ptr);
4044         if (WARN_ON_ONCE(!ptr))
4045                 return NULL;
4046
4047         ptr++;
4048
4049         (*pos)++;
4050
4051         ptr = update_enum_map(ptr);
4052
4053         return ptr;
4054 }
4055
4056 static void *enum_map_start(struct seq_file *m, loff_t *pos)
4057 {
4058         union trace_enum_map_item *v;
4059         loff_t l = 0;
4060
4061         mutex_lock(&trace_enum_mutex);
4062
4063         v = trace_enum_maps;
4064         if (v)
4065                 v++;
4066
4067         while (v && l < *pos) {
4068                 v = enum_map_next(m, v, &l);
4069         }
4070
4071         return v;
4072 }
4073
4074 static void enum_map_stop(struct seq_file *m, void *v)
4075 {
4076         mutex_unlock(&trace_enum_mutex);
4077 }
4078
4079 static int enum_map_show(struct seq_file *m, void *v)
4080 {
4081         union trace_enum_map_item *ptr = v;
4082
4083         seq_printf(m, "%s %ld (%s)\n",
4084                    ptr->map.enum_string, ptr->map.enum_value,
4085                    ptr->map.system);
4086
4087         return 0;
4088 }
4089
4090 static const struct seq_operations tracing_enum_map_seq_ops = {
4091         .start          = enum_map_start,
4092         .next           = enum_map_next,
4093         .stop           = enum_map_stop,
4094         .show           = enum_map_show,
4095 };
4096
4097 static int tracing_enum_map_open(struct inode *inode, struct file *filp)
4098 {
4099         if (tracing_disabled)
4100                 return -ENODEV;
4101
4102         return seq_open(filp, &tracing_enum_map_seq_ops);
4103 }
4104
4105 static const struct file_operations tracing_enum_map_fops = {
4106         .open           = tracing_enum_map_open,
4107         .read           = seq_read,
4108         .llseek         = seq_lseek,
4109         .release        = seq_release,
4110 };
4111
4112 static inline union trace_enum_map_item *
4113 trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
4114 {
4115         /* Return tail of array given the head */
4116         return ptr + ptr->head.length + 1;
4117 }
4118
4119 static void
4120 trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
4121                            int len)
4122 {
4123         struct trace_enum_map **stop;
4124         struct trace_enum_map **map;
4125         union trace_enum_map_item *map_array;
4126         union trace_enum_map_item *ptr;
4127
4128         stop = start + len;
4129
4130         /*
4131          * The trace_enum_maps contains the map plus a head and tail item,
4132          * where the head holds the module and length of array, and the
4133          * tail holds a pointer to the next list.
4134          */
4135         map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4136         if (!map_array) {
4137                 pr_warn("Unable to allocate trace enum mapping\n");
4138                 return;
4139         }
4140
4141         mutex_lock(&trace_enum_mutex);
4142
4143         if (!trace_enum_maps)
4144                 trace_enum_maps = map_array;
4145         else {
4146                 ptr = trace_enum_maps;
4147                 for (;;) {
4148                         ptr = trace_enum_jmp_to_tail(ptr);
4149                         if (!ptr->tail.next)
4150                                 break;
4151                         ptr = ptr->tail.next;
4152
4153                 }
4154                 ptr->tail.next = map_array;
4155         }
4156         map_array->head.mod = mod;
4157         map_array->head.length = len;
4158         map_array++;
4159
4160         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4161                 map_array->map = **map;
4162                 map_array++;
4163         }
4164         memset(map_array, 0, sizeof(*map_array));
4165
4166         mutex_unlock(&trace_enum_mutex);
4167 }
4168
4169 static void trace_create_enum_file(struct dentry *d_tracer)
4170 {
4171         trace_create_file("enum_map", 0444, d_tracer,
4172                           NULL, &tracing_enum_map_fops);
4173 }
4174
4175 #else /* CONFIG_TRACE_ENUM_MAP_FILE */
4176 static inline void trace_create_enum_file(struct dentry *d_tracer) { }
4177 static inline void trace_insert_enum_map_file(struct module *mod,
4178                               struct trace_enum_map **start, int len) { }
4179 #endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
4180
4181 static void trace_insert_enum_map(struct module *mod,
4182                                   struct trace_enum_map **start, int len)
4183 {
4184         struct trace_enum_map **map;
4185
4186         if (len <= 0)
4187                 return;
4188
4189         map = start;
4190
4191         trace_event_enum_update(map, len);
4192
4193         trace_insert_enum_map_file(mod, start, len);
4194 }
4195
4196 static ssize_t
4197 tracing_set_trace_read(struct file *filp, char __user *ubuf,
4198                        size_t cnt, loff_t *ppos)
4199 {
4200         struct trace_array *tr = filp->private_data;
4201         char buf[MAX_TRACER_SIZE+2];
4202         int r;
4203
4204         mutex_lock(&trace_types_lock);
4205         r = sprintf(buf, "%s\n", tr->current_trace->name);
4206         mutex_unlock(&trace_types_lock);
4207
4208         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4209 }
4210
4211 int tracer_init(struct tracer *t, struct trace_array *tr)
4212 {
4213         tracing_reset_online_cpus(&tr->trace_buffer);
4214         return t->init(tr);
4215 }
4216
4217 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4218 {
4219         int cpu;
4220
4221         for_each_tracing_cpu(cpu)
4222                 per_cpu_ptr(buf->data, cpu)->entries = val;
4223 }
4224
4225 #ifdef CONFIG_TRACER_MAX_TRACE
4226 /* resize @tr's buffer to the size of @size_tr's entries */
4227 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4228                                         struct trace_buffer *size_buf, int cpu_id)
4229 {
4230         int cpu, ret = 0;
4231
4232         if (cpu_id == RING_BUFFER_ALL_CPUS) {
4233                 for_each_tracing_cpu(cpu) {
4234                         ret = ring_buffer_resize(trace_buf->buffer,
4235                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4236                         if (ret < 0)
4237                                 break;
4238                         per_cpu_ptr(trace_buf->data, cpu)->entries =
4239                                 per_cpu_ptr(size_buf->data, cpu)->entries;
4240                 }
4241         } else {
4242                 ret = ring_buffer_resize(trace_buf->buffer,
4243                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4244                 if (ret == 0)
4245                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4246                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
4247         }
4248
4249         return ret;
4250 }
4251 #endif /* CONFIG_TRACER_MAX_TRACE */
4252
4253 static int __tracing_resize_ring_buffer(struct trace_array *tr,
4254                                         unsigned long size, int cpu)
4255 {
4256         int ret;
4257
4258         /*
4259          * If kernel or user changes the size of the ring buffer
4260          * we use the size that was given, and we can forget about
4261          * expanding it later.
4262          */
4263         ring_buffer_expanded = true;
4264
4265         /* May be called before buffers are initialized */
4266         if (!tr->trace_buffer.buffer)
4267                 return 0;
4268
4269         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4270         if (ret < 0)
4271                 return ret;
4272
4273 #ifdef CONFIG_TRACER_MAX_TRACE
4274         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
4275             !tr->current_trace->use_max_tr)
4276                 goto out;
4277
4278         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4279         if (ret < 0) {
4280                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
4281                                                      &tr->trace_buffer, cpu);
4282                 if (r < 0) {
4283                         /*
4284                          * AARGH! We are left with different
4285                          * size max buffer!!!!
4286                          * The max buffer is our "snapshot" buffer.
4287                          * When a tracer needs a snapshot (one of the
4288                          * latency tracers), it swaps the max buffer
4289                          * with the saved snap shot. We succeeded to
4290                          * update the size of the main buffer, but failed to
4291                          * update the size of the max buffer. But when we tried
4292                          * to reset the main buffer to the original size, we
4293                          * failed there too. This is very unlikely to
4294                          * happen, but if it does, warn and kill all
4295                          * tracing.
4296                          */
4297                         WARN_ON(1);
4298                         tracing_disabled = 1;
4299                 }
4300                 return ret;
4301         }
4302
4303         if (cpu == RING_BUFFER_ALL_CPUS)
4304                 set_buffer_entries(&tr->max_buffer, size);
4305         else
4306                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4307
4308  out:
4309 #endif /* CONFIG_TRACER_MAX_TRACE */
4310
4311         if (cpu == RING_BUFFER_ALL_CPUS)
4312                 set_buffer_entries(&tr->trace_buffer, size);
4313         else
4314                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4315
4316         return ret;
4317 }
4318
4319 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4320                                           unsigned long size, int cpu_id)
4321 {
4322         int ret = size;
4323
4324         mutex_lock(&trace_types_lock);
4325
4326         if (cpu_id != RING_BUFFER_ALL_CPUS) {
4327                 /* make sure, this cpu is enabled in the mask */
4328                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4329                         ret = -EINVAL;
4330                         goto out;
4331                 }
4332         }
4333
4334         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4335         if (ret < 0)
4336                 ret = -ENOMEM;
4337
4338 out:
4339         mutex_unlock(&trace_types_lock);
4340
4341         return ret;
4342 }
4343
4344
4345 /**
4346  * tracing_update_buffers - used by tracing facility to expand ring buffers
4347  *
4348  * To save on memory when the tracing is never used on a system with it
4349  * configured in. The ring buffers are set to a minimum size. But once
4350  * a user starts to use the tracing facility, then they need to grow
4351  * to their default size.
4352  *
4353  * This function is to be called when a tracer is about to be used.
4354  */
4355 int tracing_update_buffers(void)
4356 {
4357         int ret = 0;
4358
4359         mutex_lock(&trace_types_lock);
4360         if (!ring_buffer_expanded)
4361                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4362                                                 RING_BUFFER_ALL_CPUS);
4363         mutex_unlock(&trace_types_lock);
4364
4365         return ret;
4366 }
4367
4368 struct trace_option_dentry;
4369
4370 static void
4371 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4372
4373 /*
4374  * Used to clear out the tracer before deletion of an instance.
4375  * Must have trace_types_lock held.
4376  */
4377 static void tracing_set_nop(struct trace_array *tr)
4378 {
4379         if (tr->current_trace == &nop_trace)
4380                 return;
4381         
4382         tr->current_trace->enabled--;
4383
4384         if (tr->current_trace->reset)
4385                 tr->current_trace->reset(tr);
4386
4387         tr->current_trace = &nop_trace;
4388 }
4389
4390 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
4391 {
4392         /* Only enable if the directory has been created already. */
4393         if (!tr->dir)
4394                 return;
4395
4396         create_trace_option_files(tr, t);
4397 }
4398
4399 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
4400 {
4401         struct tracer *t;
4402 #ifdef CONFIG_TRACER_MAX_TRACE
4403         bool had_max_tr;
4404 #endif
4405         int ret = 0;
4406
4407         mutex_lock(&trace_types_lock);
4408
4409         if (!ring_buffer_expanded) {
4410                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4411                                                 RING_BUFFER_ALL_CPUS);
4412                 if (ret < 0)
4413                         goto out;
4414                 ret = 0;
4415         }
4416
4417         for (t = trace_types; t; t = t->next) {
4418                 if (strcmp(t->name, buf) == 0)
4419                         break;
4420         }
4421         if (!t) {
4422                 ret = -EINVAL;
4423                 goto out;
4424         }
4425         if (t == tr->current_trace)
4426                 goto out;
4427
4428         /* Some tracers are only allowed for the top level buffer */
4429         if (!trace_ok_for_array(t, tr)) {
4430                 ret = -EINVAL;
4431                 goto out;
4432         }
4433
4434         /* If trace pipe files are being read, we can't change the tracer */
4435         if (tr->current_trace->ref) {
4436                 ret = -EBUSY;
4437                 goto out;
4438         }
4439
4440         trace_branch_disable();
4441
4442         tr->current_trace->enabled--;
4443
4444         if (tr->current_trace->reset)
4445                 tr->current_trace->reset(tr);
4446
4447         /* Current trace needs to be nop_trace before synchronize_sched */
4448         tr->current_trace = &nop_trace;
4449
4450 #ifdef CONFIG_TRACER_MAX_TRACE
4451         had_max_tr = tr->allocated_snapshot;
4452
4453         if (had_max_tr && !t->use_max_tr) {
4454                 /*
4455                  * We need to make sure that the update_max_tr sees that
4456                  * current_trace changed to nop_trace to keep it from
4457                  * swapping the buffers after we resize it.
4458                  * The update_max_tr is called from interrupts disabled
4459                  * so a synchronized_sched() is sufficient.
4460                  */
4461                 synchronize_sched();
4462                 free_snapshot(tr);
4463         }
4464 #endif
4465
4466 #ifdef CONFIG_TRACER_MAX_TRACE
4467         if (t->use_max_tr && !had_max_tr) {
4468                 ret = alloc_snapshot(tr);
4469                 if (ret < 0)
4470                         goto out;
4471         }
4472 #endif
4473
4474         if (t->init) {
4475                 ret = tracer_init(t, tr);
4476                 if (ret)
4477                         goto out;
4478         }
4479
4480         tr->current_trace = t;
4481         tr->current_trace->enabled++;
4482         trace_branch_enable(tr);
4483  out:
4484         mutex_unlock(&trace_types_lock);
4485
4486         return ret;
4487 }
4488
4489 static ssize_t
4490 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4491                         size_t cnt, loff_t *ppos)
4492 {
4493         struct trace_array *tr = filp->private_data;
4494         char buf[MAX_TRACER_SIZE+1];
4495         int i;
4496         size_t ret;
4497         int err;
4498
4499         ret = cnt;
4500
4501         if (cnt > MAX_TRACER_SIZE)
4502                 cnt = MAX_TRACER_SIZE;
4503
4504         if (copy_from_user(buf, ubuf, cnt))
4505                 return -EFAULT;
4506
4507         buf[cnt] = 0;
4508
4509         /* strip ending whitespace. */
4510         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4511                 buf[i] = 0;
4512
4513         err = tracing_set_tracer(tr, buf);
4514         if (err)
4515                 return err;
4516
4517         *ppos += ret;
4518
4519         return ret;
4520 }
4521
4522 static ssize_t
4523 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
4524                    size_t cnt, loff_t *ppos)
4525 {
4526         char buf[64];
4527         int r;
4528
4529         r = snprintf(buf, sizeof(buf), "%ld\n",
4530                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4531         if (r > sizeof(buf))
4532                 r = sizeof(buf);
4533         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4534 }
4535
4536 static ssize_t
4537 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
4538                     size_t cnt, loff_t *ppos)
4539 {
4540         unsigned long val;
4541         int ret;
4542
4543         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4544         if (ret)
4545                 return ret;
4546
4547         *ptr = val * 1000;
4548
4549         return cnt;
4550 }
4551
4552 static ssize_t
4553 tracing_thresh_read(struct file *filp, char __user *ubuf,
4554                     size_t cnt, loff_t *ppos)
4555 {
4556         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
4557 }
4558
4559 static ssize_t
4560 tracing_thresh_write(struct file *filp, const char __user *ubuf,
4561                      size_t cnt, loff_t *ppos)
4562 {
4563         struct trace_array *tr = filp->private_data;
4564         int ret;
4565
4566         mutex_lock(&trace_types_lock);
4567         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
4568         if (ret < 0)
4569                 goto out;
4570
4571         if (tr->current_trace->update_thresh) {
4572                 ret = tr->current_trace->update_thresh(tr);
4573                 if (ret < 0)
4574                         goto out;
4575         }
4576
4577         ret = cnt;
4578 out:
4579         mutex_unlock(&trace_types_lock);
4580
4581         return ret;
4582 }
4583
4584 #ifdef CONFIG_TRACER_MAX_TRACE
4585
4586 static ssize_t
4587 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4588                      size_t cnt, loff_t *ppos)
4589 {
4590         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
4591 }
4592
4593 static ssize_t
4594 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4595                       size_t cnt, loff_t *ppos)
4596 {
4597         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
4598 }
4599
4600 #endif
4601
4602 static int tracing_open_pipe(struct inode *inode, struct file *filp)
4603 {
4604         struct trace_array *tr = inode->i_private;
4605         struct trace_iterator *iter;
4606         int ret = 0;
4607
4608         if (tracing_disabled)
4609                 return -ENODEV;
4610
4611         if (trace_array_get(tr) < 0)
4612                 return -ENODEV;
4613
4614         mutex_lock(&trace_types_lock);
4615
4616         /* create a buffer to store the information to pass to userspace */
4617         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4618         if (!iter) {
4619                 ret = -ENOMEM;
4620                 __trace_array_put(tr);
4621                 goto out;
4622         }
4623
4624         trace_seq_init(&iter->seq);
4625         iter->trace = tr->current_trace;
4626
4627         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
4628                 ret = -ENOMEM;
4629                 goto fail;
4630         }
4631
4632         /* trace pipe does not show start of buffer */
4633         cpumask_setall(iter->started);
4634
4635         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4636                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4637
4638         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4639         if (trace_clocks[tr->clock_id].in_ns)
4640                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4641
4642         iter->tr = tr;
4643         iter->trace_buffer = &tr->trace_buffer;
4644         iter->cpu_file = tracing_get_cpu(inode);
4645         mutex_init(&iter->mutex);
4646         filp->private_data = iter;
4647
4648         if (iter->trace->pipe_open)
4649                 iter->trace->pipe_open(iter);
4650
4651         nonseekable_open(inode, filp);
4652
4653         tr->current_trace->ref++;
4654 out:
4655         mutex_unlock(&trace_types_lock);
4656         return ret;
4657
4658 fail:
4659         kfree(iter->trace);
4660         kfree(iter);
4661         __trace_array_put(tr);
4662         mutex_unlock(&trace_types_lock);
4663         return ret;
4664 }
4665
4666 static int tracing_release_pipe(struct inode *inode, struct file *file)
4667 {
4668         struct trace_iterator *iter = file->private_data;
4669         struct trace_array *tr = inode->i_private;
4670
4671         mutex_lock(&trace_types_lock);
4672
4673         tr->current_trace->ref--;
4674
4675         if (iter->trace->pipe_close)
4676                 iter->trace->pipe_close(iter);
4677
4678         mutex_unlock(&trace_types_lock);
4679
4680         free_cpumask_var(iter->started);
4681         mutex_destroy(&iter->mutex);
4682         kfree(iter);
4683
4684         trace_array_put(tr);
4685
4686         return 0;
4687 }
4688
4689 static unsigned int
4690 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
4691 {
4692         struct trace_array *tr = iter->tr;
4693
4694         /* Iterators are static, they should be filled or empty */
4695         if (trace_buffer_iter(iter, iter->cpu_file))
4696                 return POLLIN | POLLRDNORM;
4697
4698         if (tr->trace_flags & TRACE_ITER_BLOCK)
4699                 /*
4700                  * Always select as readable when in blocking mode
4701                  */
4702                 return POLLIN | POLLRDNORM;
4703         else
4704                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
4705                                              filp, poll_table);
4706 }
4707
4708 static unsigned int
4709 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
4710 {
4711         struct trace_iterator *iter = filp->private_data;
4712
4713         return trace_poll(iter, filp, poll_table);
4714 }
4715
4716 /* Must be called with iter->mutex held. */
4717 static int tracing_wait_pipe(struct file *filp)
4718 {
4719         struct trace_iterator *iter = filp->private_data;
4720         int ret;
4721
4722         while (trace_empty(iter)) {
4723
4724                 if ((filp->f_flags & O_NONBLOCK)) {
4725                         return -EAGAIN;
4726                 }
4727
4728                 /*
4729                  * We block until we read something and tracing is disabled.
4730                  * We still block if tracing is disabled, but we have never
4731                  * read anything. This allows a user to cat this file, and
4732                  * then enable tracing. But after we have read something,
4733                  * we give an EOF when tracing is again disabled.
4734                  *
4735                  * iter->pos will be 0 if we haven't read anything.
4736                  */
4737                 if (!tracing_is_on() && iter->pos)
4738                         break;
4739
4740                 mutex_unlock(&iter->mutex);
4741
4742                 ret = wait_on_pipe(iter, false);
4743
4744                 mutex_lock(&iter->mutex);
4745
4746                 if (ret)
4747                         return ret;
4748         }
4749
4750         return 1;
4751 }
4752
4753 /*
4754  * Consumer reader.
4755  */
4756 static ssize_t
4757 tracing_read_pipe(struct file *filp, char __user *ubuf,
4758                   size_t cnt, loff_t *ppos)
4759 {
4760         struct trace_iterator *iter = filp->private_data;
4761         ssize_t sret;
4762
4763         /* return any leftover data */
4764         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4765         if (sret != -EBUSY)
4766                 return sret;
4767
4768         trace_seq_init(&iter->seq);
4769
4770         /*
4771          * Avoid more than one consumer on a single file descriptor
4772          * This is just a matter of traces coherency, the ring buffer itself
4773          * is protected.
4774          */
4775         mutex_lock(&iter->mutex);
4776         if (iter->trace->read) {
4777                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
4778                 if (sret)
4779                         goto out;
4780         }
4781
4782 waitagain:
4783         sret = tracing_wait_pipe(filp);
4784         if (sret <= 0)
4785                 goto out;
4786
4787         /* stop when tracing is finished */
4788         if (trace_empty(iter)) {
4789                 sret = 0;
4790                 goto out;
4791         }
4792
4793         if (cnt >= PAGE_SIZE)
4794                 cnt = PAGE_SIZE - 1;
4795
4796         /* reset all but tr, trace, and overruns */
4797         memset(&iter->seq, 0,
4798                sizeof(struct trace_iterator) -
4799                offsetof(struct trace_iterator, seq));
4800         cpumask_clear(iter->started);
4801         iter->pos = -1;
4802
4803         trace_event_read_lock();
4804         trace_access_lock(iter->cpu_file);
4805         while (trace_find_next_entry_inc(iter) != NULL) {
4806                 enum print_line_t ret;
4807                 int save_len = iter->seq.seq.len;
4808
4809                 ret = print_trace_line(iter);
4810                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4811                         /* don't print partial lines */
4812                         iter->seq.seq.len = save_len;
4813                         break;
4814                 }
4815                 if (ret != TRACE_TYPE_NO_CONSUME)
4816                         trace_consume(iter);
4817
4818                 if (trace_seq_used(&iter->seq) >= cnt)
4819                         break;
4820
4821                 /*
4822                  * Setting the full flag means we reached the trace_seq buffer
4823                  * size and we should leave by partial output condition above.
4824                  * One of the trace_seq_* functions is not used properly.
4825                  */
4826                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
4827                           iter->ent->type);
4828         }
4829         trace_access_unlock(iter->cpu_file);
4830         trace_event_read_unlock();
4831
4832         /* Now copy what we have to the user */
4833         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4834         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
4835                 trace_seq_init(&iter->seq);
4836
4837         /*
4838          * If there was nothing to send to user, in spite of consuming trace
4839          * entries, go back to wait for more entries.
4840          */
4841         if (sret == -EBUSY)
4842                 goto waitagain;
4843
4844 out:
4845         mutex_unlock(&iter->mutex);
4846
4847         return sret;
4848 }
4849
4850 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
4851                                      unsigned int idx)
4852 {
4853         __free_page(spd->pages[idx]);
4854 }
4855
4856 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
4857         .can_merge              = 0,
4858         .confirm                = generic_pipe_buf_confirm,
4859         .release                = generic_pipe_buf_release,
4860         .steal                  = generic_pipe_buf_steal,
4861         .get                    = generic_pipe_buf_get,
4862 };
4863
4864 static size_t
4865 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
4866 {
4867         size_t count;
4868         int save_len;
4869         int ret;
4870
4871         /* Seq buffer is page-sized, exactly what we need. */
4872         for (;;) {
4873                 save_len = iter->seq.seq.len;
4874                 ret = print_trace_line(iter);
4875
4876                 if (trace_seq_has_overflowed(&iter->seq)) {
4877                         iter->seq.seq.len = save_len;
4878                         break;
4879                 }
4880
4881                 /*
4882                  * This should not be hit, because it should only
4883                  * be set if the iter->seq overflowed. But check it
4884                  * anyway to be safe.
4885                  */
4886                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4887                         iter->seq.seq.len = save_len;
4888                         break;
4889                 }
4890
4891                 count = trace_seq_used(&iter->seq) - save_len;
4892                 if (rem < count) {
4893                         rem = 0;
4894                         iter->seq.seq.len = save_len;
4895                         break;
4896                 }
4897
4898                 if (ret != TRACE_TYPE_NO_CONSUME)
4899                         trace_consume(iter);
4900                 rem -= count;
4901                 if (!trace_find_next_entry_inc(iter))   {
4902                         rem = 0;
4903                         iter->ent = NULL;
4904                         break;
4905                 }
4906         }
4907
4908         return rem;
4909 }
4910
4911 static ssize_t tracing_splice_read_pipe(struct file *filp,
4912                                         loff_t *ppos,
4913                                         struct pipe_inode_info *pipe,
4914                                         size_t len,
4915                                         unsigned int flags)
4916 {
4917         struct page *pages_def[PIPE_DEF_BUFFERS];
4918         struct partial_page partial_def[PIPE_DEF_BUFFERS];
4919         struct trace_iterator *iter = filp->private_data;
4920         struct splice_pipe_desc spd = {
4921                 .pages          = pages_def,
4922                 .partial        = partial_def,
4923                 .nr_pages       = 0, /* This gets updated below. */
4924                 .nr_pages_max   = PIPE_DEF_BUFFERS,
4925                 .flags          = flags,
4926                 .ops            = &tracing_pipe_buf_ops,
4927                 .spd_release    = tracing_spd_release_pipe,
4928         };
4929         ssize_t ret;
4930         size_t rem;
4931         unsigned int i;
4932
4933         if (splice_grow_spd(pipe, &spd))
4934                 return -ENOMEM;
4935
4936         mutex_lock(&iter->mutex);
4937
4938         if (iter->trace->splice_read) {
4939                 ret = iter->trace->splice_read(iter, filp,
4940                                                ppos, pipe, len, flags);
4941                 if (ret)
4942                         goto out_err;
4943         }
4944
4945         ret = tracing_wait_pipe(filp);
4946         if (ret <= 0)
4947                 goto out_err;
4948
4949         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
4950                 ret = -EFAULT;
4951                 goto out_err;
4952         }
4953
4954         trace_event_read_lock();
4955         trace_access_lock(iter->cpu_file);
4956
4957         /* Fill as many pages as possible. */
4958         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
4959                 spd.pages[i] = alloc_page(GFP_KERNEL);
4960                 if (!spd.pages[i])
4961                         break;
4962
4963                 rem = tracing_fill_pipe_page(rem, iter);
4964
4965                 /* Copy the data into the page, so we can start over. */
4966                 ret = trace_seq_to_buffer(&iter->seq,
4967                                           page_address(spd.pages[i]),
4968                                           trace_seq_used(&iter->seq));
4969                 if (ret < 0) {
4970                         __free_page(spd.pages[i]);
4971                         break;
4972                 }
4973                 spd.partial[i].offset = 0;
4974                 spd.partial[i].len = trace_seq_used(&iter->seq);
4975
4976                 trace_seq_init(&iter->seq);
4977         }
4978
4979         trace_access_unlock(iter->cpu_file);
4980         trace_event_read_unlock();
4981         mutex_unlock(&iter->mutex);
4982
4983         spd.nr_pages = i;
4984
4985         if (i)
4986                 ret = splice_to_pipe(pipe, &spd);
4987         else
4988                 ret = 0;
4989 out:
4990         splice_shrink_spd(&spd);
4991         return ret;
4992
4993 out_err:
4994         mutex_unlock(&iter->mutex);
4995         goto out;
4996 }
4997
4998 static ssize_t
4999 tracing_entries_read(struct file *filp, char __user *ubuf,
5000                      size_t cnt, loff_t *ppos)
5001 {
5002         struct inode *inode = file_inode(filp);
5003         struct trace_array *tr = inode->i_private;
5004         int cpu = tracing_get_cpu(inode);
5005         char buf[64];
5006         int r = 0;
5007         ssize_t ret;
5008
5009         mutex_lock(&trace_types_lock);
5010
5011         if (cpu == RING_BUFFER_ALL_CPUS) {
5012                 int cpu, buf_size_same;
5013                 unsigned long size;
5014
5015                 size = 0;
5016                 buf_size_same = 1;
5017                 /* check if all cpu sizes are same */
5018                 for_each_tracing_cpu(cpu) {
5019                         /* fill in the size from first enabled cpu */
5020                         if (size == 0)
5021                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5022                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5023                                 buf_size_same = 0;
5024                                 break;
5025                         }
5026                 }
5027
5028                 if (buf_size_same) {
5029                         if (!ring_buffer_expanded)
5030                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
5031                                             size >> 10,
5032                                             trace_buf_size >> 10);
5033                         else
5034                                 r = sprintf(buf, "%lu\n", size >> 10);
5035                 } else
5036                         r = sprintf(buf, "X\n");
5037         } else
5038                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5039
5040         mutex_unlock(&trace_types_lock);
5041
5042         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5043         return ret;
5044 }
5045
5046 static ssize_t
5047 tracing_entries_write(struct file *filp, const char __user *ubuf,
5048                       size_t cnt, loff_t *ppos)
5049 {
5050         struct inode *inode = file_inode(filp);
5051         struct trace_array *tr = inode->i_private;
5052         unsigned long val;
5053         int ret;
5054
5055         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5056         if (ret)
5057                 return ret;
5058
5059         /* must have at least 1 entry */
5060         if (!val)
5061                 return -EINVAL;
5062
5063         /* value is in KB */
5064         val <<= 10;
5065         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5066         if (ret < 0)
5067                 return ret;
5068
5069         *ppos += cnt;
5070
5071         return cnt;
5072 }
5073
5074 static ssize_t
5075 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5076                                 size_t cnt, loff_t *ppos)
5077 {
5078         struct trace_array *tr = filp->private_data;
5079         char buf[64];
5080         int r, cpu;
5081         unsigned long size = 0, expanded_size = 0;
5082
5083         mutex_lock(&trace_types_lock);
5084         for_each_tracing_cpu(cpu) {
5085                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5086                 if (!ring_buffer_expanded)
5087                         expanded_size += trace_buf_size >> 10;
5088         }
5089         if (ring_buffer_expanded)
5090                 r = sprintf(buf, "%lu\n", size);
5091         else
5092                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5093         mutex_unlock(&trace_types_lock);
5094
5095         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5096 }
5097
5098 static ssize_t
5099 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5100                           size_t cnt, loff_t *ppos)
5101 {
5102         /*
5103          * There is no need to read what the user has written, this function
5104          * is just to make sure that there is no error when "echo" is used
5105          */
5106
5107         *ppos += cnt;
5108
5109         return cnt;
5110 }
5111
5112 static int
5113 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5114 {
5115         struct trace_array *tr = inode->i_private;
5116
5117         /* disable tracing ? */
5118         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5119                 tracer_tracing_off(tr);
5120         /* resize the ring buffer to 0 */
5121         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5122
5123         trace_array_put(tr);
5124
5125         return 0;
5126 }
5127
5128 static ssize_t
5129 tracing_mark_write(struct file *filp, const char __user *ubuf,
5130                                         size_t cnt, loff_t *fpos)
5131 {
5132         unsigned long addr = (unsigned long)ubuf;
5133         struct trace_array *tr = filp->private_data;
5134         struct ring_buffer_event *event;
5135         struct ring_buffer *buffer;
5136         struct print_entry *entry;
5137         unsigned long irq_flags;
5138         struct page *pages[2];
5139         void *map_page[2];
5140         int nr_pages = 1;
5141         ssize_t written;
5142         int offset;
5143         int size;
5144         int len;
5145         int ret;
5146         int i;
5147
5148         if (tracing_disabled)
5149                 return -EINVAL;
5150
5151         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5152                 return -EINVAL;
5153
5154         if (cnt > TRACE_BUF_SIZE)
5155                 cnt = TRACE_BUF_SIZE;
5156
5157         /*
5158          * Userspace is injecting traces into the kernel trace buffer.
5159          * We want to be as non intrusive as possible.
5160          * To do so, we do not want to allocate any special buffers
5161          * or take any locks, but instead write the userspace data
5162          * straight into the ring buffer.
5163          *
5164          * First we need to pin the userspace buffer into memory,
5165          * which, most likely it is, because it just referenced it.
5166          * But there's no guarantee that it is. By using get_user_pages_fast()
5167          * and kmap_atomic/kunmap_atomic() we can get access to the
5168          * pages directly. We then write the data directly into the
5169          * ring buffer.
5170          */
5171         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5172
5173         /* check if we cross pages */
5174         if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
5175                 nr_pages = 2;
5176
5177         offset = addr & (PAGE_SIZE - 1);
5178         addr &= PAGE_MASK;
5179
5180         ret = get_user_pages_fast(addr, nr_pages, 0, pages);
5181         if (ret < nr_pages) {
5182                 while (--ret >= 0)
5183                         put_page(pages[ret]);
5184                 written = -EFAULT;
5185                 goto out;
5186         }
5187
5188         for (i = 0; i < nr_pages; i++)
5189                 map_page[i] = kmap_atomic(pages[i]);
5190
5191         local_save_flags(irq_flags);
5192         size = sizeof(*entry) + cnt + 2; /* possible \n added */
5193         buffer = tr->trace_buffer.buffer;
5194         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5195                                           irq_flags, preempt_count());
5196         if (!event) {
5197                 /* Ring buffer disabled, return as if not open for write */
5198                 written = -EBADF;
5199                 goto out_unlock;
5200         }
5201
5202         entry = ring_buffer_event_data(event);
5203         entry->ip = _THIS_IP_;
5204
5205         if (nr_pages == 2) {
5206                 len = PAGE_SIZE - offset;
5207                 memcpy(&entry->buf, map_page[0] + offset, len);
5208                 memcpy(&entry->buf[len], map_page[1], cnt - len);
5209         } else
5210                 memcpy(&entry->buf, map_page[0] + offset, cnt);
5211
5212         if (entry->buf[cnt - 1] != '\n') {
5213                 entry->buf[cnt] = '\n';
5214                 entry->buf[cnt + 1] = '\0';
5215         } else
5216                 entry->buf[cnt] = '\0';
5217
5218         __buffer_unlock_commit(buffer, event);
5219
5220         written = cnt;
5221
5222         *fpos += written;
5223
5224  out_unlock:
5225         for (i = nr_pages - 1; i >= 0; i--) {
5226                 kunmap_atomic(map_page[i]);
5227                 put_page(pages[i]);
5228         }
5229  out:
5230         return written;
5231 }
5232
5233 static int tracing_clock_show(struct seq_file *m, void *v)
5234 {
5235         struct trace_array *tr = m->private;
5236         int i;
5237
5238         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5239                 seq_printf(m,
5240                         "%s%s%s%s", i ? " " : "",
5241                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5242                         i == tr->clock_id ? "]" : "");
5243         seq_putc(m, '\n');
5244
5245         return 0;
5246 }
5247
5248 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
5249 {
5250         int i;
5251
5252         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
5253                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
5254                         break;
5255         }
5256         if (i == ARRAY_SIZE(trace_clocks))
5257                 return -EINVAL;
5258
5259         mutex_lock(&trace_types_lock);
5260
5261         tr->clock_id = i;
5262
5263         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
5264
5265         /*
5266          * New clock may not be consistent with the previous clock.
5267          * Reset the buffer so that it doesn't have incomparable timestamps.
5268          */
5269         tracing_reset_online_cpus(&tr->trace_buffer);
5270
5271 #ifdef CONFIG_TRACER_MAX_TRACE
5272         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
5273                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
5274         tracing_reset_online_cpus(&tr->max_buffer);
5275 #endif
5276
5277         mutex_unlock(&trace_types_lock);
5278
5279         return 0;
5280 }
5281
5282 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
5283                                    size_t cnt, loff_t *fpos)
5284 {
5285         struct seq_file *m = filp->private_data;
5286         struct trace_array *tr = m->private;
5287         char buf[64];
5288         const char *clockstr;
5289         int ret;
5290
5291         if (cnt >= sizeof(buf))
5292                 return -EINVAL;
5293
5294         if (copy_from_user(buf, ubuf, cnt))
5295                 return -EFAULT;
5296
5297         buf[cnt] = 0;
5298
5299         clockstr = strstrip(buf);
5300
5301         ret = tracing_set_clock(tr, clockstr);
5302         if (ret)
5303                 return ret;
5304
5305         *fpos += cnt;
5306
5307         return cnt;
5308 }
5309
5310 static int tracing_clock_open(struct inode *inode, struct file *file)
5311 {
5312         struct trace_array *tr = inode->i_private;
5313         int ret;
5314
5315         if (tracing_disabled)
5316                 return -ENODEV;
5317
5318         if (trace_array_get(tr))
5319                 return -ENODEV;
5320
5321         ret = single_open(file, tracing_clock_show, inode->i_private);
5322         if (ret < 0)
5323                 trace_array_put(tr);
5324
5325         return ret;
5326 }
5327
5328 struct ftrace_buffer_info {
5329         struct trace_iterator   iter;
5330         void                    *spare;
5331         unsigned int            read;
5332 };
5333
5334 #ifdef CONFIG_TRACER_SNAPSHOT
5335 static int tracing_snapshot_open(struct inode *inode, struct file *file)
5336 {
5337         struct trace_array *tr = inode->i_private;
5338         struct trace_iterator *iter;
5339         struct seq_file *m;
5340         int ret = 0;
5341
5342         if (trace_array_get(tr) < 0)
5343                 return -ENODEV;
5344
5345         if (file->f_mode & FMODE_READ) {
5346                 iter = __tracing_open(inode, file, true);
5347                 if (IS_ERR(iter))
5348                         ret = PTR_ERR(iter);
5349         } else {
5350                 /* Writes still need the seq_file to hold the private data */
5351                 ret = -ENOMEM;
5352                 m = kzalloc(sizeof(*m), GFP_KERNEL);
5353                 if (!m)
5354                         goto out;
5355                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5356                 if (!iter) {
5357                         kfree(m);
5358                         goto out;
5359                 }
5360                 ret = 0;
5361
5362                 iter->tr = tr;
5363                 iter->trace_buffer = &tr->max_buffer;
5364                 iter->cpu_file = tracing_get_cpu(inode);
5365                 m->private = iter;
5366                 file->private_data = m;
5367         }
5368 out:
5369         if (ret < 0)
5370                 trace_array_put(tr);
5371
5372         return ret;
5373 }
5374
5375 static ssize_t
5376 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
5377                        loff_t *ppos)
5378 {
5379         struct seq_file *m = filp->private_data;
5380         struct trace_iterator *iter = m->private;
5381         struct trace_array *tr = iter->tr;
5382         unsigned long val;
5383         int ret;
5384
5385         ret = tracing_update_buffers();
5386         if (ret < 0)
5387                 return ret;
5388
5389         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5390         if (ret)
5391                 return ret;
5392
5393         mutex_lock(&trace_types_lock);
5394
5395         if (tr->current_trace->use_max_tr) {
5396                 ret = -EBUSY;
5397                 goto out;
5398         }
5399
5400         switch (val) {
5401         case 0:
5402                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5403                         ret = -EINVAL;
5404                         break;
5405                 }
5406                 if (tr->allocated_snapshot)
5407                         free_snapshot(tr);
5408                 break;
5409         case 1:
5410 /* Only allow per-cpu swap if the ring buffer supports it */
5411 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
5412                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5413                         ret = -EINVAL;
5414                         break;
5415                 }
5416 #endif
5417                 if (!tr->allocated_snapshot) {
5418                         ret = alloc_snapshot(tr);
5419                         if (ret < 0)
5420                                 break;
5421                 }
5422                 local_irq_disable();
5423                 /* Now, we're going to swap */
5424                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5425                         update_max_tr(tr, current, smp_processor_id());
5426                 else
5427                         update_max_tr_single(tr, current, iter->cpu_file);
5428                 local_irq_enable();
5429                 break;
5430         default:
5431                 if (tr->allocated_snapshot) {
5432                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5433                                 tracing_reset_online_cpus(&tr->max_buffer);
5434                         else
5435                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
5436                 }
5437                 break;
5438         }
5439
5440         if (ret >= 0) {
5441                 *ppos += cnt;
5442                 ret = cnt;
5443         }
5444 out:
5445         mutex_unlock(&trace_types_lock);
5446         return ret;
5447 }
5448
5449 static int tracing_snapshot_release(struct inode *inode, struct file *file)
5450 {
5451         struct seq_file *m = file->private_data;
5452         int ret;
5453
5454         ret = tracing_release(inode, file);
5455
5456         if (file->f_mode & FMODE_READ)
5457                 return ret;
5458
5459         /* If write only, the seq_file is just a stub */
5460         if (m)
5461                 kfree(m->private);
5462         kfree(m);
5463
5464         return 0;
5465 }
5466
5467 static int tracing_buffers_open(struct inode *inode, struct file *filp);
5468 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
5469                                     size_t count, loff_t *ppos);
5470 static int tracing_buffers_release(struct inode *inode, struct file *file);
5471 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5472                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
5473
5474 static int snapshot_raw_open(struct inode *inode, struct file *filp)
5475 {
5476         struct ftrace_buffer_info *info;
5477         int ret;
5478
5479         ret = tracing_buffers_open(inode, filp);
5480         if (ret < 0)
5481                 return ret;
5482
5483         info = filp->private_data;
5484
5485         if (info->iter.trace->use_max_tr) {
5486                 tracing_buffers_release(inode, filp);
5487                 return -EBUSY;
5488         }
5489
5490         info->iter.snapshot = true;
5491         info->iter.trace_buffer = &info->iter.tr->max_buffer;
5492
5493         return ret;
5494 }
5495
5496 #endif /* CONFIG_TRACER_SNAPSHOT */
5497
5498
5499 static const struct file_operations tracing_thresh_fops = {
5500         .open           = tracing_open_generic,
5501         .read           = tracing_thresh_read,
5502         .write          = tracing_thresh_write,
5503         .llseek         = generic_file_llseek,
5504 };
5505
5506 #ifdef CONFIG_TRACER_MAX_TRACE
5507 static const struct file_operations tracing_max_lat_fops = {
5508         .open           = tracing_open_generic,
5509         .read           = tracing_max_lat_read,
5510         .write          = tracing_max_lat_write,
5511         .llseek         = generic_file_llseek,
5512 };
5513 #endif
5514
5515 static const struct file_operations set_tracer_fops = {
5516         .open           = tracing_open_generic,
5517         .read           = tracing_set_trace_read,
5518         .write          = tracing_set_trace_write,
5519         .llseek         = generic_file_llseek,
5520 };
5521
5522 static const struct file_operations tracing_pipe_fops = {
5523         .open           = tracing_open_pipe,
5524         .poll           = tracing_poll_pipe,
5525         .read           = tracing_read_pipe,
5526         .splice_read    = tracing_splice_read_pipe,
5527         .release        = tracing_release_pipe,
5528         .llseek         = no_llseek,
5529 };
5530
5531 static const struct file_operations tracing_entries_fops = {
5532         .open           = tracing_open_generic_tr,
5533         .read           = tracing_entries_read,
5534         .write          = tracing_entries_write,
5535         .llseek         = generic_file_llseek,
5536         .release        = tracing_release_generic_tr,
5537 };
5538
5539 static const struct file_operations tracing_total_entries_fops = {
5540         .open           = tracing_open_generic_tr,
5541         .read           = tracing_total_entries_read,
5542         .llseek         = generic_file_llseek,
5543         .release        = tracing_release_generic_tr,
5544 };
5545
5546 static const struct file_operations tracing_free_buffer_fops = {
5547         .open           = tracing_open_generic_tr,
5548         .write          = tracing_free_buffer_write,
5549         .release        = tracing_free_buffer_release,
5550 };
5551
5552 static const struct file_operations tracing_mark_fops = {
5553         .open           = tracing_open_generic_tr,
5554         .write          = tracing_mark_write,
5555         .llseek         = generic_file_llseek,
5556         .release        = tracing_release_generic_tr,
5557 };
5558
5559 static const struct file_operations trace_clock_fops = {
5560         .open           = tracing_clock_open,
5561         .read           = seq_read,
5562         .llseek         = seq_lseek,
5563         .release        = tracing_single_release_tr,
5564         .write          = tracing_clock_write,
5565 };
5566
5567 #ifdef CONFIG_TRACER_SNAPSHOT
5568 static const struct file_operations snapshot_fops = {
5569         .open           = tracing_snapshot_open,
5570         .read           = seq_read,
5571         .write          = tracing_snapshot_write,
5572         .llseek         = tracing_lseek,
5573         .release        = tracing_snapshot_release,
5574 };
5575
5576 static const struct file_operations snapshot_raw_fops = {
5577         .open           = snapshot_raw_open,
5578         .read           = tracing_buffers_read,
5579         .release        = tracing_buffers_release,
5580         .splice_read    = tracing_buffers_splice_read,
5581         .llseek         = no_llseek,
5582 };
5583
5584 #endif /* CONFIG_TRACER_SNAPSHOT */
5585
5586 static int tracing_buffers_open(struct inode *inode, struct file *filp)
5587 {
5588         struct trace_array *tr = inode->i_private;
5589         struct ftrace_buffer_info *info;
5590         int ret;
5591
5592         if (tracing_disabled)
5593                 return -ENODEV;
5594
5595         if (trace_array_get(tr) < 0)
5596                 return -ENODEV;
5597
5598         info = kzalloc(sizeof(*info), GFP_KERNEL);
5599         if (!info) {
5600                 trace_array_put(tr);
5601                 return -ENOMEM;
5602         }
5603
5604         mutex_lock(&trace_types_lock);
5605
5606         info->iter.tr           = tr;
5607         info->iter.cpu_file     = tracing_get_cpu(inode);
5608         info->iter.trace        = tr->current_trace;
5609         info->iter.trace_buffer = &tr->trace_buffer;
5610         info->spare             = NULL;
5611         /* Force reading ring buffer for first read */
5612         info->read              = (unsigned int)-1;
5613
5614         filp->private_data = info;
5615
5616         tr->current_trace->ref++;
5617
5618         mutex_unlock(&trace_types_lock);
5619
5620         ret = nonseekable_open(inode, filp);
5621         if (ret < 0)
5622                 trace_array_put(tr);
5623
5624         return ret;
5625 }
5626
5627 static unsigned int
5628 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
5629 {
5630         struct ftrace_buffer_info *info = filp->private_data;
5631         struct trace_iterator *iter = &info->iter;
5632
5633         return trace_poll(iter, filp, poll_table);
5634 }
5635
5636 static ssize_t
5637 tracing_buffers_read(struct file *filp, char __user *ubuf,
5638                      size_t count, loff_t *ppos)
5639 {
5640         struct ftrace_buffer_info *info = filp->private_data;
5641         struct trace_iterator *iter = &info->iter;
5642         ssize_t ret;
5643         ssize_t size;
5644
5645         if (!count)
5646                 return 0;
5647
5648 #ifdef CONFIG_TRACER_MAX_TRACE
5649         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
5650                 return -EBUSY;
5651 #endif
5652
5653         if (!info->spare)
5654                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
5655                                                           iter->cpu_file);
5656         if (!info->spare)
5657                 return -ENOMEM;
5658
5659         /* Do we have previous read data to read? */
5660         if (info->read < PAGE_SIZE)
5661                 goto read;
5662
5663  again:
5664         trace_access_lock(iter->cpu_file);
5665         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
5666                                     &info->spare,
5667                                     count,
5668                                     iter->cpu_file, 0);
5669         trace_access_unlock(iter->cpu_file);
5670
5671         if (ret < 0) {
5672                 if (trace_empty(iter)) {
5673                         if ((filp->f_flags & O_NONBLOCK))
5674                                 return -EAGAIN;
5675
5676                         ret = wait_on_pipe(iter, false);
5677                         if (ret)
5678                                 return ret;
5679
5680                         goto again;
5681                 }
5682                 return 0;
5683         }
5684
5685         info->read = 0;
5686  read:
5687         size = PAGE_SIZE - info->read;
5688         if (size > count)
5689                 size = count;
5690
5691         ret = copy_to_user(ubuf, info->spare + info->read, size);
5692         if (ret == size)
5693                 return -EFAULT;
5694
5695         size -= ret;
5696
5697         *ppos += size;
5698         info->read += size;
5699
5700         return size;
5701 }
5702
5703 static int tracing_buffers_release(struct inode *inode, struct file *file)
5704 {
5705         struct ftrace_buffer_info *info = file->private_data;
5706         struct trace_iterator *iter = &info->iter;
5707
5708         mutex_lock(&trace_types_lock);
5709
5710         iter->tr->current_trace->ref--;
5711
5712         __trace_array_put(iter->tr);
5713
5714         if (info->spare)
5715                 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
5716         kfree(info);
5717
5718         mutex_unlock(&trace_types_lock);
5719
5720         return 0;
5721 }
5722
5723 struct buffer_ref {
5724         struct ring_buffer      *buffer;
5725         void                    *page;
5726         int                     ref;
5727 };
5728
5729 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
5730                                     struct pipe_buffer *buf)
5731 {
5732         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5733
5734         if (--ref->ref)
5735                 return;
5736
5737         ring_buffer_free_read_page(ref->buffer, ref->page);
5738         kfree(ref);
5739         buf->private = 0;
5740 }
5741
5742 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
5743                                 struct pipe_buffer *buf)
5744 {
5745         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5746
5747         ref->ref++;
5748 }
5749
5750 /* Pipe buffer operations for a buffer. */
5751 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
5752         .can_merge              = 0,
5753         .confirm                = generic_pipe_buf_confirm,
5754         .release                = buffer_pipe_buf_release,
5755         .steal                  = generic_pipe_buf_steal,
5756         .get                    = buffer_pipe_buf_get,
5757 };
5758
5759 /*
5760  * Callback from splice_to_pipe(), if we need to release some pages
5761  * at the end of the spd in case we error'ed out in filling the pipe.
5762  */
5763 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
5764 {
5765         struct buffer_ref *ref =
5766                 (struct buffer_ref *)spd->partial[i].private;
5767
5768         if (--ref->ref)
5769                 return;
5770
5771         ring_buffer_free_read_page(ref->buffer, ref->page);
5772         kfree(ref);
5773         spd->partial[i].private = 0;
5774 }
5775
5776 static ssize_t
5777 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5778                             struct pipe_inode_info *pipe, size_t len,
5779                             unsigned int flags)
5780 {
5781         struct ftrace_buffer_info *info = file->private_data;
5782         struct trace_iterator *iter = &info->iter;
5783         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5784         struct page *pages_def[PIPE_DEF_BUFFERS];
5785         struct splice_pipe_desc spd = {
5786                 .pages          = pages_def,
5787                 .partial        = partial_def,
5788                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5789                 .flags          = flags,
5790                 .ops            = &buffer_pipe_buf_ops,
5791                 .spd_release    = buffer_spd_release,
5792         };
5793         struct buffer_ref *ref;
5794         int entries, size, i;
5795         ssize_t ret = 0;
5796
5797 #ifdef CONFIG_TRACER_MAX_TRACE
5798         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
5799                 return -EBUSY;
5800 #endif
5801
5802         if (splice_grow_spd(pipe, &spd))
5803                 return -ENOMEM;
5804
5805         if (*ppos & (PAGE_SIZE - 1))
5806                 return -EINVAL;
5807
5808         if (len & (PAGE_SIZE - 1)) {
5809                 if (len < PAGE_SIZE)
5810                         return -EINVAL;
5811                 len &= PAGE_MASK;
5812         }
5813
5814  again:
5815         trace_access_lock(iter->cpu_file);
5816         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5817
5818         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
5819                 struct page *page;
5820                 int r;
5821
5822                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
5823                 if (!ref) {
5824                         ret = -ENOMEM;
5825                         break;
5826                 }
5827
5828                 ref->ref = 1;
5829                 ref->buffer = iter->trace_buffer->buffer;
5830                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
5831                 if (!ref->page) {
5832                         ret = -ENOMEM;
5833                         kfree(ref);
5834                         break;
5835                 }
5836
5837                 r = ring_buffer_read_page(ref->buffer, &ref->page,
5838                                           len, iter->cpu_file, 1);
5839                 if (r < 0) {
5840                         ring_buffer_free_read_page(ref->buffer, ref->page);
5841                         kfree(ref);
5842                         break;
5843                 }
5844
5845                 /*
5846                  * zero out any left over data, this is going to
5847                  * user land.
5848                  */
5849                 size = ring_buffer_page_len(ref->page);
5850                 if (size < PAGE_SIZE)
5851                         memset(ref->page + size, 0, PAGE_SIZE - size);
5852
5853                 page = virt_to_page(ref->page);
5854
5855                 spd.pages[i] = page;
5856                 spd.partial[i].len = PAGE_SIZE;
5857                 spd.partial[i].offset = 0;
5858                 spd.partial[i].private = (unsigned long)ref;
5859                 spd.nr_pages++;
5860                 *ppos += PAGE_SIZE;
5861
5862                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5863         }
5864
5865         trace_access_unlock(iter->cpu_file);
5866         spd.nr_pages = i;
5867
5868         /* did we read anything? */
5869         if (!spd.nr_pages) {
5870                 if (ret)
5871                         return ret;
5872
5873                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
5874                         return -EAGAIN;
5875
5876                 ret = wait_on_pipe(iter, true);
5877                 if (ret)
5878                         return ret;
5879
5880                 goto again;
5881         }
5882
5883         ret = splice_to_pipe(pipe, &spd);
5884         splice_shrink_spd(&spd);
5885
5886         return ret;
5887 }
5888
5889 static const struct file_operations tracing_buffers_fops = {
5890         .open           = tracing_buffers_open,
5891         .read           = tracing_buffers_read,
5892         .poll           = tracing_buffers_poll,
5893         .release        = tracing_buffers_release,
5894         .splice_read    = tracing_buffers_splice_read,
5895         .llseek         = no_llseek,
5896 };
5897
5898 static ssize_t
5899 tracing_stats_read(struct file *filp, char __user *ubuf,
5900                    size_t count, loff_t *ppos)
5901 {
5902         struct inode *inode = file_inode(filp);
5903         struct trace_array *tr = inode->i_private;
5904         struct trace_buffer *trace_buf = &tr->trace_buffer;
5905         int cpu = tracing_get_cpu(inode);
5906         struct trace_seq *s;
5907         unsigned long cnt;
5908         unsigned long long t;
5909         unsigned long usec_rem;
5910
5911         s = kmalloc(sizeof(*s), GFP_KERNEL);
5912         if (!s)
5913                 return -ENOMEM;
5914
5915         trace_seq_init(s);
5916
5917         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
5918         trace_seq_printf(s, "entries: %ld\n", cnt);
5919
5920         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
5921         trace_seq_printf(s, "overrun: %ld\n", cnt);
5922
5923         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
5924         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
5925
5926         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
5927         trace_seq_printf(s, "bytes: %ld\n", cnt);
5928
5929         if (trace_clocks[tr->clock_id].in_ns) {
5930                 /* local or global for trace_clock */
5931                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5932                 usec_rem = do_div(t, USEC_PER_SEC);
5933                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
5934                                                                 t, usec_rem);
5935
5936                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
5937                 usec_rem = do_div(t, USEC_PER_SEC);
5938                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
5939         } else {
5940                 /* counter or tsc mode for trace_clock */
5941                 trace_seq_printf(s, "oldest event ts: %llu\n",
5942                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5943
5944                 trace_seq_printf(s, "now ts: %llu\n",
5945                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
5946         }
5947
5948         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
5949         trace_seq_printf(s, "dropped events: %ld\n", cnt);
5950
5951         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
5952         trace_seq_printf(s, "read events: %ld\n", cnt);
5953
5954         count = simple_read_from_buffer(ubuf, count, ppos,
5955                                         s->buffer, trace_seq_used(s));
5956
5957         kfree(s);
5958
5959         return count;
5960 }
5961
5962 static const struct file_operations tracing_stats_fops = {
5963         .open           = tracing_open_generic_tr,
5964         .read           = tracing_stats_read,
5965         .llseek         = generic_file_llseek,
5966         .release        = tracing_release_generic_tr,
5967 };
5968
5969 #ifdef CONFIG_DYNAMIC_FTRACE
5970
5971 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
5972 {
5973         return 0;
5974 }
5975
5976 static ssize_t
5977 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
5978                   size_t cnt, loff_t *ppos)
5979 {
5980         static char ftrace_dyn_info_buffer[1024];
5981         static DEFINE_MUTEX(dyn_info_mutex);
5982         unsigned long *p = filp->private_data;
5983         char *buf = ftrace_dyn_info_buffer;
5984         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
5985         int r;
5986
5987         mutex_lock(&dyn_info_mutex);
5988         r = sprintf(buf, "%ld ", *p);
5989
5990         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
5991         buf[r++] = '\n';
5992
5993         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5994
5995         mutex_unlock(&dyn_info_mutex);
5996
5997         return r;
5998 }
5999
6000 static const struct file_operations tracing_dyn_info_fops = {
6001         .open           = tracing_open_generic,
6002         .read           = tracing_read_dyn_info,
6003         .llseek         = generic_file_llseek,
6004 };
6005 #endif /* CONFIG_DYNAMIC_FTRACE */
6006
6007 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6008 static void
6009 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6010 {
6011         tracing_snapshot();
6012 }
6013
6014 static void
6015 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6016 {
6017         unsigned long *count = (long *)data;
6018
6019         if (!*count)
6020                 return;
6021
6022         if (*count != -1)
6023                 (*count)--;
6024
6025         tracing_snapshot();
6026 }
6027
6028 static int
6029 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
6030                       struct ftrace_probe_ops *ops, void *data)
6031 {
6032         long count = (long)data;
6033
6034         seq_printf(m, "%ps:", (void *)ip);
6035
6036         seq_puts(m, "snapshot");
6037
6038         if (count == -1)
6039                 seq_puts(m, ":unlimited\n");
6040         else
6041                 seq_printf(m, ":count=%ld\n", count);
6042
6043         return 0;
6044 }
6045
6046 static struct ftrace_probe_ops snapshot_probe_ops = {
6047         .func                   = ftrace_snapshot,
6048         .print                  = ftrace_snapshot_print,
6049 };
6050
6051 static struct ftrace_probe_ops snapshot_count_probe_ops = {
6052         .func                   = ftrace_count_snapshot,
6053         .print                  = ftrace_snapshot_print,
6054 };
6055
6056 static int
6057 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
6058                                char *glob, char *cmd, char *param, int enable)
6059 {
6060         struct ftrace_probe_ops *ops;
6061         void *count = (void *)-1;
6062         char *number;
6063         int ret;
6064
6065         /* hash funcs only work with set_ftrace_filter */
6066         if (!enable)
6067                 return -EINVAL;
6068
6069         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
6070
6071         if (glob[0] == '!') {
6072                 unregister_ftrace_function_probe_func(glob+1, ops);
6073                 return 0;
6074         }
6075
6076         if (!param)
6077                 goto out_reg;
6078
6079         number = strsep(&param, ":");
6080
6081         if (!strlen(number))
6082                 goto out_reg;
6083
6084         /*
6085          * We use the callback data field (which is a pointer)
6086          * as our counter.
6087          */
6088         ret = kstrtoul(number, 0, (unsigned long *)&count);
6089         if (ret)
6090                 return ret;
6091
6092  out_reg:
6093         ret = register_ftrace_function_probe(glob, ops, count);
6094
6095         if (ret >= 0)
6096                 alloc_snapshot(&global_trace);
6097
6098         return ret < 0 ? ret : 0;
6099 }
6100
6101 static struct ftrace_func_command ftrace_snapshot_cmd = {
6102         .name                   = "snapshot",
6103         .func                   = ftrace_trace_snapshot_callback,
6104 };
6105
6106 static __init int register_snapshot_cmd(void)
6107 {
6108         return register_ftrace_command(&ftrace_snapshot_cmd);
6109 }
6110 #else
6111 static inline __init int register_snapshot_cmd(void) { return 0; }
6112 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
6113
6114 static struct dentry *tracing_get_dentry(struct trace_array *tr)
6115 {
6116         if (WARN_ON(!tr->dir))
6117                 return ERR_PTR(-ENODEV);
6118
6119         /* Top directory uses NULL as the parent */
6120         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
6121                 return NULL;
6122
6123         /* All sub buffers have a descriptor */
6124         return tr->dir;
6125 }
6126
6127 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
6128 {
6129         struct dentry *d_tracer;
6130
6131         if (tr->percpu_dir)
6132                 return tr->percpu_dir;
6133
6134         d_tracer = tracing_get_dentry(tr);
6135         if (IS_ERR(d_tracer))
6136                 return NULL;
6137
6138         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
6139
6140         WARN_ONCE(!tr->percpu_dir,
6141                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
6142
6143         return tr->percpu_dir;
6144 }
6145
6146 static struct dentry *
6147 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
6148                       void *data, long cpu, const struct file_operations *fops)
6149 {
6150         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
6151
6152         if (ret) /* See tracing_get_cpu() */
6153                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
6154         return ret;
6155 }
6156
6157 static void
6158 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
6159 {
6160         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
6161         struct dentry *d_cpu;
6162         char cpu_dir[30]; /* 30 characters should be more than enough */
6163
6164         if (!d_percpu)
6165                 return;
6166
6167         snprintf(cpu_dir, 30, "cpu%ld", cpu);
6168         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
6169         if (!d_cpu) {
6170                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
6171                 return;
6172         }
6173
6174         /* per cpu trace_pipe */
6175         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
6176                                 tr, cpu, &tracing_pipe_fops);
6177
6178         /* per cpu trace */
6179         trace_create_cpu_file("trace", 0644, d_cpu,
6180                                 tr, cpu, &tracing_fops);
6181
6182         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
6183                                 tr, cpu, &tracing_buffers_fops);
6184
6185         trace_create_cpu_file("stats", 0444, d_cpu,
6186                                 tr, cpu, &tracing_stats_fops);
6187
6188         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
6189                                 tr, cpu, &tracing_entries_fops);
6190
6191 #ifdef CONFIG_TRACER_SNAPSHOT
6192         trace_create_cpu_file("snapshot", 0644, d_cpu,
6193                                 tr, cpu, &snapshot_fops);
6194
6195         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
6196                                 tr, cpu, &snapshot_raw_fops);
6197 #endif
6198 }
6199
6200 #ifdef CONFIG_FTRACE_SELFTEST
6201 /* Let selftest have access to static functions in this file */
6202 #include "trace_selftest.c"
6203 #endif
6204
6205 static ssize_t
6206 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
6207                         loff_t *ppos)
6208 {
6209         struct trace_option_dentry *topt = filp->private_data;
6210         char *buf;
6211
6212         if (topt->flags->val & topt->opt->bit)
6213                 buf = "1\n";
6214         else
6215                 buf = "0\n";
6216
6217         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6218 }
6219
6220 static ssize_t
6221 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
6222                          loff_t *ppos)
6223 {
6224         struct trace_option_dentry *topt = filp->private_data;
6225         unsigned long val;
6226         int ret;
6227
6228         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6229         if (ret)
6230                 return ret;
6231
6232         if (val != 0 && val != 1)
6233                 return -EINVAL;
6234
6235         if (!!(topt->flags->val & topt->opt->bit) != val) {
6236                 mutex_lock(&trace_types_lock);
6237                 ret = __set_tracer_option(topt->tr, topt->flags,
6238                                           topt->opt, !val);
6239                 mutex_unlock(&trace_types_lock);
6240                 if (ret)
6241                         return ret;
6242         }
6243
6244         *ppos += cnt;
6245
6246         return cnt;
6247 }
6248
6249
6250 static const struct file_operations trace_options_fops = {
6251         .open = tracing_open_generic,
6252         .read = trace_options_read,
6253         .write = trace_options_write,
6254         .llseek = generic_file_llseek,
6255 };
6256
6257 /*
6258  * In order to pass in both the trace_array descriptor as well as the index
6259  * to the flag that the trace option file represents, the trace_array
6260  * has a character array of trace_flags_index[], which holds the index
6261  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
6262  * The address of this character array is passed to the flag option file
6263  * read/write callbacks.
6264  *
6265  * In order to extract both the index and the trace_array descriptor,
6266  * get_tr_index() uses the following algorithm.
6267  *
6268  *   idx = *ptr;
6269  *
6270  * As the pointer itself contains the address of the index (remember
6271  * index[1] == 1).
6272  *
6273  * Then to get the trace_array descriptor, by subtracting that index
6274  * from the ptr, we get to the start of the index itself.
6275  *
6276  *   ptr - idx == &index[0]
6277  *
6278  * Then a simple container_of() from that pointer gets us to the
6279  * trace_array descriptor.
6280  */
6281 static void get_tr_index(void *data, struct trace_array **ptr,
6282                          unsigned int *pindex)
6283 {
6284         *pindex = *(unsigned char *)data;
6285
6286         *ptr = container_of(data - *pindex, struct trace_array,
6287                             trace_flags_index);
6288 }
6289
6290 static ssize_t
6291 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
6292                         loff_t *ppos)
6293 {
6294         void *tr_index = filp->private_data;
6295         struct trace_array *tr;
6296         unsigned int index;
6297         char *buf;
6298
6299         get_tr_index(tr_index, &tr, &index);
6300
6301         if (tr->trace_flags & (1 << index))
6302                 buf = "1\n";
6303         else
6304                 buf = "0\n";
6305
6306         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6307 }
6308
6309 static ssize_t
6310 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
6311                          loff_t *ppos)
6312 {
6313         void *tr_index = filp->private_data;
6314         struct trace_array *tr;
6315         unsigned int index;
6316         unsigned long val;
6317         int ret;
6318
6319         get_tr_index(tr_index, &tr, &index);
6320
6321         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6322         if (ret)
6323                 return ret;
6324
6325         if (val != 0 && val != 1)
6326                 return -EINVAL;
6327
6328         mutex_lock(&trace_types_lock);
6329         ret = set_tracer_flag(tr, 1 << index, val);
6330         mutex_unlock(&trace_types_lock);
6331
6332         if (ret < 0)
6333                 return ret;
6334
6335         *ppos += cnt;
6336
6337         return cnt;
6338 }
6339
6340 static const struct file_operations trace_options_core_fops = {
6341         .open = tracing_open_generic,
6342         .read = trace_options_core_read,
6343         .write = trace_options_core_write,
6344         .llseek = generic_file_llseek,
6345 };
6346
6347 struct dentry *trace_create_file(const char *name,
6348                                  umode_t mode,
6349                                  struct dentry *parent,
6350                                  void *data,
6351                                  const struct file_operations *fops)
6352 {
6353         struct dentry *ret;
6354
6355         ret = tracefs_create_file(name, mode, parent, data, fops);
6356         if (!ret)
6357                 pr_warn("Could not create tracefs '%s' entry\n", name);
6358
6359         return ret;
6360 }
6361
6362
6363 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
6364 {
6365         struct dentry *d_tracer;
6366
6367         if (tr->options)
6368                 return tr->options;
6369
6370         d_tracer = tracing_get_dentry(tr);
6371         if (IS_ERR(d_tracer))
6372                 return NULL;
6373
6374         tr->options = tracefs_create_dir("options", d_tracer);
6375         if (!tr->options) {
6376                 pr_warn("Could not create tracefs directory 'options'\n");
6377                 return NULL;
6378         }
6379
6380         return tr->options;
6381 }
6382
6383 static void
6384 create_trace_option_file(struct trace_array *tr,
6385                          struct trace_option_dentry *topt,
6386                          struct tracer_flags *flags,
6387                          struct tracer_opt *opt)
6388 {
6389         struct dentry *t_options;
6390
6391         t_options = trace_options_init_dentry(tr);
6392         if (!t_options)
6393                 return;
6394
6395         topt->flags = flags;
6396         topt->opt = opt;
6397         topt->tr = tr;
6398
6399         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
6400                                     &trace_options_fops);
6401
6402 }
6403
6404 static void
6405 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
6406 {
6407         struct trace_option_dentry *topts;
6408         struct trace_options *tr_topts;
6409         struct tracer_flags *flags;
6410         struct tracer_opt *opts;
6411         int cnt;
6412         int i;
6413
6414         if (!tracer)
6415                 return;
6416
6417         flags = tracer->flags;
6418
6419         if (!flags || !flags->opts)
6420                 return;
6421
6422         /*
6423          * If this is an instance, only create flags for tracers
6424          * the instance may have.
6425          */
6426         if (!trace_ok_for_array(tracer, tr))
6427                 return;
6428
6429         for (i = 0; i < tr->nr_topts; i++) {
6430                 /* Make sure there's no duplicate flags. */
6431                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
6432                         return;
6433         }
6434
6435         opts = flags->opts;
6436
6437         for (cnt = 0; opts[cnt].name; cnt++)
6438                 ;
6439
6440         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
6441         if (!topts)
6442                 return;
6443
6444         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
6445                             GFP_KERNEL);
6446         if (!tr_topts) {
6447                 kfree(topts);
6448                 return;
6449         }
6450
6451         tr->topts = tr_topts;
6452         tr->topts[tr->nr_topts].tracer = tracer;
6453         tr->topts[tr->nr_topts].topts = topts;
6454         tr->nr_topts++;
6455
6456         for (cnt = 0; opts[cnt].name; cnt++) {
6457                 create_trace_option_file(tr, &topts[cnt], flags,
6458                                          &opts[cnt]);
6459                 WARN_ONCE(topts[cnt].entry == NULL,
6460                           "Failed to create trace option: %s",
6461                           opts[cnt].name);
6462         }
6463 }
6464
6465 static struct dentry *
6466 create_trace_option_core_file(struct trace_array *tr,
6467                               const char *option, long index)
6468 {
6469         struct dentry *t_options;
6470
6471         t_options = trace_options_init_dentry(tr);
6472         if (!t_options)
6473                 return NULL;
6474
6475         return trace_create_file(option, 0644, t_options,
6476                                  (void *)&tr->trace_flags_index[index],
6477                                  &trace_options_core_fops);
6478 }
6479
6480 static void create_trace_options_dir(struct trace_array *tr)
6481 {
6482         struct dentry *t_options;
6483         bool top_level = tr == &global_trace;
6484         int i;
6485
6486         t_options = trace_options_init_dentry(tr);
6487         if (!t_options)
6488                 return;
6489
6490         for (i = 0; trace_options[i]; i++) {
6491                 if (top_level ||
6492                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
6493                         create_trace_option_core_file(tr, trace_options[i], i);
6494         }
6495 }
6496
6497 static ssize_t
6498 rb_simple_read(struct file *filp, char __user *ubuf,
6499                size_t cnt, loff_t *ppos)
6500 {
6501         struct trace_array *tr = filp->private_data;
6502         char buf[64];
6503         int r;
6504
6505         r = tracer_tracing_is_on(tr);
6506         r = sprintf(buf, "%d\n", r);
6507
6508         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6509 }
6510
6511 static ssize_t
6512 rb_simple_write(struct file *filp, const char __user *ubuf,
6513                 size_t cnt, loff_t *ppos)
6514 {
6515         struct trace_array *tr = filp->private_data;
6516         struct ring_buffer *buffer = tr->trace_buffer.buffer;
6517         unsigned long val;
6518         int ret;
6519
6520         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6521         if (ret)
6522                 return ret;
6523
6524         if (buffer) {
6525                 mutex_lock(&trace_types_lock);
6526                 if (val) {
6527                         tracer_tracing_on(tr);
6528                         if (tr->current_trace->start)
6529                                 tr->current_trace->start(tr);
6530                 } else {
6531                         tracer_tracing_off(tr);
6532                         if (tr->current_trace->stop)
6533                                 tr->current_trace->stop(tr);
6534                 }
6535                 mutex_unlock(&trace_types_lock);
6536         }
6537
6538         (*ppos)++;
6539
6540         return cnt;
6541 }
6542
6543 static const struct file_operations rb_simple_fops = {
6544         .open           = tracing_open_generic_tr,
6545         .read           = rb_simple_read,
6546         .write          = rb_simple_write,
6547         .release        = tracing_release_generic_tr,
6548         .llseek         = default_llseek,
6549 };
6550
6551 struct dentry *trace_instance_dir;
6552
6553 static void
6554 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
6555
6556 static int
6557 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6558 {
6559         enum ring_buffer_flags rb_flags;
6560
6561         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6562
6563         buf->tr = tr;
6564
6565         buf->buffer = ring_buffer_alloc(size, rb_flags);
6566         if (!buf->buffer)
6567                 return -ENOMEM;
6568
6569         buf->data = alloc_percpu(struct trace_array_cpu);
6570         if (!buf->data) {
6571                 ring_buffer_free(buf->buffer);
6572                 return -ENOMEM;
6573         }
6574
6575         /* Allocate the first page for all buffers */
6576         set_buffer_entries(&tr->trace_buffer,
6577                            ring_buffer_size(tr->trace_buffer.buffer, 0));
6578
6579         return 0;
6580 }
6581
6582 static int allocate_trace_buffers(struct trace_array *tr, int size)
6583 {
6584         int ret;
6585
6586         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6587         if (ret)
6588                 return ret;
6589
6590 #ifdef CONFIG_TRACER_MAX_TRACE
6591         ret = allocate_trace_buffer(tr, &tr->max_buffer,
6592                                     allocate_snapshot ? size : 1);
6593         if (WARN_ON(ret)) {
6594                 ring_buffer_free(tr->trace_buffer.buffer);
6595                 free_percpu(tr->trace_buffer.data);
6596                 return -ENOMEM;
6597         }
6598         tr->allocated_snapshot = allocate_snapshot;
6599
6600         /*
6601          * Only the top level trace array gets its snapshot allocated
6602          * from the kernel command line.
6603          */
6604         allocate_snapshot = false;
6605 #endif
6606         return 0;
6607 }
6608
6609 static void free_trace_buffer(struct trace_buffer *buf)
6610 {
6611         if (buf->buffer) {
6612                 ring_buffer_free(buf->buffer);
6613                 buf->buffer = NULL;
6614                 free_percpu(buf->data);
6615                 buf->data = NULL;
6616         }
6617 }
6618
6619 static void free_trace_buffers(struct trace_array *tr)
6620 {
6621         if (!tr)
6622                 return;
6623
6624         free_trace_buffer(&tr->trace_buffer);
6625
6626 #ifdef CONFIG_TRACER_MAX_TRACE
6627         free_trace_buffer(&tr->max_buffer);
6628 #endif
6629 }
6630
6631 static void init_trace_flags_index(struct trace_array *tr)
6632 {
6633         int i;
6634
6635         /* Used by the trace options files */
6636         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
6637                 tr->trace_flags_index[i] = i;
6638 }
6639
6640 static void __update_tracer_options(struct trace_array *tr)
6641 {
6642         struct tracer *t;
6643
6644         for (t = trace_types; t; t = t->next)
6645                 add_tracer_options(tr, t);
6646 }
6647
6648 static void update_tracer_options(struct trace_array *tr)
6649 {
6650         mutex_lock(&trace_types_lock);
6651         __update_tracer_options(tr);
6652         mutex_unlock(&trace_types_lock);
6653 }
6654
6655 static int instance_mkdir(const char *name)
6656 {
6657         struct trace_array *tr;
6658         int ret;
6659
6660         mutex_lock(&trace_types_lock);
6661
6662         ret = -EEXIST;
6663         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6664                 if (tr->name && strcmp(tr->name, name) == 0)
6665                         goto out_unlock;
6666         }
6667
6668         ret = -ENOMEM;
6669         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
6670         if (!tr)
6671                 goto out_unlock;
6672
6673         tr->name = kstrdup(name, GFP_KERNEL);
6674         if (!tr->name)
6675                 goto out_free_tr;
6676
6677         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
6678                 goto out_free_tr;
6679
6680         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
6681
6682         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
6683
6684         raw_spin_lock_init(&tr->start_lock);
6685
6686         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
6687
6688         tr->current_trace = &nop_trace;
6689
6690         INIT_LIST_HEAD(&tr->systems);
6691         INIT_LIST_HEAD(&tr->events);
6692
6693         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
6694                 goto out_free_tr;
6695
6696         tr->dir = tracefs_create_dir(name, trace_instance_dir);
6697         if (!tr->dir)
6698                 goto out_free_tr;
6699
6700         ret = event_trace_add_tracer(tr->dir, tr);
6701         if (ret) {
6702                 tracefs_remove_recursive(tr->dir);
6703                 goto out_free_tr;
6704         }
6705
6706         init_tracer_tracefs(tr, tr->dir);
6707         init_trace_flags_index(tr);
6708         __update_tracer_options(tr);
6709
6710         list_add(&tr->list, &ftrace_trace_arrays);
6711
6712         mutex_unlock(&trace_types_lock);
6713
6714         return 0;
6715
6716  out_free_tr:
6717         free_trace_buffers(tr);
6718         free_cpumask_var(tr->tracing_cpumask);
6719         kfree(tr->name);
6720         kfree(tr);
6721
6722  out_unlock:
6723         mutex_unlock(&trace_types_lock);
6724
6725         return ret;
6726
6727 }
6728
6729 static int instance_rmdir(const char *name)
6730 {
6731         struct trace_array *tr;
6732         int found = 0;
6733         int ret;
6734         int i;
6735
6736         mutex_lock(&trace_types_lock);
6737
6738         ret = -ENODEV;
6739         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6740                 if (tr->name && strcmp(tr->name, name) == 0) {
6741                         found = 1;
6742                         break;
6743                 }
6744         }
6745         if (!found)
6746                 goto out_unlock;
6747
6748         ret = -EBUSY;
6749         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
6750                 goto out_unlock;
6751
6752         list_del(&tr->list);
6753
6754         /* Disable all the flags that were enabled coming in */
6755         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
6756                 if ((1 << i) & ZEROED_TRACE_FLAGS)
6757                         set_tracer_flag(tr, 1 << i, 0);
6758         }
6759
6760         tracing_set_nop(tr);
6761         event_trace_del_tracer(tr);
6762         ftrace_destroy_function_files(tr);
6763         tracefs_remove_recursive(tr->dir);
6764         free_trace_buffers(tr);
6765
6766         for (i = 0; i < tr->nr_topts; i++) {
6767                 kfree(tr->topts[i].topts);
6768         }
6769         kfree(tr->topts);
6770
6771         kfree(tr->name);
6772         kfree(tr);
6773
6774         ret = 0;
6775
6776  out_unlock:
6777         mutex_unlock(&trace_types_lock);
6778
6779         return ret;
6780 }
6781
6782 static __init void create_trace_instances(struct dentry *d_tracer)
6783 {
6784         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
6785                                                          instance_mkdir,
6786                                                          instance_rmdir);
6787         if (WARN_ON(!trace_instance_dir))
6788                 return;
6789 }
6790
6791 static void
6792 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
6793 {
6794         int cpu;
6795
6796         trace_create_file("available_tracers", 0444, d_tracer,
6797                         tr, &show_traces_fops);
6798
6799         trace_create_file("current_tracer", 0644, d_tracer,
6800                         tr, &set_tracer_fops);
6801
6802         trace_create_file("tracing_cpumask", 0644, d_tracer,
6803                           tr, &tracing_cpumask_fops);
6804
6805         trace_create_file("trace_options", 0644, d_tracer,
6806                           tr, &tracing_iter_fops);
6807
6808         trace_create_file("trace", 0644, d_tracer,
6809                           tr, &tracing_fops);
6810
6811         trace_create_file("trace_pipe", 0444, d_tracer,
6812                           tr, &tracing_pipe_fops);
6813
6814         trace_create_file("buffer_size_kb", 0644, d_tracer,
6815                           tr, &tracing_entries_fops);
6816
6817         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
6818                           tr, &tracing_total_entries_fops);
6819
6820         trace_create_file("free_buffer", 0200, d_tracer,
6821                           tr, &tracing_free_buffer_fops);
6822
6823         trace_create_file("trace_marker", 0220, d_tracer,
6824                           tr, &tracing_mark_fops);
6825
6826         trace_create_file("trace_clock", 0644, d_tracer, tr,
6827                           &trace_clock_fops);
6828
6829         trace_create_file("tracing_on", 0644, d_tracer,
6830                           tr, &rb_simple_fops);
6831
6832         create_trace_options_dir(tr);
6833
6834 #ifdef CONFIG_TRACER_MAX_TRACE
6835         trace_create_file("tracing_max_latency", 0644, d_tracer,
6836                         &tr->max_latency, &tracing_max_lat_fops);
6837 #endif
6838
6839         if (ftrace_create_function_files(tr, d_tracer))
6840                 WARN(1, "Could not allocate function filter files");
6841
6842 #ifdef CONFIG_TRACER_SNAPSHOT
6843         trace_create_file("snapshot", 0644, d_tracer,
6844                           tr, &snapshot_fops);
6845 #endif
6846
6847         for_each_tracing_cpu(cpu)
6848                 tracing_init_tracefs_percpu(tr, cpu);
6849
6850 }
6851
6852 static struct vfsmount *trace_automount(void *ingore)
6853 {
6854         struct vfsmount *mnt;
6855         struct file_system_type *type;
6856
6857         /*
6858          * To maintain backward compatibility for tools that mount
6859          * debugfs to get to the tracing facility, tracefs is automatically
6860          * mounted to the debugfs/tracing directory.
6861          */
6862         type = get_fs_type("tracefs");
6863         if (!type)
6864                 return NULL;
6865         mnt = vfs_kern_mount(type, 0, "tracefs", NULL);
6866         put_filesystem(type);
6867         if (IS_ERR(mnt))
6868                 return NULL;
6869         mntget(mnt);
6870
6871         return mnt;
6872 }
6873
6874 /**
6875  * tracing_init_dentry - initialize top level trace array
6876  *
6877  * This is called when creating files or directories in the tracing
6878  * directory. It is called via fs_initcall() by any of the boot up code
6879  * and expects to return the dentry of the top level tracing directory.
6880  */
6881 struct dentry *tracing_init_dentry(void)
6882 {
6883         struct trace_array *tr = &global_trace;
6884
6885         /* The top level trace array uses  NULL as parent */
6886         if (tr->dir)
6887                 return NULL;
6888
6889         if (WARN_ON(!tracefs_initialized()) ||
6890                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
6891                  WARN_ON(!debugfs_initialized())))
6892                 return ERR_PTR(-ENODEV);
6893
6894         /*
6895          * As there may still be users that expect the tracing
6896          * files to exist in debugfs/tracing, we must automount
6897          * the tracefs file system there, so older tools still
6898          * work with the newer kerenl.
6899          */
6900         tr->dir = debugfs_create_automount("tracing", NULL,
6901                                            trace_automount, NULL);
6902         if (!tr->dir) {
6903                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
6904                 return ERR_PTR(-ENOMEM);
6905         }
6906
6907         return NULL;
6908 }
6909
6910 extern struct trace_enum_map *__start_ftrace_enum_maps[];
6911 extern struct trace_enum_map *__stop_ftrace_enum_maps[];
6912
6913 static void __init trace_enum_init(void)
6914 {
6915         int len;
6916
6917         len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
6918         trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len);
6919 }
6920
6921 #ifdef CONFIG_MODULES
6922 static void trace_module_add_enums(struct module *mod)
6923 {
6924         if (!mod->num_trace_enums)
6925                 return;
6926
6927         /*
6928          * Modules with bad taint do not have events created, do
6929          * not bother with enums either.
6930          */
6931         if (trace_module_has_bad_taint(mod))
6932                 return;
6933
6934         trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
6935 }
6936
6937 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
6938 static void trace_module_remove_enums(struct module *mod)
6939 {
6940         union trace_enum_map_item *map;
6941         union trace_enum_map_item **last = &trace_enum_maps;
6942
6943         if (!mod->num_trace_enums)
6944                 return;
6945
6946         mutex_lock(&trace_enum_mutex);
6947
6948         map = trace_enum_maps;
6949
6950         while (map) {
6951                 if (map->head.mod == mod)
6952                         break;
6953                 map = trace_enum_jmp_to_tail(map);
6954                 last = &map->tail.next;
6955                 map = map->tail.next;
6956         }
6957         if (!map)
6958                 goto out;
6959
6960         *last = trace_enum_jmp_to_tail(map)->tail.next;
6961         kfree(map);
6962  out:
6963         mutex_unlock(&trace_enum_mutex);
6964 }
6965 #else
6966 static inline void trace_module_remove_enums(struct module *mod) { }
6967 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
6968
6969 static int trace_module_notify(struct notifier_block *self,
6970                                unsigned long val, void *data)
6971 {
6972         struct module *mod = data;
6973
6974         switch (val) {
6975         case MODULE_STATE_COMING:
6976                 trace_module_add_enums(mod);
6977                 break;
6978         case MODULE_STATE_GOING:
6979                 trace_module_remove_enums(mod);
6980                 break;
6981         }
6982
6983         return 0;
6984 }
6985
6986 static struct notifier_block trace_module_nb = {
6987         .notifier_call = trace_module_notify,
6988         .priority = 0,
6989 };
6990 #endif /* CONFIG_MODULES */
6991
6992 static __init int tracer_init_tracefs(void)
6993 {
6994         struct dentry *d_tracer;
6995
6996         trace_access_lock_init();
6997
6998         d_tracer = tracing_init_dentry();
6999         if (IS_ERR(d_tracer))
7000                 return 0;
7001
7002         init_tracer_tracefs(&global_trace, d_tracer);
7003
7004         trace_create_file("tracing_thresh", 0644, d_tracer,
7005                         &global_trace, &tracing_thresh_fops);
7006
7007         trace_create_file("README", 0444, d_tracer,
7008                         NULL, &tracing_readme_fops);
7009
7010         trace_create_file("saved_cmdlines", 0444, d_tracer,
7011                         NULL, &tracing_saved_cmdlines_fops);
7012
7013         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
7014                           NULL, &tracing_saved_cmdlines_size_fops);
7015
7016         trace_enum_init();
7017
7018         trace_create_enum_file(d_tracer);
7019
7020 #ifdef CONFIG_MODULES
7021         register_module_notifier(&trace_module_nb);
7022 #endif
7023
7024 #ifdef CONFIG_DYNAMIC_FTRACE
7025         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
7026                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
7027 #endif
7028
7029         create_trace_instances(d_tracer);
7030
7031         update_tracer_options(&global_trace);
7032
7033         return 0;
7034 }
7035
7036 static int trace_panic_handler(struct notifier_block *this,
7037                                unsigned long event, void *unused)
7038 {
7039         if (ftrace_dump_on_oops)
7040                 ftrace_dump(ftrace_dump_on_oops);
7041         return NOTIFY_OK;
7042 }
7043
7044 static struct notifier_block trace_panic_notifier = {
7045         .notifier_call  = trace_panic_handler,
7046         .next           = NULL,
7047         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
7048 };
7049
7050 static int trace_die_handler(struct notifier_block *self,
7051                              unsigned long val,
7052                              void *data)
7053 {
7054         switch (val) {
7055         case DIE_OOPS:
7056                 if (ftrace_dump_on_oops)
7057                         ftrace_dump(ftrace_dump_on_oops);
7058                 break;
7059         default:
7060                 break;
7061         }
7062         return NOTIFY_OK;
7063 }
7064
7065 static struct notifier_block trace_die_notifier = {
7066         .notifier_call = trace_die_handler,
7067         .priority = 200
7068 };
7069
7070 /*
7071  * printk is set to max of 1024, we really don't need it that big.
7072  * Nothing should be printing 1000 characters anyway.
7073  */
7074 #define TRACE_MAX_PRINT         1000
7075
7076 /*
7077  * Define here KERN_TRACE so that we have one place to modify
7078  * it if we decide to change what log level the ftrace dump
7079  * should be at.
7080  */
7081 #define KERN_TRACE              KERN_EMERG
7082
7083 void
7084 trace_printk_seq(struct trace_seq *s)
7085 {
7086         /* Probably should print a warning here. */
7087         if (s->seq.len >= TRACE_MAX_PRINT)
7088                 s->seq.len = TRACE_MAX_PRINT;
7089
7090         /*
7091          * More paranoid code. Although the buffer size is set to
7092          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
7093          * an extra layer of protection.
7094          */
7095         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
7096                 s->seq.len = s->seq.size - 1;
7097
7098         /* should be zero ended, but we are paranoid. */
7099         s->buffer[s->seq.len] = 0;
7100
7101         printk(KERN_TRACE "%s", s->buffer);
7102
7103         trace_seq_init(s);
7104 }
7105
7106 void trace_init_global_iter(struct trace_iterator *iter)
7107 {
7108         iter->tr = &global_trace;
7109         iter->trace = iter->tr->current_trace;
7110         iter->cpu_file = RING_BUFFER_ALL_CPUS;
7111         iter->trace_buffer = &global_trace.trace_buffer;
7112
7113         if (iter->trace && iter->trace->open)
7114                 iter->trace->open(iter);
7115
7116         /* Annotate start of buffers if we had overruns */
7117         if (ring_buffer_overruns(iter->trace_buffer->buffer))
7118                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
7119
7120         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
7121         if (trace_clocks[iter->tr->clock_id].in_ns)
7122                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
7123 }
7124
7125 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
7126 {
7127         /* use static because iter can be a bit big for the stack */
7128         static struct trace_iterator iter;
7129         static atomic_t dump_running;
7130         struct trace_array *tr = &global_trace;
7131         unsigned int old_userobj;
7132         unsigned long flags;
7133         int cnt = 0, cpu;
7134
7135         /* Only allow one dump user at a time. */
7136         if (atomic_inc_return(&dump_running) != 1) {
7137                 atomic_dec(&dump_running);
7138                 return;
7139         }
7140
7141         /*
7142          * Always turn off tracing when we dump.
7143          * We don't need to show trace output of what happens
7144          * between multiple crashes.
7145          *
7146          * If the user does a sysrq-z, then they can re-enable
7147          * tracing with echo 1 > tracing_on.
7148          */
7149         tracing_off();
7150
7151         local_irq_save(flags);
7152
7153         /* Simulate the iterator */
7154         trace_init_global_iter(&iter);
7155
7156         for_each_tracing_cpu(cpu) {
7157                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7158         }
7159
7160         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
7161
7162         /* don't look at user memory in panic mode */
7163         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
7164
7165         switch (oops_dump_mode) {
7166         case DUMP_ALL:
7167                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7168                 break;
7169         case DUMP_ORIG:
7170                 iter.cpu_file = raw_smp_processor_id();
7171                 break;
7172         case DUMP_NONE:
7173                 goto out_enable;
7174         default:
7175                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
7176                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7177         }
7178
7179         printk(KERN_TRACE "Dumping ftrace buffer:\n");
7180
7181         /* Did function tracer already get disabled? */
7182         if (ftrace_is_dead()) {
7183                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
7184                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
7185         }
7186
7187         /*
7188          * We need to stop all tracing on all CPUS to read the
7189          * the next buffer. This is a bit expensive, but is
7190          * not done often. We fill all what we can read,
7191          * and then release the locks again.
7192          */
7193
7194         while (!trace_empty(&iter)) {
7195
7196                 if (!cnt)
7197                         printk(KERN_TRACE "---------------------------------\n");
7198
7199                 cnt++;
7200
7201                 /* reset all but tr, trace, and overruns */
7202                 memset(&iter.seq, 0,
7203                        sizeof(struct trace_iterator) -
7204                        offsetof(struct trace_iterator, seq));
7205                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
7206                 iter.pos = -1;
7207
7208                 if (trace_find_next_entry_inc(&iter) != NULL) {
7209                         int ret;
7210
7211                         ret = print_trace_line(&iter);
7212                         if (ret != TRACE_TYPE_NO_CONSUME)
7213                                 trace_consume(&iter);
7214                 }
7215                 touch_nmi_watchdog();
7216
7217                 trace_printk_seq(&iter.seq);
7218         }
7219
7220         if (!cnt)
7221                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
7222         else
7223                 printk(KERN_TRACE "---------------------------------\n");
7224
7225  out_enable:
7226         tr->trace_flags |= old_userobj;
7227
7228         for_each_tracing_cpu(cpu) {
7229                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7230         }
7231         atomic_dec(&dump_running);
7232         local_irq_restore(flags);
7233 }
7234 EXPORT_SYMBOL_GPL(ftrace_dump);
7235
7236 __init static int tracer_alloc_buffers(void)
7237 {
7238         int ring_buf_size;
7239         int ret = -ENOMEM;
7240
7241         /*
7242          * Make sure we don't accidently add more trace options
7243          * than we have bits for.
7244          */
7245         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
7246
7247         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
7248                 goto out;
7249
7250         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
7251                 goto out_free_buffer_mask;
7252
7253         /* Only allocate trace_printk buffers if a trace_printk exists */
7254         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
7255                 /* Must be called before global_trace.buffer is allocated */
7256                 trace_printk_init_buffers();
7257
7258         /* To save memory, keep the ring buffer size to its minimum */
7259         if (ring_buffer_expanded)
7260                 ring_buf_size = trace_buf_size;
7261         else
7262                 ring_buf_size = 1;
7263
7264         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
7265         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
7266
7267         raw_spin_lock_init(&global_trace.start_lock);
7268
7269         /* Used for event triggers */
7270         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
7271         if (!temp_buffer)
7272                 goto out_free_cpumask;
7273
7274         if (trace_create_savedcmd() < 0)
7275                 goto out_free_temp_buffer;
7276
7277         /* TODO: make the number of buffers hot pluggable with CPUS */
7278         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
7279                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
7280                 WARN_ON(1);
7281                 goto out_free_savedcmd;
7282         }
7283
7284         if (global_trace.buffer_disabled)
7285                 tracing_off();
7286
7287         if (trace_boot_clock) {
7288                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
7289                 if (ret < 0)
7290                         pr_warn("Trace clock %s not defined, going back to default\n",
7291                                 trace_boot_clock);
7292         }
7293
7294         /*
7295          * register_tracer() might reference current_trace, so it
7296          * needs to be set before we register anything. This is
7297          * just a bootstrap of current_trace anyway.
7298          */
7299         global_trace.current_trace = &nop_trace;
7300
7301         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7302
7303         ftrace_init_global_array_ops(&global_trace);
7304
7305         init_trace_flags_index(&global_trace);
7306
7307         register_tracer(&nop_trace);
7308
7309         /* All seems OK, enable tracing */
7310         tracing_disabled = 0;
7311
7312         atomic_notifier_chain_register(&panic_notifier_list,
7313                                        &trace_panic_notifier);
7314
7315         register_die_notifier(&trace_die_notifier);
7316
7317         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
7318
7319         INIT_LIST_HEAD(&global_trace.systems);
7320         INIT_LIST_HEAD(&global_trace.events);
7321         list_add(&global_trace.list, &ftrace_trace_arrays);
7322
7323         apply_trace_boot_options();
7324
7325         register_snapshot_cmd();
7326
7327         return 0;
7328
7329 out_free_savedcmd:
7330         free_saved_cmdlines_buffer(savedcmd);
7331 out_free_temp_buffer:
7332         ring_buffer_free(temp_buffer);
7333 out_free_cpumask:
7334         free_cpumask_var(global_trace.tracing_cpumask);
7335 out_free_buffer_mask:
7336         free_cpumask_var(tracing_buffer_mask);
7337 out:
7338         return ret;
7339 }
7340
7341 void __init trace_init(void)
7342 {
7343         if (tracepoint_printk) {
7344                 tracepoint_print_iter =
7345                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
7346                 if (WARN_ON(!tracepoint_print_iter))
7347                         tracepoint_printk = 0;
7348         }
7349         tracer_alloc_buffers();
7350         trace_event_init();
7351 }
7352
7353 __init static int clear_boot_tracer(void)
7354 {
7355         /*
7356          * The default tracer at boot buffer is an init section.
7357          * This function is called in lateinit. If we did not
7358          * find the boot tracer, then clear it out, to prevent
7359          * later registration from accessing the buffer that is
7360          * about to be freed.
7361          */
7362         if (!default_bootup_tracer)
7363                 return 0;
7364
7365         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
7366                default_bootup_tracer);
7367         default_bootup_tracer = NULL;
7368
7369         return 0;
7370 }
7371
7372 fs_initcall(tracer_init_tracefs);
7373 late_initcall(clear_boot_tracer);