d98789b112c6c655fedd762bd4a4b258fb1ec9b4
[cascardo/linux.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/kprobes.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/sched/rt.h>
44
45 #include "trace.h"
46 #include "trace_output.h"
47
48 /*
49  * On boot up, the ring buffer is set to the minimum size, so that
50  * we do not waste memory on systems that are not using tracing.
51  */
52 bool ring_buffer_expanded;
53
54 /*
55  * We need to change this state when a selftest is running.
56  * A selftest will lurk into the ring-buffer to count the
57  * entries inserted during the selftest although some concurrent
58  * insertions into the ring-buffer such as trace_printk could occurred
59  * at the same time, giving false positive or negative results.
60  */
61 static bool __read_mostly tracing_selftest_running;
62
63 /*
64  * If a tracer is running, we do not want to run SELFTEST.
65  */
66 bool __read_mostly tracing_selftest_disabled;
67
68 /* Pipe tracepoints to printk */
69 struct trace_iterator *tracepoint_print_iter;
70 int tracepoint_printk;
71
72 /* For tracers that don't implement custom flags */
73 static struct tracer_opt dummy_tracer_opt[] = {
74         { }
75 };
76
77 static struct tracer_flags dummy_tracer_flags = {
78         .val = 0,
79         .opts = dummy_tracer_opt
80 };
81
82 static int
83 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
84 {
85         return 0;
86 }
87
88 /*
89  * To prevent the comm cache from being overwritten when no
90  * tracing is active, only save the comm when a trace event
91  * occurred.
92  */
93 static DEFINE_PER_CPU(bool, trace_cmdline_save);
94
95 /*
96  * Kill all tracing for good (never come back).
97  * It is initialized to 1 but will turn to zero if the initialization
98  * of the tracer is successful. But that is the only place that sets
99  * this back to zero.
100  */
101 static int tracing_disabled = 1;
102
103 DEFINE_PER_CPU(int, ftrace_cpu_disabled);
104
105 cpumask_var_t __read_mostly     tracing_buffer_mask;
106
107 /*
108  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
109  *
110  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
111  * is set, then ftrace_dump is called. This will output the contents
112  * of the ftrace buffers to the console.  This is very useful for
113  * capturing traces that lead to crashes and outputing it to a
114  * serial console.
115  *
116  * It is default off, but you can enable it with either specifying
117  * "ftrace_dump_on_oops" in the kernel command line, or setting
118  * /proc/sys/kernel/ftrace_dump_on_oops
119  * Set 1 if you want to dump buffers of all CPUs
120  * Set 2 if you want to dump the buffer of the CPU that triggered oops
121  */
122
123 enum ftrace_dump_mode ftrace_dump_on_oops;
124
125 /* When set, tracing will stop when a WARN*() is hit */
126 int __disable_trace_on_warning;
127
128 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
129 /* Map of enums to their values, for "enum_map" file */
130 struct trace_enum_map_head {
131         struct module                   *mod;
132         unsigned long                   length;
133 };
134
135 union trace_enum_map_item;
136
137 struct trace_enum_map_tail {
138         /*
139          * "end" is first and points to NULL as it must be different
140          * than "mod" or "enum_string"
141          */
142         union trace_enum_map_item       *next;
143         const char                      *end;   /* points to NULL */
144 };
145
146 static DEFINE_MUTEX(trace_enum_mutex);
147
148 /*
149  * The trace_enum_maps are saved in an array with two extra elements,
150  * one at the beginning, and one at the end. The beginning item contains
151  * the count of the saved maps (head.length), and the module they
152  * belong to if not built in (head.mod). The ending item contains a
153  * pointer to the next array of saved enum_map items.
154  */
155 union trace_enum_map_item {
156         struct trace_enum_map           map;
157         struct trace_enum_map_head      head;
158         struct trace_enum_map_tail      tail;
159 };
160
161 static union trace_enum_map_item *trace_enum_maps;
162 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
163
164 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
165
166 #define MAX_TRACER_SIZE         100
167 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
168 static char *default_bootup_tracer;
169
170 static bool allocate_snapshot;
171
172 static int __init set_cmdline_ftrace(char *str)
173 {
174         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
175         default_bootup_tracer = bootup_tracer_buf;
176         /* We are using ftrace early, expand it */
177         ring_buffer_expanded = true;
178         return 1;
179 }
180 __setup("ftrace=", set_cmdline_ftrace);
181
182 static int __init set_ftrace_dump_on_oops(char *str)
183 {
184         if (*str++ != '=' || !*str) {
185                 ftrace_dump_on_oops = DUMP_ALL;
186                 return 1;
187         }
188
189         if (!strcmp("orig_cpu", str)) {
190                 ftrace_dump_on_oops = DUMP_ORIG;
191                 return 1;
192         }
193
194         return 0;
195 }
196 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
197
198 static int __init stop_trace_on_warning(char *str)
199 {
200         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
201                 __disable_trace_on_warning = 1;
202         return 1;
203 }
204 __setup("traceoff_on_warning", stop_trace_on_warning);
205
206 static int __init boot_alloc_snapshot(char *str)
207 {
208         allocate_snapshot = true;
209         /* We also need the main ring buffer expanded */
210         ring_buffer_expanded = true;
211         return 1;
212 }
213 __setup("alloc_snapshot", boot_alloc_snapshot);
214
215
216 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
217 static char *trace_boot_options __initdata;
218
219 static int __init set_trace_boot_options(char *str)
220 {
221         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
222         trace_boot_options = trace_boot_options_buf;
223         return 0;
224 }
225 __setup("trace_options=", set_trace_boot_options);
226
227 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
228 static char *trace_boot_clock __initdata;
229
230 static int __init set_trace_boot_clock(char *str)
231 {
232         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
233         trace_boot_clock = trace_boot_clock_buf;
234         return 0;
235 }
236 __setup("trace_clock=", set_trace_boot_clock);
237
238 static int __init set_tracepoint_printk(char *str)
239 {
240         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
241                 tracepoint_printk = 1;
242         return 1;
243 }
244 __setup("tp_printk", set_tracepoint_printk);
245
246 unsigned long long ns2usecs(cycle_t nsec)
247 {
248         nsec += 500;
249         do_div(nsec, 1000);
250         return nsec;
251 }
252
253 /*
254  * The global_trace is the descriptor that holds the tracing
255  * buffers for the live tracing. For each CPU, it contains
256  * a link list of pages that will store trace entries. The
257  * page descriptor of the pages in the memory is used to hold
258  * the link list by linking the lru item in the page descriptor
259  * to each of the pages in the buffer per CPU.
260  *
261  * For each active CPU there is a data field that holds the
262  * pages for the buffer for that CPU. Each CPU has the same number
263  * of pages allocated for its buffer.
264  */
265 static struct trace_array       global_trace;
266
267 LIST_HEAD(ftrace_trace_arrays);
268
269 int trace_array_get(struct trace_array *this_tr)
270 {
271         struct trace_array *tr;
272         int ret = -ENODEV;
273
274         mutex_lock(&trace_types_lock);
275         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
276                 if (tr == this_tr) {
277                         tr->ref++;
278                         ret = 0;
279                         break;
280                 }
281         }
282         mutex_unlock(&trace_types_lock);
283
284         return ret;
285 }
286
287 static void __trace_array_put(struct trace_array *this_tr)
288 {
289         WARN_ON(!this_tr->ref);
290         this_tr->ref--;
291 }
292
293 void trace_array_put(struct trace_array *this_tr)
294 {
295         mutex_lock(&trace_types_lock);
296         __trace_array_put(this_tr);
297         mutex_unlock(&trace_types_lock);
298 }
299
300 int filter_check_discard(struct trace_event_file *file, void *rec,
301                          struct ring_buffer *buffer,
302                          struct ring_buffer_event *event)
303 {
304         if (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
305             !filter_match_preds(file->filter, rec)) {
306                 ring_buffer_discard_commit(buffer, event);
307                 return 1;
308         }
309
310         return 0;
311 }
312 EXPORT_SYMBOL_GPL(filter_check_discard);
313
314 int call_filter_check_discard(struct trace_event_call *call, void *rec,
315                               struct ring_buffer *buffer,
316                               struct ring_buffer_event *event)
317 {
318         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
319             !filter_match_preds(call->filter, rec)) {
320                 ring_buffer_discard_commit(buffer, event);
321                 return 1;
322         }
323
324         return 0;
325 }
326 EXPORT_SYMBOL_GPL(call_filter_check_discard);
327
328 static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
329 {
330         u64 ts;
331
332         /* Early boot up does not have a buffer yet */
333         if (!buf->buffer)
334                 return trace_clock_local();
335
336         ts = ring_buffer_time_stamp(buf->buffer, cpu);
337         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
338
339         return ts;
340 }
341
342 cycle_t ftrace_now(int cpu)
343 {
344         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
345 }
346
347 /**
348  * tracing_is_enabled - Show if global_trace has been disabled
349  *
350  * Shows if the global trace has been enabled or not. It uses the
351  * mirror flag "buffer_disabled" to be used in fast paths such as for
352  * the irqsoff tracer. But it may be inaccurate due to races. If you
353  * need to know the accurate state, use tracing_is_on() which is a little
354  * slower, but accurate.
355  */
356 int tracing_is_enabled(void)
357 {
358         /*
359          * For quick access (irqsoff uses this in fast path), just
360          * return the mirror variable of the state of the ring buffer.
361          * It's a little racy, but we don't really care.
362          */
363         smp_rmb();
364         return !global_trace.buffer_disabled;
365 }
366
367 /*
368  * trace_buf_size is the size in bytes that is allocated
369  * for a buffer. Note, the number of bytes is always rounded
370  * to page size.
371  *
372  * This number is purposely set to a low number of 16384.
373  * If the dump on oops happens, it will be much appreciated
374  * to not have to wait for all that output. Anyway this can be
375  * boot time and run time configurable.
376  */
377 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
378
379 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
380
381 /* trace_types holds a link list of available tracers. */
382 static struct tracer            *trace_types __read_mostly;
383
384 /*
385  * trace_types_lock is used to protect the trace_types list.
386  */
387 DEFINE_MUTEX(trace_types_lock);
388
389 /*
390  * serialize the access of the ring buffer
391  *
392  * ring buffer serializes readers, but it is low level protection.
393  * The validity of the events (which returns by ring_buffer_peek() ..etc)
394  * are not protected by ring buffer.
395  *
396  * The content of events may become garbage if we allow other process consumes
397  * these events concurrently:
398  *   A) the page of the consumed events may become a normal page
399  *      (not reader page) in ring buffer, and this page will be rewrited
400  *      by events producer.
401  *   B) The page of the consumed events may become a page for splice_read,
402  *      and this page will be returned to system.
403  *
404  * These primitives allow multi process access to different cpu ring buffer
405  * concurrently.
406  *
407  * These primitives don't distinguish read-only and read-consume access.
408  * Multi read-only access are also serialized.
409  */
410
411 #ifdef CONFIG_SMP
412 static DECLARE_RWSEM(all_cpu_access_lock);
413 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
414
415 static inline void trace_access_lock(int cpu)
416 {
417         if (cpu == RING_BUFFER_ALL_CPUS) {
418                 /* gain it for accessing the whole ring buffer. */
419                 down_write(&all_cpu_access_lock);
420         } else {
421                 /* gain it for accessing a cpu ring buffer. */
422
423                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
424                 down_read(&all_cpu_access_lock);
425
426                 /* Secondly block other access to this @cpu ring buffer. */
427                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
428         }
429 }
430
431 static inline void trace_access_unlock(int cpu)
432 {
433         if (cpu == RING_BUFFER_ALL_CPUS) {
434                 up_write(&all_cpu_access_lock);
435         } else {
436                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
437                 up_read(&all_cpu_access_lock);
438         }
439 }
440
441 static inline void trace_access_lock_init(void)
442 {
443         int cpu;
444
445         for_each_possible_cpu(cpu)
446                 mutex_init(&per_cpu(cpu_access_lock, cpu));
447 }
448
449 #else
450
451 static DEFINE_MUTEX(access_lock);
452
453 static inline void trace_access_lock(int cpu)
454 {
455         (void)cpu;
456         mutex_lock(&access_lock);
457 }
458
459 static inline void trace_access_unlock(int cpu)
460 {
461         (void)cpu;
462         mutex_unlock(&access_lock);
463 }
464
465 static inline void trace_access_lock_init(void)
466 {
467 }
468
469 #endif
470
471 #ifdef CONFIG_STACKTRACE
472 static void __ftrace_trace_stack(struct ring_buffer *buffer,
473                                  unsigned long flags,
474                                  int skip, int pc, struct pt_regs *regs);
475 static inline void ftrace_trace_stack(struct ring_buffer *buffer,
476                                       unsigned long flags,
477                                       int skip, int pc, struct pt_regs *regs);
478
479 #else
480 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
481                                         unsigned long flags,
482                                         int skip, int pc, struct pt_regs *regs)
483 {
484 }
485 static inline void ftrace_trace_stack(struct ring_buffer *buffer,
486                                       unsigned long flags,
487                                       int skip, int pc, struct pt_regs *regs)
488 {
489 }
490
491 #endif
492
493 /* trace_flags holds trace_options default values */
494 unsigned long trace_flags =
495         FUNCTION_DEFAULT_FLAGS | FUNCTION_GRAPH_DEFAULT_FLAGS |
496         TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
497         TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |
498         TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |
499         TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS
500         ;
501
502 static void tracer_tracing_on(struct trace_array *tr)
503 {
504         if (tr->trace_buffer.buffer)
505                 ring_buffer_record_on(tr->trace_buffer.buffer);
506         /*
507          * This flag is looked at when buffers haven't been allocated
508          * yet, or by some tracers (like irqsoff), that just want to
509          * know if the ring buffer has been disabled, but it can handle
510          * races of where it gets disabled but we still do a record.
511          * As the check is in the fast path of the tracers, it is more
512          * important to be fast than accurate.
513          */
514         tr->buffer_disabled = 0;
515         /* Make the flag seen by readers */
516         smp_wmb();
517 }
518
519 /**
520  * tracing_on - enable tracing buffers
521  *
522  * This function enables tracing buffers that may have been
523  * disabled with tracing_off.
524  */
525 void tracing_on(void)
526 {
527         tracer_tracing_on(&global_trace);
528 }
529 EXPORT_SYMBOL_GPL(tracing_on);
530
531 /**
532  * __trace_puts - write a constant string into the trace buffer.
533  * @ip:    The address of the caller
534  * @str:   The constant string to write
535  * @size:  The size of the string.
536  */
537 int __trace_puts(unsigned long ip, const char *str, int size)
538 {
539         struct ring_buffer_event *event;
540         struct ring_buffer *buffer;
541         struct print_entry *entry;
542         unsigned long irq_flags;
543         int alloc;
544         int pc;
545
546         if (!(trace_flags & TRACE_ITER_PRINTK))
547                 return 0;
548
549         pc = preempt_count();
550
551         if (unlikely(tracing_selftest_running || tracing_disabled))
552                 return 0;
553
554         alloc = sizeof(*entry) + size + 2; /* possible \n added */
555
556         local_save_flags(irq_flags);
557         buffer = global_trace.trace_buffer.buffer;
558         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
559                                           irq_flags, pc);
560         if (!event)
561                 return 0;
562
563         entry = ring_buffer_event_data(event);
564         entry->ip = ip;
565
566         memcpy(&entry->buf, str, size);
567
568         /* Add a newline if necessary */
569         if (entry->buf[size - 1] != '\n') {
570                 entry->buf[size] = '\n';
571                 entry->buf[size + 1] = '\0';
572         } else
573                 entry->buf[size] = '\0';
574
575         __buffer_unlock_commit(buffer, event);
576         ftrace_trace_stack(buffer, irq_flags, 4, pc, NULL);
577
578         return size;
579 }
580 EXPORT_SYMBOL_GPL(__trace_puts);
581
582 /**
583  * __trace_bputs - write the pointer to a constant string into trace buffer
584  * @ip:    The address of the caller
585  * @str:   The constant string to write to the buffer to
586  */
587 int __trace_bputs(unsigned long ip, const char *str)
588 {
589         struct ring_buffer_event *event;
590         struct ring_buffer *buffer;
591         struct bputs_entry *entry;
592         unsigned long irq_flags;
593         int size = sizeof(struct bputs_entry);
594         int pc;
595
596         if (!(trace_flags & TRACE_ITER_PRINTK))
597                 return 0;
598
599         pc = preempt_count();
600
601         if (unlikely(tracing_selftest_running || tracing_disabled))
602                 return 0;
603
604         local_save_flags(irq_flags);
605         buffer = global_trace.trace_buffer.buffer;
606         event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
607                                           irq_flags, pc);
608         if (!event)
609                 return 0;
610
611         entry = ring_buffer_event_data(event);
612         entry->ip                       = ip;
613         entry->str                      = str;
614
615         __buffer_unlock_commit(buffer, event);
616         ftrace_trace_stack(buffer, irq_flags, 4, pc, NULL);
617
618         return 1;
619 }
620 EXPORT_SYMBOL_GPL(__trace_bputs);
621
622 #ifdef CONFIG_TRACER_SNAPSHOT
623 /**
624  * trace_snapshot - take a snapshot of the current buffer.
625  *
626  * This causes a swap between the snapshot buffer and the current live
627  * tracing buffer. You can use this to take snapshots of the live
628  * trace when some condition is triggered, but continue to trace.
629  *
630  * Note, make sure to allocate the snapshot with either
631  * a tracing_snapshot_alloc(), or by doing it manually
632  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
633  *
634  * If the snapshot buffer is not allocated, it will stop tracing.
635  * Basically making a permanent snapshot.
636  */
637 void tracing_snapshot(void)
638 {
639         struct trace_array *tr = &global_trace;
640         struct tracer *tracer = tr->current_trace;
641         unsigned long flags;
642
643         if (in_nmi()) {
644                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
645                 internal_trace_puts("*** snapshot is being ignored        ***\n");
646                 return;
647         }
648
649         if (!tr->allocated_snapshot) {
650                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
651                 internal_trace_puts("*** stopping trace here!   ***\n");
652                 tracing_off();
653                 return;
654         }
655
656         /* Note, snapshot can not be used when the tracer uses it */
657         if (tracer->use_max_tr) {
658                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
659                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
660                 return;
661         }
662
663         local_irq_save(flags);
664         update_max_tr(tr, current, smp_processor_id());
665         local_irq_restore(flags);
666 }
667 EXPORT_SYMBOL_GPL(tracing_snapshot);
668
669 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
670                                         struct trace_buffer *size_buf, int cpu_id);
671 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
672
673 static int alloc_snapshot(struct trace_array *tr)
674 {
675         int ret;
676
677         if (!tr->allocated_snapshot) {
678
679                 /* allocate spare buffer */
680                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
681                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
682                 if (ret < 0)
683                         return ret;
684
685                 tr->allocated_snapshot = true;
686         }
687
688         return 0;
689 }
690
691 static void free_snapshot(struct trace_array *tr)
692 {
693         /*
694          * We don't free the ring buffer. instead, resize it because
695          * The max_tr ring buffer has some state (e.g. ring->clock) and
696          * we want preserve it.
697          */
698         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
699         set_buffer_entries(&tr->max_buffer, 1);
700         tracing_reset_online_cpus(&tr->max_buffer);
701         tr->allocated_snapshot = false;
702 }
703
704 /**
705  * tracing_alloc_snapshot - allocate snapshot buffer.
706  *
707  * This only allocates the snapshot buffer if it isn't already
708  * allocated - it doesn't also take a snapshot.
709  *
710  * This is meant to be used in cases where the snapshot buffer needs
711  * to be set up for events that can't sleep but need to be able to
712  * trigger a snapshot.
713  */
714 int tracing_alloc_snapshot(void)
715 {
716         struct trace_array *tr = &global_trace;
717         int ret;
718
719         ret = alloc_snapshot(tr);
720         WARN_ON(ret < 0);
721
722         return ret;
723 }
724 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
725
726 /**
727  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
728  *
729  * This is similar to trace_snapshot(), but it will allocate the
730  * snapshot buffer if it isn't already allocated. Use this only
731  * where it is safe to sleep, as the allocation may sleep.
732  *
733  * This causes a swap between the snapshot buffer and the current live
734  * tracing buffer. You can use this to take snapshots of the live
735  * trace when some condition is triggered, but continue to trace.
736  */
737 void tracing_snapshot_alloc(void)
738 {
739         int ret;
740
741         ret = tracing_alloc_snapshot();
742         if (ret < 0)
743                 return;
744
745         tracing_snapshot();
746 }
747 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
748 #else
749 void tracing_snapshot(void)
750 {
751         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
752 }
753 EXPORT_SYMBOL_GPL(tracing_snapshot);
754 int tracing_alloc_snapshot(void)
755 {
756         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
757         return -ENODEV;
758 }
759 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
760 void tracing_snapshot_alloc(void)
761 {
762         /* Give warning */
763         tracing_snapshot();
764 }
765 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
766 #endif /* CONFIG_TRACER_SNAPSHOT */
767
768 static void tracer_tracing_off(struct trace_array *tr)
769 {
770         if (tr->trace_buffer.buffer)
771                 ring_buffer_record_off(tr->trace_buffer.buffer);
772         /*
773          * This flag is looked at when buffers haven't been allocated
774          * yet, or by some tracers (like irqsoff), that just want to
775          * know if the ring buffer has been disabled, but it can handle
776          * races of where it gets disabled but we still do a record.
777          * As the check is in the fast path of the tracers, it is more
778          * important to be fast than accurate.
779          */
780         tr->buffer_disabled = 1;
781         /* Make the flag seen by readers */
782         smp_wmb();
783 }
784
785 /**
786  * tracing_off - turn off tracing buffers
787  *
788  * This function stops the tracing buffers from recording data.
789  * It does not disable any overhead the tracers themselves may
790  * be causing. This function simply causes all recording to
791  * the ring buffers to fail.
792  */
793 void tracing_off(void)
794 {
795         tracer_tracing_off(&global_trace);
796 }
797 EXPORT_SYMBOL_GPL(tracing_off);
798
799 void disable_trace_on_warning(void)
800 {
801         if (__disable_trace_on_warning)
802                 tracing_off();
803 }
804
805 /**
806  * tracer_tracing_is_on - show real state of ring buffer enabled
807  * @tr : the trace array to know if ring buffer is enabled
808  *
809  * Shows real state of the ring buffer if it is enabled or not.
810  */
811 static int tracer_tracing_is_on(struct trace_array *tr)
812 {
813         if (tr->trace_buffer.buffer)
814                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
815         return !tr->buffer_disabled;
816 }
817
818 /**
819  * tracing_is_on - show state of ring buffers enabled
820  */
821 int tracing_is_on(void)
822 {
823         return tracer_tracing_is_on(&global_trace);
824 }
825 EXPORT_SYMBOL_GPL(tracing_is_on);
826
827 static int __init set_buf_size(char *str)
828 {
829         unsigned long buf_size;
830
831         if (!str)
832                 return 0;
833         buf_size = memparse(str, &str);
834         /* nr_entries can not be zero */
835         if (buf_size == 0)
836                 return 0;
837         trace_buf_size = buf_size;
838         return 1;
839 }
840 __setup("trace_buf_size=", set_buf_size);
841
842 static int __init set_tracing_thresh(char *str)
843 {
844         unsigned long threshold;
845         int ret;
846
847         if (!str)
848                 return 0;
849         ret = kstrtoul(str, 0, &threshold);
850         if (ret < 0)
851                 return 0;
852         tracing_thresh = threshold * 1000;
853         return 1;
854 }
855 __setup("tracing_thresh=", set_tracing_thresh);
856
857 unsigned long nsecs_to_usecs(unsigned long nsecs)
858 {
859         return nsecs / 1000;
860 }
861
862 /*
863  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
864  * It uses C(a, b) where 'a' is the enum name and 'b' is the string that
865  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
866  * of strings in the order that the enums were defined.
867  */
868 #undef C
869 #define C(a, b) b
870
871 /* These must match the bit postions in trace_iterator_flags */
872 static const char *trace_options[] = {
873         TRACE_FLAGS
874         NULL
875 };
876
877 static struct {
878         u64 (*func)(void);
879         const char *name;
880         int in_ns;              /* is this clock in nanoseconds? */
881 } trace_clocks[] = {
882         { trace_clock_local,            "local",        1 },
883         { trace_clock_global,           "global",       1 },
884         { trace_clock_counter,          "counter",      0 },
885         { trace_clock_jiffies,          "uptime",       0 },
886         { trace_clock,                  "perf",         1 },
887         { ktime_get_mono_fast_ns,       "mono",         1 },
888         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
889         ARCH_TRACE_CLOCKS
890 };
891
892 /*
893  * trace_parser_get_init - gets the buffer for trace parser
894  */
895 int trace_parser_get_init(struct trace_parser *parser, int size)
896 {
897         memset(parser, 0, sizeof(*parser));
898
899         parser->buffer = kmalloc(size, GFP_KERNEL);
900         if (!parser->buffer)
901                 return 1;
902
903         parser->size = size;
904         return 0;
905 }
906
907 /*
908  * trace_parser_put - frees the buffer for trace parser
909  */
910 void trace_parser_put(struct trace_parser *parser)
911 {
912         kfree(parser->buffer);
913 }
914
915 /*
916  * trace_get_user - reads the user input string separated by  space
917  * (matched by isspace(ch))
918  *
919  * For each string found the 'struct trace_parser' is updated,
920  * and the function returns.
921  *
922  * Returns number of bytes read.
923  *
924  * See kernel/trace/trace.h for 'struct trace_parser' details.
925  */
926 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
927         size_t cnt, loff_t *ppos)
928 {
929         char ch;
930         size_t read = 0;
931         ssize_t ret;
932
933         if (!*ppos)
934                 trace_parser_clear(parser);
935
936         ret = get_user(ch, ubuf++);
937         if (ret)
938                 goto out;
939
940         read++;
941         cnt--;
942
943         /*
944          * The parser is not finished with the last write,
945          * continue reading the user input without skipping spaces.
946          */
947         if (!parser->cont) {
948                 /* skip white space */
949                 while (cnt && isspace(ch)) {
950                         ret = get_user(ch, ubuf++);
951                         if (ret)
952                                 goto out;
953                         read++;
954                         cnt--;
955                 }
956
957                 /* only spaces were written */
958                 if (isspace(ch)) {
959                         *ppos += read;
960                         ret = read;
961                         goto out;
962                 }
963
964                 parser->idx = 0;
965         }
966
967         /* read the non-space input */
968         while (cnt && !isspace(ch)) {
969                 if (parser->idx < parser->size - 1)
970                         parser->buffer[parser->idx++] = ch;
971                 else {
972                         ret = -EINVAL;
973                         goto out;
974                 }
975                 ret = get_user(ch, ubuf++);
976                 if (ret)
977                         goto out;
978                 read++;
979                 cnt--;
980         }
981
982         /* We either got finished input or we have to wait for another call. */
983         if (isspace(ch)) {
984                 parser->buffer[parser->idx] = 0;
985                 parser->cont = false;
986         } else if (parser->idx < parser->size - 1) {
987                 parser->cont = true;
988                 parser->buffer[parser->idx++] = ch;
989         } else {
990                 ret = -EINVAL;
991                 goto out;
992         }
993
994         *ppos += read;
995         ret = read;
996
997 out:
998         return ret;
999 }
1000
1001 /* TODO add a seq_buf_to_buffer() */
1002 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1003 {
1004         int len;
1005
1006         if (trace_seq_used(s) <= s->seq.readpos)
1007                 return -EBUSY;
1008
1009         len = trace_seq_used(s) - s->seq.readpos;
1010         if (cnt > len)
1011                 cnt = len;
1012         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1013
1014         s->seq.readpos += cnt;
1015         return cnt;
1016 }
1017
1018 unsigned long __read_mostly     tracing_thresh;
1019
1020 #ifdef CONFIG_TRACER_MAX_TRACE
1021 /*
1022  * Copy the new maximum trace into the separate maximum-trace
1023  * structure. (this way the maximum trace is permanently saved,
1024  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1025  */
1026 static void
1027 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1028 {
1029         struct trace_buffer *trace_buf = &tr->trace_buffer;
1030         struct trace_buffer *max_buf = &tr->max_buffer;
1031         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1032         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1033
1034         max_buf->cpu = cpu;
1035         max_buf->time_start = data->preempt_timestamp;
1036
1037         max_data->saved_latency = tr->max_latency;
1038         max_data->critical_start = data->critical_start;
1039         max_data->critical_end = data->critical_end;
1040
1041         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1042         max_data->pid = tsk->pid;
1043         /*
1044          * If tsk == current, then use current_uid(), as that does not use
1045          * RCU. The irq tracer can be called out of RCU scope.
1046          */
1047         if (tsk == current)
1048                 max_data->uid = current_uid();
1049         else
1050                 max_data->uid = task_uid(tsk);
1051
1052         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1053         max_data->policy = tsk->policy;
1054         max_data->rt_priority = tsk->rt_priority;
1055
1056         /* record this tasks comm */
1057         tracing_record_cmdline(tsk);
1058 }
1059
1060 /**
1061  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1062  * @tr: tracer
1063  * @tsk: the task with the latency
1064  * @cpu: The cpu that initiated the trace.
1065  *
1066  * Flip the buffers between the @tr and the max_tr and record information
1067  * about which task was the cause of this latency.
1068  */
1069 void
1070 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1071 {
1072         struct ring_buffer *buf;
1073
1074         if (tr->stop_count)
1075                 return;
1076
1077         WARN_ON_ONCE(!irqs_disabled());
1078
1079         if (!tr->allocated_snapshot) {
1080                 /* Only the nop tracer should hit this when disabling */
1081                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1082                 return;
1083         }
1084
1085         arch_spin_lock(&tr->max_lock);
1086
1087         buf = tr->trace_buffer.buffer;
1088         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1089         tr->max_buffer.buffer = buf;
1090
1091         __update_max_tr(tr, tsk, cpu);
1092         arch_spin_unlock(&tr->max_lock);
1093 }
1094
1095 /**
1096  * update_max_tr_single - only copy one trace over, and reset the rest
1097  * @tr - tracer
1098  * @tsk - task with the latency
1099  * @cpu - the cpu of the buffer to copy.
1100  *
1101  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1102  */
1103 void
1104 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1105 {
1106         int ret;
1107
1108         if (tr->stop_count)
1109                 return;
1110
1111         WARN_ON_ONCE(!irqs_disabled());
1112         if (!tr->allocated_snapshot) {
1113                 /* Only the nop tracer should hit this when disabling */
1114                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1115                 return;
1116         }
1117
1118         arch_spin_lock(&tr->max_lock);
1119
1120         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1121
1122         if (ret == -EBUSY) {
1123                 /*
1124                  * We failed to swap the buffer due to a commit taking
1125                  * place on this CPU. We fail to record, but we reset
1126                  * the max trace buffer (no one writes directly to it)
1127                  * and flag that it failed.
1128                  */
1129                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1130                         "Failed to swap buffers due to commit in progress\n");
1131         }
1132
1133         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1134
1135         __update_max_tr(tr, tsk, cpu);
1136         arch_spin_unlock(&tr->max_lock);
1137 }
1138 #endif /* CONFIG_TRACER_MAX_TRACE */
1139
1140 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1141 {
1142         /* Iterators are static, they should be filled or empty */
1143         if (trace_buffer_iter(iter, iter->cpu_file))
1144                 return 0;
1145
1146         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1147                                 full);
1148 }
1149
1150 #ifdef CONFIG_FTRACE_STARTUP_TEST
1151 static int run_tracer_selftest(struct tracer *type)
1152 {
1153         struct trace_array *tr = &global_trace;
1154         struct tracer *saved_tracer = tr->current_trace;
1155         int ret;
1156
1157         if (!type->selftest || tracing_selftest_disabled)
1158                 return 0;
1159
1160         /*
1161          * Run a selftest on this tracer.
1162          * Here we reset the trace buffer, and set the current
1163          * tracer to be this tracer. The tracer can then run some
1164          * internal tracing to verify that everything is in order.
1165          * If we fail, we do not register this tracer.
1166          */
1167         tracing_reset_online_cpus(&tr->trace_buffer);
1168
1169         tr->current_trace = type;
1170
1171 #ifdef CONFIG_TRACER_MAX_TRACE
1172         if (type->use_max_tr) {
1173                 /* If we expanded the buffers, make sure the max is expanded too */
1174                 if (ring_buffer_expanded)
1175                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1176                                            RING_BUFFER_ALL_CPUS);
1177                 tr->allocated_snapshot = true;
1178         }
1179 #endif
1180
1181         /* the test is responsible for initializing and enabling */
1182         pr_info("Testing tracer %s: ", type->name);
1183         ret = type->selftest(type, tr);
1184         /* the test is responsible for resetting too */
1185         tr->current_trace = saved_tracer;
1186         if (ret) {
1187                 printk(KERN_CONT "FAILED!\n");
1188                 /* Add the warning after printing 'FAILED' */
1189                 WARN_ON(1);
1190                 return -1;
1191         }
1192         /* Only reset on passing, to avoid touching corrupted buffers */
1193         tracing_reset_online_cpus(&tr->trace_buffer);
1194
1195 #ifdef CONFIG_TRACER_MAX_TRACE
1196         if (type->use_max_tr) {
1197                 tr->allocated_snapshot = false;
1198
1199                 /* Shrink the max buffer again */
1200                 if (ring_buffer_expanded)
1201                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1202                                            RING_BUFFER_ALL_CPUS);
1203         }
1204 #endif
1205
1206         printk(KERN_CONT "PASSED\n");
1207         return 0;
1208 }
1209 #else
1210 static inline int run_tracer_selftest(struct tracer *type)
1211 {
1212         return 0;
1213 }
1214 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1215
1216 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1217
1218 /**
1219  * register_tracer - register a tracer with the ftrace system.
1220  * @type - the plugin for the tracer
1221  *
1222  * Register a new plugin tracer.
1223  */
1224 int register_tracer(struct tracer *type)
1225 {
1226         struct tracer *t;
1227         int ret = 0;
1228
1229         if (!type->name) {
1230                 pr_info("Tracer must have a name\n");
1231                 return -1;
1232         }
1233
1234         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1235                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1236                 return -1;
1237         }
1238
1239         mutex_lock(&trace_types_lock);
1240
1241         tracing_selftest_running = true;
1242
1243         for (t = trace_types; t; t = t->next) {
1244                 if (strcmp(type->name, t->name) == 0) {
1245                         /* already found */
1246                         pr_info("Tracer %s already registered\n",
1247                                 type->name);
1248                         ret = -1;
1249                         goto out;
1250                 }
1251         }
1252
1253         if (!type->set_flag)
1254                 type->set_flag = &dummy_set_flag;
1255         if (!type->flags)
1256                 type->flags = &dummy_tracer_flags;
1257         else
1258                 if (!type->flags->opts)
1259                         type->flags->opts = dummy_tracer_opt;
1260
1261         ret = run_tracer_selftest(type);
1262         if (ret < 0)
1263                 goto out;
1264
1265         type->next = trace_types;
1266         trace_types = type;
1267         add_tracer_options(&global_trace, type);
1268
1269  out:
1270         tracing_selftest_running = false;
1271         mutex_unlock(&trace_types_lock);
1272
1273         if (ret || !default_bootup_tracer)
1274                 goto out_unlock;
1275
1276         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1277                 goto out_unlock;
1278
1279         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1280         /* Do we want this tracer to start on bootup? */
1281         tracing_set_tracer(&global_trace, type->name);
1282         default_bootup_tracer = NULL;
1283         /* disable other selftests, since this will break it. */
1284         tracing_selftest_disabled = true;
1285 #ifdef CONFIG_FTRACE_STARTUP_TEST
1286         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1287                type->name);
1288 #endif
1289
1290  out_unlock:
1291         return ret;
1292 }
1293
1294 void tracing_reset(struct trace_buffer *buf, int cpu)
1295 {
1296         struct ring_buffer *buffer = buf->buffer;
1297
1298         if (!buffer)
1299                 return;
1300
1301         ring_buffer_record_disable(buffer);
1302
1303         /* Make sure all commits have finished */
1304         synchronize_sched();
1305         ring_buffer_reset_cpu(buffer, cpu);
1306
1307         ring_buffer_record_enable(buffer);
1308 }
1309
1310 void tracing_reset_online_cpus(struct trace_buffer *buf)
1311 {
1312         struct ring_buffer *buffer = buf->buffer;
1313         int cpu;
1314
1315         if (!buffer)
1316                 return;
1317
1318         ring_buffer_record_disable(buffer);
1319
1320         /* Make sure all commits have finished */
1321         synchronize_sched();
1322
1323         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1324
1325         for_each_online_cpu(cpu)
1326                 ring_buffer_reset_cpu(buffer, cpu);
1327
1328         ring_buffer_record_enable(buffer);
1329 }
1330
1331 /* Must have trace_types_lock held */
1332 void tracing_reset_all_online_cpus(void)
1333 {
1334         struct trace_array *tr;
1335
1336         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1337                 tracing_reset_online_cpus(&tr->trace_buffer);
1338 #ifdef CONFIG_TRACER_MAX_TRACE
1339                 tracing_reset_online_cpus(&tr->max_buffer);
1340 #endif
1341         }
1342 }
1343
1344 #define SAVED_CMDLINES_DEFAULT 128
1345 #define NO_CMDLINE_MAP UINT_MAX
1346 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1347 struct saved_cmdlines_buffer {
1348         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1349         unsigned *map_cmdline_to_pid;
1350         unsigned cmdline_num;
1351         int cmdline_idx;
1352         char *saved_cmdlines;
1353 };
1354 static struct saved_cmdlines_buffer *savedcmd;
1355
1356 /* temporary disable recording */
1357 static atomic_t trace_record_cmdline_disabled __read_mostly;
1358
1359 static inline char *get_saved_cmdlines(int idx)
1360 {
1361         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1362 }
1363
1364 static inline void set_cmdline(int idx, const char *cmdline)
1365 {
1366         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1367 }
1368
1369 static int allocate_cmdlines_buffer(unsigned int val,
1370                                     struct saved_cmdlines_buffer *s)
1371 {
1372         s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1373                                         GFP_KERNEL);
1374         if (!s->map_cmdline_to_pid)
1375                 return -ENOMEM;
1376
1377         s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1378         if (!s->saved_cmdlines) {
1379                 kfree(s->map_cmdline_to_pid);
1380                 return -ENOMEM;
1381         }
1382
1383         s->cmdline_idx = 0;
1384         s->cmdline_num = val;
1385         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1386                sizeof(s->map_pid_to_cmdline));
1387         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1388                val * sizeof(*s->map_cmdline_to_pid));
1389
1390         return 0;
1391 }
1392
1393 static int trace_create_savedcmd(void)
1394 {
1395         int ret;
1396
1397         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1398         if (!savedcmd)
1399                 return -ENOMEM;
1400
1401         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1402         if (ret < 0) {
1403                 kfree(savedcmd);
1404                 savedcmd = NULL;
1405                 return -ENOMEM;
1406         }
1407
1408         return 0;
1409 }
1410
1411 int is_tracing_stopped(void)
1412 {
1413         return global_trace.stop_count;
1414 }
1415
1416 /**
1417  * tracing_start - quick start of the tracer
1418  *
1419  * If tracing is enabled but was stopped by tracing_stop,
1420  * this will start the tracer back up.
1421  */
1422 void tracing_start(void)
1423 {
1424         struct ring_buffer *buffer;
1425         unsigned long flags;
1426
1427         if (tracing_disabled)
1428                 return;
1429
1430         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1431         if (--global_trace.stop_count) {
1432                 if (global_trace.stop_count < 0) {
1433                         /* Someone screwed up their debugging */
1434                         WARN_ON_ONCE(1);
1435                         global_trace.stop_count = 0;
1436                 }
1437                 goto out;
1438         }
1439
1440         /* Prevent the buffers from switching */
1441         arch_spin_lock(&global_trace.max_lock);
1442
1443         buffer = global_trace.trace_buffer.buffer;
1444         if (buffer)
1445                 ring_buffer_record_enable(buffer);
1446
1447 #ifdef CONFIG_TRACER_MAX_TRACE
1448         buffer = global_trace.max_buffer.buffer;
1449         if (buffer)
1450                 ring_buffer_record_enable(buffer);
1451 #endif
1452
1453         arch_spin_unlock(&global_trace.max_lock);
1454
1455  out:
1456         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1457 }
1458
1459 static void tracing_start_tr(struct trace_array *tr)
1460 {
1461         struct ring_buffer *buffer;
1462         unsigned long flags;
1463
1464         if (tracing_disabled)
1465                 return;
1466
1467         /* If global, we need to also start the max tracer */
1468         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1469                 return tracing_start();
1470
1471         raw_spin_lock_irqsave(&tr->start_lock, flags);
1472
1473         if (--tr->stop_count) {
1474                 if (tr->stop_count < 0) {
1475                         /* Someone screwed up their debugging */
1476                         WARN_ON_ONCE(1);
1477                         tr->stop_count = 0;
1478                 }
1479                 goto out;
1480         }
1481
1482         buffer = tr->trace_buffer.buffer;
1483         if (buffer)
1484                 ring_buffer_record_enable(buffer);
1485
1486  out:
1487         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1488 }
1489
1490 /**
1491  * tracing_stop - quick stop of the tracer
1492  *
1493  * Light weight way to stop tracing. Use in conjunction with
1494  * tracing_start.
1495  */
1496 void tracing_stop(void)
1497 {
1498         struct ring_buffer *buffer;
1499         unsigned long flags;
1500
1501         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1502         if (global_trace.stop_count++)
1503                 goto out;
1504
1505         /* Prevent the buffers from switching */
1506         arch_spin_lock(&global_trace.max_lock);
1507
1508         buffer = global_trace.trace_buffer.buffer;
1509         if (buffer)
1510                 ring_buffer_record_disable(buffer);
1511
1512 #ifdef CONFIG_TRACER_MAX_TRACE
1513         buffer = global_trace.max_buffer.buffer;
1514         if (buffer)
1515                 ring_buffer_record_disable(buffer);
1516 #endif
1517
1518         arch_spin_unlock(&global_trace.max_lock);
1519
1520  out:
1521         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1522 }
1523
1524 static void tracing_stop_tr(struct trace_array *tr)
1525 {
1526         struct ring_buffer *buffer;
1527         unsigned long flags;
1528
1529         /* If global, we need to also stop the max tracer */
1530         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1531                 return tracing_stop();
1532
1533         raw_spin_lock_irqsave(&tr->start_lock, flags);
1534         if (tr->stop_count++)
1535                 goto out;
1536
1537         buffer = tr->trace_buffer.buffer;
1538         if (buffer)
1539                 ring_buffer_record_disable(buffer);
1540
1541  out:
1542         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1543 }
1544
1545 void trace_stop_cmdline_recording(void);
1546
1547 static int trace_save_cmdline(struct task_struct *tsk)
1548 {
1549         unsigned pid, idx;
1550
1551         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1552                 return 0;
1553
1554         /*
1555          * It's not the end of the world if we don't get
1556          * the lock, but we also don't want to spin
1557          * nor do we want to disable interrupts,
1558          * so if we miss here, then better luck next time.
1559          */
1560         if (!arch_spin_trylock(&trace_cmdline_lock))
1561                 return 0;
1562
1563         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1564         if (idx == NO_CMDLINE_MAP) {
1565                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1566
1567                 /*
1568                  * Check whether the cmdline buffer at idx has a pid
1569                  * mapped. We are going to overwrite that entry so we
1570                  * need to clear the map_pid_to_cmdline. Otherwise we
1571                  * would read the new comm for the old pid.
1572                  */
1573                 pid = savedcmd->map_cmdline_to_pid[idx];
1574                 if (pid != NO_CMDLINE_MAP)
1575                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1576
1577                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1578                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1579
1580                 savedcmd->cmdline_idx = idx;
1581         }
1582
1583         set_cmdline(idx, tsk->comm);
1584
1585         arch_spin_unlock(&trace_cmdline_lock);
1586
1587         return 1;
1588 }
1589
1590 static void __trace_find_cmdline(int pid, char comm[])
1591 {
1592         unsigned map;
1593
1594         if (!pid) {
1595                 strcpy(comm, "<idle>");
1596                 return;
1597         }
1598
1599         if (WARN_ON_ONCE(pid < 0)) {
1600                 strcpy(comm, "<XXX>");
1601                 return;
1602         }
1603
1604         if (pid > PID_MAX_DEFAULT) {
1605                 strcpy(comm, "<...>");
1606                 return;
1607         }
1608
1609         map = savedcmd->map_pid_to_cmdline[pid];
1610         if (map != NO_CMDLINE_MAP)
1611                 strcpy(comm, get_saved_cmdlines(map));
1612         else
1613                 strcpy(comm, "<...>");
1614 }
1615
1616 void trace_find_cmdline(int pid, char comm[])
1617 {
1618         preempt_disable();
1619         arch_spin_lock(&trace_cmdline_lock);
1620
1621         __trace_find_cmdline(pid, comm);
1622
1623         arch_spin_unlock(&trace_cmdline_lock);
1624         preempt_enable();
1625 }
1626
1627 void tracing_record_cmdline(struct task_struct *tsk)
1628 {
1629         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1630                 return;
1631
1632         if (!__this_cpu_read(trace_cmdline_save))
1633                 return;
1634
1635         if (trace_save_cmdline(tsk))
1636                 __this_cpu_write(trace_cmdline_save, false);
1637 }
1638
1639 void
1640 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1641                              int pc)
1642 {
1643         struct task_struct *tsk = current;
1644
1645         entry->preempt_count            = pc & 0xff;
1646         entry->pid                      = (tsk) ? tsk->pid : 0;
1647         entry->flags =
1648 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1649                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1650 #else
1651                 TRACE_FLAG_IRQS_NOSUPPORT |
1652 #endif
1653                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1654                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1655                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1656                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1657 }
1658 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1659
1660 struct ring_buffer_event *
1661 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1662                           int type,
1663                           unsigned long len,
1664                           unsigned long flags, int pc)
1665 {
1666         struct ring_buffer_event *event;
1667
1668         event = ring_buffer_lock_reserve(buffer, len);
1669         if (event != NULL) {
1670                 struct trace_entry *ent = ring_buffer_event_data(event);
1671
1672                 tracing_generic_entry_update(ent, flags, pc);
1673                 ent->type = type;
1674         }
1675
1676         return event;
1677 }
1678
1679 void
1680 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1681 {
1682         __this_cpu_write(trace_cmdline_save, true);
1683         ring_buffer_unlock_commit(buffer, event);
1684 }
1685
1686 void trace_buffer_unlock_commit(struct trace_array *tr,
1687                                 struct ring_buffer *buffer,
1688                                 struct ring_buffer_event *event,
1689                                 unsigned long flags, int pc)
1690 {
1691         __buffer_unlock_commit(buffer, event);
1692
1693         ftrace_trace_stack(buffer, flags, 6, pc, NULL);
1694         ftrace_trace_userstack(buffer, flags, pc);
1695 }
1696 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
1697
1698 static struct ring_buffer *temp_buffer;
1699
1700 struct ring_buffer_event *
1701 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
1702                           struct trace_event_file *trace_file,
1703                           int type, unsigned long len,
1704                           unsigned long flags, int pc)
1705 {
1706         struct ring_buffer_event *entry;
1707
1708         *current_rb = trace_file->tr->trace_buffer.buffer;
1709         entry = trace_buffer_lock_reserve(*current_rb,
1710                                          type, len, flags, pc);
1711         /*
1712          * If tracing is off, but we have triggers enabled
1713          * we still need to look at the event data. Use the temp_buffer
1714          * to store the trace event for the tigger to use. It's recusive
1715          * safe and will not be recorded anywhere.
1716          */
1717         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
1718                 *current_rb = temp_buffer;
1719                 entry = trace_buffer_lock_reserve(*current_rb,
1720                                                   type, len, flags, pc);
1721         }
1722         return entry;
1723 }
1724 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
1725
1726 struct ring_buffer_event *
1727 trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
1728                                   int type, unsigned long len,
1729                                   unsigned long flags, int pc)
1730 {
1731         *current_rb = global_trace.trace_buffer.buffer;
1732         return trace_buffer_lock_reserve(*current_rb,
1733                                          type, len, flags, pc);
1734 }
1735 EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
1736
1737 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
1738                                      struct ring_buffer *buffer,
1739                                      struct ring_buffer_event *event,
1740                                      unsigned long flags, int pc,
1741                                      struct pt_regs *regs)
1742 {
1743         __buffer_unlock_commit(buffer, event);
1744
1745         ftrace_trace_stack(buffer, flags, 6, pc, regs);
1746         ftrace_trace_userstack(buffer, flags, pc);
1747 }
1748 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
1749
1750 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1751                                          struct ring_buffer_event *event)
1752 {
1753         ring_buffer_discard_commit(buffer, event);
1754 }
1755 EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
1756
1757 void
1758 trace_function(struct trace_array *tr,
1759                unsigned long ip, unsigned long parent_ip, unsigned long flags,
1760                int pc)
1761 {
1762         struct trace_event_call *call = &event_function;
1763         struct ring_buffer *buffer = tr->trace_buffer.buffer;
1764         struct ring_buffer_event *event;
1765         struct ftrace_entry *entry;
1766
1767         /* If we are reading the ring buffer, don't trace */
1768         if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
1769                 return;
1770
1771         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1772                                           flags, pc);
1773         if (!event)
1774                 return;
1775         entry   = ring_buffer_event_data(event);
1776         entry->ip                       = ip;
1777         entry->parent_ip                = parent_ip;
1778
1779         if (!call_filter_check_discard(call, entry, buffer, event))
1780                 __buffer_unlock_commit(buffer, event);
1781 }
1782
1783 #ifdef CONFIG_STACKTRACE
1784
1785 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
1786 struct ftrace_stack {
1787         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
1788 };
1789
1790 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
1791 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
1792
1793 static void __ftrace_trace_stack(struct ring_buffer *buffer,
1794                                  unsigned long flags,
1795                                  int skip, int pc, struct pt_regs *regs)
1796 {
1797         struct trace_event_call *call = &event_kernel_stack;
1798         struct ring_buffer_event *event;
1799         struct stack_entry *entry;
1800         struct stack_trace trace;
1801         int use_stack;
1802         int size = FTRACE_STACK_ENTRIES;
1803
1804         trace.nr_entries        = 0;
1805         trace.skip              = skip;
1806
1807         /*
1808          * Since events can happen in NMIs there's no safe way to
1809          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
1810          * or NMI comes in, it will just have to use the default
1811          * FTRACE_STACK_SIZE.
1812          */
1813         preempt_disable_notrace();
1814
1815         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1816         /*
1817          * We don't need any atomic variables, just a barrier.
1818          * If an interrupt comes in, we don't care, because it would
1819          * have exited and put the counter back to what we want.
1820          * We just need a barrier to keep gcc from moving things
1821          * around.
1822          */
1823         barrier();
1824         if (use_stack == 1) {
1825                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
1826                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
1827
1828                 if (regs)
1829                         save_stack_trace_regs(regs, &trace);
1830                 else
1831                         save_stack_trace(&trace);
1832
1833                 if (trace.nr_entries > size)
1834                         size = trace.nr_entries;
1835         } else
1836                 /* From now on, use_stack is a boolean */
1837                 use_stack = 0;
1838
1839         size *= sizeof(unsigned long);
1840
1841         event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1842                                           sizeof(*entry) + size, flags, pc);
1843         if (!event)
1844                 goto out;
1845         entry = ring_buffer_event_data(event);
1846
1847         memset(&entry->caller, 0, size);
1848
1849         if (use_stack)
1850                 memcpy(&entry->caller, trace.entries,
1851                        trace.nr_entries * sizeof(unsigned long));
1852         else {
1853                 trace.max_entries       = FTRACE_STACK_ENTRIES;
1854                 trace.entries           = entry->caller;
1855                 if (regs)
1856                         save_stack_trace_regs(regs, &trace);
1857                 else
1858                         save_stack_trace(&trace);
1859         }
1860
1861         entry->size = trace.nr_entries;
1862
1863         if (!call_filter_check_discard(call, entry, buffer, event))
1864                 __buffer_unlock_commit(buffer, event);
1865
1866  out:
1867         /* Again, don't let gcc optimize things here */
1868         barrier();
1869         __this_cpu_dec(ftrace_stack_reserve);
1870         preempt_enable_notrace();
1871
1872 }
1873
1874 static inline void ftrace_trace_stack(struct ring_buffer *buffer,
1875                                       unsigned long flags,
1876                                       int skip, int pc, struct pt_regs *regs)
1877 {
1878         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1879                 return;
1880
1881         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
1882 }
1883
1884 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1885                    int pc)
1886 {
1887         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
1888 }
1889
1890 /**
1891  * trace_dump_stack - record a stack back trace in the trace buffer
1892  * @skip: Number of functions to skip (helper handlers)
1893  */
1894 void trace_dump_stack(int skip)
1895 {
1896         unsigned long flags;
1897
1898         if (tracing_disabled || tracing_selftest_running)
1899                 return;
1900
1901         local_save_flags(flags);
1902
1903         /*
1904          * Skip 3 more, seems to get us at the caller of
1905          * this function.
1906          */
1907         skip += 3;
1908         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
1909                              flags, skip, preempt_count(), NULL);
1910 }
1911
1912 static DEFINE_PER_CPU(int, user_stack_count);
1913
1914 void
1915 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1916 {
1917         struct trace_event_call *call = &event_user_stack;
1918         struct ring_buffer_event *event;
1919         struct userstack_entry *entry;
1920         struct stack_trace trace;
1921
1922         if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1923                 return;
1924
1925         /*
1926          * NMIs can not handle page faults, even with fix ups.
1927          * The save user stack can (and often does) fault.
1928          */
1929         if (unlikely(in_nmi()))
1930                 return;
1931
1932         /*
1933          * prevent recursion, since the user stack tracing may
1934          * trigger other kernel events.
1935          */
1936         preempt_disable();
1937         if (__this_cpu_read(user_stack_count))
1938                 goto out;
1939
1940         __this_cpu_inc(user_stack_count);
1941
1942         event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1943                                           sizeof(*entry), flags, pc);
1944         if (!event)
1945                 goto out_drop_count;
1946         entry   = ring_buffer_event_data(event);
1947
1948         entry->tgid             = current->tgid;
1949         memset(&entry->caller, 0, sizeof(entry->caller));
1950
1951         trace.nr_entries        = 0;
1952         trace.max_entries       = FTRACE_STACK_ENTRIES;
1953         trace.skip              = 0;
1954         trace.entries           = entry->caller;
1955
1956         save_stack_trace_user(&trace);
1957         if (!call_filter_check_discard(call, entry, buffer, event))
1958                 __buffer_unlock_commit(buffer, event);
1959
1960  out_drop_count:
1961         __this_cpu_dec(user_stack_count);
1962  out:
1963         preempt_enable();
1964 }
1965
1966 #ifdef UNUSED
1967 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1968 {
1969         ftrace_trace_userstack(tr, flags, preempt_count());
1970 }
1971 #endif /* UNUSED */
1972
1973 #endif /* CONFIG_STACKTRACE */
1974
1975 /* created for use with alloc_percpu */
1976 struct trace_buffer_struct {
1977         char buffer[TRACE_BUF_SIZE];
1978 };
1979
1980 static struct trace_buffer_struct *trace_percpu_buffer;
1981 static struct trace_buffer_struct *trace_percpu_sirq_buffer;
1982 static struct trace_buffer_struct *trace_percpu_irq_buffer;
1983 static struct trace_buffer_struct *trace_percpu_nmi_buffer;
1984
1985 /*
1986  * The buffer used is dependent on the context. There is a per cpu
1987  * buffer for normal context, softirq contex, hard irq context and
1988  * for NMI context. Thise allows for lockless recording.
1989  *
1990  * Note, if the buffers failed to be allocated, then this returns NULL
1991  */
1992 static char *get_trace_buf(void)
1993 {
1994         struct trace_buffer_struct *percpu_buffer;
1995
1996         /*
1997          * If we have allocated per cpu buffers, then we do not
1998          * need to do any locking.
1999          */
2000         if (in_nmi())
2001                 percpu_buffer = trace_percpu_nmi_buffer;
2002         else if (in_irq())
2003                 percpu_buffer = trace_percpu_irq_buffer;
2004         else if (in_softirq())
2005                 percpu_buffer = trace_percpu_sirq_buffer;
2006         else
2007                 percpu_buffer = trace_percpu_buffer;
2008
2009         if (!percpu_buffer)
2010                 return NULL;
2011
2012         return this_cpu_ptr(&percpu_buffer->buffer[0]);
2013 }
2014
2015 static int alloc_percpu_trace_buffer(void)
2016 {
2017         struct trace_buffer_struct *buffers;
2018         struct trace_buffer_struct *sirq_buffers;
2019         struct trace_buffer_struct *irq_buffers;
2020         struct trace_buffer_struct *nmi_buffers;
2021
2022         buffers = alloc_percpu(struct trace_buffer_struct);
2023         if (!buffers)
2024                 goto err_warn;
2025
2026         sirq_buffers = alloc_percpu(struct trace_buffer_struct);
2027         if (!sirq_buffers)
2028                 goto err_sirq;
2029
2030         irq_buffers = alloc_percpu(struct trace_buffer_struct);
2031         if (!irq_buffers)
2032                 goto err_irq;
2033
2034         nmi_buffers = alloc_percpu(struct trace_buffer_struct);
2035         if (!nmi_buffers)
2036                 goto err_nmi;
2037
2038         trace_percpu_buffer = buffers;
2039         trace_percpu_sirq_buffer = sirq_buffers;
2040         trace_percpu_irq_buffer = irq_buffers;
2041         trace_percpu_nmi_buffer = nmi_buffers;
2042
2043         return 0;
2044
2045  err_nmi:
2046         free_percpu(irq_buffers);
2047  err_irq:
2048         free_percpu(sirq_buffers);
2049  err_sirq:
2050         free_percpu(buffers);
2051  err_warn:
2052         WARN(1, "Could not allocate percpu trace_printk buffer");
2053         return -ENOMEM;
2054 }
2055
2056 static int buffers_allocated;
2057
2058 void trace_printk_init_buffers(void)
2059 {
2060         if (buffers_allocated)
2061                 return;
2062
2063         if (alloc_percpu_trace_buffer())
2064                 return;
2065
2066         /* trace_printk() is for debug use only. Don't use it in production. */
2067
2068         pr_warning("\n");
2069         pr_warning("**********************************************************\n");
2070         pr_warning("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2071         pr_warning("**                                                      **\n");
2072         pr_warning("** trace_printk() being used. Allocating extra memory.  **\n");
2073         pr_warning("**                                                      **\n");
2074         pr_warning("** This means that this is a DEBUG kernel and it is     **\n");
2075         pr_warning("** unsafe for production use.                           **\n");
2076         pr_warning("**                                                      **\n");
2077         pr_warning("** If you see this message and you are not debugging    **\n");
2078         pr_warning("** the kernel, report this immediately to your vendor!  **\n");
2079         pr_warning("**                                                      **\n");
2080         pr_warning("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2081         pr_warning("**********************************************************\n");
2082
2083         /* Expand the buffers to set size */
2084         tracing_update_buffers();
2085
2086         buffers_allocated = 1;
2087
2088         /*
2089          * trace_printk_init_buffers() can be called by modules.
2090          * If that happens, then we need to start cmdline recording
2091          * directly here. If the global_trace.buffer is already
2092          * allocated here, then this was called by module code.
2093          */
2094         if (global_trace.trace_buffer.buffer)
2095                 tracing_start_cmdline_record();
2096 }
2097
2098 void trace_printk_start_comm(void)
2099 {
2100         /* Start tracing comms if trace printk is set */
2101         if (!buffers_allocated)
2102                 return;
2103         tracing_start_cmdline_record();
2104 }
2105
2106 static void trace_printk_start_stop_comm(int enabled)
2107 {
2108         if (!buffers_allocated)
2109                 return;
2110
2111         if (enabled)
2112                 tracing_start_cmdline_record();
2113         else
2114                 tracing_stop_cmdline_record();
2115 }
2116
2117 /**
2118  * trace_vbprintk - write binary msg to tracing buffer
2119  *
2120  */
2121 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2122 {
2123         struct trace_event_call *call = &event_bprint;
2124         struct ring_buffer_event *event;
2125         struct ring_buffer *buffer;
2126         struct trace_array *tr = &global_trace;
2127         struct bprint_entry *entry;
2128         unsigned long flags;
2129         char *tbuffer;
2130         int len = 0, size, pc;
2131
2132         if (unlikely(tracing_selftest_running || tracing_disabled))
2133                 return 0;
2134
2135         /* Don't pollute graph traces with trace_vprintk internals */
2136         pause_graph_tracing();
2137
2138         pc = preempt_count();
2139         preempt_disable_notrace();
2140
2141         tbuffer = get_trace_buf();
2142         if (!tbuffer) {
2143                 len = 0;
2144                 goto out;
2145         }
2146
2147         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2148
2149         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2150                 goto out;
2151
2152         local_save_flags(flags);
2153         size = sizeof(*entry) + sizeof(u32) * len;
2154         buffer = tr->trace_buffer.buffer;
2155         event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2156                                           flags, pc);
2157         if (!event)
2158                 goto out;
2159         entry = ring_buffer_event_data(event);
2160         entry->ip                       = ip;
2161         entry->fmt                      = fmt;
2162
2163         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2164         if (!call_filter_check_discard(call, entry, buffer, event)) {
2165                 __buffer_unlock_commit(buffer, event);
2166                 ftrace_trace_stack(buffer, flags, 6, pc, NULL);
2167         }
2168
2169 out:
2170         preempt_enable_notrace();
2171         unpause_graph_tracing();
2172
2173         return len;
2174 }
2175 EXPORT_SYMBOL_GPL(trace_vbprintk);
2176
2177 static int
2178 __trace_array_vprintk(struct ring_buffer *buffer,
2179                       unsigned long ip, const char *fmt, va_list args)
2180 {
2181         struct trace_event_call *call = &event_print;
2182         struct ring_buffer_event *event;
2183         int len = 0, size, pc;
2184         struct print_entry *entry;
2185         unsigned long flags;
2186         char *tbuffer;
2187
2188         if (tracing_disabled || tracing_selftest_running)
2189                 return 0;
2190
2191         /* Don't pollute graph traces with trace_vprintk internals */
2192         pause_graph_tracing();
2193
2194         pc = preempt_count();
2195         preempt_disable_notrace();
2196
2197
2198         tbuffer = get_trace_buf();
2199         if (!tbuffer) {
2200                 len = 0;
2201                 goto out;
2202         }
2203
2204         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2205
2206         local_save_flags(flags);
2207         size = sizeof(*entry) + len + 1;
2208         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2209                                           flags, pc);
2210         if (!event)
2211                 goto out;
2212         entry = ring_buffer_event_data(event);
2213         entry->ip = ip;
2214
2215         memcpy(&entry->buf, tbuffer, len + 1);
2216         if (!call_filter_check_discard(call, entry, buffer, event)) {
2217                 __buffer_unlock_commit(buffer, event);
2218                 ftrace_trace_stack(buffer, flags, 6, pc, NULL);
2219         }
2220  out:
2221         preempt_enable_notrace();
2222         unpause_graph_tracing();
2223
2224         return len;
2225 }
2226
2227 int trace_array_vprintk(struct trace_array *tr,
2228                         unsigned long ip, const char *fmt, va_list args)
2229 {
2230         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2231 }
2232
2233 int trace_array_printk(struct trace_array *tr,
2234                        unsigned long ip, const char *fmt, ...)
2235 {
2236         int ret;
2237         va_list ap;
2238
2239         if (!(trace_flags & TRACE_ITER_PRINTK))
2240                 return 0;
2241
2242         va_start(ap, fmt);
2243         ret = trace_array_vprintk(tr, ip, fmt, ap);
2244         va_end(ap);
2245         return ret;
2246 }
2247
2248 int trace_array_printk_buf(struct ring_buffer *buffer,
2249                            unsigned long ip, const char *fmt, ...)
2250 {
2251         int ret;
2252         va_list ap;
2253
2254         if (!(trace_flags & TRACE_ITER_PRINTK))
2255                 return 0;
2256
2257         va_start(ap, fmt);
2258         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2259         va_end(ap);
2260         return ret;
2261 }
2262
2263 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2264 {
2265         return trace_array_vprintk(&global_trace, ip, fmt, args);
2266 }
2267 EXPORT_SYMBOL_GPL(trace_vprintk);
2268
2269 static void trace_iterator_increment(struct trace_iterator *iter)
2270 {
2271         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2272
2273         iter->idx++;
2274         if (buf_iter)
2275                 ring_buffer_read(buf_iter, NULL);
2276 }
2277
2278 static struct trace_entry *
2279 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2280                 unsigned long *lost_events)
2281 {
2282         struct ring_buffer_event *event;
2283         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2284
2285         if (buf_iter)
2286                 event = ring_buffer_iter_peek(buf_iter, ts);
2287         else
2288                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2289                                          lost_events);
2290
2291         if (event) {
2292                 iter->ent_size = ring_buffer_event_length(event);
2293                 return ring_buffer_event_data(event);
2294         }
2295         iter->ent_size = 0;
2296         return NULL;
2297 }
2298
2299 static struct trace_entry *
2300 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2301                   unsigned long *missing_events, u64 *ent_ts)
2302 {
2303         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2304         struct trace_entry *ent, *next = NULL;
2305         unsigned long lost_events = 0, next_lost = 0;
2306         int cpu_file = iter->cpu_file;
2307         u64 next_ts = 0, ts;
2308         int next_cpu = -1;
2309         int next_size = 0;
2310         int cpu;
2311
2312         /*
2313          * If we are in a per_cpu trace file, don't bother by iterating over
2314          * all cpu and peek directly.
2315          */
2316         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2317                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2318                         return NULL;
2319                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2320                 if (ent_cpu)
2321                         *ent_cpu = cpu_file;
2322
2323                 return ent;
2324         }
2325
2326         for_each_tracing_cpu(cpu) {
2327
2328                 if (ring_buffer_empty_cpu(buffer, cpu))
2329                         continue;
2330
2331                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2332
2333                 /*
2334                  * Pick the entry with the smallest timestamp:
2335                  */
2336                 if (ent && (!next || ts < next_ts)) {
2337                         next = ent;
2338                         next_cpu = cpu;
2339                         next_ts = ts;
2340                         next_lost = lost_events;
2341                         next_size = iter->ent_size;
2342                 }
2343         }
2344
2345         iter->ent_size = next_size;
2346
2347         if (ent_cpu)
2348                 *ent_cpu = next_cpu;
2349
2350         if (ent_ts)
2351                 *ent_ts = next_ts;
2352
2353         if (missing_events)
2354                 *missing_events = next_lost;
2355
2356         return next;
2357 }
2358
2359 /* Find the next real entry, without updating the iterator itself */
2360 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2361                                           int *ent_cpu, u64 *ent_ts)
2362 {
2363         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2364 }
2365
2366 /* Find the next real entry, and increment the iterator to the next entry */
2367 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2368 {
2369         iter->ent = __find_next_entry(iter, &iter->cpu,
2370                                       &iter->lost_events, &iter->ts);
2371
2372         if (iter->ent)
2373                 trace_iterator_increment(iter);
2374
2375         return iter->ent ? iter : NULL;
2376 }
2377
2378 static void trace_consume(struct trace_iterator *iter)
2379 {
2380         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2381                             &iter->lost_events);
2382 }
2383
2384 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2385 {
2386         struct trace_iterator *iter = m->private;
2387         int i = (int)*pos;
2388         void *ent;
2389
2390         WARN_ON_ONCE(iter->leftover);
2391
2392         (*pos)++;
2393
2394         /* can't go backwards */
2395         if (iter->idx > i)
2396                 return NULL;
2397
2398         if (iter->idx < 0)
2399                 ent = trace_find_next_entry_inc(iter);
2400         else
2401                 ent = iter;
2402
2403         while (ent && iter->idx < i)
2404                 ent = trace_find_next_entry_inc(iter);
2405
2406         iter->pos = *pos;
2407
2408         return ent;
2409 }
2410
2411 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2412 {
2413         struct ring_buffer_event *event;
2414         struct ring_buffer_iter *buf_iter;
2415         unsigned long entries = 0;
2416         u64 ts;
2417
2418         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2419
2420         buf_iter = trace_buffer_iter(iter, cpu);
2421         if (!buf_iter)
2422                 return;
2423
2424         ring_buffer_iter_reset(buf_iter);
2425
2426         /*
2427          * We could have the case with the max latency tracers
2428          * that a reset never took place on a cpu. This is evident
2429          * by the timestamp being before the start of the buffer.
2430          */
2431         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2432                 if (ts >= iter->trace_buffer->time_start)
2433                         break;
2434                 entries++;
2435                 ring_buffer_read(buf_iter, NULL);
2436         }
2437
2438         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2439 }
2440
2441 /*
2442  * The current tracer is copied to avoid a global locking
2443  * all around.
2444  */
2445 static void *s_start(struct seq_file *m, loff_t *pos)
2446 {
2447         struct trace_iterator *iter = m->private;
2448         struct trace_array *tr = iter->tr;
2449         int cpu_file = iter->cpu_file;
2450         void *p = NULL;
2451         loff_t l = 0;
2452         int cpu;
2453
2454         /*
2455          * copy the tracer to avoid using a global lock all around.
2456          * iter->trace is a copy of current_trace, the pointer to the
2457          * name may be used instead of a strcmp(), as iter->trace->name
2458          * will point to the same string as current_trace->name.
2459          */
2460         mutex_lock(&trace_types_lock);
2461         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2462                 *iter->trace = *tr->current_trace;
2463         mutex_unlock(&trace_types_lock);
2464
2465 #ifdef CONFIG_TRACER_MAX_TRACE
2466         if (iter->snapshot && iter->trace->use_max_tr)
2467                 return ERR_PTR(-EBUSY);
2468 #endif
2469
2470         if (!iter->snapshot)
2471                 atomic_inc(&trace_record_cmdline_disabled);
2472
2473         if (*pos != iter->pos) {
2474                 iter->ent = NULL;
2475                 iter->cpu = 0;
2476                 iter->idx = -1;
2477
2478                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2479                         for_each_tracing_cpu(cpu)
2480                                 tracing_iter_reset(iter, cpu);
2481                 } else
2482                         tracing_iter_reset(iter, cpu_file);
2483
2484                 iter->leftover = 0;
2485                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2486                         ;
2487
2488         } else {
2489                 /*
2490                  * If we overflowed the seq_file before, then we want
2491                  * to just reuse the trace_seq buffer again.
2492                  */
2493                 if (iter->leftover)
2494                         p = iter;
2495                 else {
2496                         l = *pos - 1;
2497                         p = s_next(m, p, &l);
2498                 }
2499         }
2500
2501         trace_event_read_lock();
2502         trace_access_lock(cpu_file);
2503         return p;
2504 }
2505
2506 static void s_stop(struct seq_file *m, void *p)
2507 {
2508         struct trace_iterator *iter = m->private;
2509
2510 #ifdef CONFIG_TRACER_MAX_TRACE
2511         if (iter->snapshot && iter->trace->use_max_tr)
2512                 return;
2513 #endif
2514
2515         if (!iter->snapshot)
2516                 atomic_dec(&trace_record_cmdline_disabled);
2517
2518         trace_access_unlock(iter->cpu_file);
2519         trace_event_read_unlock();
2520 }
2521
2522 static void
2523 get_total_entries(struct trace_buffer *buf,
2524                   unsigned long *total, unsigned long *entries)
2525 {
2526         unsigned long count;
2527         int cpu;
2528
2529         *total = 0;
2530         *entries = 0;
2531
2532         for_each_tracing_cpu(cpu) {
2533                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2534                 /*
2535                  * If this buffer has skipped entries, then we hold all
2536                  * entries for the trace and we need to ignore the
2537                  * ones before the time stamp.
2538                  */
2539                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2540                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2541                         /* total is the same as the entries */
2542                         *total += count;
2543                 } else
2544                         *total += count +
2545                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
2546                 *entries += count;
2547         }
2548 }
2549
2550 static void print_lat_help_header(struct seq_file *m)
2551 {
2552         seq_puts(m, "#                  _------=> CPU#            \n"
2553                     "#                 / _-----=> irqs-off        \n"
2554                     "#                | / _----=> need-resched    \n"
2555                     "#                || / _---=> hardirq/softirq \n"
2556                     "#                ||| / _--=> preempt-depth   \n"
2557                     "#                |||| /     delay            \n"
2558                     "#  cmd     pid   ||||| time  |   caller      \n"
2559                     "#     \\   /      |||||  \\    |   /         \n");
2560 }
2561
2562 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2563 {
2564         unsigned long total;
2565         unsigned long entries;
2566
2567         get_total_entries(buf, &total, &entries);
2568         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2569                    entries, total, num_online_cpus());
2570         seq_puts(m, "#\n");
2571 }
2572
2573 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2574 {
2575         print_event_info(buf, m);
2576         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n"
2577                     "#              | |       |          |         |\n");
2578 }
2579
2580 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2581 {
2582         print_event_info(buf, m);
2583         seq_puts(m, "#                              _-----=> irqs-off\n"
2584                     "#                             / _----=> need-resched\n"
2585                     "#                            | / _---=> hardirq/softirq\n"
2586                     "#                            || / _--=> preempt-depth\n"
2587                     "#                            ||| /     delay\n"
2588                     "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n"
2589                     "#              | |       |   ||||       |         |\n");
2590 }
2591
2592 void
2593 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2594 {
2595         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2596         struct trace_buffer *buf = iter->trace_buffer;
2597         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2598         struct tracer *type = iter->trace;
2599         unsigned long entries;
2600         unsigned long total;
2601         const char *name = "preemption";
2602
2603         name = type->name;
2604
2605         get_total_entries(buf, &total, &entries);
2606
2607         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2608                    name, UTS_RELEASE);
2609         seq_puts(m, "# -----------------------------------"
2610                  "---------------------------------\n");
2611         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2612                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2613                    nsecs_to_usecs(data->saved_latency),
2614                    entries,
2615                    total,
2616                    buf->cpu,
2617 #if defined(CONFIG_PREEMPT_NONE)
2618                    "server",
2619 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2620                    "desktop",
2621 #elif defined(CONFIG_PREEMPT)
2622                    "preempt",
2623 #else
2624                    "unknown",
2625 #endif
2626                    /* These are reserved for later use */
2627                    0, 0, 0, 0);
2628 #ifdef CONFIG_SMP
2629         seq_printf(m, " #P:%d)\n", num_online_cpus());
2630 #else
2631         seq_puts(m, ")\n");
2632 #endif
2633         seq_puts(m, "#    -----------------\n");
2634         seq_printf(m, "#    | task: %.16s-%d "
2635                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2636                    data->comm, data->pid,
2637                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2638                    data->policy, data->rt_priority);
2639         seq_puts(m, "#    -----------------\n");
2640
2641         if (data->critical_start) {
2642                 seq_puts(m, "#  => started at: ");
2643                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2644                 trace_print_seq(m, &iter->seq);
2645                 seq_puts(m, "\n#  => ended at:   ");
2646                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2647                 trace_print_seq(m, &iter->seq);
2648                 seq_puts(m, "\n#\n");
2649         }
2650
2651         seq_puts(m, "#\n");
2652 }
2653
2654 static void test_cpu_buff_start(struct trace_iterator *iter)
2655 {
2656         struct trace_seq *s = &iter->seq;
2657
2658         if (!(trace_flags & TRACE_ITER_ANNOTATE))
2659                 return;
2660
2661         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
2662                 return;
2663
2664         if (cpumask_test_cpu(iter->cpu, iter->started))
2665                 return;
2666
2667         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
2668                 return;
2669
2670         cpumask_set_cpu(iter->cpu, iter->started);
2671
2672         /* Don't print started cpu buffer for the first entry of the trace */
2673         if (iter->idx > 1)
2674                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
2675                                 iter->cpu);
2676 }
2677
2678 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
2679 {
2680         struct trace_seq *s = &iter->seq;
2681         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2682         struct trace_entry *entry;
2683         struct trace_event *event;
2684
2685         entry = iter->ent;
2686
2687         test_cpu_buff_start(iter);
2688
2689         event = ftrace_find_event(entry->type);
2690
2691         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2692                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2693                         trace_print_lat_context(iter);
2694                 else
2695                         trace_print_context(iter);
2696         }
2697
2698         if (trace_seq_has_overflowed(s))
2699                 return TRACE_TYPE_PARTIAL_LINE;
2700
2701         if (event)
2702                 return event->funcs->trace(iter, sym_flags, event);
2703
2704         trace_seq_printf(s, "Unknown type %d\n", entry->type);
2705
2706         return trace_handle_return(s);
2707 }
2708
2709 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2710 {
2711         struct trace_seq *s = &iter->seq;
2712         struct trace_entry *entry;
2713         struct trace_event *event;
2714
2715         entry = iter->ent;
2716
2717         if (trace_flags & TRACE_ITER_CONTEXT_INFO)
2718                 trace_seq_printf(s, "%d %d %llu ",
2719                                  entry->pid, iter->cpu, iter->ts);
2720
2721         if (trace_seq_has_overflowed(s))
2722                 return TRACE_TYPE_PARTIAL_LINE;
2723
2724         event = ftrace_find_event(entry->type);
2725         if (event)
2726                 return event->funcs->raw(iter, 0, event);
2727
2728         trace_seq_printf(s, "%d ?\n", entry->type);
2729
2730         return trace_handle_return(s);
2731 }
2732
2733 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2734 {
2735         struct trace_seq *s = &iter->seq;
2736         unsigned char newline = '\n';
2737         struct trace_entry *entry;
2738         struct trace_event *event;
2739
2740         entry = iter->ent;
2741
2742         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2743                 SEQ_PUT_HEX_FIELD(s, entry->pid);
2744                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
2745                 SEQ_PUT_HEX_FIELD(s, iter->ts);
2746                 if (trace_seq_has_overflowed(s))
2747                         return TRACE_TYPE_PARTIAL_LINE;
2748         }
2749
2750         event = ftrace_find_event(entry->type);
2751         if (event) {
2752                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
2753                 if (ret != TRACE_TYPE_HANDLED)
2754                         return ret;
2755         }
2756
2757         SEQ_PUT_FIELD(s, newline);
2758
2759         return trace_handle_return(s);
2760 }
2761
2762 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2763 {
2764         struct trace_seq *s = &iter->seq;
2765         struct trace_entry *entry;
2766         struct trace_event *event;
2767
2768         entry = iter->ent;
2769
2770         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2771                 SEQ_PUT_FIELD(s, entry->pid);
2772                 SEQ_PUT_FIELD(s, iter->cpu);
2773                 SEQ_PUT_FIELD(s, iter->ts);
2774                 if (trace_seq_has_overflowed(s))
2775                         return TRACE_TYPE_PARTIAL_LINE;
2776         }
2777
2778         event = ftrace_find_event(entry->type);
2779         return event ? event->funcs->binary(iter, 0, event) :
2780                 TRACE_TYPE_HANDLED;
2781 }
2782
2783 int trace_empty(struct trace_iterator *iter)
2784 {
2785         struct ring_buffer_iter *buf_iter;
2786         int cpu;
2787
2788         /* If we are looking at one CPU buffer, only check that one */
2789         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
2790                 cpu = iter->cpu_file;
2791                 buf_iter = trace_buffer_iter(iter, cpu);
2792                 if (buf_iter) {
2793                         if (!ring_buffer_iter_empty(buf_iter))
2794                                 return 0;
2795                 } else {
2796                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2797                                 return 0;
2798                 }
2799                 return 1;
2800         }
2801
2802         for_each_tracing_cpu(cpu) {
2803                 buf_iter = trace_buffer_iter(iter, cpu);
2804                 if (buf_iter) {
2805                         if (!ring_buffer_iter_empty(buf_iter))
2806                                 return 0;
2807                 } else {
2808                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2809                                 return 0;
2810                 }
2811         }
2812
2813         return 1;
2814 }
2815
2816 /*  Called with trace_event_read_lock() held. */
2817 enum print_line_t print_trace_line(struct trace_iterator *iter)
2818 {
2819         enum print_line_t ret;
2820
2821         if (iter->lost_events) {
2822                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2823                                  iter->cpu, iter->lost_events);
2824                 if (trace_seq_has_overflowed(&iter->seq))
2825                         return TRACE_TYPE_PARTIAL_LINE;
2826         }
2827
2828         if (iter->trace && iter->trace->print_line) {
2829                 ret = iter->trace->print_line(iter);
2830                 if (ret != TRACE_TYPE_UNHANDLED)
2831                         return ret;
2832         }
2833
2834         if (iter->ent->type == TRACE_BPUTS &&
2835                         trace_flags & TRACE_ITER_PRINTK &&
2836                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2837                 return trace_print_bputs_msg_only(iter);
2838
2839         if (iter->ent->type == TRACE_BPRINT &&
2840                         trace_flags & TRACE_ITER_PRINTK &&
2841                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2842                 return trace_print_bprintk_msg_only(iter);
2843
2844         if (iter->ent->type == TRACE_PRINT &&
2845                         trace_flags & TRACE_ITER_PRINTK &&
2846                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2847                 return trace_print_printk_msg_only(iter);
2848
2849         if (trace_flags & TRACE_ITER_BIN)
2850                 return print_bin_fmt(iter);
2851
2852         if (trace_flags & TRACE_ITER_HEX)
2853                 return print_hex_fmt(iter);
2854
2855         if (trace_flags & TRACE_ITER_RAW)
2856                 return print_raw_fmt(iter);
2857
2858         return print_trace_fmt(iter);
2859 }
2860
2861 void trace_latency_header(struct seq_file *m)
2862 {
2863         struct trace_iterator *iter = m->private;
2864
2865         /* print nothing if the buffers are empty */
2866         if (trace_empty(iter))
2867                 return;
2868
2869         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2870                 print_trace_header(m, iter);
2871
2872         if (!(trace_flags & TRACE_ITER_VERBOSE))
2873                 print_lat_help_header(m);
2874 }
2875
2876 void trace_default_header(struct seq_file *m)
2877 {
2878         struct trace_iterator *iter = m->private;
2879
2880         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
2881                 return;
2882
2883         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2884                 /* print nothing if the buffers are empty */
2885                 if (trace_empty(iter))
2886                         return;
2887                 print_trace_header(m, iter);
2888                 if (!(trace_flags & TRACE_ITER_VERBOSE))
2889                         print_lat_help_header(m);
2890         } else {
2891                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
2892                         if (trace_flags & TRACE_ITER_IRQ_INFO)
2893                                 print_func_help_header_irq(iter->trace_buffer, m);
2894                         else
2895                                 print_func_help_header(iter->trace_buffer, m);
2896                 }
2897         }
2898 }
2899
2900 static void test_ftrace_alive(struct seq_file *m)
2901 {
2902         if (!ftrace_is_dead())
2903                 return;
2904         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
2905                     "#          MAY BE MISSING FUNCTION EVENTS\n");
2906 }
2907
2908 #ifdef CONFIG_TRACER_MAX_TRACE
2909 static void show_snapshot_main_help(struct seq_file *m)
2910 {
2911         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
2912                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
2913                     "#                      Takes a snapshot of the main buffer.\n"
2914                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
2915                     "#                      (Doesn't have to be '2' works with any number that\n"
2916                     "#                       is not a '0' or '1')\n");
2917 }
2918
2919 static void show_snapshot_percpu_help(struct seq_file *m)
2920 {
2921         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
2922 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2923         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
2924                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
2925 #else
2926         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
2927                     "#                     Must use main snapshot file to allocate.\n");
2928 #endif
2929         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
2930                     "#                      (Doesn't have to be '2' works with any number that\n"
2931                     "#                       is not a '0' or '1')\n");
2932 }
2933
2934 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
2935 {
2936         if (iter->tr->allocated_snapshot)
2937                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
2938         else
2939                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
2940
2941         seq_puts(m, "# Snapshot commands:\n");
2942         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
2943                 show_snapshot_main_help(m);
2944         else
2945                 show_snapshot_percpu_help(m);
2946 }
2947 #else
2948 /* Should never be called */
2949 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
2950 #endif
2951
2952 static int s_show(struct seq_file *m, void *v)
2953 {
2954         struct trace_iterator *iter = v;
2955         int ret;
2956
2957         if (iter->ent == NULL) {
2958                 if (iter->tr) {
2959                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
2960                         seq_puts(m, "#\n");
2961                         test_ftrace_alive(m);
2962                 }
2963                 if (iter->snapshot && trace_empty(iter))
2964                         print_snapshot_help(m, iter);
2965                 else if (iter->trace && iter->trace->print_header)
2966                         iter->trace->print_header(m);
2967                 else
2968                         trace_default_header(m);
2969
2970         } else if (iter->leftover) {
2971                 /*
2972                  * If we filled the seq_file buffer earlier, we
2973                  * want to just show it now.
2974                  */
2975                 ret = trace_print_seq(m, &iter->seq);
2976
2977                 /* ret should this time be zero, but you never know */
2978                 iter->leftover = ret;
2979
2980         } else {
2981                 print_trace_line(iter);
2982                 ret = trace_print_seq(m, &iter->seq);
2983                 /*
2984                  * If we overflow the seq_file buffer, then it will
2985                  * ask us for this data again at start up.
2986                  * Use that instead.
2987                  *  ret is 0 if seq_file write succeeded.
2988                  *        -1 otherwise.
2989                  */
2990                 iter->leftover = ret;
2991         }
2992
2993         return 0;
2994 }
2995
2996 /*
2997  * Should be used after trace_array_get(), trace_types_lock
2998  * ensures that i_cdev was already initialized.
2999  */
3000 static inline int tracing_get_cpu(struct inode *inode)
3001 {
3002         if (inode->i_cdev) /* See trace_create_cpu_file() */
3003                 return (long)inode->i_cdev - 1;
3004         return RING_BUFFER_ALL_CPUS;
3005 }
3006
3007 static const struct seq_operations tracer_seq_ops = {
3008         .start          = s_start,
3009         .next           = s_next,
3010         .stop           = s_stop,
3011         .show           = s_show,
3012 };
3013
3014 static struct trace_iterator *
3015 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3016 {
3017         struct trace_array *tr = inode->i_private;
3018         struct trace_iterator *iter;
3019         int cpu;
3020
3021         if (tracing_disabled)
3022                 return ERR_PTR(-ENODEV);
3023
3024         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3025         if (!iter)
3026                 return ERR_PTR(-ENOMEM);
3027
3028         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3029                                     GFP_KERNEL);
3030         if (!iter->buffer_iter)
3031                 goto release;
3032
3033         /*
3034          * We make a copy of the current tracer to avoid concurrent
3035          * changes on it while we are reading.
3036          */
3037         mutex_lock(&trace_types_lock);
3038         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3039         if (!iter->trace)
3040                 goto fail;
3041
3042         *iter->trace = *tr->current_trace;
3043
3044         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3045                 goto fail;
3046
3047         iter->tr = tr;
3048
3049 #ifdef CONFIG_TRACER_MAX_TRACE
3050         /* Currently only the top directory has a snapshot */
3051         if (tr->current_trace->print_max || snapshot)
3052                 iter->trace_buffer = &tr->max_buffer;
3053         else
3054 #endif
3055                 iter->trace_buffer = &tr->trace_buffer;
3056         iter->snapshot = snapshot;
3057         iter->pos = -1;
3058         iter->cpu_file = tracing_get_cpu(inode);
3059         mutex_init(&iter->mutex);
3060
3061         /* Notify the tracer early; before we stop tracing. */
3062         if (iter->trace && iter->trace->open)
3063                 iter->trace->open(iter);
3064
3065         /* Annotate start of buffers if we had overruns */
3066         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3067                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3068
3069         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3070         if (trace_clocks[tr->clock_id].in_ns)
3071                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3072
3073         /* stop the trace while dumping if we are not opening "snapshot" */
3074         if (!iter->snapshot)
3075                 tracing_stop_tr(tr);
3076
3077         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3078                 for_each_tracing_cpu(cpu) {
3079                         iter->buffer_iter[cpu] =
3080                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3081                 }
3082                 ring_buffer_read_prepare_sync();
3083                 for_each_tracing_cpu(cpu) {
3084                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3085                         tracing_iter_reset(iter, cpu);
3086                 }
3087         } else {
3088                 cpu = iter->cpu_file;
3089                 iter->buffer_iter[cpu] =
3090                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3091                 ring_buffer_read_prepare_sync();
3092                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3093                 tracing_iter_reset(iter, cpu);
3094         }
3095
3096         mutex_unlock(&trace_types_lock);
3097
3098         return iter;
3099
3100  fail:
3101         mutex_unlock(&trace_types_lock);
3102         kfree(iter->trace);
3103         kfree(iter->buffer_iter);
3104 release:
3105         seq_release_private(inode, file);
3106         return ERR_PTR(-ENOMEM);
3107 }
3108
3109 int tracing_open_generic(struct inode *inode, struct file *filp)
3110 {
3111         if (tracing_disabled)
3112                 return -ENODEV;
3113
3114         filp->private_data = inode->i_private;
3115         return 0;
3116 }
3117
3118 bool tracing_is_disabled(void)
3119 {
3120         return (tracing_disabled) ? true: false;
3121 }
3122
3123 /*
3124  * Open and update trace_array ref count.
3125  * Must have the current trace_array passed to it.
3126  */
3127 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3128 {
3129         struct trace_array *tr = inode->i_private;
3130
3131         if (tracing_disabled)
3132                 return -ENODEV;
3133
3134         if (trace_array_get(tr) < 0)
3135                 return -ENODEV;
3136
3137         filp->private_data = inode->i_private;
3138
3139         return 0;
3140 }
3141
3142 static int tracing_release(struct inode *inode, struct file *file)
3143 {
3144         struct trace_array *tr = inode->i_private;
3145         struct seq_file *m = file->private_data;
3146         struct trace_iterator *iter;
3147         int cpu;
3148
3149         if (!(file->f_mode & FMODE_READ)) {
3150                 trace_array_put(tr);
3151                 return 0;
3152         }
3153
3154         /* Writes do not use seq_file */
3155         iter = m->private;
3156         mutex_lock(&trace_types_lock);
3157
3158         for_each_tracing_cpu(cpu) {
3159                 if (iter->buffer_iter[cpu])
3160                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3161         }
3162
3163         if (iter->trace && iter->trace->close)
3164                 iter->trace->close(iter);
3165
3166         if (!iter->snapshot)
3167                 /* reenable tracing if it was previously enabled */
3168                 tracing_start_tr(tr);
3169
3170         __trace_array_put(tr);
3171
3172         mutex_unlock(&trace_types_lock);
3173
3174         mutex_destroy(&iter->mutex);
3175         free_cpumask_var(iter->started);
3176         kfree(iter->trace);
3177         kfree(iter->buffer_iter);
3178         seq_release_private(inode, file);
3179
3180         return 0;
3181 }
3182
3183 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3184 {
3185         struct trace_array *tr = inode->i_private;
3186
3187         trace_array_put(tr);
3188         return 0;
3189 }
3190
3191 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3192 {
3193         struct trace_array *tr = inode->i_private;
3194
3195         trace_array_put(tr);
3196
3197         return single_release(inode, file);
3198 }
3199
3200 static int tracing_open(struct inode *inode, struct file *file)
3201 {
3202         struct trace_array *tr = inode->i_private;
3203         struct trace_iterator *iter;
3204         int ret = 0;
3205
3206         if (trace_array_get(tr) < 0)
3207                 return -ENODEV;
3208
3209         /* If this file was open for write, then erase contents */
3210         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3211                 int cpu = tracing_get_cpu(inode);
3212
3213                 if (cpu == RING_BUFFER_ALL_CPUS)
3214                         tracing_reset_online_cpus(&tr->trace_buffer);
3215                 else
3216                         tracing_reset(&tr->trace_buffer, cpu);
3217         }
3218
3219         if (file->f_mode & FMODE_READ) {
3220                 iter = __tracing_open(inode, file, false);
3221                 if (IS_ERR(iter))
3222                         ret = PTR_ERR(iter);
3223                 else if (trace_flags & TRACE_ITER_LATENCY_FMT)
3224                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3225         }
3226
3227         if (ret < 0)
3228                 trace_array_put(tr);
3229
3230         return ret;
3231 }
3232
3233 /*
3234  * Some tracers are not suitable for instance buffers.
3235  * A tracer is always available for the global array (toplevel)
3236  * or if it explicitly states that it is.
3237  */
3238 static bool
3239 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3240 {
3241         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3242 }
3243
3244 /* Find the next tracer that this trace array may use */
3245 static struct tracer *
3246 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3247 {
3248         while (t && !trace_ok_for_array(t, tr))
3249                 t = t->next;
3250
3251         return t;
3252 }
3253
3254 static void *
3255 t_next(struct seq_file *m, void *v, loff_t *pos)
3256 {
3257         struct trace_array *tr = m->private;
3258         struct tracer *t = v;
3259
3260         (*pos)++;
3261
3262         if (t)
3263                 t = get_tracer_for_array(tr, t->next);
3264
3265         return t;
3266 }
3267
3268 static void *t_start(struct seq_file *m, loff_t *pos)
3269 {
3270         struct trace_array *tr = m->private;
3271         struct tracer *t;
3272         loff_t l = 0;
3273
3274         mutex_lock(&trace_types_lock);
3275
3276         t = get_tracer_for_array(tr, trace_types);
3277         for (; t && l < *pos; t = t_next(m, t, &l))
3278                         ;
3279
3280         return t;
3281 }
3282
3283 static void t_stop(struct seq_file *m, void *p)
3284 {
3285         mutex_unlock(&trace_types_lock);
3286 }
3287
3288 static int t_show(struct seq_file *m, void *v)
3289 {
3290         struct tracer *t = v;
3291
3292         if (!t)
3293                 return 0;
3294
3295         seq_puts(m, t->name);
3296         if (t->next)
3297                 seq_putc(m, ' ');
3298         else
3299                 seq_putc(m, '\n');
3300
3301         return 0;
3302 }
3303
3304 static const struct seq_operations show_traces_seq_ops = {
3305         .start          = t_start,
3306         .next           = t_next,
3307         .stop           = t_stop,
3308         .show           = t_show,
3309 };
3310
3311 static int show_traces_open(struct inode *inode, struct file *file)
3312 {
3313         struct trace_array *tr = inode->i_private;
3314         struct seq_file *m;
3315         int ret;
3316
3317         if (tracing_disabled)
3318                 return -ENODEV;
3319
3320         ret = seq_open(file, &show_traces_seq_ops);
3321         if (ret)
3322                 return ret;
3323
3324         m = file->private_data;
3325         m->private = tr;
3326
3327         return 0;
3328 }
3329
3330 static ssize_t
3331 tracing_write_stub(struct file *filp, const char __user *ubuf,
3332                    size_t count, loff_t *ppos)
3333 {
3334         return count;
3335 }
3336
3337 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3338 {
3339         int ret;
3340
3341         if (file->f_mode & FMODE_READ)
3342                 ret = seq_lseek(file, offset, whence);
3343         else
3344                 file->f_pos = ret = 0;
3345
3346         return ret;
3347 }
3348
3349 static const struct file_operations tracing_fops = {
3350         .open           = tracing_open,
3351         .read           = seq_read,
3352         .write          = tracing_write_stub,
3353         .llseek         = tracing_lseek,
3354         .release        = tracing_release,
3355 };
3356
3357 static const struct file_operations show_traces_fops = {
3358         .open           = show_traces_open,
3359         .read           = seq_read,
3360         .release        = seq_release,
3361         .llseek         = seq_lseek,
3362 };
3363
3364 /*
3365  * The tracer itself will not take this lock, but still we want
3366  * to provide a consistent cpumask to user-space:
3367  */
3368 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3369
3370 /*
3371  * Temporary storage for the character representation of the
3372  * CPU bitmask (and one more byte for the newline):
3373  */
3374 static char mask_str[NR_CPUS + 1];
3375
3376 static ssize_t
3377 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3378                      size_t count, loff_t *ppos)
3379 {
3380         struct trace_array *tr = file_inode(filp)->i_private;
3381         int len;
3382
3383         mutex_lock(&tracing_cpumask_update_lock);
3384
3385         len = snprintf(mask_str, count, "%*pb\n",
3386                        cpumask_pr_args(tr->tracing_cpumask));
3387         if (len >= count) {
3388                 count = -EINVAL;
3389                 goto out_err;
3390         }
3391         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3392
3393 out_err:
3394         mutex_unlock(&tracing_cpumask_update_lock);
3395
3396         return count;
3397 }
3398
3399 static ssize_t
3400 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3401                       size_t count, loff_t *ppos)
3402 {
3403         struct trace_array *tr = file_inode(filp)->i_private;
3404         cpumask_var_t tracing_cpumask_new;
3405         int err, cpu;
3406
3407         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3408                 return -ENOMEM;
3409
3410         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3411         if (err)
3412                 goto err_unlock;
3413
3414         mutex_lock(&tracing_cpumask_update_lock);
3415
3416         local_irq_disable();
3417         arch_spin_lock(&tr->max_lock);
3418         for_each_tracing_cpu(cpu) {
3419                 /*
3420                  * Increase/decrease the disabled counter if we are
3421                  * about to flip a bit in the cpumask:
3422                  */
3423                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3424                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3425                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3426                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3427                 }
3428                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3429                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3430                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3431                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3432                 }
3433         }
3434         arch_spin_unlock(&tr->max_lock);
3435         local_irq_enable();
3436
3437         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3438
3439         mutex_unlock(&tracing_cpumask_update_lock);
3440         free_cpumask_var(tracing_cpumask_new);
3441
3442         return count;
3443
3444 err_unlock:
3445         free_cpumask_var(tracing_cpumask_new);
3446
3447         return err;
3448 }
3449
3450 static const struct file_operations tracing_cpumask_fops = {
3451         .open           = tracing_open_generic_tr,
3452         .read           = tracing_cpumask_read,
3453         .write          = tracing_cpumask_write,
3454         .release        = tracing_release_generic_tr,
3455         .llseek         = generic_file_llseek,
3456 };
3457
3458 static int tracing_trace_options_show(struct seq_file *m, void *v)
3459 {
3460         struct tracer_opt *trace_opts;
3461         struct trace_array *tr = m->private;
3462         u32 tracer_flags;
3463         int i;
3464
3465         mutex_lock(&trace_types_lock);
3466         tracer_flags = tr->current_trace->flags->val;
3467         trace_opts = tr->current_trace->flags->opts;
3468
3469         for (i = 0; trace_options[i]; i++) {
3470                 if (trace_flags & (1 << i))
3471                         seq_printf(m, "%s\n", trace_options[i]);
3472                 else
3473                         seq_printf(m, "no%s\n", trace_options[i]);
3474         }
3475
3476         for (i = 0; trace_opts[i].name; i++) {
3477                 if (tracer_flags & trace_opts[i].bit)
3478                         seq_printf(m, "%s\n", trace_opts[i].name);
3479                 else
3480                         seq_printf(m, "no%s\n", trace_opts[i].name);
3481         }
3482         mutex_unlock(&trace_types_lock);
3483
3484         return 0;
3485 }
3486
3487 static int __set_tracer_option(struct trace_array *tr,
3488                                struct tracer_flags *tracer_flags,
3489                                struct tracer_opt *opts, int neg)
3490 {
3491         struct tracer *trace = tr->current_trace;
3492         int ret;
3493
3494         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3495         if (ret)
3496                 return ret;
3497
3498         if (neg)
3499                 tracer_flags->val &= ~opts->bit;
3500         else
3501                 tracer_flags->val |= opts->bit;
3502         return 0;
3503 }
3504
3505 /* Try to assign a tracer specific option */
3506 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3507 {
3508         struct tracer *trace = tr->current_trace;
3509         struct tracer_flags *tracer_flags = trace->flags;
3510         struct tracer_opt *opts = NULL;
3511         int i;
3512
3513         for (i = 0; tracer_flags->opts[i].name; i++) {
3514                 opts = &tracer_flags->opts[i];
3515
3516                 if (strcmp(cmp, opts->name) == 0)
3517                         return __set_tracer_option(tr, trace->flags, opts, neg);
3518         }
3519
3520         return -EINVAL;
3521 }
3522
3523 /* Some tracers require overwrite to stay enabled */
3524 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3525 {
3526         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3527                 return -1;
3528
3529         return 0;
3530 }
3531
3532 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3533 {
3534         /* do nothing if flag is already set */
3535         if (!!(trace_flags & mask) == !!enabled)
3536                 return 0;
3537
3538         /* Give the tracer a chance to approve the change */
3539         if (tr->current_trace->flag_changed)
3540                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3541                         return -EINVAL;
3542
3543         if (enabled)
3544                 trace_flags |= mask;
3545         else
3546                 trace_flags &= ~mask;
3547
3548         if (mask == TRACE_ITER_RECORD_CMD)
3549                 trace_event_enable_cmd_record(enabled);
3550
3551         if (mask == TRACE_ITER_OVERWRITE) {
3552                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3553 #ifdef CONFIG_TRACER_MAX_TRACE
3554                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3555 #endif
3556         }
3557
3558         if (mask == TRACE_ITER_PRINTK) {
3559                 trace_printk_start_stop_comm(enabled);
3560                 trace_printk_control(enabled);
3561         }
3562
3563         return 0;
3564 }
3565
3566 static int trace_set_options(struct trace_array *tr, char *option)
3567 {
3568         char *cmp;
3569         int neg = 0;
3570         int ret = -ENODEV;
3571         int i;
3572
3573         cmp = strstrip(option);
3574
3575         if (strncmp(cmp, "no", 2) == 0) {
3576                 neg = 1;
3577                 cmp += 2;
3578         }
3579
3580         mutex_lock(&trace_types_lock);
3581
3582         for (i = 0; trace_options[i]; i++) {
3583                 if (strcmp(cmp, trace_options[i]) == 0) {
3584                         ret = set_tracer_flag(tr, 1 << i, !neg);
3585                         break;
3586                 }
3587         }
3588
3589         /* If no option could be set, test the specific tracer options */
3590         if (!trace_options[i])
3591                 ret = set_tracer_option(tr, cmp, neg);
3592
3593         mutex_unlock(&trace_types_lock);
3594
3595         return ret;
3596 }
3597
3598 static ssize_t
3599 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3600                         size_t cnt, loff_t *ppos)
3601 {
3602         struct seq_file *m = filp->private_data;
3603         struct trace_array *tr = m->private;
3604         char buf[64];
3605         int ret;
3606
3607         if (cnt >= sizeof(buf))
3608                 return -EINVAL;
3609
3610         if (copy_from_user(&buf, ubuf, cnt))
3611                 return -EFAULT;
3612
3613         buf[cnt] = 0;
3614
3615         ret = trace_set_options(tr, buf);
3616         if (ret < 0)
3617                 return ret;
3618
3619         *ppos += cnt;
3620
3621         return cnt;
3622 }
3623
3624 static int tracing_trace_options_open(struct inode *inode, struct file *file)
3625 {
3626         struct trace_array *tr = inode->i_private;
3627         int ret;
3628
3629         if (tracing_disabled)
3630                 return -ENODEV;
3631
3632         if (trace_array_get(tr) < 0)
3633                 return -ENODEV;
3634
3635         ret = single_open(file, tracing_trace_options_show, inode->i_private);
3636         if (ret < 0)
3637                 trace_array_put(tr);
3638
3639         return ret;
3640 }
3641
3642 static const struct file_operations tracing_iter_fops = {
3643         .open           = tracing_trace_options_open,
3644         .read           = seq_read,
3645         .llseek         = seq_lseek,
3646         .release        = tracing_single_release_tr,
3647         .write          = tracing_trace_options_write,
3648 };
3649
3650 static const char readme_msg[] =
3651         "tracing mini-HOWTO:\n\n"
3652         "# echo 0 > tracing_on : quick way to disable tracing\n"
3653         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
3654         " Important files:\n"
3655         "  trace\t\t\t- The static contents of the buffer\n"
3656         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
3657         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
3658         "  current_tracer\t- function and latency tracers\n"
3659         "  available_tracers\t- list of configured tracers for current_tracer\n"
3660         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
3661         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
3662         "  trace_clock\t\t-change the clock used to order events\n"
3663         "       local:   Per cpu clock but may not be synced across CPUs\n"
3664         "      global:   Synced across CPUs but slows tracing down.\n"
3665         "     counter:   Not a clock, but just an increment\n"
3666         "      uptime:   Jiffy counter from time of boot\n"
3667         "        perf:   Same clock that perf events use\n"
3668 #ifdef CONFIG_X86_64
3669         "     x86-tsc:   TSC cycle counter\n"
3670 #endif
3671         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
3672         "  tracing_cpumask\t- Limit which CPUs to trace\n"
3673         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
3674         "\t\t\t  Remove sub-buffer with rmdir\n"
3675         "  trace_options\t\t- Set format or modify how tracing happens\n"
3676         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
3677         "\t\t\t  option name\n"
3678         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
3679 #ifdef CONFIG_DYNAMIC_FTRACE
3680         "\n  available_filter_functions - list of functions that can be filtered on\n"
3681         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
3682         "\t\t\t  functions\n"
3683         "\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3684         "\t     modules: Can select a group via module\n"
3685         "\t      Format: :mod:<module-name>\n"
3686         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
3687         "\t    triggers: a command to perform when function is hit\n"
3688         "\t      Format: <function>:<trigger>[:count]\n"
3689         "\t     trigger: traceon, traceoff\n"
3690         "\t\t      enable_event:<system>:<event>\n"
3691         "\t\t      disable_event:<system>:<event>\n"
3692 #ifdef CONFIG_STACKTRACE
3693         "\t\t      stacktrace\n"
3694 #endif
3695 #ifdef CONFIG_TRACER_SNAPSHOT
3696         "\t\t      snapshot\n"
3697 #endif
3698         "\t\t      dump\n"
3699         "\t\t      cpudump\n"
3700         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
3701         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
3702         "\t     The first one will disable tracing every time do_fault is hit\n"
3703         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
3704         "\t       The first time do trap is hit and it disables tracing, the\n"
3705         "\t       counter will decrement to 2. If tracing is already disabled,\n"
3706         "\t       the counter will not decrement. It only decrements when the\n"
3707         "\t       trigger did work\n"
3708         "\t     To remove trigger without count:\n"
3709         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
3710         "\t     To remove trigger with a count:\n"
3711         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
3712         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
3713         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3714         "\t    modules: Can select a group via module command :mod:\n"
3715         "\t    Does not accept triggers\n"
3716 #endif /* CONFIG_DYNAMIC_FTRACE */
3717 #ifdef CONFIG_FUNCTION_TRACER
3718         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
3719         "\t\t    (function)\n"
3720 #endif
3721 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
3722         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
3723         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
3724         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
3725 #endif
3726 #ifdef CONFIG_TRACER_SNAPSHOT
3727         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
3728         "\t\t\t  snapshot buffer. Read the contents for more\n"
3729         "\t\t\t  information\n"
3730 #endif
3731 #ifdef CONFIG_STACK_TRACER
3732         "  stack_trace\t\t- Shows the max stack trace when active\n"
3733         "  stack_max_size\t- Shows current max stack size that was traced\n"
3734         "\t\t\t  Write into this file to reset the max size (trigger a\n"
3735         "\t\t\t  new trace)\n"
3736 #ifdef CONFIG_DYNAMIC_FTRACE
3737         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
3738         "\t\t\t  traces\n"
3739 #endif
3740 #endif /* CONFIG_STACK_TRACER */
3741         "  events/\t\t- Directory containing all trace event subsystems:\n"
3742         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
3743         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
3744         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
3745         "\t\t\t  events\n"
3746         "      filter\t\t- If set, only events passing filter are traced\n"
3747         "  events/<system>/<event>/\t- Directory containing control files for\n"
3748         "\t\t\t  <event>:\n"
3749         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
3750         "      filter\t\t- If set, only events passing filter are traced\n"
3751         "      trigger\t\t- If set, a command to perform when event is hit\n"
3752         "\t    Format: <trigger>[:count][if <filter>]\n"
3753         "\t   trigger: traceon, traceoff\n"
3754         "\t            enable_event:<system>:<event>\n"
3755         "\t            disable_event:<system>:<event>\n"
3756 #ifdef CONFIG_STACKTRACE
3757         "\t\t    stacktrace\n"
3758 #endif
3759 #ifdef CONFIG_TRACER_SNAPSHOT
3760         "\t\t    snapshot\n"
3761 #endif
3762         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
3763         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
3764         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
3765         "\t                  events/block/block_unplug/trigger\n"
3766         "\t   The first disables tracing every time block_unplug is hit.\n"
3767         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
3768         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
3769         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
3770         "\t   Like function triggers, the counter is only decremented if it\n"
3771         "\t    enabled or disabled tracing.\n"
3772         "\t   To remove a trigger without a count:\n"
3773         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
3774         "\t   To remove a trigger with a count:\n"
3775         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
3776         "\t   Filters can be ignored when removing a trigger.\n"
3777 ;
3778
3779 static ssize_t
3780 tracing_readme_read(struct file *filp, char __user *ubuf,
3781                        size_t cnt, loff_t *ppos)
3782 {
3783         return simple_read_from_buffer(ubuf, cnt, ppos,
3784                                         readme_msg, strlen(readme_msg));
3785 }
3786
3787 static const struct file_operations tracing_readme_fops = {
3788         .open           = tracing_open_generic,
3789         .read           = tracing_readme_read,
3790         .llseek         = generic_file_llseek,
3791 };
3792
3793 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
3794 {
3795         unsigned int *ptr = v;
3796
3797         if (*pos || m->count)
3798                 ptr++;
3799
3800         (*pos)++;
3801
3802         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
3803              ptr++) {
3804                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
3805                         continue;
3806
3807                 return ptr;
3808         }
3809
3810         return NULL;
3811 }
3812
3813 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
3814 {
3815         void *v;
3816         loff_t l = 0;
3817
3818         preempt_disable();
3819         arch_spin_lock(&trace_cmdline_lock);
3820
3821         v = &savedcmd->map_cmdline_to_pid[0];
3822         while (l <= *pos) {
3823                 v = saved_cmdlines_next(m, v, &l);
3824                 if (!v)
3825                         return NULL;
3826         }
3827
3828         return v;
3829 }
3830
3831 static void saved_cmdlines_stop(struct seq_file *m, void *v)
3832 {
3833         arch_spin_unlock(&trace_cmdline_lock);
3834         preempt_enable();
3835 }
3836
3837 static int saved_cmdlines_show(struct seq_file *m, void *v)
3838 {
3839         char buf[TASK_COMM_LEN];
3840         unsigned int *pid = v;
3841
3842         __trace_find_cmdline(*pid, buf);
3843         seq_printf(m, "%d %s\n", *pid, buf);
3844         return 0;
3845 }
3846
3847 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
3848         .start          = saved_cmdlines_start,
3849         .next           = saved_cmdlines_next,
3850         .stop           = saved_cmdlines_stop,
3851         .show           = saved_cmdlines_show,
3852 };
3853
3854 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
3855 {
3856         if (tracing_disabled)
3857                 return -ENODEV;
3858
3859         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
3860 }
3861
3862 static const struct file_operations tracing_saved_cmdlines_fops = {
3863         .open           = tracing_saved_cmdlines_open,
3864         .read           = seq_read,
3865         .llseek         = seq_lseek,
3866         .release        = seq_release,
3867 };
3868
3869 static ssize_t
3870 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
3871                                  size_t cnt, loff_t *ppos)
3872 {
3873         char buf[64];
3874         int r;
3875
3876         arch_spin_lock(&trace_cmdline_lock);
3877         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
3878         arch_spin_unlock(&trace_cmdline_lock);
3879
3880         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3881 }
3882
3883 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
3884 {
3885         kfree(s->saved_cmdlines);
3886         kfree(s->map_cmdline_to_pid);
3887         kfree(s);
3888 }
3889
3890 static int tracing_resize_saved_cmdlines(unsigned int val)
3891 {
3892         struct saved_cmdlines_buffer *s, *savedcmd_temp;
3893
3894         s = kmalloc(sizeof(*s), GFP_KERNEL);
3895         if (!s)
3896                 return -ENOMEM;
3897
3898         if (allocate_cmdlines_buffer(val, s) < 0) {
3899                 kfree(s);
3900                 return -ENOMEM;
3901         }
3902
3903         arch_spin_lock(&trace_cmdline_lock);
3904         savedcmd_temp = savedcmd;
3905         savedcmd = s;
3906         arch_spin_unlock(&trace_cmdline_lock);
3907         free_saved_cmdlines_buffer(savedcmd_temp);
3908
3909         return 0;
3910 }
3911
3912 static ssize_t
3913 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
3914                                   size_t cnt, loff_t *ppos)
3915 {
3916         unsigned long val;
3917         int ret;
3918
3919         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
3920         if (ret)
3921                 return ret;
3922
3923         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
3924         if (!val || val > PID_MAX_DEFAULT)
3925                 return -EINVAL;
3926
3927         ret = tracing_resize_saved_cmdlines((unsigned int)val);
3928         if (ret < 0)
3929                 return ret;
3930
3931         *ppos += cnt;
3932
3933         return cnt;
3934 }
3935
3936 static const struct file_operations tracing_saved_cmdlines_size_fops = {
3937         .open           = tracing_open_generic,
3938         .read           = tracing_saved_cmdlines_size_read,
3939         .write          = tracing_saved_cmdlines_size_write,
3940 };
3941
3942 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
3943 static union trace_enum_map_item *
3944 update_enum_map(union trace_enum_map_item *ptr)
3945 {
3946         if (!ptr->map.enum_string) {
3947                 if (ptr->tail.next) {
3948                         ptr = ptr->tail.next;
3949                         /* Set ptr to the next real item (skip head) */
3950                         ptr++;
3951                 } else
3952                         return NULL;
3953         }
3954         return ptr;
3955 }
3956
3957 static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
3958 {
3959         union trace_enum_map_item *ptr = v;
3960
3961         /*
3962          * Paranoid! If ptr points to end, we don't want to increment past it.
3963          * This really should never happen.
3964          */
3965         ptr = update_enum_map(ptr);
3966         if (WARN_ON_ONCE(!ptr))
3967                 return NULL;
3968
3969         ptr++;
3970
3971         (*pos)++;
3972
3973         ptr = update_enum_map(ptr);
3974
3975         return ptr;
3976 }
3977
3978 static void *enum_map_start(struct seq_file *m, loff_t *pos)
3979 {
3980         union trace_enum_map_item *v;
3981         loff_t l = 0;
3982
3983         mutex_lock(&trace_enum_mutex);
3984
3985         v = trace_enum_maps;
3986         if (v)
3987                 v++;
3988
3989         while (v && l < *pos) {
3990                 v = enum_map_next(m, v, &l);
3991         }
3992
3993         return v;
3994 }
3995
3996 static void enum_map_stop(struct seq_file *m, void *v)
3997 {
3998         mutex_unlock(&trace_enum_mutex);
3999 }
4000
4001 static int enum_map_show(struct seq_file *m, void *v)
4002 {
4003         union trace_enum_map_item *ptr = v;
4004
4005         seq_printf(m, "%s %ld (%s)\n",
4006                    ptr->map.enum_string, ptr->map.enum_value,
4007                    ptr->map.system);
4008
4009         return 0;
4010 }
4011
4012 static const struct seq_operations tracing_enum_map_seq_ops = {
4013         .start          = enum_map_start,
4014         .next           = enum_map_next,
4015         .stop           = enum_map_stop,
4016         .show           = enum_map_show,
4017 };
4018
4019 static int tracing_enum_map_open(struct inode *inode, struct file *filp)
4020 {
4021         if (tracing_disabled)
4022                 return -ENODEV;
4023
4024         return seq_open(filp, &tracing_enum_map_seq_ops);
4025 }
4026
4027 static const struct file_operations tracing_enum_map_fops = {
4028         .open           = tracing_enum_map_open,
4029         .read           = seq_read,
4030         .llseek         = seq_lseek,
4031         .release        = seq_release,
4032 };
4033
4034 static inline union trace_enum_map_item *
4035 trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
4036 {
4037         /* Return tail of array given the head */
4038         return ptr + ptr->head.length + 1;
4039 }
4040
4041 static void
4042 trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
4043                            int len)
4044 {
4045         struct trace_enum_map **stop;
4046         struct trace_enum_map **map;
4047         union trace_enum_map_item *map_array;
4048         union trace_enum_map_item *ptr;
4049
4050         stop = start + len;
4051
4052         /*
4053          * The trace_enum_maps contains the map plus a head and tail item,
4054          * where the head holds the module and length of array, and the
4055          * tail holds a pointer to the next list.
4056          */
4057         map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4058         if (!map_array) {
4059                 pr_warning("Unable to allocate trace enum mapping\n");
4060                 return;
4061         }
4062
4063         mutex_lock(&trace_enum_mutex);
4064
4065         if (!trace_enum_maps)
4066                 trace_enum_maps = map_array;
4067         else {
4068                 ptr = trace_enum_maps;
4069                 for (;;) {
4070                         ptr = trace_enum_jmp_to_tail(ptr);
4071                         if (!ptr->tail.next)
4072                                 break;
4073                         ptr = ptr->tail.next;
4074
4075                 }
4076                 ptr->tail.next = map_array;
4077         }
4078         map_array->head.mod = mod;
4079         map_array->head.length = len;
4080         map_array++;
4081
4082         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4083                 map_array->map = **map;
4084                 map_array++;
4085         }
4086         memset(map_array, 0, sizeof(*map_array));
4087
4088         mutex_unlock(&trace_enum_mutex);
4089 }
4090
4091 static void trace_create_enum_file(struct dentry *d_tracer)
4092 {
4093         trace_create_file("enum_map", 0444, d_tracer,
4094                           NULL, &tracing_enum_map_fops);
4095 }
4096
4097 #else /* CONFIG_TRACE_ENUM_MAP_FILE */
4098 static inline void trace_create_enum_file(struct dentry *d_tracer) { }
4099 static inline void trace_insert_enum_map_file(struct module *mod,
4100                               struct trace_enum_map **start, int len) { }
4101 #endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
4102
4103 static void trace_insert_enum_map(struct module *mod,
4104                                   struct trace_enum_map **start, int len)
4105 {
4106         struct trace_enum_map **map;
4107
4108         if (len <= 0)
4109                 return;
4110
4111         map = start;
4112
4113         trace_event_enum_update(map, len);
4114
4115         trace_insert_enum_map_file(mod, start, len);
4116 }
4117
4118 static ssize_t
4119 tracing_set_trace_read(struct file *filp, char __user *ubuf,
4120                        size_t cnt, loff_t *ppos)
4121 {
4122         struct trace_array *tr = filp->private_data;
4123         char buf[MAX_TRACER_SIZE+2];
4124         int r;
4125
4126         mutex_lock(&trace_types_lock);
4127         r = sprintf(buf, "%s\n", tr->current_trace->name);
4128         mutex_unlock(&trace_types_lock);
4129
4130         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4131 }
4132
4133 int tracer_init(struct tracer *t, struct trace_array *tr)
4134 {
4135         tracing_reset_online_cpus(&tr->trace_buffer);
4136         return t->init(tr);
4137 }
4138
4139 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4140 {
4141         int cpu;
4142
4143         for_each_tracing_cpu(cpu)
4144                 per_cpu_ptr(buf->data, cpu)->entries = val;
4145 }
4146
4147 #ifdef CONFIG_TRACER_MAX_TRACE
4148 /* resize @tr's buffer to the size of @size_tr's entries */
4149 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4150                                         struct trace_buffer *size_buf, int cpu_id)
4151 {
4152         int cpu, ret = 0;
4153
4154         if (cpu_id == RING_BUFFER_ALL_CPUS) {
4155                 for_each_tracing_cpu(cpu) {
4156                         ret = ring_buffer_resize(trace_buf->buffer,
4157                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4158                         if (ret < 0)
4159                                 break;
4160                         per_cpu_ptr(trace_buf->data, cpu)->entries =
4161                                 per_cpu_ptr(size_buf->data, cpu)->entries;
4162                 }
4163         } else {
4164                 ret = ring_buffer_resize(trace_buf->buffer,
4165                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4166                 if (ret == 0)
4167                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4168                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
4169         }
4170
4171         return ret;
4172 }
4173 #endif /* CONFIG_TRACER_MAX_TRACE */
4174
4175 static int __tracing_resize_ring_buffer(struct trace_array *tr,
4176                                         unsigned long size, int cpu)
4177 {
4178         int ret;
4179
4180         /*
4181          * If kernel or user changes the size of the ring buffer
4182          * we use the size that was given, and we can forget about
4183          * expanding it later.
4184          */
4185         ring_buffer_expanded = true;
4186
4187         /* May be called before buffers are initialized */
4188         if (!tr->trace_buffer.buffer)
4189                 return 0;
4190
4191         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4192         if (ret < 0)
4193                 return ret;
4194
4195 #ifdef CONFIG_TRACER_MAX_TRACE
4196         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
4197             !tr->current_trace->use_max_tr)
4198                 goto out;
4199
4200         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4201         if (ret < 0) {
4202                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
4203                                                      &tr->trace_buffer, cpu);
4204                 if (r < 0) {
4205                         /*
4206                          * AARGH! We are left with different
4207                          * size max buffer!!!!
4208                          * The max buffer is our "snapshot" buffer.
4209                          * When a tracer needs a snapshot (one of the
4210                          * latency tracers), it swaps the max buffer
4211                          * with the saved snap shot. We succeeded to
4212                          * update the size of the main buffer, but failed to
4213                          * update the size of the max buffer. But when we tried
4214                          * to reset the main buffer to the original size, we
4215                          * failed there too. This is very unlikely to
4216                          * happen, but if it does, warn and kill all
4217                          * tracing.
4218                          */
4219                         WARN_ON(1);
4220                         tracing_disabled = 1;
4221                 }
4222                 return ret;
4223         }
4224
4225         if (cpu == RING_BUFFER_ALL_CPUS)
4226                 set_buffer_entries(&tr->max_buffer, size);
4227         else
4228                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4229
4230  out:
4231 #endif /* CONFIG_TRACER_MAX_TRACE */
4232
4233         if (cpu == RING_BUFFER_ALL_CPUS)
4234                 set_buffer_entries(&tr->trace_buffer, size);
4235         else
4236                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4237
4238         return ret;
4239 }
4240
4241 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4242                                           unsigned long size, int cpu_id)
4243 {
4244         int ret = size;
4245
4246         mutex_lock(&trace_types_lock);
4247
4248         if (cpu_id != RING_BUFFER_ALL_CPUS) {
4249                 /* make sure, this cpu is enabled in the mask */
4250                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4251                         ret = -EINVAL;
4252                         goto out;
4253                 }
4254         }
4255
4256         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4257         if (ret < 0)
4258                 ret = -ENOMEM;
4259
4260 out:
4261         mutex_unlock(&trace_types_lock);
4262
4263         return ret;
4264 }
4265
4266
4267 /**
4268  * tracing_update_buffers - used by tracing facility to expand ring buffers
4269  *
4270  * To save on memory when the tracing is never used on a system with it
4271  * configured in. The ring buffers are set to a minimum size. But once
4272  * a user starts to use the tracing facility, then they need to grow
4273  * to their default size.
4274  *
4275  * This function is to be called when a tracer is about to be used.
4276  */
4277 int tracing_update_buffers(void)
4278 {
4279         int ret = 0;
4280
4281         mutex_lock(&trace_types_lock);
4282         if (!ring_buffer_expanded)
4283                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4284                                                 RING_BUFFER_ALL_CPUS);
4285         mutex_unlock(&trace_types_lock);
4286
4287         return ret;
4288 }
4289
4290 struct trace_option_dentry;
4291
4292 static struct trace_option_dentry *
4293 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4294
4295 /*
4296  * Used to clear out the tracer before deletion of an instance.
4297  * Must have trace_types_lock held.
4298  */
4299 static void tracing_set_nop(struct trace_array *tr)
4300 {
4301         if (tr->current_trace == &nop_trace)
4302                 return;
4303         
4304         tr->current_trace->enabled--;
4305
4306         if (tr->current_trace->reset)
4307                 tr->current_trace->reset(tr);
4308
4309         tr->current_trace = &nop_trace;
4310 }
4311
4312 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
4313 {
4314         /* Only enable if the directory has been created already. */
4315         if (!tr->dir)
4316                 return;
4317
4318         /* Currently, only the top instance has options */
4319         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL))
4320                 return;
4321
4322         /* Ignore if they were already created */
4323         if (t->topts)
4324                 return;
4325
4326         t->topts = create_trace_option_files(tr, t);
4327 }
4328
4329 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
4330 {
4331         struct tracer *t;
4332 #ifdef CONFIG_TRACER_MAX_TRACE
4333         bool had_max_tr;
4334 #endif
4335         int ret = 0;
4336
4337         mutex_lock(&trace_types_lock);
4338
4339         if (!ring_buffer_expanded) {
4340                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4341                                                 RING_BUFFER_ALL_CPUS);
4342                 if (ret < 0)
4343                         goto out;
4344                 ret = 0;
4345         }
4346
4347         for (t = trace_types; t; t = t->next) {
4348                 if (strcmp(t->name, buf) == 0)
4349                         break;
4350         }
4351         if (!t) {
4352                 ret = -EINVAL;
4353                 goto out;
4354         }
4355         if (t == tr->current_trace)
4356                 goto out;
4357
4358         /* Some tracers are only allowed for the top level buffer */
4359         if (!trace_ok_for_array(t, tr)) {
4360                 ret = -EINVAL;
4361                 goto out;
4362         }
4363
4364         /* If trace pipe files are being read, we can't change the tracer */
4365         if (tr->current_trace->ref) {
4366                 ret = -EBUSY;
4367                 goto out;
4368         }
4369
4370         trace_branch_disable();
4371
4372         tr->current_trace->enabled--;
4373
4374         if (tr->current_trace->reset)
4375                 tr->current_trace->reset(tr);
4376
4377         /* Current trace needs to be nop_trace before synchronize_sched */
4378         tr->current_trace = &nop_trace;
4379
4380 #ifdef CONFIG_TRACER_MAX_TRACE
4381         had_max_tr = tr->allocated_snapshot;
4382
4383         if (had_max_tr && !t->use_max_tr) {
4384                 /*
4385                  * We need to make sure that the update_max_tr sees that
4386                  * current_trace changed to nop_trace to keep it from
4387                  * swapping the buffers after we resize it.
4388                  * The update_max_tr is called from interrupts disabled
4389                  * so a synchronized_sched() is sufficient.
4390                  */
4391                 synchronize_sched();
4392                 free_snapshot(tr);
4393         }
4394 #endif
4395
4396 #ifdef CONFIG_TRACER_MAX_TRACE
4397         if (t->use_max_tr && !had_max_tr) {
4398                 ret = alloc_snapshot(tr);
4399                 if (ret < 0)
4400                         goto out;
4401         }
4402 #endif
4403
4404         if (t->init) {
4405                 ret = tracer_init(t, tr);
4406                 if (ret)
4407                         goto out;
4408         }
4409
4410         tr->current_trace = t;
4411         tr->current_trace->enabled++;
4412         trace_branch_enable(tr);
4413  out:
4414         mutex_unlock(&trace_types_lock);
4415
4416         return ret;
4417 }
4418
4419 static ssize_t
4420 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4421                         size_t cnt, loff_t *ppos)
4422 {
4423         struct trace_array *tr = filp->private_data;
4424         char buf[MAX_TRACER_SIZE+1];
4425         int i;
4426         size_t ret;
4427         int err;
4428
4429         ret = cnt;
4430
4431         if (cnt > MAX_TRACER_SIZE)
4432                 cnt = MAX_TRACER_SIZE;
4433
4434         if (copy_from_user(&buf, ubuf, cnt))
4435                 return -EFAULT;
4436
4437         buf[cnt] = 0;
4438
4439         /* strip ending whitespace. */
4440         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4441                 buf[i] = 0;
4442
4443         err = tracing_set_tracer(tr, buf);
4444         if (err)
4445                 return err;
4446
4447         *ppos += ret;
4448
4449         return ret;
4450 }
4451
4452 static ssize_t
4453 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
4454                    size_t cnt, loff_t *ppos)
4455 {
4456         char buf[64];
4457         int r;
4458
4459         r = snprintf(buf, sizeof(buf), "%ld\n",
4460                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4461         if (r > sizeof(buf))
4462                 r = sizeof(buf);
4463         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4464 }
4465
4466 static ssize_t
4467 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
4468                     size_t cnt, loff_t *ppos)
4469 {
4470         unsigned long val;
4471         int ret;
4472
4473         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4474         if (ret)
4475                 return ret;
4476
4477         *ptr = val * 1000;
4478
4479         return cnt;
4480 }
4481
4482 static ssize_t
4483 tracing_thresh_read(struct file *filp, char __user *ubuf,
4484                     size_t cnt, loff_t *ppos)
4485 {
4486         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
4487 }
4488
4489 static ssize_t
4490 tracing_thresh_write(struct file *filp, const char __user *ubuf,
4491                      size_t cnt, loff_t *ppos)
4492 {
4493         struct trace_array *tr = filp->private_data;
4494         int ret;
4495
4496         mutex_lock(&trace_types_lock);
4497         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
4498         if (ret < 0)
4499                 goto out;
4500
4501         if (tr->current_trace->update_thresh) {
4502                 ret = tr->current_trace->update_thresh(tr);
4503                 if (ret < 0)
4504                         goto out;
4505         }
4506
4507         ret = cnt;
4508 out:
4509         mutex_unlock(&trace_types_lock);
4510
4511         return ret;
4512 }
4513
4514 static ssize_t
4515 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4516                      size_t cnt, loff_t *ppos)
4517 {
4518         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
4519 }
4520
4521 static ssize_t
4522 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4523                       size_t cnt, loff_t *ppos)
4524 {
4525         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
4526 }
4527
4528 static int tracing_open_pipe(struct inode *inode, struct file *filp)
4529 {
4530         struct trace_array *tr = inode->i_private;
4531         struct trace_iterator *iter;
4532         int ret = 0;
4533
4534         if (tracing_disabled)
4535                 return -ENODEV;
4536
4537         if (trace_array_get(tr) < 0)
4538                 return -ENODEV;
4539
4540         mutex_lock(&trace_types_lock);
4541
4542         /* create a buffer to store the information to pass to userspace */
4543         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4544         if (!iter) {
4545                 ret = -ENOMEM;
4546                 __trace_array_put(tr);
4547                 goto out;
4548         }
4549
4550         trace_seq_init(&iter->seq);
4551         iter->trace = tr->current_trace;
4552
4553         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
4554                 ret = -ENOMEM;
4555                 goto fail;
4556         }
4557
4558         /* trace pipe does not show start of buffer */
4559         cpumask_setall(iter->started);
4560
4561         if (trace_flags & TRACE_ITER_LATENCY_FMT)
4562                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4563
4564         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4565         if (trace_clocks[tr->clock_id].in_ns)
4566                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4567
4568         iter->tr = tr;
4569         iter->trace_buffer = &tr->trace_buffer;
4570         iter->cpu_file = tracing_get_cpu(inode);
4571         mutex_init(&iter->mutex);
4572         filp->private_data = iter;
4573
4574         if (iter->trace->pipe_open)
4575                 iter->trace->pipe_open(iter);
4576
4577         nonseekable_open(inode, filp);
4578
4579         tr->current_trace->ref++;
4580 out:
4581         mutex_unlock(&trace_types_lock);
4582         return ret;
4583
4584 fail:
4585         kfree(iter->trace);
4586         kfree(iter);
4587         __trace_array_put(tr);
4588         mutex_unlock(&trace_types_lock);
4589         return ret;
4590 }
4591
4592 static int tracing_release_pipe(struct inode *inode, struct file *file)
4593 {
4594         struct trace_iterator *iter = file->private_data;
4595         struct trace_array *tr = inode->i_private;
4596
4597         mutex_lock(&trace_types_lock);
4598
4599         tr->current_trace->ref--;
4600
4601         if (iter->trace->pipe_close)
4602                 iter->trace->pipe_close(iter);
4603
4604         mutex_unlock(&trace_types_lock);
4605
4606         free_cpumask_var(iter->started);
4607         mutex_destroy(&iter->mutex);
4608         kfree(iter);
4609
4610         trace_array_put(tr);
4611
4612         return 0;
4613 }
4614
4615 static unsigned int
4616 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
4617 {
4618         /* Iterators are static, they should be filled or empty */
4619         if (trace_buffer_iter(iter, iter->cpu_file))
4620                 return POLLIN | POLLRDNORM;
4621
4622         if (trace_flags & TRACE_ITER_BLOCK)
4623                 /*
4624                  * Always select as readable when in blocking mode
4625                  */
4626                 return POLLIN | POLLRDNORM;
4627         else
4628                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
4629                                              filp, poll_table);
4630 }
4631
4632 static unsigned int
4633 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
4634 {
4635         struct trace_iterator *iter = filp->private_data;
4636
4637         return trace_poll(iter, filp, poll_table);
4638 }
4639
4640 /* Must be called with iter->mutex held. */
4641 static int tracing_wait_pipe(struct file *filp)
4642 {
4643         struct trace_iterator *iter = filp->private_data;
4644         int ret;
4645
4646         while (trace_empty(iter)) {
4647
4648                 if ((filp->f_flags & O_NONBLOCK)) {
4649                         return -EAGAIN;
4650                 }
4651
4652                 /*
4653                  * We block until we read something and tracing is disabled.
4654                  * We still block if tracing is disabled, but we have never
4655                  * read anything. This allows a user to cat this file, and
4656                  * then enable tracing. But after we have read something,
4657                  * we give an EOF when tracing is again disabled.
4658                  *
4659                  * iter->pos will be 0 if we haven't read anything.
4660                  */
4661                 if (!tracing_is_on() && iter->pos)
4662                         break;
4663
4664                 mutex_unlock(&iter->mutex);
4665
4666                 ret = wait_on_pipe(iter, false);
4667
4668                 mutex_lock(&iter->mutex);
4669
4670                 if (ret)
4671                         return ret;
4672         }
4673
4674         return 1;
4675 }
4676
4677 /*
4678  * Consumer reader.
4679  */
4680 static ssize_t
4681 tracing_read_pipe(struct file *filp, char __user *ubuf,
4682                   size_t cnt, loff_t *ppos)
4683 {
4684         struct trace_iterator *iter = filp->private_data;
4685         ssize_t sret;
4686
4687         /* return any leftover data */
4688         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4689         if (sret != -EBUSY)
4690                 return sret;
4691
4692         trace_seq_init(&iter->seq);
4693
4694         /*
4695          * Avoid more than one consumer on a single file descriptor
4696          * This is just a matter of traces coherency, the ring buffer itself
4697          * is protected.
4698          */
4699         mutex_lock(&iter->mutex);
4700         if (iter->trace->read) {
4701                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
4702                 if (sret)
4703                         goto out;
4704         }
4705
4706 waitagain:
4707         sret = tracing_wait_pipe(filp);
4708         if (sret <= 0)
4709                 goto out;
4710
4711         /* stop when tracing is finished */
4712         if (trace_empty(iter)) {
4713                 sret = 0;
4714                 goto out;
4715         }
4716
4717         if (cnt >= PAGE_SIZE)
4718                 cnt = PAGE_SIZE - 1;
4719
4720         /* reset all but tr, trace, and overruns */
4721         memset(&iter->seq, 0,
4722                sizeof(struct trace_iterator) -
4723                offsetof(struct trace_iterator, seq));
4724         cpumask_clear(iter->started);
4725         iter->pos = -1;
4726
4727         trace_event_read_lock();
4728         trace_access_lock(iter->cpu_file);
4729         while (trace_find_next_entry_inc(iter) != NULL) {
4730                 enum print_line_t ret;
4731                 int save_len = iter->seq.seq.len;
4732
4733                 ret = print_trace_line(iter);
4734                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4735                         /* don't print partial lines */
4736                         iter->seq.seq.len = save_len;
4737                         break;
4738                 }
4739                 if (ret != TRACE_TYPE_NO_CONSUME)
4740                         trace_consume(iter);
4741
4742                 if (trace_seq_used(&iter->seq) >= cnt)
4743                         break;
4744
4745                 /*
4746                  * Setting the full flag means we reached the trace_seq buffer
4747                  * size and we should leave by partial output condition above.
4748                  * One of the trace_seq_* functions is not used properly.
4749                  */
4750                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
4751                           iter->ent->type);
4752         }
4753         trace_access_unlock(iter->cpu_file);
4754         trace_event_read_unlock();
4755
4756         /* Now copy what we have to the user */
4757         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4758         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
4759                 trace_seq_init(&iter->seq);
4760
4761         /*
4762          * If there was nothing to send to user, in spite of consuming trace
4763          * entries, go back to wait for more entries.
4764          */
4765         if (sret == -EBUSY)
4766                 goto waitagain;
4767
4768 out:
4769         mutex_unlock(&iter->mutex);
4770
4771         return sret;
4772 }
4773
4774 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
4775                                      unsigned int idx)
4776 {
4777         __free_page(spd->pages[idx]);
4778 }
4779
4780 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
4781         .can_merge              = 0,
4782         .confirm                = generic_pipe_buf_confirm,
4783         .release                = generic_pipe_buf_release,
4784         .steal                  = generic_pipe_buf_steal,
4785         .get                    = generic_pipe_buf_get,
4786 };
4787
4788 static size_t
4789 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
4790 {
4791         size_t count;
4792         int save_len;
4793         int ret;
4794
4795         /* Seq buffer is page-sized, exactly what we need. */
4796         for (;;) {
4797                 save_len = iter->seq.seq.len;
4798                 ret = print_trace_line(iter);
4799
4800                 if (trace_seq_has_overflowed(&iter->seq)) {
4801                         iter->seq.seq.len = save_len;
4802                         break;
4803                 }
4804
4805                 /*
4806                  * This should not be hit, because it should only
4807                  * be set if the iter->seq overflowed. But check it
4808                  * anyway to be safe.
4809                  */
4810                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4811                         iter->seq.seq.len = save_len;
4812                         break;
4813                 }
4814
4815                 count = trace_seq_used(&iter->seq) - save_len;
4816                 if (rem < count) {
4817                         rem = 0;
4818                         iter->seq.seq.len = save_len;
4819                         break;
4820                 }
4821
4822                 if (ret != TRACE_TYPE_NO_CONSUME)
4823                         trace_consume(iter);
4824                 rem -= count;
4825                 if (!trace_find_next_entry_inc(iter))   {
4826                         rem = 0;
4827                         iter->ent = NULL;
4828                         break;
4829                 }
4830         }
4831
4832         return rem;
4833 }
4834
4835 static ssize_t tracing_splice_read_pipe(struct file *filp,
4836                                         loff_t *ppos,
4837                                         struct pipe_inode_info *pipe,
4838                                         size_t len,
4839                                         unsigned int flags)
4840 {
4841         struct page *pages_def[PIPE_DEF_BUFFERS];
4842         struct partial_page partial_def[PIPE_DEF_BUFFERS];
4843         struct trace_iterator *iter = filp->private_data;
4844         struct splice_pipe_desc spd = {
4845                 .pages          = pages_def,
4846                 .partial        = partial_def,
4847                 .nr_pages       = 0, /* This gets updated below. */
4848                 .nr_pages_max   = PIPE_DEF_BUFFERS,
4849                 .flags          = flags,
4850                 .ops            = &tracing_pipe_buf_ops,
4851                 .spd_release    = tracing_spd_release_pipe,
4852         };
4853         ssize_t ret;
4854         size_t rem;
4855         unsigned int i;
4856
4857         if (splice_grow_spd(pipe, &spd))
4858                 return -ENOMEM;
4859
4860         mutex_lock(&iter->mutex);
4861
4862         if (iter->trace->splice_read) {
4863                 ret = iter->trace->splice_read(iter, filp,
4864                                                ppos, pipe, len, flags);
4865                 if (ret)
4866                         goto out_err;
4867         }
4868
4869         ret = tracing_wait_pipe(filp);
4870         if (ret <= 0)
4871                 goto out_err;
4872
4873         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
4874                 ret = -EFAULT;
4875                 goto out_err;
4876         }
4877
4878         trace_event_read_lock();
4879         trace_access_lock(iter->cpu_file);
4880
4881         /* Fill as many pages as possible. */
4882         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
4883                 spd.pages[i] = alloc_page(GFP_KERNEL);
4884                 if (!spd.pages[i])
4885                         break;
4886
4887                 rem = tracing_fill_pipe_page(rem, iter);
4888
4889                 /* Copy the data into the page, so we can start over. */
4890                 ret = trace_seq_to_buffer(&iter->seq,
4891                                           page_address(spd.pages[i]),
4892                                           trace_seq_used(&iter->seq));
4893                 if (ret < 0) {
4894                         __free_page(spd.pages[i]);
4895                         break;
4896                 }
4897                 spd.partial[i].offset = 0;
4898                 spd.partial[i].len = trace_seq_used(&iter->seq);
4899
4900                 trace_seq_init(&iter->seq);
4901         }
4902
4903         trace_access_unlock(iter->cpu_file);
4904         trace_event_read_unlock();
4905         mutex_unlock(&iter->mutex);
4906
4907         spd.nr_pages = i;
4908
4909         ret = splice_to_pipe(pipe, &spd);
4910 out:
4911         splice_shrink_spd(&spd);
4912         return ret;
4913
4914 out_err:
4915         mutex_unlock(&iter->mutex);
4916         goto out;
4917 }
4918
4919 static ssize_t
4920 tracing_entries_read(struct file *filp, char __user *ubuf,
4921                      size_t cnt, loff_t *ppos)
4922 {
4923         struct inode *inode = file_inode(filp);
4924         struct trace_array *tr = inode->i_private;
4925         int cpu = tracing_get_cpu(inode);
4926         char buf[64];
4927         int r = 0;
4928         ssize_t ret;
4929
4930         mutex_lock(&trace_types_lock);
4931
4932         if (cpu == RING_BUFFER_ALL_CPUS) {
4933                 int cpu, buf_size_same;
4934                 unsigned long size;
4935
4936                 size = 0;
4937                 buf_size_same = 1;
4938                 /* check if all cpu sizes are same */
4939                 for_each_tracing_cpu(cpu) {
4940                         /* fill in the size from first enabled cpu */
4941                         if (size == 0)
4942                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
4943                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
4944                                 buf_size_same = 0;
4945                                 break;
4946                         }
4947                 }
4948
4949                 if (buf_size_same) {
4950                         if (!ring_buffer_expanded)
4951                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
4952                                             size >> 10,
4953                                             trace_buf_size >> 10);
4954                         else
4955                                 r = sprintf(buf, "%lu\n", size >> 10);
4956                 } else
4957                         r = sprintf(buf, "X\n");
4958         } else
4959                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
4960
4961         mutex_unlock(&trace_types_lock);
4962
4963         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4964         return ret;
4965 }
4966
4967 static ssize_t
4968 tracing_entries_write(struct file *filp, const char __user *ubuf,
4969                       size_t cnt, loff_t *ppos)
4970 {
4971         struct inode *inode = file_inode(filp);
4972         struct trace_array *tr = inode->i_private;
4973         unsigned long val;
4974         int ret;
4975
4976         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4977         if (ret)
4978                 return ret;
4979
4980         /* must have at least 1 entry */
4981         if (!val)
4982                 return -EINVAL;
4983
4984         /* value is in KB */
4985         val <<= 10;
4986         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
4987         if (ret < 0)
4988                 return ret;
4989
4990         *ppos += cnt;
4991
4992         return cnt;
4993 }
4994
4995 static ssize_t
4996 tracing_total_entries_read(struct file *filp, char __user *ubuf,
4997                                 size_t cnt, loff_t *ppos)
4998 {
4999         struct trace_array *tr = filp->private_data;
5000         char buf[64];
5001         int r, cpu;
5002         unsigned long size = 0, expanded_size = 0;
5003
5004         mutex_lock(&trace_types_lock);
5005         for_each_tracing_cpu(cpu) {
5006                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5007                 if (!ring_buffer_expanded)
5008                         expanded_size += trace_buf_size >> 10;
5009         }
5010         if (ring_buffer_expanded)
5011                 r = sprintf(buf, "%lu\n", size);
5012         else
5013                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5014         mutex_unlock(&trace_types_lock);
5015
5016         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5017 }
5018
5019 static ssize_t
5020 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5021                           size_t cnt, loff_t *ppos)
5022 {
5023         /*
5024          * There is no need to read what the user has written, this function
5025          * is just to make sure that there is no error when "echo" is used
5026          */
5027
5028         *ppos += cnt;
5029
5030         return cnt;
5031 }
5032
5033 static int
5034 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5035 {
5036         struct trace_array *tr = inode->i_private;
5037
5038         /* disable tracing ? */
5039         if (trace_flags & TRACE_ITER_STOP_ON_FREE)
5040                 tracer_tracing_off(tr);
5041         /* resize the ring buffer to 0 */
5042         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5043
5044         trace_array_put(tr);
5045
5046         return 0;
5047 }
5048
5049 static ssize_t
5050 tracing_mark_write(struct file *filp, const char __user *ubuf,
5051                                         size_t cnt, loff_t *fpos)
5052 {
5053         unsigned long addr = (unsigned long)ubuf;
5054         struct trace_array *tr = filp->private_data;
5055         struct ring_buffer_event *event;
5056         struct ring_buffer *buffer;
5057         struct print_entry *entry;
5058         unsigned long irq_flags;
5059         struct page *pages[2];
5060         void *map_page[2];
5061         int nr_pages = 1;
5062         ssize_t written;
5063         int offset;
5064         int size;
5065         int len;
5066         int ret;
5067         int i;
5068
5069         if (tracing_disabled)
5070                 return -EINVAL;
5071
5072         if (!(trace_flags & TRACE_ITER_MARKERS))
5073                 return -EINVAL;
5074
5075         if (cnt > TRACE_BUF_SIZE)
5076                 cnt = TRACE_BUF_SIZE;
5077
5078         /*
5079          * Userspace is injecting traces into the kernel trace buffer.
5080          * We want to be as non intrusive as possible.
5081          * To do so, we do not want to allocate any special buffers
5082          * or take any locks, but instead write the userspace data
5083          * straight into the ring buffer.
5084          *
5085          * First we need to pin the userspace buffer into memory,
5086          * which, most likely it is, because it just referenced it.
5087          * But there's no guarantee that it is. By using get_user_pages_fast()
5088          * and kmap_atomic/kunmap_atomic() we can get access to the
5089          * pages directly. We then write the data directly into the
5090          * ring buffer.
5091          */
5092         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5093
5094         /* check if we cross pages */
5095         if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
5096                 nr_pages = 2;
5097
5098         offset = addr & (PAGE_SIZE - 1);
5099         addr &= PAGE_MASK;
5100
5101         ret = get_user_pages_fast(addr, nr_pages, 0, pages);
5102         if (ret < nr_pages) {
5103                 while (--ret >= 0)
5104                         put_page(pages[ret]);
5105                 written = -EFAULT;
5106                 goto out;
5107         }
5108
5109         for (i = 0; i < nr_pages; i++)
5110                 map_page[i] = kmap_atomic(pages[i]);
5111
5112         local_save_flags(irq_flags);
5113         size = sizeof(*entry) + cnt + 2; /* possible \n added */
5114         buffer = tr->trace_buffer.buffer;
5115         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5116                                           irq_flags, preempt_count());
5117         if (!event) {
5118                 /* Ring buffer disabled, return as if not open for write */
5119                 written = -EBADF;
5120                 goto out_unlock;
5121         }
5122
5123         entry = ring_buffer_event_data(event);
5124         entry->ip = _THIS_IP_;
5125
5126         if (nr_pages == 2) {
5127                 len = PAGE_SIZE - offset;
5128                 memcpy(&entry->buf, map_page[0] + offset, len);
5129                 memcpy(&entry->buf[len], map_page[1], cnt - len);
5130         } else
5131                 memcpy(&entry->buf, map_page[0] + offset, cnt);
5132
5133         if (entry->buf[cnt - 1] != '\n') {
5134                 entry->buf[cnt] = '\n';
5135                 entry->buf[cnt + 1] = '\0';
5136         } else
5137                 entry->buf[cnt] = '\0';
5138
5139         __buffer_unlock_commit(buffer, event);
5140
5141         written = cnt;
5142
5143         *fpos += written;
5144
5145  out_unlock:
5146         for (i = nr_pages - 1; i >= 0; i--) {
5147                 kunmap_atomic(map_page[i]);
5148                 put_page(pages[i]);
5149         }
5150  out:
5151         return written;
5152 }
5153
5154 static int tracing_clock_show(struct seq_file *m, void *v)
5155 {
5156         struct trace_array *tr = m->private;
5157         int i;
5158
5159         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5160                 seq_printf(m,
5161                         "%s%s%s%s", i ? " " : "",
5162                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5163                         i == tr->clock_id ? "]" : "");
5164         seq_putc(m, '\n');
5165
5166         return 0;
5167 }
5168
5169 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
5170 {
5171         int i;
5172
5173         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
5174                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
5175                         break;
5176         }
5177         if (i == ARRAY_SIZE(trace_clocks))
5178                 return -EINVAL;
5179
5180         mutex_lock(&trace_types_lock);
5181
5182         tr->clock_id = i;
5183
5184         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
5185
5186         /*
5187          * New clock may not be consistent with the previous clock.
5188          * Reset the buffer so that it doesn't have incomparable timestamps.
5189          */
5190         tracing_reset_online_cpus(&tr->trace_buffer);
5191
5192 #ifdef CONFIG_TRACER_MAX_TRACE
5193         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
5194                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
5195         tracing_reset_online_cpus(&tr->max_buffer);
5196 #endif
5197
5198         mutex_unlock(&trace_types_lock);
5199
5200         return 0;
5201 }
5202
5203 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
5204                                    size_t cnt, loff_t *fpos)
5205 {
5206         struct seq_file *m = filp->private_data;
5207         struct trace_array *tr = m->private;
5208         char buf[64];
5209         const char *clockstr;
5210         int ret;
5211
5212         if (cnt >= sizeof(buf))
5213                 return -EINVAL;
5214
5215         if (copy_from_user(&buf, ubuf, cnt))
5216                 return -EFAULT;
5217
5218         buf[cnt] = 0;
5219
5220         clockstr = strstrip(buf);
5221
5222         ret = tracing_set_clock(tr, clockstr);
5223         if (ret)
5224                 return ret;
5225
5226         *fpos += cnt;
5227
5228         return cnt;
5229 }
5230
5231 static int tracing_clock_open(struct inode *inode, struct file *file)
5232 {
5233         struct trace_array *tr = inode->i_private;
5234         int ret;
5235
5236         if (tracing_disabled)
5237                 return -ENODEV;
5238
5239         if (trace_array_get(tr))
5240                 return -ENODEV;
5241
5242         ret = single_open(file, tracing_clock_show, inode->i_private);
5243         if (ret < 0)
5244                 trace_array_put(tr);
5245
5246         return ret;
5247 }
5248
5249 struct ftrace_buffer_info {
5250         struct trace_iterator   iter;
5251         void                    *spare;
5252         unsigned int            read;
5253 };
5254
5255 #ifdef CONFIG_TRACER_SNAPSHOT
5256 static int tracing_snapshot_open(struct inode *inode, struct file *file)
5257 {
5258         struct trace_array *tr = inode->i_private;
5259         struct trace_iterator *iter;
5260         struct seq_file *m;
5261         int ret = 0;
5262
5263         if (trace_array_get(tr) < 0)
5264                 return -ENODEV;
5265
5266         if (file->f_mode & FMODE_READ) {
5267                 iter = __tracing_open(inode, file, true);
5268                 if (IS_ERR(iter))
5269                         ret = PTR_ERR(iter);
5270         } else {
5271                 /* Writes still need the seq_file to hold the private data */
5272                 ret = -ENOMEM;
5273                 m = kzalloc(sizeof(*m), GFP_KERNEL);
5274                 if (!m)
5275                         goto out;
5276                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5277                 if (!iter) {
5278                         kfree(m);
5279                         goto out;
5280                 }
5281                 ret = 0;
5282
5283                 iter->tr = tr;
5284                 iter->trace_buffer = &tr->max_buffer;
5285                 iter->cpu_file = tracing_get_cpu(inode);
5286                 m->private = iter;
5287                 file->private_data = m;
5288         }
5289 out:
5290         if (ret < 0)
5291                 trace_array_put(tr);
5292
5293         return ret;
5294 }
5295
5296 static ssize_t
5297 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
5298                        loff_t *ppos)
5299 {
5300         struct seq_file *m = filp->private_data;
5301         struct trace_iterator *iter = m->private;
5302         struct trace_array *tr = iter->tr;
5303         unsigned long val;
5304         int ret;
5305
5306         ret = tracing_update_buffers();
5307         if (ret < 0)
5308                 return ret;
5309
5310         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5311         if (ret)
5312                 return ret;
5313
5314         mutex_lock(&trace_types_lock);
5315
5316         if (tr->current_trace->use_max_tr) {
5317                 ret = -EBUSY;
5318                 goto out;
5319         }
5320
5321         switch (val) {
5322         case 0:
5323                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5324                         ret = -EINVAL;
5325                         break;
5326                 }
5327                 if (tr->allocated_snapshot)
5328                         free_snapshot(tr);
5329                 break;
5330         case 1:
5331 /* Only allow per-cpu swap if the ring buffer supports it */
5332 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
5333                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5334                         ret = -EINVAL;
5335                         break;
5336                 }
5337 #endif
5338                 if (!tr->allocated_snapshot) {
5339                         ret = alloc_snapshot(tr);
5340                         if (ret < 0)
5341                                 break;
5342                 }
5343                 local_irq_disable();
5344                 /* Now, we're going to swap */
5345                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5346                         update_max_tr(tr, current, smp_processor_id());
5347                 else
5348                         update_max_tr_single(tr, current, iter->cpu_file);
5349                 local_irq_enable();
5350                 break;
5351         default:
5352                 if (tr->allocated_snapshot) {
5353                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5354                                 tracing_reset_online_cpus(&tr->max_buffer);
5355                         else
5356                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
5357                 }
5358                 break;
5359         }
5360
5361         if (ret >= 0) {
5362                 *ppos += cnt;
5363                 ret = cnt;
5364         }
5365 out:
5366         mutex_unlock(&trace_types_lock);
5367         return ret;
5368 }
5369
5370 static int tracing_snapshot_release(struct inode *inode, struct file *file)
5371 {
5372         struct seq_file *m = file->private_data;
5373         int ret;
5374
5375         ret = tracing_release(inode, file);
5376
5377         if (file->f_mode & FMODE_READ)
5378                 return ret;
5379
5380         /* If write only, the seq_file is just a stub */
5381         if (m)
5382                 kfree(m->private);
5383         kfree(m);
5384
5385         return 0;
5386 }
5387
5388 static int tracing_buffers_open(struct inode *inode, struct file *filp);
5389 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
5390                                     size_t count, loff_t *ppos);
5391 static int tracing_buffers_release(struct inode *inode, struct file *file);
5392 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5393                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
5394
5395 static int snapshot_raw_open(struct inode *inode, struct file *filp)
5396 {
5397         struct ftrace_buffer_info *info;
5398         int ret;
5399
5400         ret = tracing_buffers_open(inode, filp);
5401         if (ret < 0)
5402                 return ret;
5403
5404         info = filp->private_data;
5405
5406         if (info->iter.trace->use_max_tr) {
5407                 tracing_buffers_release(inode, filp);
5408                 return -EBUSY;
5409         }
5410
5411         info->iter.snapshot = true;
5412         info->iter.trace_buffer = &info->iter.tr->max_buffer;
5413
5414         return ret;
5415 }
5416
5417 #endif /* CONFIG_TRACER_SNAPSHOT */
5418
5419
5420 static const struct file_operations tracing_thresh_fops = {
5421         .open           = tracing_open_generic,
5422         .read           = tracing_thresh_read,
5423         .write          = tracing_thresh_write,
5424         .llseek         = generic_file_llseek,
5425 };
5426
5427 static const struct file_operations tracing_max_lat_fops = {
5428         .open           = tracing_open_generic,
5429         .read           = tracing_max_lat_read,
5430         .write          = tracing_max_lat_write,
5431         .llseek         = generic_file_llseek,
5432 };
5433
5434 static const struct file_operations set_tracer_fops = {
5435         .open           = tracing_open_generic,
5436         .read           = tracing_set_trace_read,
5437         .write          = tracing_set_trace_write,
5438         .llseek         = generic_file_llseek,
5439 };
5440
5441 static const struct file_operations tracing_pipe_fops = {
5442         .open           = tracing_open_pipe,
5443         .poll           = tracing_poll_pipe,
5444         .read           = tracing_read_pipe,
5445         .splice_read    = tracing_splice_read_pipe,
5446         .release        = tracing_release_pipe,
5447         .llseek         = no_llseek,
5448 };
5449
5450 static const struct file_operations tracing_entries_fops = {
5451         .open           = tracing_open_generic_tr,
5452         .read           = tracing_entries_read,
5453         .write          = tracing_entries_write,
5454         .llseek         = generic_file_llseek,
5455         .release        = tracing_release_generic_tr,
5456 };
5457
5458 static const struct file_operations tracing_total_entries_fops = {
5459         .open           = tracing_open_generic_tr,
5460         .read           = tracing_total_entries_read,
5461         .llseek         = generic_file_llseek,
5462         .release        = tracing_release_generic_tr,
5463 };
5464
5465 static const struct file_operations tracing_free_buffer_fops = {
5466         .open           = tracing_open_generic_tr,
5467         .write          = tracing_free_buffer_write,
5468         .release        = tracing_free_buffer_release,
5469 };
5470
5471 static const struct file_operations tracing_mark_fops = {
5472         .open           = tracing_open_generic_tr,
5473         .write          = tracing_mark_write,
5474         .llseek         = generic_file_llseek,
5475         .release        = tracing_release_generic_tr,
5476 };
5477
5478 static const struct file_operations trace_clock_fops = {
5479         .open           = tracing_clock_open,
5480         .read           = seq_read,
5481         .llseek         = seq_lseek,
5482         .release        = tracing_single_release_tr,
5483         .write          = tracing_clock_write,
5484 };
5485
5486 #ifdef CONFIG_TRACER_SNAPSHOT
5487 static const struct file_operations snapshot_fops = {
5488         .open           = tracing_snapshot_open,
5489         .read           = seq_read,
5490         .write          = tracing_snapshot_write,
5491         .llseek         = tracing_lseek,
5492         .release        = tracing_snapshot_release,
5493 };
5494
5495 static const struct file_operations snapshot_raw_fops = {
5496         .open           = snapshot_raw_open,
5497         .read           = tracing_buffers_read,
5498         .release        = tracing_buffers_release,
5499         .splice_read    = tracing_buffers_splice_read,
5500         .llseek         = no_llseek,
5501 };
5502
5503 #endif /* CONFIG_TRACER_SNAPSHOT */
5504
5505 static int tracing_buffers_open(struct inode *inode, struct file *filp)
5506 {
5507         struct trace_array *tr = inode->i_private;
5508         struct ftrace_buffer_info *info;
5509         int ret;
5510
5511         if (tracing_disabled)
5512                 return -ENODEV;
5513
5514         if (trace_array_get(tr) < 0)
5515                 return -ENODEV;
5516
5517         info = kzalloc(sizeof(*info), GFP_KERNEL);
5518         if (!info) {
5519                 trace_array_put(tr);
5520                 return -ENOMEM;
5521         }
5522
5523         mutex_lock(&trace_types_lock);
5524
5525         info->iter.tr           = tr;
5526         info->iter.cpu_file     = tracing_get_cpu(inode);
5527         info->iter.trace        = tr->current_trace;
5528         info->iter.trace_buffer = &tr->trace_buffer;
5529         info->spare             = NULL;
5530         /* Force reading ring buffer for first read */
5531         info->read              = (unsigned int)-1;
5532
5533         filp->private_data = info;
5534
5535         tr->current_trace->ref++;
5536
5537         mutex_unlock(&trace_types_lock);
5538
5539         ret = nonseekable_open(inode, filp);
5540         if (ret < 0)
5541                 trace_array_put(tr);
5542
5543         return ret;
5544 }
5545
5546 static unsigned int
5547 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
5548 {
5549         struct ftrace_buffer_info *info = filp->private_data;
5550         struct trace_iterator *iter = &info->iter;
5551
5552         return trace_poll(iter, filp, poll_table);
5553 }
5554
5555 static ssize_t
5556 tracing_buffers_read(struct file *filp, char __user *ubuf,
5557                      size_t count, loff_t *ppos)
5558 {
5559         struct ftrace_buffer_info *info = filp->private_data;
5560         struct trace_iterator *iter = &info->iter;
5561         ssize_t ret;
5562         ssize_t size;
5563
5564         if (!count)
5565                 return 0;
5566
5567 #ifdef CONFIG_TRACER_MAX_TRACE
5568         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
5569                 return -EBUSY;
5570 #endif
5571
5572         if (!info->spare)
5573                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
5574                                                           iter->cpu_file);
5575         if (!info->spare)
5576                 return -ENOMEM;
5577
5578         /* Do we have previous read data to read? */
5579         if (info->read < PAGE_SIZE)
5580                 goto read;
5581
5582  again:
5583         trace_access_lock(iter->cpu_file);
5584         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
5585                                     &info->spare,
5586                                     count,
5587                                     iter->cpu_file, 0);
5588         trace_access_unlock(iter->cpu_file);
5589
5590         if (ret < 0) {
5591                 if (trace_empty(iter)) {
5592                         if ((filp->f_flags & O_NONBLOCK))
5593                                 return -EAGAIN;
5594
5595                         ret = wait_on_pipe(iter, false);
5596                         if (ret)
5597                                 return ret;
5598
5599                         goto again;
5600                 }
5601                 return 0;
5602         }
5603
5604         info->read = 0;
5605  read:
5606         size = PAGE_SIZE - info->read;
5607         if (size > count)
5608                 size = count;
5609
5610         ret = copy_to_user(ubuf, info->spare + info->read, size);
5611         if (ret == size)
5612                 return -EFAULT;
5613
5614         size -= ret;
5615
5616         *ppos += size;
5617         info->read += size;
5618
5619         return size;
5620 }
5621
5622 static int tracing_buffers_release(struct inode *inode, struct file *file)
5623 {
5624         struct ftrace_buffer_info *info = file->private_data;
5625         struct trace_iterator *iter = &info->iter;
5626
5627         mutex_lock(&trace_types_lock);
5628
5629         iter->tr->current_trace->ref--;
5630
5631         __trace_array_put(iter->tr);
5632
5633         if (info->spare)
5634                 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
5635         kfree(info);
5636
5637         mutex_unlock(&trace_types_lock);
5638
5639         return 0;
5640 }
5641
5642 struct buffer_ref {
5643         struct ring_buffer      *buffer;
5644         void                    *page;
5645         int                     ref;
5646 };
5647
5648 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
5649                                     struct pipe_buffer *buf)
5650 {
5651         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5652
5653         if (--ref->ref)
5654                 return;
5655
5656         ring_buffer_free_read_page(ref->buffer, ref->page);
5657         kfree(ref);
5658         buf->private = 0;
5659 }
5660
5661 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
5662                                 struct pipe_buffer *buf)
5663 {
5664         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5665
5666         ref->ref++;
5667 }
5668
5669 /* Pipe buffer operations for a buffer. */
5670 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
5671         .can_merge              = 0,
5672         .confirm                = generic_pipe_buf_confirm,
5673         .release                = buffer_pipe_buf_release,
5674         .steal                  = generic_pipe_buf_steal,
5675         .get                    = buffer_pipe_buf_get,
5676 };
5677
5678 /*
5679  * Callback from splice_to_pipe(), if we need to release some pages
5680  * at the end of the spd in case we error'ed out in filling the pipe.
5681  */
5682 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
5683 {
5684         struct buffer_ref *ref =
5685                 (struct buffer_ref *)spd->partial[i].private;
5686
5687         if (--ref->ref)
5688                 return;
5689
5690         ring_buffer_free_read_page(ref->buffer, ref->page);
5691         kfree(ref);
5692         spd->partial[i].private = 0;
5693 }
5694
5695 static ssize_t
5696 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5697                             struct pipe_inode_info *pipe, size_t len,
5698                             unsigned int flags)
5699 {
5700         struct ftrace_buffer_info *info = file->private_data;
5701         struct trace_iterator *iter = &info->iter;
5702         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5703         struct page *pages_def[PIPE_DEF_BUFFERS];
5704         struct splice_pipe_desc spd = {
5705                 .pages          = pages_def,
5706                 .partial        = partial_def,
5707                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5708                 .flags          = flags,
5709                 .ops            = &buffer_pipe_buf_ops,
5710                 .spd_release    = buffer_spd_release,
5711         };
5712         struct buffer_ref *ref;
5713         int entries, size, i;
5714         ssize_t ret = 0;
5715
5716 #ifdef CONFIG_TRACER_MAX_TRACE
5717         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
5718                 return -EBUSY;
5719 #endif
5720
5721         if (splice_grow_spd(pipe, &spd))
5722                 return -ENOMEM;
5723
5724         if (*ppos & (PAGE_SIZE - 1))
5725                 return -EINVAL;
5726
5727         if (len & (PAGE_SIZE - 1)) {
5728                 if (len < PAGE_SIZE)
5729                         return -EINVAL;
5730                 len &= PAGE_MASK;
5731         }
5732
5733  again:
5734         trace_access_lock(iter->cpu_file);
5735         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5736
5737         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
5738                 struct page *page;
5739                 int r;
5740
5741                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
5742                 if (!ref) {
5743                         ret = -ENOMEM;
5744                         break;
5745                 }
5746
5747                 ref->ref = 1;
5748                 ref->buffer = iter->trace_buffer->buffer;
5749                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
5750                 if (!ref->page) {
5751                         ret = -ENOMEM;
5752                         kfree(ref);
5753                         break;
5754                 }
5755
5756                 r = ring_buffer_read_page(ref->buffer, &ref->page,
5757                                           len, iter->cpu_file, 1);
5758                 if (r < 0) {
5759                         ring_buffer_free_read_page(ref->buffer, ref->page);
5760                         kfree(ref);
5761                         break;
5762                 }
5763
5764                 /*
5765                  * zero out any left over data, this is going to
5766                  * user land.
5767                  */
5768                 size = ring_buffer_page_len(ref->page);
5769                 if (size < PAGE_SIZE)
5770                         memset(ref->page + size, 0, PAGE_SIZE - size);
5771
5772                 page = virt_to_page(ref->page);
5773
5774                 spd.pages[i] = page;
5775                 spd.partial[i].len = PAGE_SIZE;
5776                 spd.partial[i].offset = 0;
5777                 spd.partial[i].private = (unsigned long)ref;
5778                 spd.nr_pages++;
5779                 *ppos += PAGE_SIZE;
5780
5781                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5782         }
5783
5784         trace_access_unlock(iter->cpu_file);
5785         spd.nr_pages = i;
5786
5787         /* did we read anything? */
5788         if (!spd.nr_pages) {
5789                 if (ret)
5790                         return ret;
5791
5792                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
5793                         return -EAGAIN;
5794
5795                 ret = wait_on_pipe(iter, true);
5796                 if (ret)
5797                         return ret;
5798
5799                 goto again;
5800         }
5801
5802         ret = splice_to_pipe(pipe, &spd);
5803         splice_shrink_spd(&spd);
5804
5805         return ret;
5806 }
5807
5808 static const struct file_operations tracing_buffers_fops = {
5809         .open           = tracing_buffers_open,
5810         .read           = tracing_buffers_read,
5811         .poll           = tracing_buffers_poll,
5812         .release        = tracing_buffers_release,
5813         .splice_read    = tracing_buffers_splice_read,
5814         .llseek         = no_llseek,
5815 };
5816
5817 static ssize_t
5818 tracing_stats_read(struct file *filp, char __user *ubuf,
5819                    size_t count, loff_t *ppos)
5820 {
5821         struct inode *inode = file_inode(filp);
5822         struct trace_array *tr = inode->i_private;
5823         struct trace_buffer *trace_buf = &tr->trace_buffer;
5824         int cpu = tracing_get_cpu(inode);
5825         struct trace_seq *s;
5826         unsigned long cnt;
5827         unsigned long long t;
5828         unsigned long usec_rem;
5829
5830         s = kmalloc(sizeof(*s), GFP_KERNEL);
5831         if (!s)
5832                 return -ENOMEM;
5833
5834         trace_seq_init(s);
5835
5836         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
5837         trace_seq_printf(s, "entries: %ld\n", cnt);
5838
5839         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
5840         trace_seq_printf(s, "overrun: %ld\n", cnt);
5841
5842         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
5843         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
5844
5845         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
5846         trace_seq_printf(s, "bytes: %ld\n", cnt);
5847
5848         if (trace_clocks[tr->clock_id].in_ns) {
5849                 /* local or global for trace_clock */
5850                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5851                 usec_rem = do_div(t, USEC_PER_SEC);
5852                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
5853                                                                 t, usec_rem);
5854
5855                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
5856                 usec_rem = do_div(t, USEC_PER_SEC);
5857                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
5858         } else {
5859                 /* counter or tsc mode for trace_clock */
5860                 trace_seq_printf(s, "oldest event ts: %llu\n",
5861                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5862
5863                 trace_seq_printf(s, "now ts: %llu\n",
5864                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
5865         }
5866
5867         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
5868         trace_seq_printf(s, "dropped events: %ld\n", cnt);
5869
5870         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
5871         trace_seq_printf(s, "read events: %ld\n", cnt);
5872
5873         count = simple_read_from_buffer(ubuf, count, ppos,
5874                                         s->buffer, trace_seq_used(s));
5875
5876         kfree(s);
5877
5878         return count;
5879 }
5880
5881 static const struct file_operations tracing_stats_fops = {
5882         .open           = tracing_open_generic_tr,
5883         .read           = tracing_stats_read,
5884         .llseek         = generic_file_llseek,
5885         .release        = tracing_release_generic_tr,
5886 };
5887
5888 #ifdef CONFIG_DYNAMIC_FTRACE
5889
5890 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
5891 {
5892         return 0;
5893 }
5894
5895 static ssize_t
5896 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
5897                   size_t cnt, loff_t *ppos)
5898 {
5899         static char ftrace_dyn_info_buffer[1024];
5900         static DEFINE_MUTEX(dyn_info_mutex);
5901         unsigned long *p = filp->private_data;
5902         char *buf = ftrace_dyn_info_buffer;
5903         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
5904         int r;
5905
5906         mutex_lock(&dyn_info_mutex);
5907         r = sprintf(buf, "%ld ", *p);
5908
5909         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
5910         buf[r++] = '\n';
5911
5912         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5913
5914         mutex_unlock(&dyn_info_mutex);
5915
5916         return r;
5917 }
5918
5919 static const struct file_operations tracing_dyn_info_fops = {
5920         .open           = tracing_open_generic,
5921         .read           = tracing_read_dyn_info,
5922         .llseek         = generic_file_llseek,
5923 };
5924 #endif /* CONFIG_DYNAMIC_FTRACE */
5925
5926 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
5927 static void
5928 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5929 {
5930         tracing_snapshot();
5931 }
5932
5933 static void
5934 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5935 {
5936         unsigned long *count = (long *)data;
5937
5938         if (!*count)
5939                 return;
5940
5941         if (*count != -1)
5942                 (*count)--;
5943
5944         tracing_snapshot();
5945 }
5946
5947 static int
5948 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
5949                       struct ftrace_probe_ops *ops, void *data)
5950 {
5951         long count = (long)data;
5952
5953         seq_printf(m, "%ps:", (void *)ip);
5954
5955         seq_puts(m, "snapshot");
5956
5957         if (count == -1)
5958                 seq_puts(m, ":unlimited\n");
5959         else
5960                 seq_printf(m, ":count=%ld\n", count);
5961
5962         return 0;
5963 }
5964
5965 static struct ftrace_probe_ops snapshot_probe_ops = {
5966         .func                   = ftrace_snapshot,
5967         .print                  = ftrace_snapshot_print,
5968 };
5969
5970 static struct ftrace_probe_ops snapshot_count_probe_ops = {
5971         .func                   = ftrace_count_snapshot,
5972         .print                  = ftrace_snapshot_print,
5973 };
5974
5975 static int
5976 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
5977                                char *glob, char *cmd, char *param, int enable)
5978 {
5979         struct ftrace_probe_ops *ops;
5980         void *count = (void *)-1;
5981         char *number;
5982         int ret;
5983
5984         /* hash funcs only work with set_ftrace_filter */
5985         if (!enable)
5986                 return -EINVAL;
5987
5988         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
5989
5990         if (glob[0] == '!') {
5991                 unregister_ftrace_function_probe_func(glob+1, ops);
5992                 return 0;
5993         }
5994
5995         if (!param)
5996                 goto out_reg;
5997
5998         number = strsep(&param, ":");
5999
6000         if (!strlen(number))
6001                 goto out_reg;
6002
6003         /*
6004          * We use the callback data field (which is a pointer)
6005          * as our counter.
6006          */
6007         ret = kstrtoul(number, 0, (unsigned long *)&count);
6008         if (ret)
6009                 return ret;
6010
6011  out_reg:
6012         ret = register_ftrace_function_probe(glob, ops, count);
6013
6014         if (ret >= 0)
6015                 alloc_snapshot(&global_trace);
6016
6017         return ret < 0 ? ret : 0;
6018 }
6019
6020 static struct ftrace_func_command ftrace_snapshot_cmd = {
6021         .name                   = "snapshot",
6022         .func                   = ftrace_trace_snapshot_callback,
6023 };
6024
6025 static __init int register_snapshot_cmd(void)
6026 {
6027         return register_ftrace_command(&ftrace_snapshot_cmd);
6028 }
6029 #else
6030 static inline __init int register_snapshot_cmd(void) { return 0; }
6031 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
6032
6033 static struct dentry *tracing_get_dentry(struct trace_array *tr)
6034 {
6035         if (WARN_ON(!tr->dir))
6036                 return ERR_PTR(-ENODEV);
6037
6038         /* Top directory uses NULL as the parent */
6039         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
6040                 return NULL;
6041
6042         /* All sub buffers have a descriptor */
6043         return tr->dir;
6044 }
6045
6046 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
6047 {
6048         struct dentry *d_tracer;
6049
6050         if (tr->percpu_dir)
6051                 return tr->percpu_dir;
6052
6053         d_tracer = tracing_get_dentry(tr);
6054         if (IS_ERR(d_tracer))
6055                 return NULL;
6056
6057         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
6058
6059         WARN_ONCE(!tr->percpu_dir,
6060                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
6061
6062         return tr->percpu_dir;
6063 }
6064
6065 static struct dentry *
6066 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
6067                       void *data, long cpu, const struct file_operations *fops)
6068 {
6069         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
6070
6071         if (ret) /* See tracing_get_cpu() */
6072                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
6073         return ret;
6074 }
6075
6076 static void
6077 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
6078 {
6079         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
6080         struct dentry *d_cpu;
6081         char cpu_dir[30]; /* 30 characters should be more than enough */
6082
6083         if (!d_percpu)
6084                 return;
6085
6086         snprintf(cpu_dir, 30, "cpu%ld", cpu);
6087         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
6088         if (!d_cpu) {
6089                 pr_warning("Could not create tracefs '%s' entry\n", cpu_dir);
6090                 return;
6091         }
6092
6093         /* per cpu trace_pipe */
6094         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
6095                                 tr, cpu, &tracing_pipe_fops);
6096
6097         /* per cpu trace */
6098         trace_create_cpu_file("trace", 0644, d_cpu,
6099                                 tr, cpu, &tracing_fops);
6100
6101         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
6102                                 tr, cpu, &tracing_buffers_fops);
6103
6104         trace_create_cpu_file("stats", 0444, d_cpu,
6105                                 tr, cpu, &tracing_stats_fops);
6106
6107         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
6108                                 tr, cpu, &tracing_entries_fops);
6109
6110 #ifdef CONFIG_TRACER_SNAPSHOT
6111         trace_create_cpu_file("snapshot", 0644, d_cpu,
6112                                 tr, cpu, &snapshot_fops);
6113
6114         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
6115                                 tr, cpu, &snapshot_raw_fops);
6116 #endif
6117 }
6118
6119 #ifdef CONFIG_FTRACE_SELFTEST
6120 /* Let selftest have access to static functions in this file */
6121 #include "trace_selftest.c"
6122 #endif
6123
6124 static ssize_t
6125 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
6126                         loff_t *ppos)
6127 {
6128         struct trace_option_dentry *topt = filp->private_data;
6129         char *buf;
6130
6131         if (topt->flags->val & topt->opt->bit)
6132                 buf = "1\n";
6133         else
6134                 buf = "0\n";
6135
6136         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6137 }
6138
6139 static ssize_t
6140 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
6141                          loff_t *ppos)
6142 {
6143         struct trace_option_dentry *topt = filp->private_data;
6144         unsigned long val;
6145         int ret;
6146
6147         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6148         if (ret)
6149                 return ret;
6150
6151         if (val != 0 && val != 1)
6152                 return -EINVAL;
6153
6154         if (!!(topt->flags->val & topt->opt->bit) != val) {
6155                 mutex_lock(&trace_types_lock);
6156                 ret = __set_tracer_option(topt->tr, topt->flags,
6157                                           topt->opt, !val);
6158                 mutex_unlock(&trace_types_lock);
6159                 if (ret)
6160                         return ret;
6161         }
6162
6163         *ppos += cnt;
6164
6165         return cnt;
6166 }
6167
6168
6169 static const struct file_operations trace_options_fops = {
6170         .open = tracing_open_generic,
6171         .read = trace_options_read,
6172         .write = trace_options_write,
6173         .llseek = generic_file_llseek,
6174 };
6175
6176 static ssize_t
6177 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
6178                         loff_t *ppos)
6179 {
6180         long index = (long)filp->private_data;
6181         char *buf;
6182
6183         if (trace_flags & (1 << index))
6184                 buf = "1\n";
6185         else
6186                 buf = "0\n";
6187
6188         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6189 }
6190
6191 static ssize_t
6192 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
6193                          loff_t *ppos)
6194 {
6195         struct trace_array *tr = &global_trace;
6196         long index = (long)filp->private_data;
6197         unsigned long val;
6198         int ret;
6199
6200         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6201         if (ret)
6202                 return ret;
6203
6204         if (val != 0 && val != 1)
6205                 return -EINVAL;
6206
6207         mutex_lock(&trace_types_lock);
6208         ret = set_tracer_flag(tr, 1 << index, val);
6209         mutex_unlock(&trace_types_lock);
6210
6211         if (ret < 0)
6212                 return ret;
6213
6214         *ppos += cnt;
6215
6216         return cnt;
6217 }
6218
6219 static const struct file_operations trace_options_core_fops = {
6220         .open = tracing_open_generic,
6221         .read = trace_options_core_read,
6222         .write = trace_options_core_write,
6223         .llseek = generic_file_llseek,
6224 };
6225
6226 struct dentry *trace_create_file(const char *name,
6227                                  umode_t mode,
6228                                  struct dentry *parent,
6229                                  void *data,
6230                                  const struct file_operations *fops)
6231 {
6232         struct dentry *ret;
6233
6234         ret = tracefs_create_file(name, mode, parent, data, fops);
6235         if (!ret)
6236                 pr_warning("Could not create tracefs '%s' entry\n", name);
6237
6238         return ret;
6239 }
6240
6241
6242 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
6243 {
6244         struct dentry *d_tracer;
6245
6246         if (tr->options)
6247                 return tr->options;
6248
6249         d_tracer = tracing_get_dentry(tr);
6250         if (IS_ERR(d_tracer))
6251                 return NULL;
6252
6253         tr->options = tracefs_create_dir("options", d_tracer);
6254         if (!tr->options) {
6255                 pr_warning("Could not create tracefs directory 'options'\n");
6256                 return NULL;
6257         }
6258
6259         return tr->options;
6260 }
6261
6262 static void
6263 create_trace_option_file(struct trace_array *tr,
6264                          struct trace_option_dentry *topt,
6265                          struct tracer_flags *flags,
6266                          struct tracer_opt *opt)
6267 {
6268         struct dentry *t_options;
6269
6270         t_options = trace_options_init_dentry(tr);
6271         if (!t_options)
6272                 return;
6273
6274         topt->flags = flags;
6275         topt->opt = opt;
6276         topt->tr = tr;
6277
6278         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
6279                                     &trace_options_fops);
6280
6281 }
6282
6283 static struct trace_option_dentry *
6284 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
6285 {
6286         struct trace_option_dentry *topts;
6287         struct tracer_flags *flags;
6288         struct tracer_opt *opts;
6289         int cnt;
6290
6291         if (!tracer)
6292                 return NULL;
6293
6294         flags = tracer->flags;
6295
6296         if (!flags || !flags->opts)
6297                 return NULL;
6298
6299         opts = flags->opts;
6300
6301         for (cnt = 0; opts[cnt].name; cnt++)
6302                 ;
6303
6304         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
6305         if (!topts)
6306                 return NULL;
6307
6308         for (cnt = 0; opts[cnt].name; cnt++) {
6309                 create_trace_option_file(tr, &topts[cnt], flags,
6310                                          &opts[cnt]);
6311                 WARN_ONCE(topts[cnt].entry == NULL,
6312                           "Failed to create trace option: %s",
6313                           opts[cnt].name);
6314         }
6315
6316         return topts;
6317 }
6318
6319 static struct dentry *
6320 create_trace_option_core_file(struct trace_array *tr,
6321                               const char *option, long index)
6322 {
6323         struct dentry *t_options;
6324
6325         t_options = trace_options_init_dentry(tr);
6326         if (!t_options)
6327                 return NULL;
6328
6329         return trace_create_file(option, 0644, t_options, (void *)index,
6330                                     &trace_options_core_fops);
6331 }
6332
6333 static __init void create_trace_options_dir(struct trace_array *tr)
6334 {
6335         struct dentry *t_options;
6336         int i;
6337
6338         t_options = trace_options_init_dentry(tr);
6339         if (!t_options)
6340                 return;
6341
6342         for (i = 0; trace_options[i]; i++)
6343                 create_trace_option_core_file(tr, trace_options[i], i);
6344 }
6345
6346 static ssize_t
6347 rb_simple_read(struct file *filp, char __user *ubuf,
6348                size_t cnt, loff_t *ppos)
6349 {
6350         struct trace_array *tr = filp->private_data;
6351         char buf[64];
6352         int r;
6353
6354         r = tracer_tracing_is_on(tr);
6355         r = sprintf(buf, "%d\n", r);
6356
6357         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6358 }
6359
6360 static ssize_t
6361 rb_simple_write(struct file *filp, const char __user *ubuf,
6362                 size_t cnt, loff_t *ppos)
6363 {
6364         struct trace_array *tr = filp->private_data;
6365         struct ring_buffer *buffer = tr->trace_buffer.buffer;
6366         unsigned long val;
6367         int ret;
6368
6369         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6370         if (ret)
6371                 return ret;
6372
6373         if (buffer) {
6374                 mutex_lock(&trace_types_lock);
6375                 if (val) {
6376                         tracer_tracing_on(tr);
6377                         if (tr->current_trace->start)
6378                                 tr->current_trace->start(tr);
6379                 } else {
6380                         tracer_tracing_off(tr);
6381                         if (tr->current_trace->stop)
6382                                 tr->current_trace->stop(tr);
6383                 }
6384                 mutex_unlock(&trace_types_lock);
6385         }
6386
6387         (*ppos)++;
6388
6389         return cnt;
6390 }
6391
6392 static const struct file_operations rb_simple_fops = {
6393         .open           = tracing_open_generic_tr,
6394         .read           = rb_simple_read,
6395         .write          = rb_simple_write,
6396         .release        = tracing_release_generic_tr,
6397         .llseek         = default_llseek,
6398 };
6399
6400 struct dentry *trace_instance_dir;
6401
6402 static void
6403 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
6404
6405 static int
6406 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6407 {
6408         enum ring_buffer_flags rb_flags;
6409
6410         rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6411
6412         buf->tr = tr;
6413
6414         buf->buffer = ring_buffer_alloc(size, rb_flags);
6415         if (!buf->buffer)
6416                 return -ENOMEM;
6417
6418         buf->data = alloc_percpu(struct trace_array_cpu);
6419         if (!buf->data) {
6420                 ring_buffer_free(buf->buffer);
6421                 return -ENOMEM;
6422         }
6423
6424         /* Allocate the first page for all buffers */
6425         set_buffer_entries(&tr->trace_buffer,
6426                            ring_buffer_size(tr->trace_buffer.buffer, 0));
6427
6428         return 0;
6429 }
6430
6431 static int allocate_trace_buffers(struct trace_array *tr, int size)
6432 {
6433         int ret;
6434
6435         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6436         if (ret)
6437                 return ret;
6438
6439 #ifdef CONFIG_TRACER_MAX_TRACE
6440         ret = allocate_trace_buffer(tr, &tr->max_buffer,
6441                                     allocate_snapshot ? size : 1);
6442         if (WARN_ON(ret)) {
6443                 ring_buffer_free(tr->trace_buffer.buffer);
6444                 free_percpu(tr->trace_buffer.data);
6445                 return -ENOMEM;
6446         }
6447         tr->allocated_snapshot = allocate_snapshot;
6448
6449         /*
6450          * Only the top level trace array gets its snapshot allocated
6451          * from the kernel command line.
6452          */
6453         allocate_snapshot = false;
6454 #endif
6455         return 0;
6456 }
6457
6458 static void free_trace_buffer(struct trace_buffer *buf)
6459 {
6460         if (buf->buffer) {
6461                 ring_buffer_free(buf->buffer);
6462                 buf->buffer = NULL;
6463                 free_percpu(buf->data);
6464                 buf->data = NULL;
6465         }
6466 }
6467
6468 static void free_trace_buffers(struct trace_array *tr)
6469 {
6470         if (!tr)
6471                 return;
6472
6473         free_trace_buffer(&tr->trace_buffer);
6474
6475 #ifdef CONFIG_TRACER_MAX_TRACE
6476         free_trace_buffer(&tr->max_buffer);
6477 #endif
6478 }
6479
6480 static int instance_mkdir(const char *name)
6481 {
6482         struct trace_array *tr;
6483         int ret;
6484
6485         mutex_lock(&trace_types_lock);
6486
6487         ret = -EEXIST;
6488         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6489                 if (tr->name && strcmp(tr->name, name) == 0)
6490                         goto out_unlock;
6491         }
6492
6493         ret = -ENOMEM;
6494         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
6495         if (!tr)
6496                 goto out_unlock;
6497
6498         tr->name = kstrdup(name, GFP_KERNEL);
6499         if (!tr->name)
6500                 goto out_free_tr;
6501
6502         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
6503                 goto out_free_tr;
6504
6505         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
6506
6507         raw_spin_lock_init(&tr->start_lock);
6508
6509         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
6510
6511         tr->current_trace = &nop_trace;
6512
6513         INIT_LIST_HEAD(&tr->systems);
6514         INIT_LIST_HEAD(&tr->events);
6515
6516         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
6517                 goto out_free_tr;
6518
6519         tr->dir = tracefs_create_dir(name, trace_instance_dir);
6520         if (!tr->dir)
6521                 goto out_free_tr;
6522
6523         ret = event_trace_add_tracer(tr->dir, tr);
6524         if (ret) {
6525                 tracefs_remove_recursive(tr->dir);
6526                 goto out_free_tr;
6527         }
6528
6529         init_tracer_tracefs(tr, tr->dir);
6530
6531         list_add(&tr->list, &ftrace_trace_arrays);
6532
6533         mutex_unlock(&trace_types_lock);
6534
6535         return 0;
6536
6537  out_free_tr:
6538         free_trace_buffers(tr);
6539         free_cpumask_var(tr->tracing_cpumask);
6540         kfree(tr->name);
6541         kfree(tr);
6542
6543  out_unlock:
6544         mutex_unlock(&trace_types_lock);
6545
6546         return ret;
6547
6548 }
6549
6550 static int instance_rmdir(const char *name)
6551 {
6552         struct trace_array *tr;
6553         int found = 0;
6554         int ret;
6555
6556         mutex_lock(&trace_types_lock);
6557
6558         ret = -ENODEV;
6559         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6560                 if (tr->name && strcmp(tr->name, name) == 0) {
6561                         found = 1;
6562                         break;
6563                 }
6564         }
6565         if (!found)
6566                 goto out_unlock;
6567
6568         ret = -EBUSY;
6569         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
6570                 goto out_unlock;
6571
6572         list_del(&tr->list);
6573
6574         tracing_set_nop(tr);
6575         event_trace_del_tracer(tr);
6576         ftrace_destroy_function_files(tr);
6577         debugfs_remove_recursive(tr->dir);
6578         free_trace_buffers(tr);
6579
6580         kfree(tr->name);
6581         kfree(tr);
6582
6583         ret = 0;
6584
6585  out_unlock:
6586         mutex_unlock(&trace_types_lock);
6587
6588         return ret;
6589 }
6590
6591 static __init void create_trace_instances(struct dentry *d_tracer)
6592 {
6593         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
6594                                                          instance_mkdir,
6595                                                          instance_rmdir);
6596         if (WARN_ON(!trace_instance_dir))
6597                 return;
6598 }
6599
6600 static void
6601 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
6602 {
6603         int cpu;
6604
6605         trace_create_file("available_tracers", 0444, d_tracer,
6606                         tr, &show_traces_fops);
6607
6608         trace_create_file("current_tracer", 0644, d_tracer,
6609                         tr, &set_tracer_fops);
6610
6611         trace_create_file("tracing_cpumask", 0644, d_tracer,
6612                           tr, &tracing_cpumask_fops);
6613
6614         trace_create_file("trace_options", 0644, d_tracer,
6615                           tr, &tracing_iter_fops);
6616
6617         trace_create_file("trace", 0644, d_tracer,
6618                           tr, &tracing_fops);
6619
6620         trace_create_file("trace_pipe", 0444, d_tracer,
6621                           tr, &tracing_pipe_fops);
6622
6623         trace_create_file("buffer_size_kb", 0644, d_tracer,
6624                           tr, &tracing_entries_fops);
6625
6626         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
6627                           tr, &tracing_total_entries_fops);
6628
6629         trace_create_file("free_buffer", 0200, d_tracer,
6630                           tr, &tracing_free_buffer_fops);
6631
6632         trace_create_file("trace_marker", 0220, d_tracer,
6633                           tr, &tracing_mark_fops);
6634
6635         trace_create_file("trace_clock", 0644, d_tracer, tr,
6636                           &trace_clock_fops);
6637
6638         trace_create_file("tracing_on", 0644, d_tracer,
6639                           tr, &rb_simple_fops);
6640
6641 #ifdef CONFIG_TRACER_MAX_TRACE
6642         trace_create_file("tracing_max_latency", 0644, d_tracer,
6643                         &tr->max_latency, &tracing_max_lat_fops);
6644 #endif
6645
6646         if (ftrace_create_function_files(tr, d_tracer))
6647                 WARN(1, "Could not allocate function filter files");
6648
6649 #ifdef CONFIG_TRACER_SNAPSHOT
6650         trace_create_file("snapshot", 0644, d_tracer,
6651                           tr, &snapshot_fops);
6652 #endif
6653
6654         for_each_tracing_cpu(cpu)
6655                 tracing_init_tracefs_percpu(tr, cpu);
6656
6657 }
6658
6659 static struct vfsmount *trace_automount(void *ingore)
6660 {
6661         struct vfsmount *mnt;
6662         struct file_system_type *type;
6663
6664         /*
6665          * To maintain backward compatibility for tools that mount
6666          * debugfs to get to the tracing facility, tracefs is automatically
6667          * mounted to the debugfs/tracing directory.
6668          */
6669         type = get_fs_type("tracefs");
6670         if (!type)
6671                 return NULL;
6672         mnt = vfs_kern_mount(type, 0, "tracefs", NULL);
6673         put_filesystem(type);
6674         if (IS_ERR(mnt))
6675                 return NULL;
6676         mntget(mnt);
6677
6678         return mnt;
6679 }
6680
6681 /**
6682  * tracing_init_dentry - initialize top level trace array
6683  *
6684  * This is called when creating files or directories in the tracing
6685  * directory. It is called via fs_initcall() by any of the boot up code
6686  * and expects to return the dentry of the top level tracing directory.
6687  */
6688 struct dentry *tracing_init_dentry(void)
6689 {
6690         struct trace_array *tr = &global_trace;
6691
6692         /* The top level trace array uses  NULL as parent */
6693         if (tr->dir)
6694                 return NULL;
6695
6696         if (WARN_ON(!debugfs_initialized()))
6697                 return ERR_PTR(-ENODEV);
6698
6699         /*
6700          * As there may still be users that expect the tracing
6701          * files to exist in debugfs/tracing, we must automount
6702          * the tracefs file system there, so older tools still
6703          * work with the newer kerenl.
6704          */
6705         tr->dir = debugfs_create_automount("tracing", NULL,
6706                                            trace_automount, NULL);
6707         if (!tr->dir) {
6708                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
6709                 return ERR_PTR(-ENOMEM);
6710         }
6711
6712         return NULL;
6713 }
6714
6715 extern struct trace_enum_map *__start_ftrace_enum_maps[];
6716 extern struct trace_enum_map *__stop_ftrace_enum_maps[];
6717
6718 static void __init trace_enum_init(void)
6719 {
6720         int len;
6721
6722         len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
6723         trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len);
6724 }
6725
6726 #ifdef CONFIG_MODULES
6727 static void trace_module_add_enums(struct module *mod)
6728 {
6729         if (!mod->num_trace_enums)
6730                 return;
6731
6732         /*
6733          * Modules with bad taint do not have events created, do
6734          * not bother with enums either.
6735          */
6736         if (trace_module_has_bad_taint(mod))
6737                 return;
6738
6739         trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
6740 }
6741
6742 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
6743 static void trace_module_remove_enums(struct module *mod)
6744 {
6745         union trace_enum_map_item *map;
6746         union trace_enum_map_item **last = &trace_enum_maps;
6747
6748         if (!mod->num_trace_enums)
6749                 return;
6750
6751         mutex_lock(&trace_enum_mutex);
6752
6753         map = trace_enum_maps;
6754
6755         while (map) {
6756                 if (map->head.mod == mod)
6757                         break;
6758                 map = trace_enum_jmp_to_tail(map);
6759                 last = &map->tail.next;
6760                 map = map->tail.next;
6761         }
6762         if (!map)
6763                 goto out;
6764
6765         *last = trace_enum_jmp_to_tail(map)->tail.next;
6766         kfree(map);
6767  out:
6768         mutex_unlock(&trace_enum_mutex);
6769 }
6770 #else
6771 static inline void trace_module_remove_enums(struct module *mod) { }
6772 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
6773
6774 static int trace_module_notify(struct notifier_block *self,
6775                                unsigned long val, void *data)
6776 {
6777         struct module *mod = data;
6778
6779         switch (val) {
6780         case MODULE_STATE_COMING:
6781                 trace_module_add_enums(mod);
6782                 break;
6783         case MODULE_STATE_GOING:
6784                 trace_module_remove_enums(mod);
6785                 break;
6786         }
6787
6788         return 0;
6789 }
6790
6791 static struct notifier_block trace_module_nb = {
6792         .notifier_call = trace_module_notify,
6793         .priority = 0,
6794 };
6795 #endif /* CONFIG_MODULES */
6796
6797 static __init int tracer_init_tracefs(void)
6798 {
6799         struct dentry *d_tracer;
6800         struct tracer *t;
6801
6802         trace_access_lock_init();
6803
6804         d_tracer = tracing_init_dentry();
6805         if (IS_ERR(d_tracer))
6806                 return 0;
6807
6808         init_tracer_tracefs(&global_trace, d_tracer);
6809
6810         trace_create_file("tracing_thresh", 0644, d_tracer,
6811                         &global_trace, &tracing_thresh_fops);
6812
6813         trace_create_file("README", 0444, d_tracer,
6814                         NULL, &tracing_readme_fops);
6815
6816         trace_create_file("saved_cmdlines", 0444, d_tracer,
6817                         NULL, &tracing_saved_cmdlines_fops);
6818
6819         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
6820                           NULL, &tracing_saved_cmdlines_size_fops);
6821
6822         trace_enum_init();
6823
6824         trace_create_enum_file(d_tracer);
6825
6826 #ifdef CONFIG_MODULES
6827         register_module_notifier(&trace_module_nb);
6828 #endif
6829
6830 #ifdef CONFIG_DYNAMIC_FTRACE
6831         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
6832                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
6833 #endif
6834
6835         create_trace_instances(d_tracer);
6836
6837         create_trace_options_dir(&global_trace);
6838
6839         mutex_lock(&trace_types_lock);
6840         for (t = trace_types; t; t = t->next)
6841                 add_tracer_options(&global_trace, t);
6842         mutex_unlock(&trace_types_lock);
6843
6844         return 0;
6845 }
6846
6847 static int trace_panic_handler(struct notifier_block *this,
6848                                unsigned long event, void *unused)
6849 {
6850         if (ftrace_dump_on_oops)
6851                 ftrace_dump(ftrace_dump_on_oops);
6852         return NOTIFY_OK;
6853 }
6854
6855 static struct notifier_block trace_panic_notifier = {
6856         .notifier_call  = trace_panic_handler,
6857         .next           = NULL,
6858         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
6859 };
6860
6861 static int trace_die_handler(struct notifier_block *self,
6862                              unsigned long val,
6863                              void *data)
6864 {
6865         switch (val) {
6866         case DIE_OOPS:
6867                 if (ftrace_dump_on_oops)
6868                         ftrace_dump(ftrace_dump_on_oops);
6869                 break;
6870         default:
6871                 break;
6872         }
6873         return NOTIFY_OK;
6874 }
6875
6876 static struct notifier_block trace_die_notifier = {
6877         .notifier_call = trace_die_handler,
6878         .priority = 200
6879 };
6880
6881 /*
6882  * printk is set to max of 1024, we really don't need it that big.
6883  * Nothing should be printing 1000 characters anyway.
6884  */
6885 #define TRACE_MAX_PRINT         1000
6886
6887 /*
6888  * Define here KERN_TRACE so that we have one place to modify
6889  * it if we decide to change what log level the ftrace dump
6890  * should be at.
6891  */
6892 #define KERN_TRACE              KERN_EMERG
6893
6894 void
6895 trace_printk_seq(struct trace_seq *s)
6896 {
6897         /* Probably should print a warning here. */
6898         if (s->seq.len >= TRACE_MAX_PRINT)
6899                 s->seq.len = TRACE_MAX_PRINT;
6900
6901         /*
6902          * More paranoid code. Although the buffer size is set to
6903          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
6904          * an extra layer of protection.
6905          */
6906         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
6907                 s->seq.len = s->seq.size - 1;
6908
6909         /* should be zero ended, but we are paranoid. */
6910         s->buffer[s->seq.len] = 0;
6911
6912         printk(KERN_TRACE "%s", s->buffer);
6913
6914         trace_seq_init(s);
6915 }
6916
6917 void trace_init_global_iter(struct trace_iterator *iter)
6918 {
6919         iter->tr = &global_trace;
6920         iter->trace = iter->tr->current_trace;
6921         iter->cpu_file = RING_BUFFER_ALL_CPUS;
6922         iter->trace_buffer = &global_trace.trace_buffer;
6923
6924         if (iter->trace && iter->trace->open)
6925                 iter->trace->open(iter);
6926
6927         /* Annotate start of buffers if we had overruns */
6928         if (ring_buffer_overruns(iter->trace_buffer->buffer))
6929                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
6930
6931         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6932         if (trace_clocks[iter->tr->clock_id].in_ns)
6933                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6934 }
6935
6936 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
6937 {
6938         /* use static because iter can be a bit big for the stack */
6939         static struct trace_iterator iter;
6940         static atomic_t dump_running;
6941         unsigned int old_userobj;
6942         unsigned long flags;
6943         int cnt = 0, cpu;
6944
6945         /* Only allow one dump user at a time. */
6946         if (atomic_inc_return(&dump_running) != 1) {
6947                 atomic_dec(&dump_running);
6948                 return;
6949         }
6950
6951         /*
6952          * Always turn off tracing when we dump.
6953          * We don't need to show trace output of what happens
6954          * between multiple crashes.
6955          *
6956          * If the user does a sysrq-z, then they can re-enable
6957          * tracing with echo 1 > tracing_on.
6958          */
6959         tracing_off();
6960
6961         local_irq_save(flags);
6962
6963         /* Simulate the iterator */
6964         trace_init_global_iter(&iter);
6965
6966         for_each_tracing_cpu(cpu) {
6967                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
6968         }
6969
6970         old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
6971
6972         /* don't look at user memory in panic mode */
6973         trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
6974
6975         switch (oops_dump_mode) {
6976         case DUMP_ALL:
6977                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6978                 break;
6979         case DUMP_ORIG:
6980                 iter.cpu_file = raw_smp_processor_id();
6981                 break;
6982         case DUMP_NONE:
6983                 goto out_enable;
6984         default:
6985                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
6986                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6987         }
6988
6989         printk(KERN_TRACE "Dumping ftrace buffer:\n");
6990
6991         /* Did function tracer already get disabled? */
6992         if (ftrace_is_dead()) {
6993                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
6994                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
6995         }
6996
6997         /*
6998          * We need to stop all tracing on all CPUS to read the
6999          * the next buffer. This is a bit expensive, but is
7000          * not done often. We fill all what we can read,
7001          * and then release the locks again.
7002          */
7003
7004         while (!trace_empty(&iter)) {
7005
7006                 if (!cnt)
7007                         printk(KERN_TRACE "---------------------------------\n");
7008
7009                 cnt++;
7010
7011                 /* reset all but tr, trace, and overruns */
7012                 memset(&iter.seq, 0,
7013                        sizeof(struct trace_iterator) -
7014                        offsetof(struct trace_iterator, seq));
7015                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
7016                 iter.pos = -1;
7017
7018                 if (trace_find_next_entry_inc(&iter) != NULL) {
7019                         int ret;
7020
7021                         ret = print_trace_line(&iter);
7022                         if (ret != TRACE_TYPE_NO_CONSUME)
7023                                 trace_consume(&iter);
7024                 }
7025                 touch_nmi_watchdog();
7026
7027                 trace_printk_seq(&iter.seq);
7028         }
7029
7030         if (!cnt)
7031                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
7032         else
7033                 printk(KERN_TRACE "---------------------------------\n");
7034
7035  out_enable:
7036         trace_flags |= old_userobj;
7037
7038         for_each_tracing_cpu(cpu) {
7039                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7040         }
7041         atomic_dec(&dump_running);
7042         local_irq_restore(flags);
7043 }
7044 EXPORT_SYMBOL_GPL(ftrace_dump);
7045
7046 __init static int tracer_alloc_buffers(void)
7047 {
7048         int ring_buf_size;
7049         int ret = -ENOMEM;
7050
7051         /*
7052          * Make sure we don't accidently add more trace options
7053          * than we have bits for.
7054          */
7055         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > 32);
7056
7057         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
7058                 goto out;
7059
7060         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
7061                 goto out_free_buffer_mask;
7062
7063         /* Only allocate trace_printk buffers if a trace_printk exists */
7064         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
7065                 /* Must be called before global_trace.buffer is allocated */
7066                 trace_printk_init_buffers();
7067
7068         /* To save memory, keep the ring buffer size to its minimum */
7069         if (ring_buffer_expanded)
7070                 ring_buf_size = trace_buf_size;
7071         else
7072                 ring_buf_size = 1;
7073
7074         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
7075         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
7076
7077         raw_spin_lock_init(&global_trace.start_lock);
7078
7079         /* Used for event triggers */
7080         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
7081         if (!temp_buffer)
7082                 goto out_free_cpumask;
7083
7084         if (trace_create_savedcmd() < 0)
7085                 goto out_free_temp_buffer;
7086
7087         /* TODO: make the number of buffers hot pluggable with CPUS */
7088         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
7089                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
7090                 WARN_ON(1);
7091                 goto out_free_savedcmd;
7092         }
7093
7094         if (global_trace.buffer_disabled)
7095                 tracing_off();
7096
7097         if (trace_boot_clock) {
7098                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
7099                 if (ret < 0)
7100                         pr_warning("Trace clock %s not defined, going back to default\n",
7101                                    trace_boot_clock);
7102         }
7103
7104         /*
7105          * register_tracer() might reference current_trace, so it
7106          * needs to be set before we register anything. This is
7107          * just a bootstrap of current_trace anyway.
7108          */
7109         global_trace.current_trace = &nop_trace;
7110
7111         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7112
7113         ftrace_init_global_array_ops(&global_trace);
7114
7115         register_tracer(&nop_trace);
7116
7117         /* All seems OK, enable tracing */
7118         tracing_disabled = 0;
7119
7120         atomic_notifier_chain_register(&panic_notifier_list,
7121                                        &trace_panic_notifier);
7122
7123         register_die_notifier(&trace_die_notifier);
7124
7125         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
7126
7127         INIT_LIST_HEAD(&global_trace.systems);
7128         INIT_LIST_HEAD(&global_trace.events);
7129         list_add(&global_trace.list, &ftrace_trace_arrays);
7130
7131         while (trace_boot_options) {
7132                 char *option;
7133
7134                 option = strsep(&trace_boot_options, ",");
7135                 trace_set_options(&global_trace, option);
7136         }
7137
7138         register_snapshot_cmd();
7139
7140         return 0;
7141
7142 out_free_savedcmd:
7143         free_saved_cmdlines_buffer(savedcmd);
7144 out_free_temp_buffer:
7145         ring_buffer_free(temp_buffer);
7146 out_free_cpumask:
7147         free_cpumask_var(global_trace.tracing_cpumask);
7148 out_free_buffer_mask:
7149         free_cpumask_var(tracing_buffer_mask);
7150 out:
7151         return ret;
7152 }
7153
7154 void __init trace_init(void)
7155 {
7156         if (tracepoint_printk) {
7157                 tracepoint_print_iter =
7158                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
7159                 if (WARN_ON(!tracepoint_print_iter))
7160                         tracepoint_printk = 0;
7161         }
7162         tracer_alloc_buffers();
7163         trace_event_init();
7164 }
7165
7166 __init static int clear_boot_tracer(void)
7167 {
7168         /*
7169          * The default tracer at boot buffer is an init section.
7170          * This function is called in lateinit. If we did not
7171          * find the boot tracer, then clear it out, to prevent
7172          * later registration from accessing the buffer that is
7173          * about to be freed.
7174          */
7175         if (!default_bootup_tracer)
7176                 return 0;
7177
7178         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
7179                default_bootup_tracer);
7180         default_bootup_tracer = NULL;
7181
7182         return 0;
7183 }
7184
7185 fs_initcall(tracer_init_tracefs);
7186 late_initcall(clear_boot_tracer);