perf core: Separate accounting of contexts and real addresses in a stack trace
authorArnaldo Carvalho de Melo <acme@redhat.com>
Thu, 12 May 2016 16:06:21 +0000 (13:06 -0300)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Tue, 17 May 2016 02:11:53 +0000 (23:11 -0300)
The perf_sample->ip_callchain->nr value includes all the entries in the
ip_callchain->ip[] array, real addresses and PERF_CONTEXT_{KERNEL,USER,etc},
while what the user expects is that what is in the kernel.perf_event_max_stack
sysctl or in the upcoming per event perf_event_attr.sample_max_stack knob be
honoured in terms of IP addresses in the stack trace.

So allocate a bunch of extra entries for contexts, and do the accounting
via perf_callchain_entry_ctx struct members.

A new sysctl, kernel.perf_event_max_contexts_per_stack is also
introduced for investigating possible bugs in the callchain
implementation by some arch.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Milian Wolff <milian.wolff@kdab.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: Wang Nan <wangnan0@huawei.com>
Cc: Zefan Li <lizefan@huawei.com>
Link: http://lkml.kernel.org/n/tip-3b4wnqk340c4sg4gwkfdi9yk@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Documentation/sysctl/kernel.txt
include/linux/perf_event.h
include/uapi/linux/perf_event.h
kernel/events/callchain.c
kernel/sysctl.c

index daabdd7..a3683ce 100644 (file)
@@ -61,6 +61,7 @@ show up in /proc/sys/kernel:
 - perf_cpu_time_max_percent
 - perf_event_paranoid
 - perf_event_max_stack
+- perf_event_max_contexts_per_stack
 - pid_max
 - powersave-nap               [ PPC only ]
 - printk
@@ -668,6 +669,19 @@ The default value is 127.
 
 ==============================================================
 
+perf_event_max_contexts_per_stack:
+
+Controls maximum number of stack frame context entries for
+(attr.sample_type & PERF_SAMPLE_CALLCHAIN) configured events, for
+instance, when using 'perf record -g' or 'perf trace --call-graph fp'.
+
+This can only be done when no events are in use that have callchains
+enabled, otherwise writing to this file will return -EBUSY.
+
+The default value is 8.
+
+==============================================================
+
 pid_max:
 
 PID allocation wrap value.  When the kernel's next PID value
index 2024b14..6b87be9 100644 (file)
@@ -65,6 +65,8 @@ struct perf_callchain_entry_ctx {
        struct perf_callchain_entry *entry;
        u32                         max_stack;
        u32                         nr;
+       short                       contexts;
+       bool                        contexts_maxed;
 };
 
 struct perf_raw_record {
@@ -1078,12 +1080,24 @@ extern int get_callchain_buffers(void);
 extern void put_callchain_buffers(void);
 
 extern int sysctl_perf_event_max_stack;
+extern int sysctl_perf_event_max_contexts_per_stack;
 
-#define perf_callchain_store_context(ctx, context) perf_callchain_store(ctx, context)
+static inline int perf_callchain_store_context(struct perf_callchain_entry_ctx *ctx, u64 ip)
+{
+       if (ctx->contexts < sysctl_perf_event_max_contexts_per_stack) {
+               struct perf_callchain_entry *entry = ctx->entry;
+               entry->ip[entry->nr++] = ip;
+               ++ctx->contexts;
+               return 0;
+       } else {
+               ctx->contexts_maxed = true;
+               return -1; /* no more room, stop walking the stack */
+       }
+}
 
 static inline int perf_callchain_store(struct perf_callchain_entry_ctx *ctx, u64 ip)
 {
-       if (ctx->nr < ctx->max_stack) {
+       if (ctx->nr < ctx->max_stack && !ctx->contexts_maxed) {
                struct perf_callchain_entry *entry = ctx->entry;
                entry->ip[entry->nr++] = ip;
                ++ctx->nr;
index 43fc8d2..36ce552 100644 (file)
@@ -862,6 +862,7 @@ enum perf_event_type {
 };
 
 #define PERF_MAX_STACK_DEPTH           127
+#define PERF_MAX_CONTEXTS_PER_STACK      8
 
 enum perf_callchain_context {
        PERF_CONTEXT_HV                 = (__u64)-32,
index ca64573..179ef46 100644 (file)
@@ -19,11 +19,13 @@ struct callchain_cpus_entries {
 };
 
 int sysctl_perf_event_max_stack __read_mostly = PERF_MAX_STACK_DEPTH;
+int sysctl_perf_event_max_contexts_per_stack __read_mostly = PERF_MAX_CONTEXTS_PER_STACK;
 
 static inline size_t perf_callchain_entry__sizeof(void)
 {
        return (sizeof(struct perf_callchain_entry) +
-               sizeof(__u64) * sysctl_perf_event_max_stack);
+               sizeof(__u64) * (sysctl_perf_event_max_stack +
+                                sysctl_perf_event_max_contexts_per_stack));
 }
 
 static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]);
@@ -197,6 +199,8 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
        ctx.entry     = entry;
        ctx.max_stack = max_stack;
        ctx.nr        = entry->nr = init_nr;
+       ctx.contexts       = 0;
+       ctx.contexts_maxed = false;
 
        if (kernel && !user_mode(regs)) {
                if (add_mark)
@@ -228,6 +232,10 @@ exit_put:
        return entry;
 }
 
+/*
+ * Used for sysctl_perf_event_max_stack and
+ * sysctl_perf_event_max_contexts_per_stack.
+ */
 int perf_event_max_stack_handler(struct ctl_table *table, int write,
                                 void __user *buffer, size_t *lenp, loff_t *ppos)
 {
index 0ec6907..bec4c11 100644 (file)
@@ -1156,6 +1156,15 @@ static struct ctl_table kern_table[] = {
                .extra1         = &zero,
                .extra2         = &six_hundred_forty_kb,
        },
+       {
+               .procname       = "perf_event_max_contexts_per_stack",
+               .data           = &sysctl_perf_event_max_contexts_per_stack,
+               .maxlen         = sizeof(sysctl_perf_event_max_contexts_per_stack),
+               .mode           = 0644,
+               .proc_handler   = perf_event_max_stack_handler,
+               .extra1         = &zero,
+               .extra2         = &one_thousand,
+       },
 #endif
 #ifdef CONFIG_KMEMCHECK
        {