Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[cascardo/linux.git] / kernel / events / core.c
index 3cfabdf..9fc3be0 100644 (file)
@@ -2496,11 +2496,11 @@ static int __perf_event_stop(void *info)
        return 0;
 }
 
-static int perf_event_restart(struct perf_event *event)
+static int perf_event_stop(struct perf_event *event, int restart)
 {
        struct stop_event_data sd = {
                .event          = event,
-               .restart        = 1,
+               .restart        = restart,
        };
        int ret = 0;
 
@@ -3549,10 +3549,18 @@ static int perf_event_read(struct perf_event *event, bool group)
                        .group = group,
                        .ret = 0,
                };
-               ret = smp_call_function_single(event->oncpu, __perf_event_read, &data, 1);
-               /* The event must have been read from an online CPU: */
-               WARN_ON_ONCE(ret);
-               ret = ret ? : data.ret;
+               /*
+                * Purposely ignore the smp_call_function_single() return
+                * value.
+                *
+                * If event->oncpu isn't a valid CPU it means the event got
+                * scheduled out and that will have updated the event count.
+                *
+                * Therefore, either way, we'll have an up-to-date event count
+                * after this.
+                */
+               (void)smp_call_function_single(event->oncpu, __perf_event_read, &data, 1);
+               ret = data.ret;
        } else if (event->state == PERF_EVENT_STATE_INACTIVE) {
                struct perf_event_context *ctx = event->ctx;
                unsigned long flags;
@@ -4837,6 +4845,19 @@ static void ring_buffer_attach(struct perf_event *event,
                spin_unlock_irqrestore(&rb->event_lock, flags);
        }
 
+       /*
+        * Avoid racing with perf_mmap_close(AUX): stop the event
+        * before swizzling the event::rb pointer; if it's getting
+        * unmapped, its aux_mmap_count will be 0 and it won't
+        * restart. See the comment in __perf_pmu_output_stop().
+        *
+        * Data will inevitably be lost when set_output is done in
+        * mid-air, but then again, whoever does it like this is
+        * not in for the data anyway.
+        */
+       if (has_aux(event))
+               perf_event_stop(event, 0);
+
        rcu_assign_pointer(event->rb, rb);
 
        if (old_rb) {
@@ -6112,7 +6133,7 @@ static void perf_event_addr_filters_exec(struct perf_event *event, void *data)
        raw_spin_unlock_irqrestore(&ifh->lock, flags);
 
        if (restart)
-               perf_event_restart(event);
+               perf_event_stop(event, 1);
 }
 
 void perf_event_exec(void)
@@ -6156,7 +6177,13 @@ static void __perf_event_output_stop(struct perf_event *event, void *data)
 
        /*
         * In case of inheritance, it will be the parent that links to the
-        * ring-buffer, but it will be the child that's actually using it:
+        * ring-buffer, but it will be the child that's actually using it.
+        *
+        * We are using event::rb to determine if the event should be stopped,
+        * however this may race with ring_buffer_attach() (through set_output),
+        * which will make us skip the event that actually needs to be stopped.
+        * So ring_buffer_attach() has to stop an aux event before re-assigning
+        * its rb pointer.
         */
        if (rcu_dereference(parent->rb) == rb)
                ro->err = __perf_event_stop(&sd);
@@ -6670,7 +6697,7 @@ static void __perf_addr_filters_adjust(struct perf_event *event, void *data)
        raw_spin_unlock_irqrestore(&ifh->lock, flags);
 
        if (restart)
-               perf_event_restart(event);
+               perf_event_stop(event, 1);
 }
 
 /*
@@ -7022,7 +7049,7 @@ static int __perf_event_overflow(struct perf_event *event,
                irq_work_queue(&event->pending);
        }
 
-       event->overflow_handler(event, data, regs);
+       READ_ONCE(event->overflow_handler)(event, data, regs);
 
        if (*perf_event_fasync(event) && event->pending_kill) {
                event->pending_wakeup = 1;
@@ -7637,11 +7664,83 @@ static void perf_event_free_filter(struct perf_event *event)
        ftrace_profile_free_filter(event);
 }
 
+#ifdef CONFIG_BPF_SYSCALL
+static void bpf_overflow_handler(struct perf_event *event,
+                                struct perf_sample_data *data,
+                                struct pt_regs *regs)
+{
+       struct bpf_perf_event_data_kern ctx = {
+               .data = data,
+               .regs = regs,
+       };
+       int ret = 0;
+
+       preempt_disable();
+       if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1))
+               goto out;
+       rcu_read_lock();
+       ret = BPF_PROG_RUN(event->prog, (void *)&ctx);
+       rcu_read_unlock();
+out:
+       __this_cpu_dec(bpf_prog_active);
+       preempt_enable();
+       if (!ret)
+               return;
+
+       event->orig_overflow_handler(event, data, regs);
+}
+
+static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd)
+{
+       struct bpf_prog *prog;
+
+       if (event->overflow_handler_context)
+               /* hw breakpoint or kernel counter */
+               return -EINVAL;
+
+       if (event->prog)
+               return -EEXIST;
+
+       prog = bpf_prog_get_type(prog_fd, BPF_PROG_TYPE_PERF_EVENT);
+       if (IS_ERR(prog))
+               return PTR_ERR(prog);
+
+       event->prog = prog;
+       event->orig_overflow_handler = READ_ONCE(event->overflow_handler);
+       WRITE_ONCE(event->overflow_handler, bpf_overflow_handler);
+       return 0;
+}
+
+static void perf_event_free_bpf_handler(struct perf_event *event)
+{
+       struct bpf_prog *prog = event->prog;
+
+       if (!prog)
+               return;
+
+       WRITE_ONCE(event->overflow_handler, event->orig_overflow_handler);
+       event->prog = NULL;
+       bpf_prog_put(prog);
+}
+#else
+static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd)
+{
+       return -EOPNOTSUPP;
+}
+static void perf_event_free_bpf_handler(struct perf_event *event)
+{
+}
+#endif
+
 static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
 {
        bool is_kprobe, is_tracepoint;
        struct bpf_prog *prog;
 
+       if (event->attr.type == PERF_TYPE_HARDWARE ||
+           event->attr.type == PERF_TYPE_SOFTWARE)
+               return perf_event_set_bpf_handler(event, prog_fd);
+
        if (event->attr.type != PERF_TYPE_TRACEPOINT)
                return -EINVAL;
 
@@ -7682,6 +7781,8 @@ static void perf_event_free_bpf_prog(struct perf_event *event)
 {
        struct bpf_prog *prog;
 
+       perf_event_free_bpf_handler(event);
+
        if (!event->tp_event)
                return;
 
@@ -7859,7 +7960,7 @@ static void perf_event_addr_filters_apply(struct perf_event *event)
        mmput(mm);
 
 restart:
-       perf_event_restart(event);
+       perf_event_stop(event, 1);
 }
 
 /*
@@ -8998,6 +9099,19 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
        if (!overflow_handler && parent_event) {
                overflow_handler = parent_event->overflow_handler;
                context = parent_event->overflow_handler_context;
+#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_EVENT_TRACING)
+               if (overflow_handler == bpf_overflow_handler) {
+                       struct bpf_prog *prog = bpf_prog_inc(parent_event->prog);
+
+                       if (IS_ERR(prog)) {
+                               err = PTR_ERR(prog);
+                               goto err_ns;
+                       }
+                       event->prog = prog;
+                       event->orig_overflow_handler =
+                               parent_event->orig_overflow_handler;
+               }
+#endif
        }
 
        if (overflow_handler) {