perf/core: Update filters only on executable mmap

[cascardo/linux.git] / kernel / events / core.c
diff --git a/kernel/events/core.c b/kernel/events/core.c

index 356a6c7..9a030a9 100644 (file)
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -242,18 +242,6 @@ unlock:
         return ret;
  }
  
-static void event_function_local(struct perf_event *event, event_f func, void *data)
-{
-       struct event_function_struct efs = {
-               .event = event,
-               .func = func,
-               .data = data,
-       };
-
-       int ret = event_function(&efs);
-       WARN_ON_ONCE(ret);
-}
-
  static void event_function_call(struct perf_event *event, event_f func, void *data)
  {
         struct perf_event_context *ctx = event->ctx;
@@ -303,6 +291,54 @@ again:
         raw_spin_unlock_irq(&ctx->lock);
  }
  
+/*
+ * Similar to event_function_call() + event_function(), but hard assumes IRQs
+ * are already disabled and we're on the right CPU.
+ */
+static void event_function_local(struct perf_event *event, event_f func, void *data)
+{
+       struct perf_event_context *ctx = event->ctx;
+       struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
+       struct task_struct *task = READ_ONCE(ctx->task);
+       struct perf_event_context *task_ctx = NULL;
+
+       WARN_ON_ONCE(!irqs_disabled());
+
+       if (task) {
+               if (task == TASK_TOMBSTONE)
+                       return;
+
+               task_ctx = ctx;
+       }
+
+       perf_ctx_lock(cpuctx, task_ctx);
+
+       task = ctx->task;
+       if (task == TASK_TOMBSTONE)
+               goto unlock;
+
+       if (task) {
+               /*
+                * We must be either inactive or active and the right task,
+                * otherwise we're screwed, since we cannot IPI to somewhere
+                * else.
+                */
+               if (ctx->is_active) {
+                       if (WARN_ON_ONCE(task != current))
+                               goto unlock;
+
+                       if (WARN_ON_ONCE(cpuctx->task_ctx != ctx))
+                               goto unlock;
+               }
+       } else {
+               WARN_ON_ONCE(&cpuctx->ctx != ctx);
+       }
+
+       func(event, cpuctx, ctx, data);
+unlock:
+       perf_ctx_unlock(cpuctx, task_ctx);
+}
+
  #define PERF_FLAG_ALL (PERF_FLAG_FD_NO_GROUP |\
                        PERF_FLAG_FD_OUTPUT  |\
                        PERF_FLAG_PID_CGROUP |\
@@ -448,7 +484,7 @@ static u64 __report_allowed;
  
  static void perf_duration_warn(struct irq_work *w)
  {
-       printk_ratelimited(KERN_WARNING
+       printk_ratelimited(KERN_INFO
                 "perf: interrupt took too long (%lld > %lld), lowering "
                 "kernel.perf_event_max_sample_rate to %d\n",
                 __report_avg, __report_allowed,
@@ -843,6 +879,32 @@ perf_cgroup_mark_enabled(struct perf_event *event,
                 }
         }
  }
+
+/*
+ * Update cpuctx->cgrp so that it is set when first cgroup event is added and
+ * cleared when last cgroup event is removed.
+ */
+static inline void
+list_update_cgroup_event(struct perf_event *event,
+                        struct perf_event_context *ctx, bool add)
+{
+       struct perf_cpu_context *cpuctx;
+
+       if (!is_cgroup_event(event))
+               return;
+
+       if (add && ctx->nr_cgroups++)
+               return;
+       else if (!add && --ctx->nr_cgroups)
+               return;
+       /*
+        * Because cgroup events are always per-cpu events,
+        * this will always be called from the right CPU.
+        */
+       cpuctx = __get_cpu_context(ctx);
+       cpuctx->cgrp = add ? event->cgrp : NULL;
+}
+
  #else /* !CONFIG_CGROUP_PERF */
  
  static inline bool
@@ -920,6 +982,13 @@ perf_cgroup_mark_enabled(struct perf_event *event,
                          struct perf_event_context *ctx)
  {
  }
+
+static inline void
+list_update_cgroup_event(struct perf_event *event,
+                        struct perf_event_context *ctx, bool add)
+{
+}
+
  #endif
  
  /*
@@ -1392,6 +1461,7 @@ ctx_group_list(struct perf_event *event, struct perf_event_context *ctx)
  static void
  list_add_event(struct perf_event *event, struct perf_event_context *ctx)
  {
+
         lockdep_assert_held(&ctx->lock);
  
         WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
@@ -1412,8 +1482,7 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
                 list_add_tail(&event->group_entry, list);
         }
  
-       if (is_cgroup_event(event))
-               ctx->nr_cgroups++;
+       list_update_cgroup_event(event, ctx, true);
  
         list_add_rcu(&event->event_entry, &ctx->event_list);
         ctx->nr_events++;
@@ -1581,8 +1650,6 @@ static void perf_group_attach(struct perf_event *event)
  static void
  list_del_event(struct perf_event *event, struct perf_event_context *ctx)
  {
-       struct perf_cpu_context *cpuctx;
-
         WARN_ON_ONCE(event->ctx != ctx);
         lockdep_assert_held(&ctx->lock);
  
@@ -1594,20 +1661,7 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
  
         event->attach_state &= ~PERF_ATTACH_CONTEXT;
  
-       if (is_cgroup_event(event)) {
-               ctx->nr_cgroups--;
-               /*
-                * Because cgroup events are always per-cpu events, this will
-                * always be called from the right CPU.
-                */
-               cpuctx = __get_cpu_context(ctx);
-               /*
-                * If there are no more cgroup events then clear cgrp to avoid
-                * stale pointer in update_cgrp_time_from_cpuctx().
-                */
-               if (!ctx->nr_cgroups)
-                       cpuctx->cgrp = NULL;
-       }
+       list_update_cgroup_event(event, ctx, false);
  
         ctx->nr_events--;
         if (event->attr.inherit_stat)
@@ -1716,8 +1770,8 @@ static inline int pmu_filter_match(struct perf_event *event)
  static inline int
  event_filter_match(struct perf_event *event)
  {
-       return (event->cpu == -1 || event->cpu == smp_processor_id())
-           && perf_cgroup_match(event) && pmu_filter_match(event);
+       return (event->cpu == -1 || event->cpu == smp_processor_id()) &&
+              perf_cgroup_match(event) && pmu_filter_match(event);
  }
  
  static void
@@ -1737,8 +1791,8 @@ event_sched_out(struct perf_event *event,
          * maintained, otherwise bogus information is return
          * via read() for time_enabled, time_running:
          */
-       if (event->state == PERF_EVENT_STATE_INACTIVE
-           && !event_filter_match(event)) {
+       if (event->state == PERF_EVENT_STATE_INACTIVE &&
+           !event_filter_match(event)) {
                 delta = tstamp - event->tstamp_stopped;
                 event->tstamp_running += delta;
                 event->tstamp_stopped = tstamp;
@@ -2236,10 +2290,15 @@ perf_install_in_context(struct perf_event_context *ctx,
  
         lockdep_assert_held(&ctx->mutex);
  
-       event->ctx = ctx;
         if (event->cpu != -1)
                 event->cpu = cpu;
  
+       /*
+        * Ensures that if we can observe event->ctx, both the event and ctx
+        * will be 'complete'. See perf_iterate_sb_cpu().
+        */
+       smp_store_release(&event->ctx, ctx);
+
         if (!task) {
                 cpu_function_call(cpu, __perf_install_in_context, event);
                 return;
@@ -5969,6 +6028,14 @@ static void perf_iterate_sb_cpu(perf_iterate_f output, void *data)
         struct perf_event *event;
  
         list_for_each_entry_rcu(event, &pel->list, sb_list) {
+               /*
+                * Skip events that are not fully formed yet; ensure that
+                * if we observe event->ctx, both event and ctx will be
+                * complete enough. See perf_install_in_context().
+                */
+               if (!smp_load_acquire(&event->ctx))
+                       continue;
+
                 if (event->state < PERF_EVENT_STATE_INACTIVE)
                         continue;
                 if (!event_filter_match(event))
@@ -6622,6 +6689,13 @@ static void perf_addr_filters_adjust(struct vm_area_struct *vma)
         struct perf_event_context *ctx;
         int ctxn;
  
+       /*
+        * Data tracing isn't supported yet and as such there is no need
+        * to keep track of anything that isn't related to executable code:
+        */
+       if (!(vma->vm_flags & VM_EXEC))
+               return;
+
         rcu_read_lock();
         for_each_task_context_nr(ctxn) {
                 ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
@@ -7905,8 +7979,10 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr,
                                         goto fail;
                         }
  
-                       if (token == IF_SRC_FILE) {
-                               filename = match_strdup(&args[2]);
+                       if (token == IF_SRC_FILE || token == IF_SRC_FILEADDR) {
+                               int fpos = filter->range ? 2 : 1;
+
+                               filename = match_strdup(&args[fpos]);
                                 if (!filename) {
                                         ret = -ENOMEM;
                                         goto fail;