perf/core: Free AUX pages in unmap path
[cascardo/linux.git] / kernel / events / core.c
index 525d11c..243df4b 100644 (file)
@@ -1925,8 +1925,13 @@ event_sched_in(struct perf_event *event,
        if (event->state <= PERF_EVENT_STATE_OFF)
                return 0;
 
-       event->state = PERF_EVENT_STATE_ACTIVE;
-       event->oncpu = smp_processor_id();
+       WRITE_ONCE(event->oncpu, smp_processor_id());
+       /*
+        * Order event::oncpu write to happen before the ACTIVE state
+        * is visible.
+        */
+       smp_wmb();
+       WRITE_ONCE(event->state, PERF_EVENT_STATE_ACTIVE);
 
        /*
         * Unthrottle events, since we scheduled we might have missed several
@@ -2358,6 +2363,29 @@ void perf_event_enable(struct perf_event *event)
 }
 EXPORT_SYMBOL_GPL(perf_event_enable);
 
+static int __perf_event_stop(void *info)
+{
+       struct perf_event *event = info;
+
+       /* for AUX events, our job is done if the event is already inactive */
+       if (READ_ONCE(event->state) != PERF_EVENT_STATE_ACTIVE)
+               return 0;
+
+       /* matches smp_wmb() in event_sched_in() */
+       smp_rmb();
+
+       /*
+        * There is a window with interrupts enabled before we get here,
+        * so we need to check again lest we try to stop another CPU's event.
+        */
+       if (READ_ONCE(event->oncpu) != smp_processor_id())
+               return -EAGAIN;
+
+       event->pmu->stop(event, PERF_EF_UPDATE);
+
+       return 0;
+}
+
 static int _perf_event_refresh(struct perf_event *event, int refresh)
 {
        /*
@@ -4667,6 +4695,8 @@ static void perf_mmap_open(struct vm_area_struct *vma)
                event->pmu->event_mapped(event);
 }
 
+static void perf_pmu_output_stop(struct perf_event *event);
+
 /*
  * A buffer can be mmap()ed multiple times; either directly through the same
  * event, or through other events by use of perf_event_set_output().
@@ -4694,10 +4724,22 @@ static void perf_mmap_close(struct vm_area_struct *vma)
         */
        if (rb_has_aux(rb) && vma->vm_pgoff == rb->aux_pgoff &&
            atomic_dec_and_mutex_lock(&rb->aux_mmap_count, &event->mmap_mutex)) {
+               /*
+                * Stop all AUX events that are writing to this buffer,
+                * so that we can free its AUX pages and corresponding PMU
+                * data. Note that after rb::aux_mmap_count dropped to zero,
+                * they won't start any more (see perf_aux_output_begin()).
+                */
+               perf_pmu_output_stop(event);
+
+               /* now it's safe to free the pages */
                atomic_long_sub(rb->aux_nr_pages, &mmap_user->locked_vm);
                vma->vm_mm->pinned_vm -= rb->aux_mmap_locked;
 
+               /* this has to be the last one */
                rb_free_aux(rb);
+               WARN_ON_ONCE(atomic_read(&rb->aux_refcount));
+
                mutex_unlock(&event->mmap_mutex);
        }
 
@@ -5768,6 +5810,80 @@ next:
        rcu_read_unlock();
 }
 
+struct remote_output {
+       struct ring_buffer      *rb;
+       int                     err;
+};
+
+static void __perf_event_output_stop(struct perf_event *event, void *data)
+{
+       struct perf_event *parent = event->parent;
+       struct remote_output *ro = data;
+       struct ring_buffer *rb = ro->rb;
+
+       if (!has_aux(event))
+               return;
+
+       if (!parent)
+               parent = event;
+
+       /*
+        * In case of inheritance, it will be the parent that links to the
+        * ring-buffer, but it will be the child that's actually using it:
+        */
+       if (rcu_dereference(parent->rb) == rb)
+               ro->err = __perf_event_stop(event);
+}
+
+static int __perf_pmu_output_stop(void *info)
+{
+       struct perf_event *event = info;
+       struct pmu *pmu = event->pmu;
+       struct perf_cpu_context *cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
+       struct remote_output ro = {
+               .rb     = event->rb,
+       };
+
+       rcu_read_lock();
+       perf_event_aux_ctx(&cpuctx->ctx, __perf_event_output_stop, &ro);
+       if (cpuctx->task_ctx)
+               perf_event_aux_ctx(cpuctx->task_ctx, __perf_event_output_stop,
+                                  &ro);
+       rcu_read_unlock();
+
+       return ro.err;
+}
+
+static void perf_pmu_output_stop(struct perf_event *event)
+{
+       struct perf_event *iter;
+       int err, cpu;
+
+restart:
+       rcu_read_lock();
+       list_for_each_entry_rcu(iter, &event->rb->event_list, rb_entry) {
+               /*
+                * For per-CPU events, we need to make sure that neither they
+                * nor their children are running; for cpu==-1 events it's
+                * sufficient to stop the event itself if it's active, since
+                * it can't have children.
+                */
+               cpu = iter->cpu;
+               if (cpu == -1)
+                       cpu = READ_ONCE(iter->oncpu);
+
+               if (cpu == -1)
+                       continue;
+
+               err = cpu_function_call(cpu, __perf_pmu_output_stop, event);
+               if (err == -EAGAIN) {
+                       rcu_read_unlock();
+                       goto restart;
+               }
+       }
+       rcu_read_unlock();
+}
+
 /*
  * task tracking -- fork/exit
  *