perf: Fix mux_interval hrtimer wreckage
authorPeter Zijlstra <peterz@infradead.org>
Wed, 15 Apr 2015 09:41:58 +0000 (11:41 +0200)
committerThomas Gleixner <tglx@linutronix.de>
Wed, 22 Apr 2015 15:12:22 +0000 (17:12 +0200)
Thomas stumbled over the hrtimer_forward_now() in
perf_event_mux_interval_ms_store() and noticed its broken-ness.

You cannot just change the expiry time of an active timer, it will
destroy the red-black tree order and cause havoc.

Change it to (re)start the timer instead, (re)starting a timer will
dequeue and enqueue a timer and therefore preserve rb-tree order.

Since we cannot enqueue remotely, wrap the thing in
cpu_function_call(), this however mandates that we restrict ourselves
to online cpus. Also serialize the entire setting so we don't get
multiple concurrent threads trying to update to different values.

Also fix a problem in perf_mux_hrtimer_restart(), checking against
hrtimer_active() can actually loose us the timer when timer->state ==
HRTIMER_STATE_CALLBACK and the callback has already decided NORESTART.

Furthermore it doesn't make any sense to test
hrtimer_callback_running() when we already tested hrtimer_active(),
but with the above change, we explicitly must call it when
callback_running.

Lastly, rename a few functions:

  s/perf_cpu_hrtimer_/perf_mux_hrtimer_/ -- because I could not find
                                            the mux timer function

  s/\<hr\>/timer/ -- because that's the normal way of calling things.

Fixes: 62b856397927 ("perf: Add sysfs entry to adjust multiplexing interval per PMU")
Reported-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20150415095011.863052571@infradead.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
kernel/events/core.c

index 05309fd..e7ed00b 100644 (file)
 
 static struct workqueue_struct *perf_wq;
 
+typedef int (*remote_function_f)(void *);
+
 struct remote_function_call {
        struct task_struct      *p;
-       int                     (*func)(void *info);
+       remote_function_f       func;
        void                    *info;
        int                     ret;
 };
@@ -86,7 +88,7 @@ static void remote_function(void *data)
  *         -EAGAIN - when the process moved away
  */
 static int
-task_function_call(struct task_struct *p, int (*func) (void *info), void *info)
+task_function_call(struct task_struct *p, remote_function_f func, void *info)
 {
        struct remote_function_call data = {
                .p      = p,
@@ -110,7 +112,7 @@ task_function_call(struct task_struct *p, int (*func) (void *info), void *info)
  *
  * returns: @func return value or -ENXIO when the cpu is offline
  */
-static int cpu_function_call(int cpu, int (*func) (void *info), void *info)
+static int cpu_function_call(int cpu, remote_function_f func, void *info)
 {
        struct remote_function_call data = {
                .p      = NULL,
@@ -747,7 +749,7 @@ perf_cgroup_mark_enabled(struct perf_event *event,
 /*
  * function must be called with interrupts disbled
  */
-static enum hrtimer_restart perf_cpu_hrtimer_handler(struct hrtimer *hr)
+static enum hrtimer_restart perf_mux_hrtimer_handler(struct hrtimer *hr)
 {
        struct perf_cpu_context *cpuctx;
        enum hrtimer_restart ret = HRTIMER_NORESTART;
@@ -771,7 +773,7 @@ static enum hrtimer_restart perf_cpu_hrtimer_handler(struct hrtimer *hr)
 }
 
 /* CPU is going down */
-void perf_cpu_hrtimer_cancel(int cpu)
+void perf_mux_hrtimer_cancel(int cpu)
 {
        struct perf_cpu_context *cpuctx;
        struct pmu *pmu;
@@ -798,11 +800,11 @@ void perf_cpu_hrtimer_cancel(int cpu)
        local_irq_restore(flags);
 }
 
-static void __perf_cpu_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu)
+static void __perf_mux_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu)
 {
-       struct hrtimer *hr = &cpuctx->hrtimer;
+       struct hrtimer *timer = &cpuctx->hrtimer;
        struct pmu *pmu = cpuctx->ctx.pmu;
-       int timer;
+       u64 interval;
 
        /* no multiplexing needed for SW PMU */
        if (pmu->task_ctx_nr == perf_sw_context)
@@ -812,29 +814,30 @@ static void __perf_cpu_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu)
         * check default is sane, if not set then force to
         * default interval (1/tick)
         */
-       timer = pmu->hrtimer_interval_ms;
-       if (timer < 1)
-               timer = pmu->hrtimer_interval_ms = PERF_CPU_HRTIMER;
+       interval = pmu->hrtimer_interval_ms;
+       if (interval < 1)
+               interval = pmu->hrtimer_interval_ms = PERF_CPU_HRTIMER;
 
-       cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * timer);
+       cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * interval);
 
-       hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
-       hr->function = perf_cpu_hrtimer_handler;
+       hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
+       timer->function = perf_mux_hrtimer_handler;
 }
 
-static void perf_cpu_hrtimer_restart(struct perf_cpu_context *cpuctx)
+static int perf_mux_hrtimer_restart(struct perf_cpu_context *cpuctx)
 {
-       struct hrtimer *hr = &cpuctx->hrtimer;
+       struct hrtimer *timer = &cpuctx->hrtimer;
        struct pmu *pmu = cpuctx->ctx.pmu;
 
        /* not for SW PMU */
        if (pmu->task_ctx_nr == perf_sw_context)
-               return;
+               return 0;
 
-       if (hrtimer_active(hr))
-               return;
+       if (hrtimer_is_queued(timer))
+               return 0;
 
-       hrtimer_start(hr, cpuctx->hrtimer_interval, HRTIMER_MODE_REL_PINNED);
+       hrtimer_start(timer, cpuctx->hrtimer_interval, HRTIMER_MODE_REL_PINNED);
+       return 0;
 }
 
 void perf_pmu_disable(struct pmu *pmu)
@@ -1913,7 +1916,7 @@ group_sched_in(struct perf_event *group_event,
 
        if (event_sched_in(group_event, cpuctx, ctx)) {
                pmu->cancel_txn(pmu);
-               perf_cpu_hrtimer_restart(cpuctx);
+               perf_mux_hrtimer_restart(cpuctx);
                return -EAGAIN;
        }
 
@@ -1960,7 +1963,7 @@ group_error:
 
        pmu->cancel_txn(pmu);
 
-       perf_cpu_hrtimer_restart(cpuctx);
+       perf_mux_hrtimer_restart(cpuctx);
 
        return -EAGAIN;
 }
@@ -2233,7 +2236,7 @@ static int __perf_event_enable(void *info)
                 */
                if (leader != event) {
                        group_sched_out(leader, cpuctx, ctx);
-                       perf_cpu_hrtimer_restart(cpuctx);
+                       perf_mux_hrtimer_restart(cpuctx);
                }
                if (leader->attr.pinned) {
                        update_group_times(leader);
@@ -7143,6 +7146,8 @@ perf_event_mux_interval_ms_show(struct device *dev,
        return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->hrtimer_interval_ms);
 }
 
+static DEFINE_MUTEX(mux_interval_mutex);
+
 static ssize_t
 perf_event_mux_interval_ms_store(struct device *dev,
                                 struct device_attribute *attr,
@@ -7162,17 +7167,21 @@ perf_event_mux_interval_ms_store(struct device *dev,
        if (timer == pmu->hrtimer_interval_ms)
                return count;
 
+       mutex_lock(&mux_interval_mutex);
        pmu->hrtimer_interval_ms = timer;
 
        /* update all cpuctx for this PMU */
-       for_each_possible_cpu(cpu) {
+       get_online_cpus();
+       for_each_online_cpu(cpu) {
                struct perf_cpu_context *cpuctx;
                cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
                cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * timer);
 
-               if (hrtimer_active(&cpuctx->hrtimer))
-                       hrtimer_forward_now(&cpuctx->hrtimer, cpuctx->hrtimer_interval);
+               cpu_function_call(cpu,
+                       (remote_function_f)perf_mux_hrtimer_restart, cpuctx);
        }
+       put_online_cpus();
+       mutex_unlock(&mux_interval_mutex);
 
        return count;
 }
@@ -7277,7 +7286,7 @@ skip_type:
                lockdep_set_class(&cpuctx->ctx.lock, &cpuctx_lock);
                cpuctx->ctx.pmu = pmu;
 
-               __perf_cpu_hrtimer_init(cpuctx, cpu);
+               __perf_mux_hrtimer_init(cpuctx, cpu);
 
                cpuctx->unique_pmu = pmu;
        }