sched: Rate-limit newidle
authorMike Galbraith <efault@gmx.de>
Wed, 4 Nov 2009 16:53:50 +0000 (17:53 +0100)
committerIngo Molnar <mingo@elte.hu>
Wed, 4 Nov 2009 18:13:48 +0000 (19:13 +0100)
Rate limit newidle to migration_cost. It's a win for all
stages of sysbench oltp tests.

Signed-off-by: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
kernel/sched.c
kernel/sched_debug.c

index ae026aa..f849212 100644 (file)
@@ -589,6 +589,8 @@ struct rq {
 
        u64 rt_avg;
        u64 age_stamp;
+       u64 idle_stamp;
+       u64 avg_idle;
 #endif
 
        /* calc_load related fields */
@@ -2353,6 +2355,17 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
        if (rq != orig_rq)
                update_rq_clock(rq);
 
+       if (rq->idle_stamp) {
+               u64 delta = rq->clock - rq->idle_stamp;
+               u64 max = 2*sysctl_sched_migration_cost;
+
+               if (delta > max)
+                       rq->avg_idle = max;
+               else
+                       update_avg(&rq->avg_idle, delta);
+               rq->idle_stamp = 0;
+       }
+
        WARN_ON(p->state != TASK_WAKING);
        cpu = task_cpu(p);
 
@@ -4389,6 +4402,11 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
        int pulled_task = 0;
        unsigned long next_balance = jiffies + HZ;
 
+       this_rq->idle_stamp = this_rq->clock;
+
+       if (this_rq->avg_idle < sysctl_sched_migration_cost)
+               return;
+
        for_each_domain(this_cpu, sd) {
                unsigned long interval;
 
@@ -4403,8 +4421,10 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
                interval = msecs_to_jiffies(sd->balance_interval);
                if (time_after(next_balance, sd->last_balance + interval))
                        next_balance = sd->last_balance + interval;
-               if (pulled_task)
+               if (pulled_task) {
+                       this_rq->idle_stamp = 0;
                        break;
+               }
        }
        if (pulled_task || time_after(jiffies, this_rq->next_balance)) {
                /*
index efb8440..6988cf0 100644 (file)
@@ -285,12 +285,16 @@ static void print_cpu(struct seq_file *m, int cpu)
 
 #ifdef CONFIG_SCHEDSTATS
 #define P(n) SEQ_printf(m, "  .%-30s: %d\n", #n, rq->n);
+#define P64(n) SEQ_printf(m, "  .%-30s: %Ld\n", #n, rq->n);
 
        P(yld_count);
 
        P(sched_switch);
        P(sched_count);
        P(sched_goidle);
+#ifdef CONFIG_SMP
+       P64(avg_idle);
+#endif
 
        P(ttwu_count);
        P(ttwu_local);