sched/fair: Fix PELT integrity for new tasks

[cascardo/linux.git] / kernel / sched / core.c
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index 3d856c4..14afa51 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2342,11 +2342,11 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
  
         __sched_fork(clone_flags, p);
         /*
-        * We mark the process as running here. This guarantees that
+        * We mark the process as NEW here. This guarantees that
          * nobody will actually run it, and a signal or other external
          * event cannot wake it up and insert it on the runqueue either.
          */
-       p->state = TASK_RUNNING;
+       p->state = TASK_NEW;
  
         /*
          * Make sure we do not leak PI boosting priority to the child.
@@ -2383,6 +2383,8 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
                 p->sched_class = &fair_sched_class;
         }
  
+       init_entity_runnable_average(&p->se);
+
         /*
          * The child is not yet in the pid-hash so no cgroup attach races,
          * and the cgroup is pinned to this child due to cgroup_fork()
@@ -2529,9 +2531,8 @@ void wake_up_new_task(struct task_struct *p)
         struct rq_flags rf;
         struct rq *rq;
  
-       /* Initialize new task's runnable average */
-       init_entity_runnable_average(&p->se);
         raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
+       p->state = TASK_RUNNING;
  #ifdef CONFIG_SMP
         /*
          * Fork balancing, do it here and not earlier because:
@@ -8237,6 +8238,7 @@ static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
  {
         struct task_struct *task;
         struct cgroup_subsys_state *css;
+       int ret = 0;
  
         cgroup_taskset_for_each(task, css, tset) {
  #ifdef CONFIG_RT_GROUP_SCHED
@@ -8247,8 +8249,24 @@ static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
                 if (task->sched_class != &fair_sched_class)
                         return -EINVAL;
  #endif
+               /*
+                * Serialize against wake_up_new_task() such that if its
+                * running, we're sure to observe its full state.
+                */
+               raw_spin_lock_irq(&task->pi_lock);
+               /*
+                * Avoid calling sched_move_task() before wake_up_new_task()
+                * has happened. This would lead to problems with PELT, due to
+                * move wanting to detach+attach while we're not attached yet.
+                */
+               if (task->state == TASK_NEW)
+                       ret = -EINVAL;
+               raw_spin_unlock_irq(&task->pi_lock);
+
+               if (ret)
+                       break;
         }
-       return 0;
+       return ret;
  }
  
  static void cpu_cgroup_attach(struct cgroup_taskset *tset)