Merge branch 'sched/urgent' into sched/core

[cascardo/linux.git] / kernel / sched.c
diff --git a/kernel/sched.c b/kernel/sched.c

index 700aa9a..3d1ee42 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -703,45 +703,18 @@ static __read_mostly char *sched_feat_names[] = {
  
  #undef SCHED_FEAT
  
-static int sched_feat_open(struct inode *inode, struct file *filp)
-{
-       filp->private_data = inode->i_private;
-       return 0;
-}
-
-static ssize_t
-sched_feat_read(struct file *filp, char __user *ubuf,
-               size_t cnt, loff_t *ppos)
+static int sched_feat_show(struct seq_file *m, void *v)
  {
-       char *buf;
-       int r = 0;
-       int len = 0;
         int i;
  
         for (i = 0; sched_feat_names[i]; i++) {
-               len += strlen(sched_feat_names[i]);
-               len += 4;
-       }
-
-       buf = kmalloc(len + 2, GFP_KERNEL);
-       if (!buf)
-               return -ENOMEM;
-
-       for (i = 0; sched_feat_names[i]; i++) {
-               if (sysctl_sched_features & (1UL << i))
-                       r += sprintf(buf + r, "%s ", sched_feat_names[i]);
-               else
-                       r += sprintf(buf + r, "NO_%s ", sched_feat_names[i]);
+               if (!(sysctl_sched_features & (1UL << i)))
+                       seq_puts(m, "NO_");
+               seq_printf(m, "%s ", sched_feat_names[i]);
         }
+       seq_puts(m, "\n");
  
-       r += sprintf(buf + r, "\n");
-       WARN_ON(r >= len + 2);
-
-       r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
-
-       kfree(buf);
-
-       return r;
+       return 0;
  }
  
  static ssize_t
@@ -786,10 +759,17 @@ sched_feat_write(struct file *filp, const char __user *ubuf,
         return cnt;
  }
  
+static int sched_feat_open(struct inode *inode, struct file *filp)
+{
+       return single_open(filp, sched_feat_show, NULL);
+}
+
  static struct file_operations sched_feat_fops = {
-       .open   = sched_feat_open,
-       .read   = sched_feat_read,
-       .write  = sched_feat_write,
+       .open           = sched_feat_open,
+       .write          = sched_feat_write,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
  };
  
  static __init int sched_init_debug(void)
@@ -1474,27 +1454,13 @@ static void
  update_group_shares_cpu(struct task_group *tg, int cpu,
                         unsigned long sd_shares, unsigned long sd_rq_weight)
  {
-       int boost = 0;
         unsigned long shares;
         unsigned long rq_weight;
  
         if (!tg->se[cpu])
                 return;
  
-       rq_weight = tg->cfs_rq[cpu]->load.weight;
-
-       /*
-        * If there are currently no tasks on the cpu pretend there is one of
-        * average load so that when a new task gets to run here it will not
-        * get delayed by group starvation.
-        */
-       if (!rq_weight) {
-               boost = 1;
-               rq_weight = NICE_0_LOAD;
-       }
-
-       if (unlikely(rq_weight > sd_rq_weight))
-               rq_weight = sd_rq_weight;
+       rq_weight = tg->cfs_rq[cpu]->rq_weight;
  
         /*
          *           \Sum shares * rq_weight
@@ -1502,7 +1468,7 @@ update_group_shares_cpu(struct task_group *tg, int cpu,
          *               \Sum rq_weight
          *
          */
-       shares = (sd_shares * rq_weight) / (sd_rq_weight + 1);
+       shares = (sd_shares * rq_weight) / sd_rq_weight;
         shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES);
  
         if (abs(shares - tg->se[cpu]->load.weight) >
@@ -1511,11 +1477,7 @@ update_group_shares_cpu(struct task_group *tg, int cpu,
                 unsigned long flags;
  
                 spin_lock_irqsave(&rq->lock, flags);
-               /*
-                * record the actual number of shares, not the boosted amount.
-                */
-               tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
-               tg->cfs_rq[cpu]->rq_weight = rq_weight;
+               tg->cfs_rq[cpu]->shares = shares;
  
                 __set_se_shares(tg->se[cpu], shares);
                 spin_unlock_irqrestore(&rq->lock, flags);
@@ -1529,13 +1491,23 @@ update_group_shares_cpu(struct task_group *tg, int cpu,
   */
  static int tg_shares_up(struct task_group *tg, void *data)
  {
-       unsigned long rq_weight = 0;
+       unsigned long weight, rq_weight = 0;
         unsigned long shares = 0;
         struct sched_domain *sd = data;
         int i;
  
         for_each_cpu_mask(i, sd->span) {
-               rq_weight += tg->cfs_rq[i]->load.weight;
+               /*
+                * If there are currently no tasks on the cpu pretend there
+                * is one of average load so that when a new task gets to
+                * run here it will not get delayed by group starvation.
+                */
+               weight = tg->cfs_rq[i]->load.weight;
+               if (!weight)
+                       weight = NICE_0_LOAD;
+
+               tg->cfs_rq[i]->rq_weight = weight;
+               rq_weight += weight;
                 shares += tg->cfs_rq[i]->shares;
         }
  
@@ -1545,9 +1517,6 @@ static int tg_shares_up(struct task_group *tg, void *data)
         if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE))
                 shares = tg->shares;
  
-       if (!rq_weight)
-               rq_weight = cpus_weight(sd->span) * NICE_0_LOAD;
-
         for_each_cpu_mask(i, sd->span)
                 update_group_shares_cpu(tg, i, shares, rq_weight);
  
@@ -2838,7 +2807,7 @@ static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
         return ret;
  }
  
-static void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
+static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
         __releases(busiest->lock)
  {
         spin_unlock(&busiest->lock);
@@ -6126,7 +6095,6 @@ static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu)
  
  /*
   * Figure out where task on dead CPU should go, use force if necessary.
- * NOTE: interrupts should be disabled by the caller
   */
  static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
  {
@@ -6636,28 +6604,6 @@ early_initcall(migration_init);
  
  #ifdef CONFIG_SCHED_DEBUG
  
-static inline const char *sd_level_to_string(enum sched_domain_level lvl)
-{
-       switch (lvl) {
-       case SD_LV_NONE:
-                       return "NONE";
-       case SD_LV_SIBLING:
-                       return "SIBLING";
-       case SD_LV_MC:
-                       return "MC";
-       case SD_LV_CPU:
-                       return "CPU";
-       case SD_LV_NODE:
-                       return "NODE";
-       case SD_LV_ALLNODES:
-                       return "ALLNODES";
-       case SD_LV_MAX:
-                       return "MAX";
-
-       }
-       return "MAX";
-}
-
  static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
                                   cpumask_t *groupmask)
  {
@@ -6677,8 +6623,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
                 return -1;
         }
  
-       printk(KERN_CONT "span %s level %s\n",
-               str, sd_level_to_string(sd->level));
+       printk(KERN_CONT "span %s level %s\n", str, sd->name);
  
         if (!cpu_isset(cpu, sd->span)) {
                 printk(KERN_ERR "ERROR: domain->span does not contain "
@@ -7334,13 +7279,21 @@ struct allmasks {
  };
  
  #if    NR_CPUS > 128
-#define        SCHED_CPUMASK_ALLOC             1
-#define        SCHED_CPUMASK_FREE(v)           kfree(v)
-#define        SCHED_CPUMASK_DECLARE(v)        struct allmasks *v
+#define SCHED_CPUMASK_DECLARE(v)       struct allmasks *v
+static inline void sched_cpumask_alloc(struct allmasks **masks)
+{
+       *masks = kmalloc(sizeof(**masks), GFP_KERNEL);
+}
+static inline void sched_cpumask_free(struct allmasks *masks)
+{
+       kfree(masks);
+}
  #else
-#define        SCHED_CPUMASK_ALLOC             0
-#define        SCHED_CPUMASK_FREE(v)
-#define        SCHED_CPUMASK_DECLARE(v)        struct allmasks _v, *v = &_v
+#define SCHED_CPUMASK_DECLARE(v)       struct allmasks _v, *v = &_v
+static inline void sched_cpumask_alloc(struct allmasks **masks)
+{ }
+static inline void sched_cpumask_free(struct allmasks *masks)
+{ }
  #endif
  
  #define        SCHED_CPUMASK_VAR(v, a)         cpumask_t *v = (cpumask_t *) \
@@ -7416,9 +7369,8 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
                 return -ENOMEM;
         }
  
-#if SCHED_CPUMASK_ALLOC
         /* get space for all scratch cpumask variables */
-       allmasks = kmalloc(sizeof(*allmasks), GFP_KERNEL);
+       sched_cpumask_alloc(&allmasks);
         if (!allmasks) {
                 printk(KERN_WARNING "Cannot alloc cpumask array\n");
                 kfree(rd);
@@ -7427,7 +7379,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
  #endif
                 return -ENOMEM;
         }
-#endif
+
         tmpmask = (cpumask_t *)allmasks;
  
  
@@ -7681,13 +7633,13 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
                 cpu_attach_domain(sd, rd, i);
         }
  
-       SCHED_CPUMASK_FREE((void *)allmasks);
+       sched_cpumask_free(allmasks);
         return 0;
  
  #ifdef CONFIG_NUMA
  error:
         free_sched_groups(cpu_map, tmpmask);
-       SCHED_CPUMASK_FREE((void *)allmasks);
+       sched_cpumask_free(allmasks);
         kfree(rd);
         return -ENOMEM;
  #endif
@@ -7751,8 +7703,6 @@ static void detach_destroy_domains(const cpumask_t *cpu_map)
         cpumask_t tmpmask;
         int i;
  
-       unregister_sched_domain_sysctl();
-
         for_each_cpu_mask_nr(i, *cpu_map)
                 cpu_attach_domain(NULL, &def_root_domain, i);
         synchronize_sched();
@@ -7830,7 +7780,7 @@ match1:
                 ndoms_cur = 0;
                 doms_new = &fallback_doms;
                 cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
-               dattr_new = NULL;
+               WARN_ON_ONCE(dattr_new);
         }
  
         /* Build new domains */
@@ -8490,7 +8440,7 @@ static
  int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
  {
         struct cfs_rq *cfs_rq;
-       struct sched_entity *se, *parent_se;
+       struct sched_entity *se;
         struct rq *rq;
         int i;
  
@@ -8506,18 +8456,17 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
         for_each_possible_cpu(i) {
                 rq = cpu_rq(i);
  
-               cfs_rq = kmalloc_node(sizeof(struct cfs_rq),
-                               GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
+               cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
+                                     GFP_KERNEL, cpu_to_node(i));
                 if (!cfs_rq)
                         goto err;
  
-               se = kmalloc_node(sizeof(struct sched_entity),
-                               GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
+               se = kzalloc_node(sizeof(struct sched_entity),
+                                 GFP_KERNEL, cpu_to_node(i));
                 if (!se)
                         goto err;
  
-               parent_se = parent ? parent->se[i] : NULL;
-               init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent_se);
+               init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]);
         }
  
         return 1;
@@ -8578,7 +8527,7 @@ static
  int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
  {
         struct rt_rq *rt_rq;
-       struct sched_rt_entity *rt_se, *parent_se;
+       struct sched_rt_entity *rt_se;
         struct rq *rq;
         int i;
  
@@ -8595,18 +8544,17 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
         for_each_possible_cpu(i) {
                 rq = cpu_rq(i);
  
-               rt_rq = kmalloc_node(sizeof(struct rt_rq),
-                               GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
+               rt_rq = kzalloc_node(sizeof(struct rt_rq),
+                                    GFP_KERNEL, cpu_to_node(i));
                 if (!rt_rq)
                         goto err;
  
-               rt_se = kmalloc_node(sizeof(struct sched_rt_entity),
-                               GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
+               rt_se = kzalloc_node(sizeof(struct sched_rt_entity),
+                                    GFP_KERNEL, cpu_to_node(i));
                 if (!rt_se)
                         goto err;
  
-               parent_se = parent ? parent->rt_se[i] : NULL;
-               init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent_se);
+               init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]);
         }
  
         return 1;
@@ -9249,11 +9197,12 @@ struct cgroup_subsys cpu_cgroup_subsys = {
   * (balbir@in.ibm.com).
   */
  
-/* track cpu usage of a group of tasks */
+/* track cpu usage of a group of tasks and its child groups */
  struct cpuacct {
         struct cgroup_subsys_state css;
         /* cpuusage holds pointer to a u64-type object on every cpu */
         u64 *cpuusage;
+       struct cpuacct *parent;
  };
  
  struct cgroup_subsys cpuacct_subsys;
@@ -9287,6 +9236,9 @@ static struct cgroup_subsys_state *cpuacct_create(
                 return ERR_PTR(-ENOMEM);
         }
  
+       if (cgrp->parent)
+               ca->parent = cgroup_ca(cgrp->parent);
+
         return &ca->css;
  }
  
@@ -9366,14 +9318,16 @@ static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
  static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
  {
         struct cpuacct *ca;
+       int cpu;
  
         if (!cpuacct_subsys.active)
                 return;
  
+       cpu = task_cpu(tsk);
         ca = task_ca(tsk);
-       if (ca) {
-               u64 *cpuusage = percpu_ptr(ca->cpuusage, task_cpu(tsk));
  
+       for (; ca; ca = ca->parent) {
+               u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
                 *cpuusage += cputime;
         }
  }