Merge branch 'for-4.9' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 14 Oct 2016 19:18:50 +0000 (12:18 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 14 Oct 2016 19:18:50 +0000 (12:18 -0700)
Pull cgroup updates from Tejun Heo:

 - tracepoints for basic cgroup management operations added

 - kernfs and cgroup path formatting functions updated to behave in the
   style of strlcpy()

 - non-critical bug fixes

* 'for-4.9' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  blkcg: Unlock blkcg_pol_mutex only once when cpd == NULL
  cgroup: fix error handling regressions in proc_cgroup_show() and cgroup_release_agent()
  cpuset: fix error handling regression in proc_cpuset_show()
  cgroup: add tracepoints for basic operations
  cgroup: make cgroup_path() and friends behave in the style of strlcpy()
  kernfs: remove kernfs_path_len()
  kernfs: make kernfs_path*() behave in the style of strlcpy()
  kernfs: add dummy implementation of kernfs_path_from_node()

1  2 
fs/kernfs/dir.c
include/linux/blk-cgroup.h
include/linux/cgroup.h
kernel/cgroup.c
kernel/cpuset.c
kernel/sched/debug.c

diff --combined fs/kernfs/dir.c
@@@ -110,8 -110,9 +110,9 @@@ static struct kernfs_node *kernfs_commo
   * kn_to:   /n1/n2/n3         [depth=3]
   * result:  /../..
   *
-  * return value: length of the string.  If greater than buflen,
-  * then contents of buf are undefined.  On error, -1 is returned.
+  * Returns the length of the full path.  If the full length is equal to or
+  * greater than @buflen, @buf contains the truncated path with the trailing
+  * '\0'.  On error, -errno is returned.
   */
  static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
                                        struct kernfs_node *kn_from,
  {
        struct kernfs_node *kn, *common;
        const char parent_str[] = "/..";
-       size_t depth_from, depth_to, len = 0, nlen = 0;
-       char *p;
-       int i;
+       size_t depth_from, depth_to, len = 0;
+       int i, j;
  
        if (!kn_from)
                kn_from = kernfs_root(kn_to)->kn;
  
        common = kernfs_common_ancestor(kn_from, kn_to);
        if (WARN_ON(!common))
-               return -1;
+               return -EINVAL;
  
        depth_to = kernfs_depth(common, kn_to);
        depth_from = kernfs_depth(common, kn_from);
                               len < buflen ? buflen - len : 0);
  
        /* Calculate how many bytes we need for the rest */
-       for (kn = kn_to; kn != common; kn = kn->parent)
-               nlen += strlen(kn->name) + 1;
-       if (len + nlen >= buflen)
-               return len + nlen;
-       p = buf + len + nlen;
-       *p = '\0';
-       for (kn = kn_to; kn != common; kn = kn->parent) {
-               size_t tmp = strlen(kn->name);
-               p -= tmp;
-               memcpy(p, kn->name, tmp);
-               *(--p) = '/';
+       for (i = depth_to - 1; i >= 0; i--) {
+               for (kn = kn_to, j = 0; j < i; j++)
+                       kn = kn->parent;
+               len += strlcpy(buf + len, "/",
+                              len < buflen ? buflen - len : 0);
+               len += strlcpy(buf + len, kn->name,
+                              len < buflen ? buflen - len : 0);
        }
  
-       return len + nlen;
+       return len;
  }
  
  /**
@@@ -185,29 -179,6 +179,6 @@@ int kernfs_name(struct kernfs_node *kn
        return ret;
  }
  
- /**
-  * kernfs_path_len - determine the length of the full path of a given node
-  * @kn: kernfs_node of interest
-  *
-  * The returned length doesn't include the space for the terminating '\0'.
-  */
- size_t kernfs_path_len(struct kernfs_node *kn)
- {
-       size_t len = 0;
-       unsigned long flags;
-       spin_lock_irqsave(&kernfs_rename_lock, flags);
-       do {
-               len += strlen(kn->name) + 1;
-               kn = kn->parent;
-       } while (kn && kn->parent);
-       spin_unlock_irqrestore(&kernfs_rename_lock, flags);
-       return len;
- }
  /**
   * kernfs_path_from_node - build path of node @to relative to @from.
   * @from: parent kernfs_node relative to which we need to build the path
   * path (which includes '..'s) as needed to reach from @from to @to is
   * returned.
   *
-  * If @buf isn't long enough, the return value will be greater than @buflen
-  * and @buf contents are undefined.
+  * Returns the length of the full path.  If the full length is equal to or
+  * greater than @buflen, @buf contains the truncated path with the trailing
+  * '\0'.  On error, -errno is returned.
   */
  int kernfs_path_from_node(struct kernfs_node *to, struct kernfs_node *from,
                          char *buf, size_t buflen)
  }
  EXPORT_SYMBOL_GPL(kernfs_path_from_node);
  
- /**
-  * kernfs_path - build full path of a given node
-  * @kn: kernfs_node of interest
-  * @buf: buffer to copy @kn's name into
-  * @buflen: size of @buf
-  *
-  * Builds and returns the full path of @kn in @buf of @buflen bytes.  The
-  * path is built from the end of @buf so the returned pointer usually
-  * doesn't match @buf.  If @buf isn't long enough, @buf is nul terminated
-  * and %NULL is returned.
-  */
- char *kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen)
- {
-       int ret;
-       ret = kernfs_path_from_node(kn, NULL, buf, buflen);
-       if (ret < 0 || ret >= buflen)
-               return NULL;
-       return buf;
- }
- EXPORT_SYMBOL_GPL(kernfs_path);
  /**
   * pr_cont_kernfs_name - pr_cont name of a kernfs_node
   * @kn: kernfs_node of interest
@@@ -1096,17 -1046,13 +1046,17 @@@ static int kernfs_iop_rmdir(struct inod
  }
  
  static int kernfs_iop_rename(struct inode *old_dir, struct dentry *old_dentry,
 -                           struct inode *new_dir, struct dentry *new_dentry)
 +                           struct inode *new_dir, struct dentry *new_dentry,
 +                           unsigned int flags)
  {
        struct kernfs_node *kn  = old_dentry->d_fsdata;
        struct kernfs_node *new_parent = new_dir->i_private;
        struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops;
        int ret;
  
 +      if (flags)
 +              return -EINVAL;
 +
        if (!scops || !scops->rename)
                return -EPERM;
  
@@@ -1130,6 -1076,9 +1080,6 @@@ const struct inode_operations kernfs_di
        .permission     = kernfs_iop_permission,
        .setattr        = kernfs_iop_setattr,
        .getattr        = kernfs_iop_getattr,
 -      .setxattr       = kernfs_iop_setxattr,
 -      .removexattr    = kernfs_iop_removexattr,
 -      .getxattr       = kernfs_iop_getxattr,
        .listxattr      = kernfs_iop_listxattr,
  
        .mkdir          = kernfs_iop_mkdir,
@@@ -45,7 -45,7 +45,7 @@@ struct blkcg 
        spinlock_t                      lock;
  
        struct radix_tree_root          blkg_tree;
 -      struct blkcg_gq                 *blkg_hint;
 +      struct blkcg_gq __rcu           *blkg_hint;
        struct hlist_head               blkg_list;
  
        struct blkcg_policy_data        *cpd[BLKCG_MAX_POLS];
@@@ -343,16 -343,7 +343,7 @@@ static inline struct blkcg *cpd_to_blkc
   */
  static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
  {
-       char *p;
-       p = cgroup_path(blkg->blkcg->css.cgroup, buf, buflen);
-       if (!p) {
-               strncpy(buf, "<unavailable>", buflen);
-               return -ENAMETOOLONG;
-       }
-       memmove(buf, p, buf + buflen - p);
-       return 0;
+       return cgroup_path(blkg->blkcg->css.cgroup, buf, buflen);
  }
  
  /**
diff --combined include/linux/cgroup.h
@@@ -97,7 -97,7 +97,7 @@@ int cgroup_add_legacy_cftypes(struct cg
  int cgroup_rm_cftypes(struct cftype *cfts);
  void cgroup_file_notify(struct cgroup_file *cfile);
  
char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen);
int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen);
  int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry);
  int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
                     struct pid *pid, struct task_struct *tsk);
@@@ -497,23 -497,6 +497,23 @@@ static inline bool cgroup_is_descendant
        return cgrp->ancestor_ids[ancestor->level] == ancestor->id;
  }
  
 +/**
 + * task_under_cgroup_hierarchy - test task's membership of cgroup ancestry
 + * @task: the task to be tested
 + * @ancestor: possible ancestor of @task's cgroup
 + *
 + * Tests whether @task's default cgroup hierarchy is a descendant of @ancestor.
 + * It follows all the same rules as cgroup_is_descendant, and only applies
 + * to the default hierarchy.
 + */
 +static inline bool task_under_cgroup_hierarchy(struct task_struct *task,
 +                                             struct cgroup *ancestor)
 +{
 +      struct css_set *cset = task_css_set(task);
 +
 +      return cgroup_is_descendant(cset->dfl_cgrp, ancestor);
 +}
 +
  /* no synchronization, the result can only be used as a hint */
  static inline bool cgroup_is_populated(struct cgroup *cgrp)
  {
@@@ -555,8 -538,7 +555,7 @@@ static inline int cgroup_name(struct cg
        return kernfs_name(cgrp->kn, buf, buflen);
  }
  
- static inline char * __must_check cgroup_path(struct cgroup *cgrp, char *buf,
-                                             size_t buflen)
+ static inline int cgroup_path(struct cgroup *cgrp, char *buf, size_t buflen)
  {
        return kernfs_path(cgrp->kn, buf, buflen);
  }
@@@ -574,7 -556,6 +573,7 @@@ static inline void pr_cont_cgroup_path(
  #else /* !CONFIG_CGROUPS */
  
  struct cgroup_subsys_state;
 +struct cgroup;
  
  static inline void css_put(struct cgroup_subsys_state *css) {}
  static inline int cgroup_attach_task_all(struct task_struct *from,
@@@ -592,11 -573,6 +591,11 @@@ static inline void cgroup_free(struct t
  static inline int cgroup_init_early(void) { return 0; }
  static inline int cgroup_init(void) { return 0; }
  
 +static inline bool task_under_cgroup_hierarchy(struct task_struct *task,
 +                                             struct cgroup *ancestor)
 +{
 +      return true;
 +}
  #endif /* !CONFIG_CGROUPS */
  
  /*
@@@ -644,7 -620,6 +643,7 @@@ struct cgroup_namespace 
        atomic_t                count;
        struct ns_common        ns;
        struct user_namespace   *user_ns;
 +      struct ucounts          *ucounts;
        struct css_set          *root_cset;
  };
  
@@@ -658,8 -633,8 +657,8 @@@ struct cgroup_namespace *copy_cgroup_ns
                                        struct user_namespace *user_ns,
                                        struct cgroup_namespace *old_ns);
  
char *cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
-                    struct cgroup_namespace *ns);
int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
+                  struct cgroup_namespace *ns);
  
  #else /* !CONFIG_CGROUPS */
  
diff --combined kernel/cgroup.c
@@@ -64,6 -64,9 +64,9 @@@
  #include <linux/file.h>
  #include <net/sock.h>
  
+ #define CREATE_TRACE_POINTS
+ #include <trace/events/cgroup.h>
  /*
   * pidlists linger the following amount before being destroyed.  The goal
   * is avoiding frequent destruction in the middle of consecutive read calls
@@@ -1176,6 -1179,8 +1179,8 @@@ static void cgroup_destroy_root(struct 
        struct cgroup *cgrp = &root->cgrp;
        struct cgrp_cset_link *link, *tmp_link;
  
+       trace_cgroup_destroy_root(root);
        cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
  
        BUG_ON(atomic_read(&root->nr_cgrps));
@@@ -1874,6 -1879,9 +1879,9 @@@ static int cgroup_remount(struct kernfs
                strcpy(root->release_agent_path, opts.release_agent);
                spin_unlock(&release_agent_path_lock);
        }
+       trace_cgroup_remount(root);
   out_unlock:
        kfree(opts.release_agent);
        kfree(opts.name);
@@@ -2031,6 -2039,8 +2039,8 @@@ static int cgroup_setup_root(struct cgr
        if (ret)
                goto destroy_root;
  
+       trace_cgroup_setup_root(root);
        /*
         * There must be no failure case after here, since rebinding takes
         * care of subsystems' refcounts, which are explicitly dropped in
@@@ -2315,22 -2325,18 +2325,18 @@@ static struct file_system_type cgroup2_
        .fs_flags = FS_USERNS_MOUNT,
  };
  
- static char *cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
-                                  struct cgroup_namespace *ns)
+ static int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
+                                struct cgroup_namespace *ns)
  {
        struct cgroup *root = cset_cgroup_from_root(ns->root_cset, cgrp->root);
-       int ret;
  
-       ret = kernfs_path_from_node(cgrp->kn, root->kn, buf, buflen);
-       if (ret < 0 || ret >= buflen)
-               return NULL;
-       return buf;
+       return kernfs_path_from_node(cgrp->kn, root->kn, buf, buflen);
  }
  
char *cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
-                    struct cgroup_namespace *ns)
int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
+                  struct cgroup_namespace *ns)
  {
-       char *ret;
+       int ret;
  
        mutex_lock(&cgroup_mutex);
        spin_lock_irq(&css_set_lock);
@@@ -2357,12 -2363,12 +2363,12 @@@ EXPORT_SYMBOL_GPL(cgroup_path_ns)
   *
   * Return value is the same as kernfs_path().
   */
char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
  {
        struct cgroup_root *root;
        struct cgroup *cgrp;
        int hierarchy_id = 1;
-       char *path = NULL;
+       int ret;
  
        mutex_lock(&cgroup_mutex);
        spin_lock_irq(&css_set_lock);
  
        if (root) {
                cgrp = task_cgroup_from_root(task, root);
-               path = cgroup_path_ns_locked(cgrp, buf, buflen, &init_cgroup_ns);
+               ret = cgroup_path_ns_locked(cgrp, buf, buflen, &init_cgroup_ns);
        } else {
                /* if no hierarchy exists, everyone is in "/" */
-               if (strlcpy(buf, "/", buflen) < buflen)
-                       path = buf;
+               ret = strlcpy(buf, "/", buflen);
        }
  
        spin_unlock_irq(&css_set_lock);
        mutex_unlock(&cgroup_mutex);
-       return path;
+       return ret;
  }
  EXPORT_SYMBOL_GPL(task_cgroup_path);
  
@@@ -2830,6 -2835,10 +2835,10 @@@ static int cgroup_attach_task(struct cg
                ret = cgroup_migrate(leader, threadgroup, dst_cgrp->root);
  
        cgroup_migrate_finish(&preloaded_csets);
+       if (!ret)
+               trace_cgroup_attach_task(dst_cgrp, leader, threadgroup);
        return ret;
  }
  
@@@ -3446,28 -3455,9 +3455,28 @@@ static ssize_t cgroup_subtree_control_w
         * Except for the root, subtree_control must be zero for a cgroup
         * with tasks so that child cgroups don't compete against tasks.
         */
 -      if (enable && cgroup_parent(cgrp) && !list_empty(&cgrp->cset_links)) {
 -              ret = -EBUSY;
 -              goto out_unlock;
 +      if (enable && cgroup_parent(cgrp)) {
 +              struct cgrp_cset_link *link;
 +
 +              /*
 +               * Because namespaces pin csets too, @cgrp->cset_links
 +               * might not be empty even when @cgrp is empty.  Walk and
 +               * verify each cset.
 +               */
 +              spin_lock_irq(&css_set_lock);
 +
 +              ret = 0;
 +              list_for_each_entry(link, &cgrp->cset_links, cset_link) {
 +                      if (css_set_populated(link->cset)) {
 +                              ret = -EBUSY;
 +                              break;
 +                      }
 +              }
 +
 +              spin_unlock_irq(&css_set_lock);
 +
 +              if (ret)
 +                      goto out_unlock;
        }
  
        /* save and update control masks and prepare csses */
@@@ -3611,6 -3601,8 +3620,8 @@@ static int cgroup_rename(struct kernfs_
        mutex_lock(&cgroup_mutex);
  
        ret = kernfs_rename(kn, new_parent, new_name_str);
+       if (!ret)
+               trace_cgroup_rename(cgrp);
  
        mutex_unlock(&cgroup_mutex);
  
@@@ -3918,9 -3910,7 +3929,9 @@@ void cgroup_file_notify(struct cgroup_f
   * cgroup_task_count - count the number of tasks in a cgroup.
   * @cgrp: the cgroup in question
   *
 - * Return the number of tasks in the cgroup.
 + * Return the number of tasks in the cgroup.  The returned number can be
 + * higher than the actual number of tasks due to css_set references from
 + * namespace roots and temporary usages.
   */
  static int cgroup_task_count(const struct cgroup *cgrp)
  {
@@@ -4381,6 -4371,8 +4392,8 @@@ int cgroup_transfer_tasks(struct cgrou
  
                if (task) {
                        ret = cgroup_migrate(task, false, to->root);
+                       if (!ret)
+                               trace_cgroup_transfer_tasks(to, task, false);
                        put_task_struct(task);
                }
        } while (task && !ret);
@@@ -5046,6 -5038,8 +5059,8 @@@ static void css_release_work_fn(struct 
                        ss->css_released(css);
        } else {
                /* cgroup release path */
+               trace_cgroup_release(cgrp);
                cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
                cgrp->id = -1;
  
@@@ -5332,6 -5326,8 +5347,8 @@@ static int cgroup_mkdir(struct kernfs_n
        if (ret)
                goto out_destroy;
  
+       trace_cgroup_mkdir(cgrp);
        /* let's create and online css's */
        kernfs_activate(kn);
  
@@@ -5507,6 -5503,9 +5524,9 @@@ static int cgroup_rmdir(struct kernfs_n
  
        ret = cgroup_destroy_locked(cgrp);
  
+       if (!ret)
+               trace_cgroup_rmdir(cgrp);
        cgroup_kn_unlock(kn);
        return ret;
  }
@@@ -5627,12 -5626,6 +5647,12 @@@ int __init cgroup_init(void
        BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files));
        BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files));
  
 +      /*
 +       * The latency of the synchronize_sched() is too high for cgroups,
 +       * avoid it at the cost of forcing all readers into the slow path.
 +       */
 +      rcu_sync_enter_start(&cgroup_threadgroup_rwsem.rss);
 +
        get_user_ns(init_cgroup_ns.user_ns);
  
        mutex_lock(&cgroup_mutex);
@@@ -5743,7 -5736,7 +5763,7 @@@ core_initcall(cgroup_wq_init)
  int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
                     struct pid *pid, struct task_struct *tsk)
  {
-       char *buf, *path;
+       char *buf;
        int retval;
        struct cgroup_root *root;
  
                 * " (deleted)" is appended to the cgroup path.
                 */
                if (cgroup_on_dfl(cgrp) || !(tsk->flags & PF_EXITING)) {
-                       path = cgroup_path_ns_locked(cgrp, buf, PATH_MAX,
+                       retval = cgroup_path_ns_locked(cgrp, buf, PATH_MAX,
                                                current->nsproxy->cgroup_ns);
-                       if (!path) {
+                       if (retval >= PATH_MAX)
                                retval = -ENAMETOOLONG;
+                       if (retval < 0)
                                goto out_unlock;
-                       }
+                       seq_puts(m, buf);
                } else {
-                       path = "/";
+                       seq_puts(m, "/");
                }
  
-               seq_puts(m, path);
                if (cgroup_on_dfl(cgrp) && cgroup_is_dead(cgrp))
                        seq_puts(m, " (deleted)\n");
                else
@@@ -6062,8 -6055,9 +6082,9 @@@ static void cgroup_release_agent(struc
  {
        struct cgroup *cgrp =
                container_of(work, struct cgroup, release_agent_work);
-       char *pathbuf = NULL, *agentbuf = NULL, *path;
+       char *pathbuf = NULL, *agentbuf = NULL;
        char *argv[3], *envp[3];
+       int ret;
  
        mutex_lock(&cgroup_mutex);
  
                goto out;
  
        spin_lock_irq(&css_set_lock);
-       path = cgroup_path_ns_locked(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns);
+       ret = cgroup_path_ns_locked(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns);
        spin_unlock_irq(&css_set_lock);
-       if (!path)
+       if (ret < 0 || ret >= PATH_MAX)
                goto out;
  
        argv[0] = agentbuf;
-       argv[1] = path;
+       argv[1] = pathbuf;
        argv[2] = NULL;
  
        /* minimal command environment */
@@@ -6297,12 -6291,6 +6318,12 @@@ void cgroup_sk_alloc(struct sock_cgroup
        if (cgroup_sk_alloc_disabled)
                return;
  
 +      /* Socket clone path */
 +      if (skcd->val) {
 +              cgroup_get(sock_cgroup_ptr(skcd));
 +              return;
 +      }
 +
        rcu_read_lock();
  
        while (true) {
@@@ -6328,16 -6316,6 +6349,16 @@@ void cgroup_sk_free(struct sock_cgroup_
  
  /* cgroup namespaces */
  
 +static struct ucounts *inc_cgroup_namespaces(struct user_namespace *ns)
 +{
 +      return inc_ucount(ns, current_euid(), UCOUNT_CGROUP_NAMESPACES);
 +}
 +
 +static void dec_cgroup_namespaces(struct ucounts *ucounts)
 +{
 +      dec_ucount(ucounts, UCOUNT_CGROUP_NAMESPACES);
 +}
 +
  static struct cgroup_namespace *alloc_cgroup_ns(void)
  {
        struct cgroup_namespace *new_ns;
  void free_cgroup_ns(struct cgroup_namespace *ns)
  {
        put_css_set(ns->root_cset);
 +      dec_cgroup_namespaces(ns->ucounts);
        put_user_ns(ns->user_ns);
        ns_free_inum(&ns->ns);
        kfree(ns);
@@@ -6371,7 -6348,6 +6392,7 @@@ struct cgroup_namespace *copy_cgroup_ns
                                        struct cgroup_namespace *old_ns)
  {
        struct cgroup_namespace *new_ns;
 +      struct ucounts *ucounts;
        struct css_set *cset;
  
        BUG_ON(!old_ns);
        if (!ns_capable(user_ns, CAP_SYS_ADMIN))
                return ERR_PTR(-EPERM);
  
 +      ucounts = inc_cgroup_namespaces(user_ns);
 +      if (!ucounts)
 +              return ERR_PTR(-ENOSPC);
 +
        /* It is not safe to take cgroup_mutex here */
        spin_lock_irq(&css_set_lock);
        cset = task_css_set(current);
        new_ns = alloc_cgroup_ns();
        if (IS_ERR(new_ns)) {
                put_css_set(cset);
 +              dec_cgroup_namespaces(ucounts);
                return new_ns;
        }
  
        new_ns->user_ns = get_user_ns(user_ns);
 +      new_ns->ucounts = ucounts;
        new_ns->root_cset = cset;
  
        return new_ns;
@@@ -6454,18 -6424,12 +6475,18 @@@ static void cgroupns_put(struct ns_comm
        put_cgroup_ns(to_cg_ns(ns));
  }
  
 +static struct user_namespace *cgroupns_owner(struct ns_common *ns)
 +{
 +      return to_cg_ns(ns)->user_ns;
 +}
 +
  const struct proc_ns_operations cgroupns_operations = {
        .name           = "cgroup",
        .type           = CLONE_NEWCGROUP,
        .get            = cgroupns_get,
        .put            = cgroupns_put,
        .install        = cgroupns_install,
 +      .owner          = cgroupns_owner,
  };
  
  static __init int cgroup_namespaces_init(void)
diff --combined kernel/cpuset.c
@@@ -325,7 -325,8 +325,7 @@@ static struct file_system_type cpuset_f
  /*
   * Return in pmask the portion of a cpusets's cpus_allowed that
   * are online.  If none are online, walk up the cpuset hierarchy
 - * until we find one that does have some online cpus.  The top
 - * cpuset always has some cpus online.
 + * until we find one that does have some online cpus.
   *
   * One way or another, we guarantee to return some non-empty subset
   * of cpu_online_mask.
   */
  static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask)
  {
 -      while (!cpumask_intersects(cs->effective_cpus, cpu_online_mask))
 +      while (!cpumask_intersects(cs->effective_cpus, cpu_online_mask)) {
                cs = parent_cs(cs);
 +              if (unlikely(!cs)) {
 +                      /*
 +                       * The top cpuset doesn't have any online cpu as a
 +                       * consequence of a race between cpuset_hotplug_work
 +                       * and cpu hotplug notifier.  But we know the top
 +                       * cpuset's effective_cpus is on its way to to be
 +                       * identical to cpu_online_mask.
 +                       */
 +                      cpumask_copy(pmask, cpu_online_mask);
 +                      return;
 +              }
 +      }
        cpumask_and(pmask, cs->effective_cpus, cpu_online_mask);
  }
  
@@@ -2080,20 -2069,6 +2080,20 @@@ static void cpuset_bind(struct cgroup_s
        mutex_unlock(&cpuset_mutex);
  }
  
 +/*
 + * Make sure the new task conform to the current state of its parent,
 + * which could have been changed by cpuset just after it inherits the
 + * state from the parent and before it sits on the cgroup's task list.
 + */
 +static void cpuset_fork(struct task_struct *task)
 +{
 +      if (task_css_is_root(task, cpuset_cgrp_id))
 +              return;
 +
 +      set_cpus_allowed_ptr(task, &current->cpus_allowed);
 +      task->mems_allowed = current->mems_allowed;
 +}
 +
  struct cgroup_subsys cpuset_cgrp_subsys = {
        .css_alloc      = cpuset_css_alloc,
        .css_online     = cpuset_css_online,
        .attach         = cpuset_attach,
        .post_attach    = cpuset_post_attach,
        .bind           = cpuset_bind,
 +      .fork           = cpuset_fork,
        .legacy_cftypes = files,
        .early_init     = true,
  };
@@@ -2715,7 -2689,7 +2715,7 @@@ void __cpuset_memory_pressure_bump(void
  int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns,
                     struct pid *pid, struct task_struct *tsk)
  {
-       char *buf, *p;
+       char *buf;
        struct cgroup_subsys_state *css;
        int retval;
  
        if (!buf)
                goto out;
  
-       retval = -ENAMETOOLONG;
        css = task_get_css(tsk, cpuset_cgrp_id);
-       p = cgroup_path_ns(css->cgroup, buf, PATH_MAX,
-                          current->nsproxy->cgroup_ns);
+       retval = cgroup_path_ns(css->cgroup, buf, PATH_MAX,
+                               current->nsproxy->cgroup_ns);
        css_put(css);
-       if (!p)
+       if (retval >= PATH_MAX)
+               retval = -ENAMETOOLONG;
+       if (retval < 0)
                goto out_free;
-       seq_puts(m, p);
+       seq_puts(m, buf);
        seq_putc(m, '\n');
        retval = 0;
  out_free:
diff --combined kernel/sched/debug.c
@@@ -369,12 -369,8 +369,12 @@@ static void print_cfs_group_stats(struc
  
  #define P(F) \
        SEQ_printf(m, "  .%-30s: %lld\n", #F, (long long)F)
 +#define P_SCHEDSTAT(F) \
 +      SEQ_printf(m, "  .%-30s: %lld\n", #F, (long long)schedstat_val(F))
  #define PN(F) \
        SEQ_printf(m, "  .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F))
 +#define PN_SCHEDSTAT(F) \
 +      SEQ_printf(m, "  .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)schedstat_val(F)))
  
        if (!se)
                return;
        PN(se->exec_start);
        PN(se->vruntime);
        PN(se->sum_exec_runtime);
 -#ifdef CONFIG_SCHEDSTATS
        if (schedstat_enabled()) {
 -              PN(se->statistics.wait_start);
 -              PN(se->statistics.sleep_start);
 -              PN(se->statistics.block_start);
 -              PN(se->statistics.sleep_max);
 -              PN(se->statistics.block_max);
 -              PN(se->statistics.exec_max);
 -              PN(se->statistics.slice_max);
 -              PN(se->statistics.wait_max);
 -              PN(se->statistics.wait_sum);
 -              P(se->statistics.wait_count);
 +              PN_SCHEDSTAT(se->statistics.wait_start);
 +              PN_SCHEDSTAT(se->statistics.sleep_start);
 +              PN_SCHEDSTAT(se->statistics.block_start);
 +              PN_SCHEDSTAT(se->statistics.sleep_max);
 +              PN_SCHEDSTAT(se->statistics.block_max);
 +              PN_SCHEDSTAT(se->statistics.exec_max);
 +              PN_SCHEDSTAT(se->statistics.slice_max);
 +              PN_SCHEDSTAT(se->statistics.wait_max);
 +              PN_SCHEDSTAT(se->statistics.wait_sum);
 +              P_SCHEDSTAT(se->statistics.wait_count);
        }
 -#endif
        P(se->load.weight);
  #ifdef CONFIG_SMP
        P(se->avg.load_avg);
        P(se->avg.util_avg);
  #endif
 +
 +#undef PN_SCHEDSTAT
  #undef PN
 +#undef P_SCHEDSTAT
  #undef P
  }
  #endif
@@@ -415,7 -410,8 +415,8 @@@ static char *task_group_path(struct tas
        if (autogroup_path(tg, group_path, PATH_MAX))
                return group_path;
  
-       return cgroup_path(tg->css.cgroup, group_path, PATH_MAX);
+       cgroup_path(tg->css.cgroup, group_path, PATH_MAX);
+       return group_path;
  }
  #endif
  
@@@ -434,9 -430,9 +435,9 @@@ print_task(struct seq_file *m, struct r
                p->prio);
  
        SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
 -              SPLIT_NS(schedstat_val(p, se.statistics.wait_sum)),
 +              SPLIT_NS(schedstat_val_or_zero(p->se.statistics.wait_sum)),
                SPLIT_NS(p->se.sum_exec_runtime),
 -              SPLIT_NS(schedstat_val(p, se.statistics.sum_sleep_runtime)));
 +              SPLIT_NS(schedstat_val_or_zero(p->se.statistics.sum_sleep_runtime)));
  
  #ifdef CONFIG_NUMA_BALANCING
        SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
@@@ -631,7 -627,9 +632,7 @@@ do {                                                                       
  #undef P64
  #endif
  
 -#ifdef CONFIG_SCHEDSTATS
 -#define P(n) SEQ_printf(m, "  .%-30s: %d\n", #n, rq->n);
 -
 +#define P(n) SEQ_printf(m, "  .%-30s: %d\n", #n, schedstat_val(rq->n));
        if (schedstat_enabled()) {
                P(yld_count);
                P(sched_count);
                P(ttwu_count);
                P(ttwu_local);
        }
 -
  #undef P
 -#endif
 +
        spin_lock_irqsave(&sched_debug_lock, flags);
        print_cfs_stats(m, cpu);
        print_rt_stats(m, cpu);
@@@ -870,14 -869,10 +871,14 @@@ void proc_sched_show_task(struct task_s
        SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)F)
  #define P(F) \
        SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)p->F)
 +#define P_SCHEDSTAT(F) \
 +      SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)schedstat_val(p->F))
  #define __PN(F) \
        SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)F))
  #define PN(F) \
        SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
 +#define PN_SCHEDSTAT(F) \
 +      SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)schedstat_val(p->F)))
  
        PN(se.exec_start);
        PN(se.vruntime);
  
        P(se.nr_migrations);
  
 -#ifdef CONFIG_SCHEDSTATS
        if (schedstat_enabled()) {
                u64 avg_atom, avg_per_cpu;
  
 -              PN(se.statistics.sum_sleep_runtime);
 -              PN(se.statistics.wait_start);
 -              PN(se.statistics.sleep_start);
 -              PN(se.statistics.block_start);
 -              PN(se.statistics.sleep_max);
 -              PN(se.statistics.block_max);
 -              PN(se.statistics.exec_max);
 -              PN(se.statistics.slice_max);
 -              PN(se.statistics.wait_max);
 -              PN(se.statistics.wait_sum);
 -              P(se.statistics.wait_count);
 -              PN(se.statistics.iowait_sum);
 -              P(se.statistics.iowait_count);
 -              P(se.statistics.nr_migrations_cold);
 -              P(se.statistics.nr_failed_migrations_affine);
 -              P(se.statistics.nr_failed_migrations_running);
 -              P(se.statistics.nr_failed_migrations_hot);
 -              P(se.statistics.nr_forced_migrations);
 -              P(se.statistics.nr_wakeups);
 -              P(se.statistics.nr_wakeups_sync);
 -              P(se.statistics.nr_wakeups_migrate);
 -              P(se.statistics.nr_wakeups_local);
 -              P(se.statistics.nr_wakeups_remote);
 -              P(se.statistics.nr_wakeups_affine);
 -              P(se.statistics.nr_wakeups_affine_attempts);
 -              P(se.statistics.nr_wakeups_passive);
 -              P(se.statistics.nr_wakeups_idle);
 +              PN_SCHEDSTAT(se.statistics.sum_sleep_runtime);
 +              PN_SCHEDSTAT(se.statistics.wait_start);
 +              PN_SCHEDSTAT(se.statistics.sleep_start);
 +              PN_SCHEDSTAT(se.statistics.block_start);
 +              PN_SCHEDSTAT(se.statistics.sleep_max);
 +              PN_SCHEDSTAT(se.statistics.block_max);
 +              PN_SCHEDSTAT(se.statistics.exec_max);
 +              PN_SCHEDSTAT(se.statistics.slice_max);
 +              PN_SCHEDSTAT(se.statistics.wait_max);
 +              PN_SCHEDSTAT(se.statistics.wait_sum);
 +              P_SCHEDSTAT(se.statistics.wait_count);
 +              PN_SCHEDSTAT(se.statistics.iowait_sum);
 +              P_SCHEDSTAT(se.statistics.iowait_count);
 +              P_SCHEDSTAT(se.statistics.nr_migrations_cold);
 +              P_SCHEDSTAT(se.statistics.nr_failed_migrations_affine);
 +              P_SCHEDSTAT(se.statistics.nr_failed_migrations_running);
 +              P_SCHEDSTAT(se.statistics.nr_failed_migrations_hot);
 +              P_SCHEDSTAT(se.statistics.nr_forced_migrations);
 +              P_SCHEDSTAT(se.statistics.nr_wakeups);
 +              P_SCHEDSTAT(se.statistics.nr_wakeups_sync);
 +              P_SCHEDSTAT(se.statistics.nr_wakeups_migrate);
 +              P_SCHEDSTAT(se.statistics.nr_wakeups_local);
 +              P_SCHEDSTAT(se.statistics.nr_wakeups_remote);
 +              P_SCHEDSTAT(se.statistics.nr_wakeups_affine);
 +              P_SCHEDSTAT(se.statistics.nr_wakeups_affine_attempts);
 +              P_SCHEDSTAT(se.statistics.nr_wakeups_passive);
 +              P_SCHEDSTAT(se.statistics.nr_wakeups_idle);
  
                avg_atom = p->se.sum_exec_runtime;
                if (nr_switches)
                __PN(avg_atom);
                __PN(avg_per_cpu);
        }
 -#endif
 +
        __P(nr_switches);
        SEQ_printf(m, "%-45s:%21Ld\n",
                   "nr_voluntary_switches", (long long)p->nvcsw);
  #endif
        P(policy);
        P(prio);
 +#undef PN_SCHEDSTAT
  #undef PN
  #undef __PN
 +#undef P_SCHEDSTAT
  #undef P
  #undef __P