From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 14 Oct 2016 19:18:50 +0000 (-0700)
Subject: Merge branch 'for-4.9' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
X-Git-Tag: v4.9-rc1~20
X-Git-Url: http://git.cascardo.eti.br/?a=commitdiff_plain;h=f34d3606f76a8121b9d4940d2dd436bebeb2f9d7;hp=-c;p=cascardo%2Flinux.git

Merge branch 'for-4.9' of git://git./linux/kernel/git/tj/cgroup

Pull cgroup updates from Tejun Heo:

 - tracepoints for basic cgroup management operations added

 - kernfs and cgroup path formatting functions updated to behave in the
   style of strlcpy()

 - non-critical bug fixes

* 'for-4.9' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  blkcg: Unlock blkcg_pol_mutex only once when cpd == NULL
  cgroup: fix error handling regressions in proc_cgroup_show() and cgroup_release_agent()
  cpuset: fix error handling regression in proc_cpuset_show()
  cgroup: add tracepoints for basic operations
  cgroup: make cgroup_path() and friends behave in the style of strlcpy()
  kernfs: remove kernfs_path_len()
  kernfs: make kernfs_path*() behave in the style of strlcpy()
  kernfs: add dummy implementation of kernfs_path_from_node()
---

f34d3606f76a8121b9d4940d2dd436bebeb2f9d7
diff --combined fs/kernfs/dir.c
index dcd96aac02f5,6e7fd37615f8..cf4c636ff4da
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@@ -110,8 -110,9 +110,9 @@@ static struct kernfs_node *kernfs_commo
   * kn_to:   /n1/n2/n3         [depth=3]
   * result:  /../..
   *
-  * return value: length of the string.  If greater than buflen,
-  * then contents of buf are undefined.  On error, -1 is returned.
+  * Returns the length of the full path.  If the full length is equal to or
+  * greater than @buflen, @buf contains the truncated path with the trailing
+  * '\0'.  On error, -errno is returned.
   */
  static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
  					struct kernfs_node *kn_from,
@@@ -119,9 -120,8 +120,8 @@@
  {
  	struct kernfs_node *kn, *common;
  	const char parent_str[] = "/..";
- 	size_t depth_from, depth_to, len = 0, nlen = 0;
- 	char *p;
- 	int i;
+ 	size_t depth_from, depth_to, len = 0;
+ 	int i, j;
  
  	if (!kn_from)
  		kn_from = kernfs_root(kn_to)->kn;
@@@ -131,7 -131,7 +131,7 @@@
  
  	common = kernfs_common_ancestor(kn_from, kn_to);
  	if (WARN_ON(!common))
- 		return -1;
+ 		return -EINVAL;
  
  	depth_to = kernfs_depth(common, kn_to);
  	depth_from = kernfs_depth(common, kn_from);
@@@ -144,22 -144,16 +144,16 @@@
  			       len < buflen ? buflen - len : 0);
  
  	/* Calculate how many bytes we need for the rest */
- 	for (kn = kn_to; kn != common; kn = kn->parent)
- 		nlen += strlen(kn->name) + 1;
- 
- 	if (len + nlen >= buflen)
- 		return len + nlen;
- 
- 	p = buf + len + nlen;
- 	*p = '\0';
- 	for (kn = kn_to; kn != common; kn = kn->parent) {
- 		size_t tmp = strlen(kn->name);
- 		p -= tmp;
- 		memcpy(p, kn->name, tmp);
- 		*(--p) = '/';
+ 	for (i = depth_to - 1; i >= 0; i--) {
+ 		for (kn = kn_to, j = 0; j < i; j++)
+ 			kn = kn->parent;
+ 		len += strlcpy(buf + len, "/",
+ 			       len < buflen ? buflen - len : 0);
+ 		len += strlcpy(buf + len, kn->name,
+ 			       len < buflen ? buflen - len : 0);
  	}
  
- 	return len + nlen;
+ 	return len;
  }
  
  /**
@@@ -185,29 -179,6 +179,6 @@@ int kernfs_name(struct kernfs_node *kn
  	return ret;
  }
  
- /**
-  * kernfs_path_len - determine the length of the full path of a given node
-  * @kn: kernfs_node of interest
-  *
-  * The returned length doesn't include the space for the terminating '\0'.
-  */
- size_t kernfs_path_len(struct kernfs_node *kn)
- {
- 	size_t len = 0;
- 	unsigned long flags;
- 
- 	spin_lock_irqsave(&kernfs_rename_lock, flags);
- 
- 	do {
- 		len += strlen(kn->name) + 1;
- 		kn = kn->parent;
- 	} while (kn && kn->parent);
- 
- 	spin_unlock_irqrestore(&kernfs_rename_lock, flags);
- 
- 	return len;
- }
- 
  /**
   * kernfs_path_from_node - build path of node @to relative to @from.
   * @from: parent kernfs_node relative to which we need to build the path
@@@ -220,8 -191,9 +191,9 @@@
   * path (which includes '..'s) as needed to reach from @from to @to is
   * returned.
   *
-  * If @buf isn't long enough, the return value will be greater than @buflen
-  * and @buf contents are undefined.
+  * Returns the length of the full path.  If the full length is equal to or
+  * greater than @buflen, @buf contains the truncated path with the trailing
+  * '\0'.  On error, -errno is returned.
   */
  int kernfs_path_from_node(struct kernfs_node *to, struct kernfs_node *from,
  			  char *buf, size_t buflen)
@@@ -236,28 -208,6 +208,6 @@@
  }
  EXPORT_SYMBOL_GPL(kernfs_path_from_node);
  
- /**
-  * kernfs_path - build full path of a given node
-  * @kn: kernfs_node of interest
-  * @buf: buffer to copy @kn's name into
-  * @buflen: size of @buf
-  *
-  * Builds and returns the full path of @kn in @buf of @buflen bytes.  The
-  * path is built from the end of @buf so the returned pointer usually
-  * doesn't match @buf.  If @buf isn't long enough, @buf is nul terminated
-  * and %NULL is returned.
-  */
- char *kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen)
- {
- 	int ret;
- 
- 	ret = kernfs_path_from_node(kn, NULL, buf, buflen);
- 	if (ret < 0 || ret >= buflen)
- 		return NULL;
- 	return buf;
- }
- EXPORT_SYMBOL_GPL(kernfs_path);
- 
  /**
   * pr_cont_kernfs_name - pr_cont name of a kernfs_node
   * @kn: kernfs_node of interest
@@@ -1096,17 -1046,13 +1046,17 @@@ static int kernfs_iop_rmdir(struct inod
  }
  
  static int kernfs_iop_rename(struct inode *old_dir, struct dentry *old_dentry,
 -			     struct inode *new_dir, struct dentry *new_dentry)
 +			     struct inode *new_dir, struct dentry *new_dentry,
 +			     unsigned int flags)
  {
  	struct kernfs_node *kn  = old_dentry->d_fsdata;
  	struct kernfs_node *new_parent = new_dir->i_private;
  	struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops;
  	int ret;
  
 +	if (flags)
 +		return -EINVAL;
 +
  	if (!scops || !scops->rename)
  		return -EPERM;
  
@@@ -1130,6 -1076,9 +1080,6 @@@ const struct inode_operations kernfs_di
  	.permission	= kernfs_iop_permission,
  	.setattr	= kernfs_iop_setattr,
  	.getattr	= kernfs_iop_getattr,
 -	.setxattr	= kernfs_iop_setxattr,
 -	.removexattr	= kernfs_iop_removexattr,
 -	.getxattr	= kernfs_iop_getxattr,
  	.listxattr	= kernfs_iop_listxattr,
  
  	.mkdir		= kernfs_iop_mkdir,
diff --combined include/linux/blk-cgroup.h
index cbdbf34de5b6,4e8c215e185c..3bf5d33800ab
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@@ -45,7 -45,7 +45,7 @@@ struct blkcg 
  	spinlock_t			lock;
  
  	struct radix_tree_root		blkg_tree;
 -	struct blkcg_gq			*blkg_hint;
 +	struct blkcg_gq	__rcu		*blkg_hint;
  	struct hlist_head		blkg_list;
  
  	struct blkcg_policy_data	*cpd[BLKCG_MAX_POLS];
@@@ -343,16 -343,7 +343,7 @@@ static inline struct blkcg *cpd_to_blkc
   */
  static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
  {
- 	char *p;
- 
- 	p = cgroup_path(blkg->blkcg->css.cgroup, buf, buflen);
- 	if (!p) {
- 		strncpy(buf, "<unavailable>", buflen);
- 		return -ENAMETOOLONG;
- 	}
- 
- 	memmove(buf, p, buf + buflen - p);
- 	return 0;
+ 	return cgroup_path(blkg->blkcg->css.cgroup, buf, buflen);
  }
  
  /**
diff --combined include/linux/cgroup.h
index 440a72164a11,6df36361a492..c83c23f0577b
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@@ -97,7 -97,7 +97,7 @@@ int cgroup_add_legacy_cftypes(struct cg
  int cgroup_rm_cftypes(struct cftype *cfts);
  void cgroup_file_notify(struct cgroup_file *cfile);
  
- char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen);
+ int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen);
  int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry);
  int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
  		     struct pid *pid, struct task_struct *tsk);
@@@ -497,23 -497,6 +497,23 @@@ static inline bool cgroup_is_descendant
  	return cgrp->ancestor_ids[ancestor->level] == ancestor->id;
  }
  
 +/**
 + * task_under_cgroup_hierarchy - test task's membership of cgroup ancestry
 + * @task: the task to be tested
 + * @ancestor: possible ancestor of @task's cgroup
 + *
 + * Tests whether @task's default cgroup hierarchy is a descendant of @ancestor.
 + * It follows all the same rules as cgroup_is_descendant, and only applies
 + * to the default hierarchy.
 + */
 +static inline bool task_under_cgroup_hierarchy(struct task_struct *task,
 +					       struct cgroup *ancestor)
 +{
 +	struct css_set *cset = task_css_set(task);
 +
 +	return cgroup_is_descendant(cset->dfl_cgrp, ancestor);
 +}
 +
  /* no synchronization, the result can only be used as a hint */
  static inline bool cgroup_is_populated(struct cgroup *cgrp)
  {
@@@ -555,8 -538,7 +555,7 @@@ static inline int cgroup_name(struct cg
  	return kernfs_name(cgrp->kn, buf, buflen);
  }
  
- static inline char * __must_check cgroup_path(struct cgroup *cgrp, char *buf,
- 					      size_t buflen)
+ static inline int cgroup_path(struct cgroup *cgrp, char *buf, size_t buflen)
  {
  	return kernfs_path(cgrp->kn, buf, buflen);
  }
@@@ -574,7 -556,6 +573,7 @@@ static inline void pr_cont_cgroup_path(
  #else /* !CONFIG_CGROUPS */
  
  struct cgroup_subsys_state;
 +struct cgroup;
  
  static inline void css_put(struct cgroup_subsys_state *css) {}
  static inline int cgroup_attach_task_all(struct task_struct *from,
@@@ -592,11 -573,6 +591,11 @@@ static inline void cgroup_free(struct t
  static inline int cgroup_init_early(void) { return 0; }
  static inline int cgroup_init(void) { return 0; }
  
 +static inline bool task_under_cgroup_hierarchy(struct task_struct *task,
 +					       struct cgroup *ancestor)
 +{
 +	return true;
 +}
  #endif /* !CONFIG_CGROUPS */
  
  /*
@@@ -644,7 -620,6 +643,7 @@@ struct cgroup_namespace 
  	atomic_t		count;
  	struct ns_common	ns;
  	struct user_namespace	*user_ns;
 +	struct ucounts		*ucounts;
  	struct css_set          *root_cset;
  };
  
@@@ -658,8 -633,8 +657,8 @@@ struct cgroup_namespace *copy_cgroup_ns
  					struct user_namespace *user_ns,
  					struct cgroup_namespace *old_ns);
  
- char *cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
- 		     struct cgroup_namespace *ns);
+ int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
+ 		   struct cgroup_namespace *ns);
  
  #else /* !CONFIG_CGROUPS */
  
diff --combined kernel/cgroup.c
index 44066158f0d1,a7f9fb4e1fc7..85bc9beb046d
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@@ -64,6 -64,9 +64,9 @@@
  #include <linux/file.h>
  #include <net/sock.h>
  
+ #define CREATE_TRACE_POINTS
+ #include <trace/events/cgroup.h>
+ 
  /*
   * pidlists linger the following amount before being destroyed.  The goal
   * is avoiding frequent destruction in the middle of consecutive read calls
@@@ -1176,6 -1179,8 +1179,8 @@@ static void cgroup_destroy_root(struct 
  	struct cgroup *cgrp = &root->cgrp;
  	struct cgrp_cset_link *link, *tmp_link;
  
+ 	trace_cgroup_destroy_root(root);
+ 
  	cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
  
  	BUG_ON(atomic_read(&root->nr_cgrps));
@@@ -1874,6 -1879,9 +1879,9 @@@ static int cgroup_remount(struct kernfs
  		strcpy(root->release_agent_path, opts.release_agent);
  		spin_unlock(&release_agent_path_lock);
  	}
+ 
+ 	trace_cgroup_remount(root);
+ 
   out_unlock:
  	kfree(opts.release_agent);
  	kfree(opts.name);
@@@ -2031,6 -2039,8 +2039,8 @@@ static int cgroup_setup_root(struct cgr
  	if (ret)
  		goto destroy_root;
  
+ 	trace_cgroup_setup_root(root);
+ 
  	/*
  	 * There must be no failure case after here, since rebinding takes
  	 * care of subsystems' refcounts, which are explicitly dropped in
@@@ -2315,22 -2325,18 +2325,18 @@@ static struct file_system_type cgroup2_
  	.fs_flags = FS_USERNS_MOUNT,
  };
  
- static char *cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
- 				   struct cgroup_namespace *ns)
+ static int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
+ 				 struct cgroup_namespace *ns)
  {
  	struct cgroup *root = cset_cgroup_from_root(ns->root_cset, cgrp->root);
- 	int ret;
  
- 	ret = kernfs_path_from_node(cgrp->kn, root->kn, buf, buflen);
- 	if (ret < 0 || ret >= buflen)
- 		return NULL;
- 	return buf;
+ 	return kernfs_path_from_node(cgrp->kn, root->kn, buf, buflen);
  }
  
- char *cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
- 		     struct cgroup_namespace *ns)
+ int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
+ 		   struct cgroup_namespace *ns)
  {
- 	char *ret;
+ 	int ret;
  
  	mutex_lock(&cgroup_mutex);
  	spin_lock_irq(&css_set_lock);
@@@ -2357,12 -2363,12 +2363,12 @@@ EXPORT_SYMBOL_GPL(cgroup_path_ns)
   *
   * Return value is the same as kernfs_path().
   */
- char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
+ int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
  {
  	struct cgroup_root *root;
  	struct cgroup *cgrp;
  	int hierarchy_id = 1;
- 	char *path = NULL;
+ 	int ret;
  
  	mutex_lock(&cgroup_mutex);
  	spin_lock_irq(&css_set_lock);
@@@ -2371,16 -2377,15 +2377,15 @@@
  
  	if (root) {
  		cgrp = task_cgroup_from_root(task, root);
- 		path = cgroup_path_ns_locked(cgrp, buf, buflen, &init_cgroup_ns);
+ 		ret = cgroup_path_ns_locked(cgrp, buf, buflen, &init_cgroup_ns);
  	} else {
  		/* if no hierarchy exists, everyone is in "/" */
- 		if (strlcpy(buf, "/", buflen) < buflen)
- 			path = buf;
+ 		ret = strlcpy(buf, "/", buflen);
  	}
  
  	spin_unlock_irq(&css_set_lock);
  	mutex_unlock(&cgroup_mutex);
- 	return path;
+ 	return ret;
  }
  EXPORT_SYMBOL_GPL(task_cgroup_path);
  
@@@ -2830,6 -2835,10 +2835,10 @@@ static int cgroup_attach_task(struct cg
  		ret = cgroup_migrate(leader, threadgroup, dst_cgrp->root);
  
  	cgroup_migrate_finish(&preloaded_csets);
+ 
+ 	if (!ret)
+ 		trace_cgroup_attach_task(dst_cgrp, leader, threadgroup);
+ 
  	return ret;
  }
  
@@@ -3446,28 -3455,9 +3455,28 @@@ static ssize_t cgroup_subtree_control_w
  	 * Except for the root, subtree_control must be zero for a cgroup
  	 * with tasks so that child cgroups don't compete against tasks.
  	 */
 -	if (enable && cgroup_parent(cgrp) && !list_empty(&cgrp->cset_links)) {
 -		ret = -EBUSY;
 -		goto out_unlock;
 +	if (enable && cgroup_parent(cgrp)) {
 +		struct cgrp_cset_link *link;
 +
 +		/*
 +		 * Because namespaces pin csets too, @cgrp->cset_links
 +		 * might not be empty even when @cgrp is empty.  Walk and
 +		 * verify each cset.
 +		 */
 +		spin_lock_irq(&css_set_lock);
 +
 +		ret = 0;
 +		list_for_each_entry(link, &cgrp->cset_links, cset_link) {
 +			if (css_set_populated(link->cset)) {
 +				ret = -EBUSY;
 +				break;
 +			}
 +		}
 +
 +		spin_unlock_irq(&css_set_lock);
 +
 +		if (ret)
 +			goto out_unlock;
  	}
  
  	/* save and update control masks and prepare csses */
@@@ -3611,6 -3601,8 +3620,8 @@@ static int cgroup_rename(struct kernfs_
  	mutex_lock(&cgroup_mutex);
  
  	ret = kernfs_rename(kn, new_parent, new_name_str);
+ 	if (!ret)
+ 		trace_cgroup_rename(cgrp);
  
  	mutex_unlock(&cgroup_mutex);
  
@@@ -3918,9 -3910,7 +3929,9 @@@ void cgroup_file_notify(struct cgroup_f
   * cgroup_task_count - count the number of tasks in a cgroup.
   * @cgrp: the cgroup in question
   *
 - * Return the number of tasks in the cgroup.
 + * Return the number of tasks in the cgroup.  The returned number can be
 + * higher than the actual number of tasks due to css_set references from
 + * namespace roots and temporary usages.
   */
  static int cgroup_task_count(const struct cgroup *cgrp)
  {
@@@ -4381,6 -4371,8 +4392,8 @@@ int cgroup_transfer_tasks(struct cgrou
  
  		if (task) {
  			ret = cgroup_migrate(task, false, to->root);
+ 			if (!ret)
+ 				trace_cgroup_transfer_tasks(to, task, false);
  			put_task_struct(task);
  		}
  	} while (task && !ret);
@@@ -5046,6 -5038,8 +5059,8 @@@ static void css_release_work_fn(struct 
  			ss->css_released(css);
  	} else {
  		/* cgroup release path */
+ 		trace_cgroup_release(cgrp);
+ 
  		cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
  		cgrp->id = -1;
  
@@@ -5332,6 -5326,8 +5347,8 @@@ static int cgroup_mkdir(struct kernfs_n
  	if (ret)
  		goto out_destroy;
  
+ 	trace_cgroup_mkdir(cgrp);
+ 
  	/* let's create and online css's */
  	kernfs_activate(kn);
  
@@@ -5507,6 -5503,9 +5524,9 @@@ static int cgroup_rmdir(struct kernfs_n
  
  	ret = cgroup_destroy_locked(cgrp);
  
+ 	if (!ret)
+ 		trace_cgroup_rmdir(cgrp);
+ 
  	cgroup_kn_unlock(kn);
  	return ret;
  }
@@@ -5627,12 -5626,6 +5647,12 @@@ int __init cgroup_init(void
  	BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files));
  	BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files));
  
 +	/*
 +	 * The latency of the synchronize_sched() is too high for cgroups,
 +	 * avoid it at the cost of forcing all readers into the slow path.
 +	 */
 +	rcu_sync_enter_start(&cgroup_threadgroup_rwsem.rss);
 +
  	get_user_ns(init_cgroup_ns.user_ns);
  
  	mutex_lock(&cgroup_mutex);
@@@ -5743,7 -5736,7 +5763,7 @@@ core_initcall(cgroup_wq_init)
  int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
  		     struct pid *pid, struct task_struct *tsk)
  {
- 	char *buf, *path;
+ 	char *buf;
  	int retval;
  	struct cgroup_root *root;
  
@@@ -5786,18 -5779,18 +5806,18 @@@
  		 * " (deleted)" is appended to the cgroup path.
  		 */
  		if (cgroup_on_dfl(cgrp) || !(tsk->flags & PF_EXITING)) {
- 			path = cgroup_path_ns_locked(cgrp, buf, PATH_MAX,
+ 			retval = cgroup_path_ns_locked(cgrp, buf, PATH_MAX,
  						current->nsproxy->cgroup_ns);
- 			if (!path) {
+ 			if (retval >= PATH_MAX)
  				retval = -ENAMETOOLONG;
+ 			if (retval < 0)
  				goto out_unlock;
- 			}
+ 
+ 			seq_puts(m, buf);
  		} else {
- 			path = "/";
+ 			seq_puts(m, "/");
  		}
  
- 		seq_puts(m, path);
- 
  		if (cgroup_on_dfl(cgrp) && cgroup_is_dead(cgrp))
  			seq_puts(m, " (deleted)\n");
  		else
@@@ -6062,8 -6055,9 +6082,9 @@@ static void cgroup_release_agent(struc
  {
  	struct cgroup *cgrp =
  		container_of(work, struct cgroup, release_agent_work);
- 	char *pathbuf = NULL, *agentbuf = NULL, *path;
+ 	char *pathbuf = NULL, *agentbuf = NULL;
  	char *argv[3], *envp[3];
+ 	int ret;
  
  	mutex_lock(&cgroup_mutex);
  
@@@ -6073,13 -6067,13 +6094,13 @@@
  		goto out;
  
  	spin_lock_irq(&css_set_lock);
- 	path = cgroup_path_ns_locked(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns);
+ 	ret = cgroup_path_ns_locked(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns);
  	spin_unlock_irq(&css_set_lock);
- 	if (!path)
+ 	if (ret < 0 || ret >= PATH_MAX)
  		goto out;
  
  	argv[0] = agentbuf;
- 	argv[1] = path;
+ 	argv[1] = pathbuf;
  	argv[2] = NULL;
  
  	/* minimal command environment */
@@@ -6297,12 -6291,6 +6318,12 @@@ void cgroup_sk_alloc(struct sock_cgroup
  	if (cgroup_sk_alloc_disabled)
  		return;
  
 +	/* Socket clone path */
 +	if (skcd->val) {
 +		cgroup_get(sock_cgroup_ptr(skcd));
 +		return;
 +	}
 +
  	rcu_read_lock();
  
  	while (true) {
@@@ -6328,16 -6316,6 +6349,16 @@@ void cgroup_sk_free(struct sock_cgroup_
  
  /* cgroup namespaces */
  
 +static struct ucounts *inc_cgroup_namespaces(struct user_namespace *ns)
 +{
 +	return inc_ucount(ns, current_euid(), UCOUNT_CGROUP_NAMESPACES);
 +}
 +
 +static void dec_cgroup_namespaces(struct ucounts *ucounts)
 +{
 +	dec_ucount(ucounts, UCOUNT_CGROUP_NAMESPACES);
 +}
 +
  static struct cgroup_namespace *alloc_cgroup_ns(void)
  {
  	struct cgroup_namespace *new_ns;
@@@ -6359,7 -6337,6 +6380,7 @@@
  void free_cgroup_ns(struct cgroup_namespace *ns)
  {
  	put_css_set(ns->root_cset);
 +	dec_cgroup_namespaces(ns->ucounts);
  	put_user_ns(ns->user_ns);
  	ns_free_inum(&ns->ns);
  	kfree(ns);
@@@ -6371,7 -6348,6 +6392,7 @@@ struct cgroup_namespace *copy_cgroup_ns
  					struct cgroup_namespace *old_ns)
  {
  	struct cgroup_namespace *new_ns;
 +	struct ucounts *ucounts;
  	struct css_set *cset;
  
  	BUG_ON(!old_ns);
@@@ -6385,10 -6361,6 +6406,10 @@@
  	if (!ns_capable(user_ns, CAP_SYS_ADMIN))
  		return ERR_PTR(-EPERM);
  
 +	ucounts = inc_cgroup_namespaces(user_ns);
 +	if (!ucounts)
 +		return ERR_PTR(-ENOSPC);
 +
  	/* It is not safe to take cgroup_mutex here */
  	spin_lock_irq(&css_set_lock);
  	cset = task_css_set(current);
@@@ -6398,12 -6370,10 +6419,12 @@@
  	new_ns = alloc_cgroup_ns();
  	if (IS_ERR(new_ns)) {
  		put_css_set(cset);
 +		dec_cgroup_namespaces(ucounts);
  		return new_ns;
  	}
  
  	new_ns->user_ns = get_user_ns(user_ns);
 +	new_ns->ucounts = ucounts;
  	new_ns->root_cset = cset;
  
  	return new_ns;
@@@ -6454,18 -6424,12 +6475,18 @@@ static void cgroupns_put(struct ns_comm
  	put_cgroup_ns(to_cg_ns(ns));
  }
  
 +static struct user_namespace *cgroupns_owner(struct ns_common *ns)
 +{
 +	return to_cg_ns(ns)->user_ns;
 +}
 +
  const struct proc_ns_operations cgroupns_operations = {
  	.name		= "cgroup",
  	.type		= CLONE_NEWCGROUP,
  	.get		= cgroupns_get,
  	.put		= cgroupns_put,
  	.install	= cgroupns_install,
 +	.owner		= cgroupns_owner,
  };
  
  static __init int cgroup_namespaces_init(void)
diff --combined kernel/cpuset.c
index 2b4c20ab5bbe,97dd8e178786..29f815d2ef7e
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@@ -325,7 -325,8 +325,7 @@@ static struct file_system_type cpuset_f
  /*
   * Return in pmask the portion of a cpusets's cpus_allowed that
   * are online.  If none are online, walk up the cpuset hierarchy
 - * until we find one that does have some online cpus.  The top
 - * cpuset always has some cpus online.
 + * until we find one that does have some online cpus.
   *
   * One way or another, we guarantee to return some non-empty subset
   * of cpu_online_mask.
@@@ -334,20 -335,8 +334,20 @@@
   */
  static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask)
  {
 -	while (!cpumask_intersects(cs->effective_cpus, cpu_online_mask))
 +	while (!cpumask_intersects(cs->effective_cpus, cpu_online_mask)) {
  		cs = parent_cs(cs);
 +		if (unlikely(!cs)) {
 +			/*
 +			 * The top cpuset doesn't have any online cpu as a
 +			 * consequence of a race between cpuset_hotplug_work
 +			 * and cpu hotplug notifier.  But we know the top
 +			 * cpuset's effective_cpus is on its way to to be
 +			 * identical to cpu_online_mask.
 +			 */
 +			cpumask_copy(pmask, cpu_online_mask);
 +			return;
 +		}
 +	}
  	cpumask_and(pmask, cs->effective_cpus, cpu_online_mask);
  }
  
@@@ -2080,20 -2069,6 +2080,20 @@@ static void cpuset_bind(struct cgroup_s
  	mutex_unlock(&cpuset_mutex);
  }
  
 +/*
 + * Make sure the new task conform to the current state of its parent,
 + * which could have been changed by cpuset just after it inherits the
 + * state from the parent and before it sits on the cgroup's task list.
 + */
 +static void cpuset_fork(struct task_struct *task)
 +{
 +	if (task_css_is_root(task, cpuset_cgrp_id))
 +		return;
 +
 +	set_cpus_allowed_ptr(task, &current->cpus_allowed);
 +	task->mems_allowed = current->mems_allowed;
 +}
 +
  struct cgroup_subsys cpuset_cgrp_subsys = {
  	.css_alloc	= cpuset_css_alloc,
  	.css_online	= cpuset_css_online,
@@@ -2104,7 -2079,6 +2104,7 @@@
  	.attach		= cpuset_attach,
  	.post_attach	= cpuset_post_attach,
  	.bind		= cpuset_bind,
 +	.fork		= cpuset_fork,
  	.legacy_cftypes	= files,
  	.early_init	= true,
  };
@@@ -2715,7 -2689,7 +2715,7 @@@ void __cpuset_memory_pressure_bump(void
  int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns,
  		     struct pid *pid, struct task_struct *tsk)
  {
- 	char *buf, *p;
+ 	char *buf;
  	struct cgroup_subsys_state *css;
  	int retval;
  
@@@ -2724,14 -2698,15 +2724,15 @@@
  	if (!buf)
  		goto out;
  
- 	retval = -ENAMETOOLONG;
  	css = task_get_css(tsk, cpuset_cgrp_id);
- 	p = cgroup_path_ns(css->cgroup, buf, PATH_MAX,
- 			   current->nsproxy->cgroup_ns);
+ 	retval = cgroup_path_ns(css->cgroup, buf, PATH_MAX,
+ 				current->nsproxy->cgroup_ns);
  	css_put(css);
- 	if (!p)
+ 	if (retval >= PATH_MAX)
+ 		retval = -ENAMETOOLONG;
+ 	if (retval < 0)
  		goto out_free;
- 	seq_puts(m, p);
+ 	seq_puts(m, buf);
  	seq_putc(m, '\n');
  	retval = 0;
  out_free:
diff --combined kernel/sched/debug.c
index 13935886a471,23cb609ba4eb..fa178b62ea79
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@@ -369,12 -369,8 +369,12 @@@ static void print_cfs_group_stats(struc
  
  #define P(F) \
  	SEQ_printf(m, "  .%-30s: %lld\n", #F, (long long)F)
 +#define P_SCHEDSTAT(F) \
 +	SEQ_printf(m, "  .%-30s: %lld\n", #F, (long long)schedstat_val(F))
  #define PN(F) \
  	SEQ_printf(m, "  .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F))
 +#define PN_SCHEDSTAT(F) \
 +	SEQ_printf(m, "  .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)schedstat_val(F)))
  
  	if (!se)
  		return;
@@@ -382,27 -378,26 +382,27 @@@
  	PN(se->exec_start);
  	PN(se->vruntime);
  	PN(se->sum_exec_runtime);
 -#ifdef CONFIG_SCHEDSTATS
  	if (schedstat_enabled()) {
 -		PN(se->statistics.wait_start);
 -		PN(se->statistics.sleep_start);
 -		PN(se->statistics.block_start);
 -		PN(se->statistics.sleep_max);
 -		PN(se->statistics.block_max);
 -		PN(se->statistics.exec_max);
 -		PN(se->statistics.slice_max);
 -		PN(se->statistics.wait_max);
 -		PN(se->statistics.wait_sum);
 -		P(se->statistics.wait_count);
 +		PN_SCHEDSTAT(se->statistics.wait_start);
 +		PN_SCHEDSTAT(se->statistics.sleep_start);
 +		PN_SCHEDSTAT(se->statistics.block_start);
 +		PN_SCHEDSTAT(se->statistics.sleep_max);
 +		PN_SCHEDSTAT(se->statistics.block_max);
 +		PN_SCHEDSTAT(se->statistics.exec_max);
 +		PN_SCHEDSTAT(se->statistics.slice_max);
 +		PN_SCHEDSTAT(se->statistics.wait_max);
 +		PN_SCHEDSTAT(se->statistics.wait_sum);
 +		P_SCHEDSTAT(se->statistics.wait_count);
  	}
 -#endif
  	P(se->load.weight);
  #ifdef CONFIG_SMP
  	P(se->avg.load_avg);
  	P(se->avg.util_avg);
  #endif
 +
 +#undef PN_SCHEDSTAT
  #undef PN
 +#undef P_SCHEDSTAT
  #undef P
  }
  #endif
@@@ -415,7 -410,8 +415,8 @@@ static char *task_group_path(struct tas
  	if (autogroup_path(tg, group_path, PATH_MAX))
  		return group_path;
  
- 	return cgroup_path(tg->css.cgroup, group_path, PATH_MAX);
+ 	cgroup_path(tg->css.cgroup, group_path, PATH_MAX);
+ 	return group_path;
  }
  #endif
  
@@@ -434,9 -430,9 +435,9 @@@ print_task(struct seq_file *m, struct r
  		p->prio);
  
  	SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
 -		SPLIT_NS(schedstat_val(p, se.statistics.wait_sum)),
 +		SPLIT_NS(schedstat_val_or_zero(p->se.statistics.wait_sum)),
  		SPLIT_NS(p->se.sum_exec_runtime),
 -		SPLIT_NS(schedstat_val(p, se.statistics.sum_sleep_runtime)));
 +		SPLIT_NS(schedstat_val_or_zero(p->se.statistics.sum_sleep_runtime)));
  
  #ifdef CONFIG_NUMA_BALANCING
  	SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
@@@ -631,7 -627,9 +632,7 @@@ do {									
  #undef P64
  #endif
  
 -#ifdef CONFIG_SCHEDSTATS
 -#define P(n) SEQ_printf(m, "  .%-30s: %d\n", #n, rq->n);
 -
 +#define P(n) SEQ_printf(m, "  .%-30s: %d\n", #n, schedstat_val(rq->n));
  	if (schedstat_enabled()) {
  		P(yld_count);
  		P(sched_count);
@@@ -639,8 -637,9 +640,8 @@@
  		P(ttwu_count);
  		P(ttwu_local);
  	}
 -
  #undef P
 -#endif
 +
  	spin_lock_irqsave(&sched_debug_lock, flags);
  	print_cfs_stats(m, cpu);
  	print_rt_stats(m, cpu);
@@@ -870,14 -869,10 +871,14 @@@ void proc_sched_show_task(struct task_s
  	SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)F)
  #define P(F) \
  	SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)p->F)
 +#define P_SCHEDSTAT(F) \
 +	SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)schedstat_val(p->F))
  #define __PN(F) \
  	SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)F))
  #define PN(F) \
  	SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
 +#define PN_SCHEDSTAT(F) \
 +	SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)schedstat_val(p->F)))
  
  	PN(se.exec_start);
  	PN(se.vruntime);
@@@ -887,36 -882,37 +888,36 @@@
  
  	P(se.nr_migrations);
  
 -#ifdef CONFIG_SCHEDSTATS
  	if (schedstat_enabled()) {
  		u64 avg_atom, avg_per_cpu;
  
 -		PN(se.statistics.sum_sleep_runtime);
 -		PN(se.statistics.wait_start);
 -		PN(se.statistics.sleep_start);
 -		PN(se.statistics.block_start);
 -		PN(se.statistics.sleep_max);
 -		PN(se.statistics.block_max);
 -		PN(se.statistics.exec_max);
 -		PN(se.statistics.slice_max);
 -		PN(se.statistics.wait_max);
 -		PN(se.statistics.wait_sum);
 -		P(se.statistics.wait_count);
 -		PN(se.statistics.iowait_sum);
 -		P(se.statistics.iowait_count);
 -		P(se.statistics.nr_migrations_cold);
 -		P(se.statistics.nr_failed_migrations_affine);
 -		P(se.statistics.nr_failed_migrations_running);
 -		P(se.statistics.nr_failed_migrations_hot);
 -		P(se.statistics.nr_forced_migrations);
 -		P(se.statistics.nr_wakeups);
 -		P(se.statistics.nr_wakeups_sync);
 -		P(se.statistics.nr_wakeups_migrate);
 -		P(se.statistics.nr_wakeups_local);
 -		P(se.statistics.nr_wakeups_remote);
 -		P(se.statistics.nr_wakeups_affine);
 -		P(se.statistics.nr_wakeups_affine_attempts);
 -		P(se.statistics.nr_wakeups_passive);
 -		P(se.statistics.nr_wakeups_idle);
 +		PN_SCHEDSTAT(se.statistics.sum_sleep_runtime);
 +		PN_SCHEDSTAT(se.statistics.wait_start);
 +		PN_SCHEDSTAT(se.statistics.sleep_start);
 +		PN_SCHEDSTAT(se.statistics.block_start);
 +		PN_SCHEDSTAT(se.statistics.sleep_max);
 +		PN_SCHEDSTAT(se.statistics.block_max);
 +		PN_SCHEDSTAT(se.statistics.exec_max);
 +		PN_SCHEDSTAT(se.statistics.slice_max);
 +		PN_SCHEDSTAT(se.statistics.wait_max);
 +		PN_SCHEDSTAT(se.statistics.wait_sum);
 +		P_SCHEDSTAT(se.statistics.wait_count);
 +		PN_SCHEDSTAT(se.statistics.iowait_sum);
 +		P_SCHEDSTAT(se.statistics.iowait_count);
 +		P_SCHEDSTAT(se.statistics.nr_migrations_cold);
 +		P_SCHEDSTAT(se.statistics.nr_failed_migrations_affine);
 +		P_SCHEDSTAT(se.statistics.nr_failed_migrations_running);
 +		P_SCHEDSTAT(se.statistics.nr_failed_migrations_hot);
 +		P_SCHEDSTAT(se.statistics.nr_forced_migrations);
 +		P_SCHEDSTAT(se.statistics.nr_wakeups);
 +		P_SCHEDSTAT(se.statistics.nr_wakeups_sync);
 +		P_SCHEDSTAT(se.statistics.nr_wakeups_migrate);
 +		P_SCHEDSTAT(se.statistics.nr_wakeups_local);
 +		P_SCHEDSTAT(se.statistics.nr_wakeups_remote);
 +		P_SCHEDSTAT(se.statistics.nr_wakeups_affine);
 +		P_SCHEDSTAT(se.statistics.nr_wakeups_affine_attempts);
 +		P_SCHEDSTAT(se.statistics.nr_wakeups_passive);
 +		P_SCHEDSTAT(se.statistics.nr_wakeups_idle);
  
  		avg_atom = p->se.sum_exec_runtime;
  		if (nr_switches)
@@@ -935,7 -931,7 +936,7 @@@
  		__PN(avg_atom);
  		__PN(avg_per_cpu);
  	}
 -#endif
 +
  	__P(nr_switches);
  	SEQ_printf(m, "%-45s:%21Ld\n",
  		   "nr_voluntary_switches", (long long)p->nvcsw);
@@@ -952,10 -948,8 +953,10 @@@
  #endif
  	P(policy);
  	P(prio);
 +#undef PN_SCHEDSTAT
  #undef PN
  #undef __PN
 +#undef P_SCHEDSTAT
  #undef P
  #undef __P