#include <linux/bit_spinlock.h>
#include <linux/rcupdate.h>
#include <linux/limits.h>
+#include <linux/export.h>
#include <linux/mutex.h>
#include <linux/rbtree.h>
#include <linux/slab.h>
#include <linux/cpu.h>
#include <linux/oom.h>
#include "internal.h"
+#include <net/sock.h>
+#include <net/tcp_memcontrol.h>
#include <asm/uaccess.h>
* the counter to account for mem+swap usage.
*/
struct res_counter memsw;
+ /*
+ * the counter to account for kmem usage.
+ */
+ struct res_counter kmem;
/*
* Per cgroup active and inactive list, similar to the
* per zone LRU lists.
* mem_cgroup ? And what type of charges should we move ?
*/
unsigned long move_charge_at_immigrate;
+ /*
+ * Should kernel memory limits be stabilished independently
+ * from user memory ?
+ */
+ int kmem_independent_accounting;
/*
* percpu counter.
*/
*/
struct mem_cgroup_stat_cpu nocpu_base;
spinlock_t pcp_counter_lock;
+
+#ifdef CONFIG_INET
+ struct tcp_memcontrol tcp_mem;
+#endif
};
/* Stuffs for move charges at task migration. */
};
/* for encoding cft->private value on file */
-#define _MEM (0)
-#define _MEMSWAP (1)
-#define _OOM_TYPE (2)
+
+enum mem_type {
+ _MEM = 0,
+ _MEMSWAP,
+ _OOM_TYPE,
+ _KMEM,
+};
+
#define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val))
#define MEMFILE_TYPE(val) (((val) >> 16) & 0xffff)
#define MEMFILE_ATTR(val) ((val) & 0xffff)
static void mem_cgroup_get(struct mem_cgroup *memcg);
static void mem_cgroup_put(struct mem_cgroup *memcg);
-static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg);
+
+/* Writing them here to avoid exposing memcg's inner layout */
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+#ifdef CONFIG_INET
+#include <net/sock.h>
+#include <net/ip.h>
+
+static bool mem_cgroup_is_root(struct mem_cgroup *memcg);
+void sock_update_memcg(struct sock *sk)
+{
+ /* A socket spends its whole life in the same cgroup */
+ if (sk->sk_cgrp) {
+ WARN_ON(1);
+ return;
+ }
+ if (static_branch(&memcg_socket_limit_enabled)) {
+ struct mem_cgroup *memcg;
+
+ BUG_ON(!sk->sk_prot->proto_cgroup);
+
+ rcu_read_lock();
+ memcg = mem_cgroup_from_task(current);
+ if (!mem_cgroup_is_root(memcg)) {
+ mem_cgroup_get(memcg);
+ sk->sk_cgrp = sk->sk_prot->proto_cgroup(memcg);
+ }
+ rcu_read_unlock();
+ }
+}
+EXPORT_SYMBOL(sock_update_memcg);
+
+void sock_release_memcg(struct sock *sk)
+{
+ if (static_branch(&memcg_socket_limit_enabled) && sk->sk_cgrp) {
+ struct mem_cgroup *memcg;
+ WARN_ON(!sk->sk_cgrp->memcg);
+ memcg = sk->sk_cgrp->memcg;
+ mem_cgroup_put(memcg);
+ }
+}
+
+struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg)
+{
+ if (!memcg || mem_cgroup_is_root(memcg))
+ return NULL;
+
+ return &memcg->tcp_mem.cg_proto;
+}
+EXPORT_SYMBOL(tcp_proto_cgroup);
+#endif /* CONFIG_INET */
+#endif /* CONFIG_CGROUP_MEM_RES_CTLR_KMEM */
+
static void drain_all_stock_async(struct mem_cgroup *memcg);
static struct mem_cgroup_per_zone *
preempt_enable();
}
-static struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont)
+struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont)
{
return container_of(cgroup_subsys_state(cont,
mem_cgroup_subsys_id), struct mem_cgroup,
u64 val;
if (!mem_cgroup_is_root(memcg)) {
+ val = 0;
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+ if (!memcg->kmem_independent_accounting)
+ val = res_counter_read_u64(&memcg->kmem, RES_USAGE);
+#endif
if (!swap)
- return res_counter_read_u64(&memcg->res, RES_USAGE);
+ val += res_counter_read_u64(&memcg->res, RES_USAGE);
else
- return res_counter_read_u64(&memcg->memsw, RES_USAGE);
+ val += res_counter_read_u64(&memcg->memsw, RES_USAGE);
+
+ return val;
}
val = mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_CACHE);
else
val = res_counter_read_u64(&memcg->memsw, name);
break;
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+ case _KMEM:
+ val = res_counter_read_u64(&memcg->kmem, name);
+ break;
+#endif
default:
BUG();
break;
}
#endif /* CONFIG_NUMA */
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+static u64 kmem_limit_independent_read(struct cgroup *cgroup, struct cftype *cft)
+{
+ return mem_cgroup_from_cont(cgroup)->kmem_independent_accounting;
+}
+
+static int kmem_limit_independent_write(struct cgroup *cgroup, struct cftype *cft,
+ u64 val)
+{
+ struct mem_cgroup *memcg = mem_cgroup_from_cont(cgroup);
+ struct mem_cgroup *parent = parent_mem_cgroup(memcg);
+
+ val = !!val;
+
+ /*
+ * This follows the same hierarchy restrictions than
+ * mem_cgroup_hierarchy_write()
+ */
+ if (!parent || !parent->use_hierarchy) {
+ if (list_empty(&cgroup->children))
+ memcg->kmem_independent_accounting = val;
+ else
+ return -EBUSY;
+ }
+ else
+ return -EINVAL;
+
+ return 0;
+}
+static struct cftype kmem_cgroup_files[] = {
+ {
+ .name = "independent_kmem_limit",
+ .read_u64 = kmem_limit_independent_read,
+ .write_u64 = kmem_limit_independent_write,
+ },
+ {
+ .name = "kmem.usage_in_bytes",
+ .private = MEMFILE_PRIVATE(_KMEM, RES_USAGE),
+ .read_u64 = mem_cgroup_read,
+ },
+ {
+ .name = "kmem.limit_in_bytes",
+ .private = MEMFILE_PRIVATE(_KMEM, RES_LIMIT),
+ .read_u64 = mem_cgroup_read,
+ },
+};
+
+static int register_kmem_files(struct cgroup *cont, struct cgroup_subsys *ss)
+{
+ int ret = 0;
+
+ ret = cgroup_add_files(cont, ss, kmem_cgroup_files,
+ ARRAY_SIZE(kmem_cgroup_files));
+
+ /*
+ * Part of this would be better living in a separate allocation
+ * function, leaving us with just the cgroup tree population work.
+ * We, however, depend on state such as network's proto_list that
+ * is only initialized after cgroup creation. I found the less
+ * cumbersome way to deal with it to defer it all to populate time
+ */
+ if (!ret)
+ ret = mem_cgroup_sockets_init(cont, ss);
+ return ret;
+};
+
+static void kmem_cgroup_destroy(struct cgroup_subsys *ss,
+ struct cgroup *cont)
+{
+ mem_cgroup_sockets_destroy(cont, ss);
+}
+#else
+static int register_kmem_files(struct cgroup *cont, struct cgroup_subsys *ss)
+{
+ return 0;
+}
+
+static void kmem_cgroup_destroy(struct cgroup_subsys *ss,
+ struct cgroup *cont)
+{
+}
+#endif
+
static struct cftype mem_cgroup_files[] = {
{
.name = "usage_in_bytes",
/*
* Returns the parent mem_cgroup in memcgroup hierarchy with hierarchy enabled.
*/
-static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
+struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
{
if (!memcg->res.parent)
return NULL;
return mem_cgroup_from_res_counter(memcg->res.parent, res);
}
+EXPORT_SYMBOL(parent_mem_cgroup);
#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
static void __init enable_swap_cgroup(void)
if (parent && parent->use_hierarchy) {
res_counter_init(&memcg->res, &parent->res);
res_counter_init(&memcg->memsw, &parent->memsw);
+ res_counter_init(&memcg->kmem, &parent->kmem);
/*
* We increment refcnt of the parent to ensure that we can
* safely access it on res_counter_charge/uncharge.
} else {
res_counter_init(&memcg->res, NULL);
res_counter_init(&memcg->memsw, NULL);
+ res_counter_init(&memcg->kmem, NULL);
}
memcg->last_scanned_child = 0;
memcg->last_scanned_node = MAX_NUMNODES;
{
struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
+ kmem_cgroup_destroy(ss, cont);
+
mem_cgroup_put(memcg);
}
if (!ret)
ret = register_memsw_files(cont, ss);
+
+ if (!ret)
+ ret = register_kmem_files(cont, ss);
+
return ret;
}