mlx4_core: Add "native" argument to mlx4_cmd and its callers (where needed)
[cascardo/linux.git] / mm / memcontrol.c
index 7af1d5e..7266202 100644 (file)
@@ -33,6 +33,7 @@
 #include <linux/bit_spinlock.h>
 #include <linux/rcupdate.h>
 #include <linux/limits.h>
+#include <linux/export.h>
 #include <linux/mutex.h>
 #include <linux/rbtree.h>
 #include <linux/slab.h>
@@ -49,6 +50,8 @@
 #include <linux/cpu.h>
 #include <linux/oom.h>
 #include "internal.h"
+#include <net/sock.h>
+#include <net/tcp_memcontrol.h>
 
 #include <asm/uaccess.h>
 
@@ -225,6 +228,10 @@ struct mem_cgroup {
         * the counter to account for mem+swap usage.
         */
        struct res_counter memsw;
+       /*
+        * the counter to account for kmem usage.
+        */
+       struct res_counter kmem;
        /*
         * Per cgroup active and inactive list, similar to the
         * per zone LRU lists.
@@ -275,6 +282,11 @@ struct mem_cgroup {
         * mem_cgroup ? And what type of charges should we move ?
         */
        unsigned long   move_charge_at_immigrate;
+       /*
+        * Should kernel memory limits be stabilished independently
+        * from user memory ?
+        */
+       int             kmem_independent_accounting;
        /*
         * percpu counter.
         */
@@ -285,6 +297,10 @@ struct mem_cgroup {
         */
        struct mem_cgroup_stat_cpu nocpu_base;
        spinlock_t pcp_counter_lock;
+
+#ifdef CONFIG_INET
+       struct tcp_memcontrol tcp_mem;
+#endif
 };
 
 /* Stuffs for move charges at task migration. */
@@ -343,9 +359,14 @@ enum charge_type {
 };
 
 /* for encoding cft->private value on file */
-#define _MEM                   (0)
-#define _MEMSWAP               (1)
-#define _OOM_TYPE              (2)
+
+enum mem_type {
+       _MEM = 0,
+       _MEMSWAP,
+       _OOM_TYPE,
+       _KMEM,
+};
+
 #define MEMFILE_PRIVATE(x, val)        (((x) << 16) | (val))
 #define MEMFILE_TYPE(val)      (((val) >> 16) & 0xffff)
 #define MEMFILE_ATTR(val)      ((val) & 0xffff)
@@ -364,7 +385,58 @@ enum charge_type {
 
 static void mem_cgroup_get(struct mem_cgroup *memcg);
 static void mem_cgroup_put(struct mem_cgroup *memcg);
-static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg);
+
+/* Writing them here to avoid exposing memcg's inner layout */
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+#ifdef CONFIG_INET
+#include <net/sock.h>
+#include <net/ip.h>
+
+static bool mem_cgroup_is_root(struct mem_cgroup *memcg);
+void sock_update_memcg(struct sock *sk)
+{
+       /* A socket spends its whole life in the same cgroup */
+       if (sk->sk_cgrp) {
+               WARN_ON(1);
+               return;
+       }
+       if (static_branch(&memcg_socket_limit_enabled)) {
+               struct mem_cgroup *memcg;
+
+               BUG_ON(!sk->sk_prot->proto_cgroup);
+
+               rcu_read_lock();
+               memcg = mem_cgroup_from_task(current);
+               if (!mem_cgroup_is_root(memcg)) {
+                       mem_cgroup_get(memcg);
+                       sk->sk_cgrp = sk->sk_prot->proto_cgroup(memcg);
+               }
+               rcu_read_unlock();
+       }
+}
+EXPORT_SYMBOL(sock_update_memcg);
+
+void sock_release_memcg(struct sock *sk)
+{
+       if (static_branch(&memcg_socket_limit_enabled) && sk->sk_cgrp) {
+               struct mem_cgroup *memcg;
+               WARN_ON(!sk->sk_cgrp->memcg);
+               memcg = sk->sk_cgrp->memcg;
+               mem_cgroup_put(memcg);
+       }
+}
+
+struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg)
+{
+       if (!memcg || mem_cgroup_is_root(memcg))
+               return NULL;
+
+       return &memcg->tcp_mem.cg_proto;
+}
+EXPORT_SYMBOL(tcp_proto_cgroup);
+#endif /* CONFIG_INET */
+#endif /* CONFIG_CGROUP_MEM_RES_CTLR_KMEM */
+
 static void drain_all_stock_async(struct mem_cgroup *memcg);
 
 static struct mem_cgroup_per_zone *
@@ -744,7 +816,7 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
        preempt_enable();
 }
 
-static struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont)
+struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont)
 {
        return container_of(cgroup_subsys_state(cont,
                                mem_cgroup_subsys_id), struct mem_cgroup,
@@ -3847,10 +3919,17 @@ static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
        u64 val;
 
        if (!mem_cgroup_is_root(memcg)) {
+               val = 0;
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+               if (!memcg->kmem_independent_accounting)
+                       val = res_counter_read_u64(&memcg->kmem, RES_USAGE);
+#endif
                if (!swap)
-                       return res_counter_read_u64(&memcg->res, RES_USAGE);
+                       val += res_counter_read_u64(&memcg->res, RES_USAGE);
                else
-                       return res_counter_read_u64(&memcg->memsw, RES_USAGE);
+                       val += res_counter_read_u64(&memcg->memsw, RES_USAGE);
+
+               return val;
        }
 
        val = mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_CACHE);
@@ -3883,6 +3962,11 @@ static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft)
                else
                        val = res_counter_read_u64(&memcg->memsw, name);
                break;
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+       case _KMEM:
+               val = res_counter_read_u64(&memcg->kmem, name);
+               break;
+#endif
        default:
                BUG();
                break;
@@ -4611,6 +4695,89 @@ static int mem_control_numa_stat_open(struct inode *unused, struct file *file)
 }
 #endif /* CONFIG_NUMA */
 
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+static u64 kmem_limit_independent_read(struct cgroup *cgroup, struct cftype *cft)
+{
+       return mem_cgroup_from_cont(cgroup)->kmem_independent_accounting;
+}
+
+static int kmem_limit_independent_write(struct cgroup *cgroup, struct cftype *cft,
+                                       u64 val)
+{
+       struct mem_cgroup *memcg = mem_cgroup_from_cont(cgroup);
+       struct mem_cgroup *parent = parent_mem_cgroup(memcg);
+
+       val = !!val;
+
+       /*
+        * This follows the same hierarchy restrictions than
+        * mem_cgroup_hierarchy_write()
+        */
+       if (!parent || !parent->use_hierarchy) {
+               if (list_empty(&cgroup->children))
+                       memcg->kmem_independent_accounting = val;
+               else
+                       return -EBUSY;
+       }
+       else
+               return -EINVAL;
+
+       return 0;
+}
+static struct cftype kmem_cgroup_files[] = {
+       {
+               .name = "independent_kmem_limit",
+               .read_u64 = kmem_limit_independent_read,
+               .write_u64 = kmem_limit_independent_write,
+       },
+       {
+               .name = "kmem.usage_in_bytes",
+               .private = MEMFILE_PRIVATE(_KMEM, RES_USAGE),
+               .read_u64 = mem_cgroup_read,
+       },
+       {
+               .name = "kmem.limit_in_bytes",
+               .private = MEMFILE_PRIVATE(_KMEM, RES_LIMIT),
+               .read_u64 = mem_cgroup_read,
+       },
+};
+
+static int register_kmem_files(struct cgroup *cont, struct cgroup_subsys *ss)
+{
+       int ret = 0;
+
+       ret = cgroup_add_files(cont, ss, kmem_cgroup_files,
+                              ARRAY_SIZE(kmem_cgroup_files));
+
+       /*
+        * Part of this would be better living in a separate allocation
+        * function, leaving us with just the cgroup tree population work.
+        * We, however, depend on state such as network's proto_list that
+        * is only initialized after cgroup creation. I found the less
+        * cumbersome way to deal with it to defer it all to populate time
+        */
+       if (!ret)
+               ret = mem_cgroup_sockets_init(cont, ss);
+       return ret;
+};
+
+static void kmem_cgroup_destroy(struct cgroup_subsys *ss,
+                               struct cgroup *cont)
+{
+       mem_cgroup_sockets_destroy(cont, ss);
+}
+#else
+static int register_kmem_files(struct cgroup *cont, struct cgroup_subsys *ss)
+{
+       return 0;
+}
+
+static void kmem_cgroup_destroy(struct cgroup_subsys *ss,
+                               struct cgroup *cont)
+{
+}
+#endif
+
 static struct cftype mem_cgroup_files[] = {
        {
                .name = "usage_in_bytes",
@@ -4842,12 +5009,13 @@ static void mem_cgroup_put(struct mem_cgroup *memcg)
 /*
  * Returns the parent mem_cgroup in memcgroup hierarchy with hierarchy enabled.
  */
-static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
+struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
 {
        if (!memcg->res.parent)
                return NULL;
        return mem_cgroup_from_res_counter(memcg->res.parent, res);
 }
+EXPORT_SYMBOL(parent_mem_cgroup);
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
 static void __init enable_swap_cgroup(void)
@@ -4924,6 +5092,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
        if (parent && parent->use_hierarchy) {
                res_counter_init(&memcg->res, &parent->res);
                res_counter_init(&memcg->memsw, &parent->memsw);
+               res_counter_init(&memcg->kmem, &parent->kmem);
                /*
                 * We increment refcnt of the parent to ensure that we can
                 * safely access it on res_counter_charge/uncharge.
@@ -4934,6 +5103,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
        } else {
                res_counter_init(&memcg->res, NULL);
                res_counter_init(&memcg->memsw, NULL);
+               res_counter_init(&memcg->kmem, NULL);
        }
        memcg->last_scanned_child = 0;
        memcg->last_scanned_node = MAX_NUMNODES;
@@ -4964,6 +5134,8 @@ static void mem_cgroup_destroy(struct cgroup_subsys *ss,
 {
        struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
 
+       kmem_cgroup_destroy(ss, cont);
+
        mem_cgroup_put(memcg);
 }
 
@@ -4977,6 +5149,10 @@ static int mem_cgroup_populate(struct cgroup_subsys *ss,
 
        if (!ret)
                ret = register_memsw_files(cont, ss);
+
+       if (!ret)
+               ret = register_kmem_files(cont, ss);
+
        return ret;
 }