tcp buffer limitation: per-cgroup limit
[cascardo/linux.git] / net / ipv4 / tcp_memcontrol.c
1 #include <net/tcp.h>
2 #include <net/tcp_memcontrol.h>
3 #include <net/sock.h>
4 #include <net/ip.h>
5 #include <linux/nsproxy.h>
6 #include <linux/memcontrol.h>
7 #include <linux/module.h>
8
9 static u64 tcp_cgroup_read(struct cgroup *cont, struct cftype *cft);
10 static int tcp_cgroup_write(struct cgroup *cont, struct cftype *cft,
11                             const char *buffer);
12
13 static struct cftype tcp_files[] = {
14         {
15                 .name = "kmem.tcp.limit_in_bytes",
16                 .write_string = tcp_cgroup_write,
17                 .read_u64 = tcp_cgroup_read,
18                 .private = RES_LIMIT,
19         },
20 };
21
22 static inline struct tcp_memcontrol *tcp_from_cgproto(struct cg_proto *cg_proto)
23 {
24         return container_of(cg_proto, struct tcp_memcontrol, cg_proto);
25 }
26
27 static void memcg_tcp_enter_memory_pressure(struct sock *sk)
28 {
29         if (!sk->sk_cgrp->memory_pressure)
30                 *sk->sk_cgrp->memory_pressure = 1;
31 }
32 EXPORT_SYMBOL(memcg_tcp_enter_memory_pressure);
33
34 int tcp_init_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss)
35 {
36         /*
37          * The root cgroup does not use res_counters, but rather,
38          * rely on the data already collected by the network
39          * subsystem
40          */
41         struct res_counter *res_parent = NULL;
42         struct cg_proto *cg_proto, *parent_cg;
43         struct tcp_memcontrol *tcp;
44         struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
45         struct mem_cgroup *parent = parent_mem_cgroup(memcg);
46         struct net *net = current->nsproxy->net_ns;
47
48         cg_proto = tcp_prot.proto_cgroup(memcg);
49         if (!cg_proto)
50                 goto create_files;
51
52         tcp = tcp_from_cgproto(cg_proto);
53
54         tcp->tcp_prot_mem[0] = net->ipv4.sysctl_tcp_mem[0];
55         tcp->tcp_prot_mem[1] = net->ipv4.sysctl_tcp_mem[1];
56         tcp->tcp_prot_mem[2] = net->ipv4.sysctl_tcp_mem[2];
57         tcp->tcp_memory_pressure = 0;
58
59         parent_cg = tcp_prot.proto_cgroup(parent);
60         if (parent_cg)
61                 res_parent = parent_cg->memory_allocated;
62
63         res_counter_init(&tcp->tcp_memory_allocated, res_parent);
64         percpu_counter_init(&tcp->tcp_sockets_allocated, 0);
65
66         cg_proto->enter_memory_pressure = memcg_tcp_enter_memory_pressure;
67         cg_proto->memory_pressure = &tcp->tcp_memory_pressure;
68         cg_proto->sysctl_mem = tcp->tcp_prot_mem;
69         cg_proto->memory_allocated = &tcp->tcp_memory_allocated;
70         cg_proto->sockets_allocated = &tcp->tcp_sockets_allocated;
71         cg_proto->memcg = memcg;
72
73 create_files:
74         return cgroup_add_files(cgrp, ss, tcp_files,
75                                 ARRAY_SIZE(tcp_files));
76 }
77 EXPORT_SYMBOL(tcp_init_cgroup);
78
79 void tcp_destroy_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss)
80 {
81         struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
82         struct cg_proto *cg_proto;
83         struct tcp_memcontrol *tcp;
84         u64 val;
85
86         cg_proto = tcp_prot.proto_cgroup(memcg);
87         if (!cg_proto)
88                 return;
89
90         tcp = tcp_from_cgproto(cg_proto);
91         percpu_counter_destroy(&tcp->tcp_sockets_allocated);
92
93         val = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_USAGE);
94
95         if (val != RESOURCE_MAX)
96                 jump_label_dec(&memcg_socket_limit_enabled);
97 }
98 EXPORT_SYMBOL(tcp_destroy_cgroup);
99
100 static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
101 {
102         struct net *net = current->nsproxy->net_ns;
103         struct tcp_memcontrol *tcp;
104         struct cg_proto *cg_proto;
105         u64 old_lim;
106         int i;
107         int ret;
108
109         cg_proto = tcp_prot.proto_cgroup(memcg);
110         if (!cg_proto)
111                 return -EINVAL;
112
113         if (val > RESOURCE_MAX)
114                 val = RESOURCE_MAX;
115
116         tcp = tcp_from_cgproto(cg_proto);
117
118         old_lim = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT);
119         ret = res_counter_set_limit(&tcp->tcp_memory_allocated, val);
120         if (ret)
121                 return ret;
122
123         for (i = 0; i < 3; i++)
124                 tcp->tcp_prot_mem[i] = min_t(long, val >> PAGE_SHIFT,
125                                              net->ipv4.sysctl_tcp_mem[i]);
126
127         if (val == RESOURCE_MAX && old_lim != RESOURCE_MAX)
128                 jump_label_dec(&memcg_socket_limit_enabled);
129         else if (old_lim == RESOURCE_MAX && val != RESOURCE_MAX)
130                 jump_label_inc(&memcg_socket_limit_enabled);
131
132         return 0;
133 }
134
135 static int tcp_cgroup_write(struct cgroup *cont, struct cftype *cft,
136                             const char *buffer)
137 {
138         struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
139         unsigned long long val;
140         int ret = 0;
141
142         switch (cft->private) {
143         case RES_LIMIT:
144                 /* see memcontrol.c */
145                 ret = res_counter_memparse_write_strategy(buffer, &val);
146                 if (ret)
147                         break;
148                 ret = tcp_update_limit(memcg, val);
149                 break;
150         default:
151                 ret = -EINVAL;
152                 break;
153         }
154         return ret;
155 }
156
157 static u64 tcp_read_stat(struct mem_cgroup *memcg, int type, u64 default_val)
158 {
159         struct tcp_memcontrol *tcp;
160         struct cg_proto *cg_proto;
161
162         cg_proto = tcp_prot.proto_cgroup(memcg);
163         if (!cg_proto)
164                 return default_val;
165
166         tcp = tcp_from_cgproto(cg_proto);
167         return res_counter_read_u64(&tcp->tcp_memory_allocated, type);
168 }
169
170 static u64 tcp_cgroup_read(struct cgroup *cont, struct cftype *cft)
171 {
172         struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
173         u64 val;
174
175         switch (cft->private) {
176         case RES_LIMIT:
177                 val = tcp_read_stat(memcg, RES_LIMIT, RESOURCE_MAX);
178                 break;
179         default:
180                 BUG();
181         }
182         return val;
183 }
184
185 unsigned long long tcp_max_memory(const struct mem_cgroup *memcg)
186 {
187         struct tcp_memcontrol *tcp;
188         struct cg_proto *cg_proto;
189
190         cg_proto = tcp_prot.proto_cgroup((struct mem_cgroup *)memcg);
191         if (!cg_proto)
192                 return 0;
193
194         tcp = tcp_from_cgproto(cg_proto);
195         return res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT);
196 }
197
198 void tcp_prot_mem(struct mem_cgroup *memcg, long val, int idx)
199 {
200         struct tcp_memcontrol *tcp;
201         struct cg_proto *cg_proto;
202
203         cg_proto = tcp_prot.proto_cgroup(memcg);
204         if (!cg_proto)
205                 return;
206
207         tcp = tcp_from_cgproto(cg_proto);
208
209         tcp->tcp_prot_mem[idx] = val;
210 }