]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - net/ipv4/tcp_memcontrol.c
Merge remote-tracking branch 'remotes/stable/linux-4.4.y' into karo-tx6-mainline
[karo-tx-linux.git] / net / ipv4 / tcp_memcontrol.c
1 #include <net/tcp.h>
2 #include <net/tcp_memcontrol.h>
3 #include <net/sock.h>
4 #include <net/ip.h>
5 #include <linux/nsproxy.h>
6 #include <linux/memcontrol.h>
7 #include <linux/module.h>
8
9 int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
10 {
11         /*
12          * The root cgroup does not use page_counters, but rather,
13          * rely on the data already collected by the network
14          * subsystem
15          */
16         struct mem_cgroup *parent = parent_mem_cgroup(memcg);
17         struct page_counter *counter_parent = NULL;
18         struct cg_proto *cg_proto, *parent_cg;
19
20         cg_proto = tcp_prot.proto_cgroup(memcg);
21         if (!cg_proto)
22                 return 0;
23
24         cg_proto->sysctl_mem[0] = sysctl_tcp_mem[0];
25         cg_proto->sysctl_mem[1] = sysctl_tcp_mem[1];
26         cg_proto->sysctl_mem[2] = sysctl_tcp_mem[2];
27         cg_proto->memory_pressure = 0;
28         cg_proto->memcg = memcg;
29
30         parent_cg = tcp_prot.proto_cgroup(parent);
31         if (parent_cg)
32                 counter_parent = &parent_cg->memory_allocated;
33
34         page_counter_init(&cg_proto->memory_allocated, counter_parent);
35         percpu_counter_init(&cg_proto->sockets_allocated, 0, GFP_KERNEL);
36
37         return 0;
38 }
39 EXPORT_SYMBOL(tcp_init_cgroup);
40
41 void tcp_destroy_cgroup(struct mem_cgroup *memcg)
42 {
43         struct cg_proto *cg_proto;
44
45         cg_proto = tcp_prot.proto_cgroup(memcg);
46         if (!cg_proto)
47                 return;
48
49         percpu_counter_destroy(&cg_proto->sockets_allocated);
50
51         if (test_bit(MEMCG_SOCK_ACTIVATED, &cg_proto->flags))
52                 static_key_slow_dec(&memcg_socket_limit_enabled);
53
54 }
55 EXPORT_SYMBOL(tcp_destroy_cgroup);
56
57 static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages)
58 {
59         struct cg_proto *cg_proto;
60         int i;
61         int ret;
62
63         cg_proto = tcp_prot.proto_cgroup(memcg);
64         if (!cg_proto)
65                 return -EINVAL;
66
67         ret = page_counter_limit(&cg_proto->memory_allocated, nr_pages);
68         if (ret)
69                 return ret;
70
71         for (i = 0; i < 3; i++)
72                 cg_proto->sysctl_mem[i] = min_t(long, nr_pages,
73                                                 sysctl_tcp_mem[i]);
74
75         if (nr_pages == PAGE_COUNTER_MAX)
76                 clear_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags);
77         else {
78                 /*
79                  * The active bit needs to be written after the static_key
80                  * update. This is what guarantees that the socket activation
81                  * function is the last one to run. See sock_update_memcg() for
82                  * details, and note that we don't mark any socket as belonging
83                  * to this memcg until that flag is up.
84                  *
85                  * We need to do this, because static_keys will span multiple
86                  * sites, but we can't control their order. If we mark a socket
87                  * as accounted, but the accounting functions are not patched in
88                  * yet, we'll lose accounting.
89                  *
90                  * We never race with the readers in sock_update_memcg(),
91                  * because when this value change, the code to process it is not
92                  * patched in yet.
93                  *
94                  * The activated bit is used to guarantee that no two writers
95                  * will do the update in the same memcg. Without that, we can't
96                  * properly shutdown the static key.
97                  */
98                 if (!test_and_set_bit(MEMCG_SOCK_ACTIVATED, &cg_proto->flags))
99                         static_key_slow_inc(&memcg_socket_limit_enabled);
100                 set_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags);
101         }
102
103         return 0;
104 }
105
106 enum {
107         RES_USAGE,
108         RES_LIMIT,
109         RES_MAX_USAGE,
110         RES_FAILCNT,
111 };
112
113 static DEFINE_MUTEX(tcp_limit_mutex);
114
115 static ssize_t tcp_cgroup_write(struct kernfs_open_file *of,
116                                 char *buf, size_t nbytes, loff_t off)
117 {
118         struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
119         unsigned long nr_pages;
120         int ret = 0;
121
122         buf = strstrip(buf);
123
124         switch (of_cft(of)->private) {
125         case RES_LIMIT:
126                 /* see memcontrol.c */
127                 ret = page_counter_memparse(buf, "-1", &nr_pages);
128                 if (ret)
129                         break;
130                 mutex_lock(&tcp_limit_mutex);
131                 ret = tcp_update_limit(memcg, nr_pages);
132                 mutex_unlock(&tcp_limit_mutex);
133                 break;
134         default:
135                 ret = -EINVAL;
136                 break;
137         }
138         return ret ?: nbytes;
139 }
140
141 static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft)
142 {
143         struct mem_cgroup *memcg = mem_cgroup_from_css(css);
144         struct cg_proto *cg_proto = tcp_prot.proto_cgroup(memcg);
145         u64 val;
146
147         switch (cft->private) {
148         case RES_LIMIT:
149                 if (!cg_proto)
150                         return PAGE_COUNTER_MAX;
151                 val = cg_proto->memory_allocated.limit;
152                 val *= PAGE_SIZE;
153                 break;
154         case RES_USAGE:
155                 if (!cg_proto)
156                         val = atomic_long_read(&tcp_memory_allocated);
157                 else
158                         val = page_counter_read(&cg_proto->memory_allocated);
159                 val *= PAGE_SIZE;
160                 break;
161         case RES_FAILCNT:
162                 if (!cg_proto)
163                         return 0;
164                 val = cg_proto->memory_allocated.failcnt;
165                 break;
166         case RES_MAX_USAGE:
167                 if (!cg_proto)
168                         return 0;
169                 val = cg_proto->memory_allocated.watermark;
170                 val *= PAGE_SIZE;
171                 break;
172         default:
173                 BUG();
174         }
175         return val;
176 }
177
178 static ssize_t tcp_cgroup_reset(struct kernfs_open_file *of,
179                                 char *buf, size_t nbytes, loff_t off)
180 {
181         struct mem_cgroup *memcg;
182         struct cg_proto *cg_proto;
183
184         memcg = mem_cgroup_from_css(of_css(of));
185         cg_proto = tcp_prot.proto_cgroup(memcg);
186         if (!cg_proto)
187                 return nbytes;
188
189         switch (of_cft(of)->private) {
190         case RES_MAX_USAGE:
191                 page_counter_reset_watermark(&cg_proto->memory_allocated);
192                 break;
193         case RES_FAILCNT:
194                 cg_proto->memory_allocated.failcnt = 0;
195                 break;
196         }
197
198         return nbytes;
199 }
200
201 static struct cftype tcp_files[] = {
202         {
203                 .name = "kmem.tcp.limit_in_bytes",
204                 .write = tcp_cgroup_write,
205                 .read_u64 = tcp_cgroup_read,
206                 .private = RES_LIMIT,
207         },
208         {
209                 .name = "kmem.tcp.usage_in_bytes",
210                 .read_u64 = tcp_cgroup_read,
211                 .private = RES_USAGE,
212         },
213         {
214                 .name = "kmem.tcp.failcnt",
215                 .private = RES_FAILCNT,
216                 .write = tcp_cgroup_reset,
217                 .read_u64 = tcp_cgroup_read,
218         },
219         {
220                 .name = "kmem.tcp.max_usage_in_bytes",
221                 .private = RES_MAX_USAGE,
222                 .write = tcp_cgroup_reset,
223                 .read_u64 = tcp_cgroup_read,
224         },
225         { }     /* terminate */
226 };
227
228 static int __init tcp_memcontrol_init(void)
229 {
230         WARN_ON(cgroup_add_legacy_cftypes(&memory_cgrp_subsys, tcp_files));
231         return 0;
232 }
233 __initcall(tcp_memcontrol_init);