]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - net/ipv4/tcp_memcontrol.c
net: tcp_memcontrol: simplify the per-memcg limit access
[karo-tx-linux.git] / net / ipv4 / tcp_memcontrol.c
1 #include <net/tcp.h>
2 #include <net/tcp_memcontrol.h>
3 #include <net/sock.h>
4 #include <net/ip.h>
5 #include <linux/nsproxy.h>
6 #include <linux/memcontrol.h>
7 #include <linux/module.h>
8
9 int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
10 {
11         /*
12          * The root cgroup does not use page_counters, but rather,
13          * rely on the data already collected by the network
14          * subsystem
15          */
16         struct mem_cgroup *parent = parent_mem_cgroup(memcg);
17         struct page_counter *counter_parent = NULL;
18         struct cg_proto *cg_proto, *parent_cg;
19
20         cg_proto = tcp_prot.proto_cgroup(memcg);
21         if (!cg_proto)
22                 return 0;
23
24         cg_proto->memory_pressure = 0;
25         cg_proto->memcg = memcg;
26
27         parent_cg = tcp_prot.proto_cgroup(parent);
28         if (parent_cg)
29                 counter_parent = &parent_cg->memory_allocated;
30
31         page_counter_init(&cg_proto->memory_allocated, counter_parent);
32
33         return 0;
34 }
35 EXPORT_SYMBOL(tcp_init_cgroup);
36
37 void tcp_destroy_cgroup(struct mem_cgroup *memcg)
38 {
39         struct cg_proto *cg_proto;
40
41         cg_proto = tcp_prot.proto_cgroup(memcg);
42         if (!cg_proto)
43                 return;
44
45         if (cg_proto->active)
46                 static_key_slow_dec(&memcg_socket_limit_enabled);
47
48 }
49 EXPORT_SYMBOL(tcp_destroy_cgroup);
50
51 static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages)
52 {
53         struct cg_proto *cg_proto;
54         int ret;
55
56         cg_proto = tcp_prot.proto_cgroup(memcg);
57         if (!cg_proto)
58                 return -EINVAL;
59
60         ret = page_counter_limit(&cg_proto->memory_allocated, nr_pages);
61         if (ret)
62                 return ret;
63
64         if (!cg_proto->active) {
65                 /*
66                  * The active flag needs to be written after the static_key
67                  * update. This is what guarantees that the socket activation
68                  * function is the last one to run. See sock_update_memcg() for
69                  * details, and note that we don't mark any socket as belonging
70                  * to this memcg until that flag is up.
71                  *
72                  * We need to do this, because static_keys will span multiple
73                  * sites, but we can't control their order. If we mark a socket
74                  * as accounted, but the accounting functions are not patched in
75                  * yet, we'll lose accounting.
76                  *
77                  * We never race with the readers in sock_update_memcg(),
78                  * because when this value change, the code to process it is not
79                  * patched in yet.
80                  */
81                 static_key_slow_inc(&memcg_socket_limit_enabled);
82                 cg_proto->active = true;
83         }
84
85         return 0;
86 }
87
88 enum {
89         RES_USAGE,
90         RES_LIMIT,
91         RES_MAX_USAGE,
92         RES_FAILCNT,
93 };
94
95 static DEFINE_MUTEX(tcp_limit_mutex);
96
97 static ssize_t tcp_cgroup_write(struct kernfs_open_file *of,
98                                 char *buf, size_t nbytes, loff_t off)
99 {
100         struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
101         unsigned long nr_pages;
102         int ret = 0;
103
104         buf = strstrip(buf);
105
106         switch (of_cft(of)->private) {
107         case RES_LIMIT:
108                 /* see memcontrol.c */
109                 ret = page_counter_memparse(buf, "-1", &nr_pages);
110                 if (ret)
111                         break;
112                 mutex_lock(&tcp_limit_mutex);
113                 ret = tcp_update_limit(memcg, nr_pages);
114                 mutex_unlock(&tcp_limit_mutex);
115                 break;
116         default:
117                 ret = -EINVAL;
118                 break;
119         }
120         return ret ?: nbytes;
121 }
122
123 static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft)
124 {
125         struct mem_cgroup *memcg = mem_cgroup_from_css(css);
126         struct cg_proto *cg_proto = tcp_prot.proto_cgroup(memcg);
127         u64 val;
128
129         switch (cft->private) {
130         case RES_LIMIT:
131                 if (!cg_proto)
132                         return PAGE_COUNTER_MAX;
133                 val = cg_proto->memory_allocated.limit;
134                 val *= PAGE_SIZE;
135                 break;
136         case RES_USAGE:
137                 if (!cg_proto)
138                         val = atomic_long_read(&tcp_memory_allocated);
139                 else
140                         val = page_counter_read(&cg_proto->memory_allocated);
141                 val *= PAGE_SIZE;
142                 break;
143         case RES_FAILCNT:
144                 if (!cg_proto)
145                         return 0;
146                 val = cg_proto->memory_allocated.failcnt;
147                 break;
148         case RES_MAX_USAGE:
149                 if (!cg_proto)
150                         return 0;
151                 val = cg_proto->memory_allocated.watermark;
152                 val *= PAGE_SIZE;
153                 break;
154         default:
155                 BUG();
156         }
157         return val;
158 }
159
160 static ssize_t tcp_cgroup_reset(struct kernfs_open_file *of,
161                                 char *buf, size_t nbytes, loff_t off)
162 {
163         struct mem_cgroup *memcg;
164         struct cg_proto *cg_proto;
165
166         memcg = mem_cgroup_from_css(of_css(of));
167         cg_proto = tcp_prot.proto_cgroup(memcg);
168         if (!cg_proto)
169                 return nbytes;
170
171         switch (of_cft(of)->private) {
172         case RES_MAX_USAGE:
173                 page_counter_reset_watermark(&cg_proto->memory_allocated);
174                 break;
175         case RES_FAILCNT:
176                 cg_proto->memory_allocated.failcnt = 0;
177                 break;
178         }
179
180         return nbytes;
181 }
182
183 static struct cftype tcp_files[] = {
184         {
185                 .name = "kmem.tcp.limit_in_bytes",
186                 .write = tcp_cgroup_write,
187                 .read_u64 = tcp_cgroup_read,
188                 .private = RES_LIMIT,
189         },
190         {
191                 .name = "kmem.tcp.usage_in_bytes",
192                 .read_u64 = tcp_cgroup_read,
193                 .private = RES_USAGE,
194         },
195         {
196                 .name = "kmem.tcp.failcnt",
197                 .private = RES_FAILCNT,
198                 .write = tcp_cgroup_reset,
199                 .read_u64 = tcp_cgroup_read,
200         },
201         {
202                 .name = "kmem.tcp.max_usage_in_bytes",
203                 .private = RES_MAX_USAGE,
204                 .write = tcp_cgroup_reset,
205                 .read_u64 = tcp_cgroup_read,
206         },
207         { }     /* terminate */
208 };
209
210 static int __init tcp_memcontrol_init(void)
211 {
212         WARN_ON(cgroup_add_legacy_cftypes(&memory_cgrp_subsys, tcp_files));
213         return 0;
214 }
215 __initcall(tcp_memcontrol_init);