]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - net/netfilter/ipvs/ip_vs_ctl.c
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec
[karo-tx-linux.git] / net / netfilter / ipvs / ip_vs_ctl.c
1 /*
2  * IPVS         An implementation of the IP virtual server support for the
3  *              LINUX operating system.  IPVS is now implemented as a module
4  *              over the NetFilter framework. IPVS can be used to build a
5  *              high-performance and highly available server based on a
6  *              cluster of servers.
7  *
8  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
9  *              Peter Kese <peter.kese@ijs.si>
10  *              Julian Anastasov <ja@ssi.bg>
11  *
12  *              This program is free software; you can redistribute it and/or
13  *              modify it under the terms of the GNU General Public License
14  *              as published by the Free Software Foundation; either version
15  *              2 of the License, or (at your option) any later version.
16  *
17  * Changes:
18  *
19  */
20
21 #define KMSG_COMPONENT "IPVS"
22 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
24 #include <linux/module.h>
25 #include <linux/init.h>
26 #include <linux/types.h>
27 #include <linux/capability.h>
28 #include <linux/fs.h>
29 #include <linux/sysctl.h>
30 #include <linux/proc_fs.h>
31 #include <linux/workqueue.h>
32 #include <linux/swap.h>
33 #include <linux/seq_file.h>
34 #include <linux/slab.h>
35
36 #include <linux/netfilter.h>
37 #include <linux/netfilter_ipv4.h>
38 #include <linux/mutex.h>
39
40 #include <net/net_namespace.h>
41 #include <linux/nsproxy.h>
42 #include <net/ip.h>
43 #ifdef CONFIG_IP_VS_IPV6
44 #include <net/ipv6.h>
45 #include <net/ip6_route.h>
46 #endif
47 #include <net/route.h>
48 #include <net/sock.h>
49 #include <net/genetlink.h>
50
51 #include <asm/uaccess.h>
52
53 #include <net/ip_vs.h>
54
55 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
56 static DEFINE_MUTEX(__ip_vs_mutex);
57
58 /* lock for service table */
59 static DEFINE_RWLOCK(__ip_vs_svc_lock);
60
61 /* sysctl variables */
62
63 #ifdef CONFIG_IP_VS_DEBUG
64 static int sysctl_ip_vs_debug_level = 0;
65
66 int ip_vs_get_debug_level(void)
67 {
68         return sysctl_ip_vs_debug_level;
69 }
70 #endif
71
72
73 /*  Protos */
74 static void __ip_vs_del_service(struct ip_vs_service *svc);
75
76
77 #ifdef CONFIG_IP_VS_IPV6
78 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
79 static bool __ip_vs_addr_is_local_v6(struct net *net,
80                                      const struct in6_addr *addr)
81 {
82         struct flowi6 fl6 = {
83                 .daddr = *addr,
84         };
85         struct dst_entry *dst = ip6_route_output(net, NULL, &fl6);
86         bool is_local;
87
88         is_local = !dst->error && dst->dev && (dst->dev->flags & IFF_LOOPBACK);
89
90         dst_release(dst);
91         return is_local;
92 }
93 #endif
94
95 #ifdef CONFIG_SYSCTL
96 /*
97  *      update_defense_level is called from keventd and from sysctl,
98  *      so it needs to protect itself from softirqs
99  */
100 static void update_defense_level(struct netns_ipvs *ipvs)
101 {
102         struct sysinfo i;
103         static int old_secure_tcp = 0;
104         int availmem;
105         int nomem;
106         int to_change = -1;
107
108         /* we only count free and buffered memory (in pages) */
109         si_meminfo(&i);
110         availmem = i.freeram + i.bufferram;
111         /* however in linux 2.5 the i.bufferram is total page cache size,
112            we need adjust it */
113         /* si_swapinfo(&i); */
114         /* availmem = availmem - (i.totalswap - i.freeswap); */
115
116         nomem = (availmem < ipvs->sysctl_amemthresh);
117
118         local_bh_disable();
119
120         /* drop_entry */
121         spin_lock(&ipvs->dropentry_lock);
122         switch (ipvs->sysctl_drop_entry) {
123         case 0:
124                 atomic_set(&ipvs->dropentry, 0);
125                 break;
126         case 1:
127                 if (nomem) {
128                         atomic_set(&ipvs->dropentry, 1);
129                         ipvs->sysctl_drop_entry = 2;
130                 } else {
131                         atomic_set(&ipvs->dropentry, 0);
132                 }
133                 break;
134         case 2:
135                 if (nomem) {
136                         atomic_set(&ipvs->dropentry, 1);
137                 } else {
138                         atomic_set(&ipvs->dropentry, 0);
139                         ipvs->sysctl_drop_entry = 1;
140                 };
141                 break;
142         case 3:
143                 atomic_set(&ipvs->dropentry, 1);
144                 break;
145         }
146         spin_unlock(&ipvs->dropentry_lock);
147
148         /* drop_packet */
149         spin_lock(&ipvs->droppacket_lock);
150         switch (ipvs->sysctl_drop_packet) {
151         case 0:
152                 ipvs->drop_rate = 0;
153                 break;
154         case 1:
155                 if (nomem) {
156                         ipvs->drop_rate = ipvs->drop_counter
157                                 = ipvs->sysctl_amemthresh /
158                                 (ipvs->sysctl_amemthresh-availmem);
159                         ipvs->sysctl_drop_packet = 2;
160                 } else {
161                         ipvs->drop_rate = 0;
162                 }
163                 break;
164         case 2:
165                 if (nomem) {
166                         ipvs->drop_rate = ipvs->drop_counter
167                                 = ipvs->sysctl_amemthresh /
168                                 (ipvs->sysctl_amemthresh-availmem);
169                 } else {
170                         ipvs->drop_rate = 0;
171                         ipvs->sysctl_drop_packet = 1;
172                 }
173                 break;
174         case 3:
175                 ipvs->drop_rate = ipvs->sysctl_am_droprate;
176                 break;
177         }
178         spin_unlock(&ipvs->droppacket_lock);
179
180         /* secure_tcp */
181         spin_lock(&ipvs->securetcp_lock);
182         switch (ipvs->sysctl_secure_tcp) {
183         case 0:
184                 if (old_secure_tcp >= 2)
185                         to_change = 0;
186                 break;
187         case 1:
188                 if (nomem) {
189                         if (old_secure_tcp < 2)
190                                 to_change = 1;
191                         ipvs->sysctl_secure_tcp = 2;
192                 } else {
193                         if (old_secure_tcp >= 2)
194                                 to_change = 0;
195                 }
196                 break;
197         case 2:
198                 if (nomem) {
199                         if (old_secure_tcp < 2)
200                                 to_change = 1;
201                 } else {
202                         if (old_secure_tcp >= 2)
203                                 to_change = 0;
204                         ipvs->sysctl_secure_tcp = 1;
205                 }
206                 break;
207         case 3:
208                 if (old_secure_tcp < 2)
209                         to_change = 1;
210                 break;
211         }
212         old_secure_tcp = ipvs->sysctl_secure_tcp;
213         if (to_change >= 0)
214                 ip_vs_protocol_timeout_change(ipvs,
215                                               ipvs->sysctl_secure_tcp > 1);
216         spin_unlock(&ipvs->securetcp_lock);
217
218         local_bh_enable();
219 }
220
221
222 /*
223  *      Timer for checking the defense
224  */
225 #define DEFENSE_TIMER_PERIOD    1*HZ
226
227 static void defense_work_handler(struct work_struct *work)
228 {
229         struct netns_ipvs *ipvs =
230                 container_of(work, struct netns_ipvs, defense_work.work);
231
232         update_defense_level(ipvs);
233         if (atomic_read(&ipvs->dropentry))
234                 ip_vs_random_dropentry(ipvs->net);
235         schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
236 }
237 #endif
238
239 int
240 ip_vs_use_count_inc(void)
241 {
242         return try_module_get(THIS_MODULE);
243 }
244
245 void
246 ip_vs_use_count_dec(void)
247 {
248         module_put(THIS_MODULE);
249 }
250
251
252 /*
253  *      Hash table: for virtual service lookups
254  */
255 #define IP_VS_SVC_TAB_BITS 8
256 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
257 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
258
259 /* the service table hashed by <protocol, addr, port> */
260 static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
261 /* the service table hashed by fwmark */
262 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
263
264
265 /*
266  *      Returns hash value for virtual service
267  */
268 static inline unsigned int
269 ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto,
270                   const union nf_inet_addr *addr, __be16 port)
271 {
272         register unsigned int porth = ntohs(port);
273         __be32 addr_fold = addr->ip;
274
275 #ifdef CONFIG_IP_VS_IPV6
276         if (af == AF_INET6)
277                 addr_fold = addr->ip6[0]^addr->ip6[1]^
278                             addr->ip6[2]^addr->ip6[3];
279 #endif
280         addr_fold ^= ((size_t)net>>8);
281
282         return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
283                 & IP_VS_SVC_TAB_MASK;
284 }
285
286 /*
287  *      Returns hash value of fwmark for virtual service lookup
288  */
289 static inline unsigned int ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
290 {
291         return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
292 }
293
294 /*
295  *      Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
296  *      or in the ip_vs_svc_fwm_table by fwmark.
297  *      Should be called with locked tables.
298  */
299 static int ip_vs_svc_hash(struct ip_vs_service *svc)
300 {
301         unsigned int hash;
302
303         if (svc->flags & IP_VS_SVC_F_HASHED) {
304                 pr_err("%s(): request for already hashed, called from %pF\n",
305                        __func__, __builtin_return_address(0));
306                 return 0;
307         }
308
309         if (svc->fwmark == 0) {
310                 /*
311                  *  Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
312                  */
313                 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
314                                          &svc->addr, svc->port);
315                 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
316         } else {
317                 /*
318                  *  Hash it by fwmark in svc_fwm_table
319                  */
320                 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
321                 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
322         }
323
324         svc->flags |= IP_VS_SVC_F_HASHED;
325         /* increase its refcnt because it is referenced by the svc table */
326         atomic_inc(&svc->refcnt);
327         return 1;
328 }
329
330
331 /*
332  *      Unhashes a service from svc_table / svc_fwm_table.
333  *      Should be called with locked tables.
334  */
335 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
336 {
337         if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
338                 pr_err("%s(): request for unhash flagged, called from %pF\n",
339                        __func__, __builtin_return_address(0));
340                 return 0;
341         }
342
343         if (svc->fwmark == 0) {
344                 /* Remove it from the svc_table table */
345                 list_del(&svc->s_list);
346         } else {
347                 /* Remove it from the svc_fwm_table table */
348                 list_del(&svc->f_list);
349         }
350
351         svc->flags &= ~IP_VS_SVC_F_HASHED;
352         atomic_dec(&svc->refcnt);
353         return 1;
354 }
355
356
357 /*
358  *      Get service by {netns, proto,addr,port} in the service table.
359  */
360 static inline struct ip_vs_service *
361 __ip_vs_service_find(struct net *net, int af, __u16 protocol,
362                      const union nf_inet_addr *vaddr, __be16 vport)
363 {
364         unsigned int hash;
365         struct ip_vs_service *svc;
366
367         /* Check for "full" addressed entries */
368         hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
369
370         list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
371                 if ((svc->af == af)
372                     && ip_vs_addr_equal(af, &svc->addr, vaddr)
373                     && (svc->port == vport)
374                     && (svc->protocol == protocol)
375                     && net_eq(svc->net, net)) {
376                         /* HIT */
377                         return svc;
378                 }
379         }
380
381         return NULL;
382 }
383
384
385 /*
386  *      Get service by {fwmark} in the service table.
387  */
388 static inline struct ip_vs_service *
389 __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
390 {
391         unsigned int hash;
392         struct ip_vs_service *svc;
393
394         /* Check for fwmark addressed entries */
395         hash = ip_vs_svc_fwm_hashkey(net, fwmark);
396
397         list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
398                 if (svc->fwmark == fwmark && svc->af == af
399                     && net_eq(svc->net, net)) {
400                         /* HIT */
401                         return svc;
402                 }
403         }
404
405         return NULL;
406 }
407
408 struct ip_vs_service *
409 ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
410                   const union nf_inet_addr *vaddr, __be16 vport)
411 {
412         struct ip_vs_service *svc;
413         struct netns_ipvs *ipvs = net_ipvs(net);
414
415         read_lock(&__ip_vs_svc_lock);
416
417         /*
418          *      Check the table hashed by fwmark first
419          */
420         if (fwmark) {
421                 svc = __ip_vs_svc_fwm_find(net, af, fwmark);
422                 if (svc)
423                         goto out;
424         }
425
426         /*
427          *      Check the table hashed by <protocol,addr,port>
428          *      for "full" addressed entries
429          */
430         svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
431
432         if (svc == NULL
433             && protocol == IPPROTO_TCP
434             && atomic_read(&ipvs->ftpsvc_counter)
435             && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
436                 /*
437                  * Check if ftp service entry exists, the packet
438                  * might belong to FTP data connections.
439                  */
440                 svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
441         }
442
443         if (svc == NULL
444             && atomic_read(&ipvs->nullsvc_counter)) {
445                 /*
446                  * Check if the catch-all port (port zero) exists
447                  */
448                 svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
449         }
450
451   out:
452         if (svc)
453                 atomic_inc(&svc->usecnt);
454         read_unlock(&__ip_vs_svc_lock);
455
456         IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
457                       fwmark, ip_vs_proto_name(protocol),
458                       IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
459                       svc ? "hit" : "not hit");
460
461         return svc;
462 }
463
464
465 static inline void
466 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
467 {
468         atomic_inc(&svc->refcnt);
469         dest->svc = svc;
470 }
471
472 static void
473 __ip_vs_unbind_svc(struct ip_vs_dest *dest)
474 {
475         struct ip_vs_service *svc = dest->svc;
476
477         dest->svc = NULL;
478         if (atomic_dec_and_test(&svc->refcnt)) {
479                 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
480                               svc->fwmark,
481                               IP_VS_DBG_ADDR(svc->af, &svc->addr),
482                               ntohs(svc->port), atomic_read(&svc->usecnt));
483                 free_percpu(svc->stats.cpustats);
484                 kfree(svc);
485         }
486 }
487
488
489 /*
490  *      Returns hash value for real service
491  */
492 static inline unsigned int ip_vs_rs_hashkey(int af,
493                                             const union nf_inet_addr *addr,
494                                             __be16 port)
495 {
496         register unsigned int porth = ntohs(port);
497         __be32 addr_fold = addr->ip;
498
499 #ifdef CONFIG_IP_VS_IPV6
500         if (af == AF_INET6)
501                 addr_fold = addr->ip6[0]^addr->ip6[1]^
502                             addr->ip6[2]^addr->ip6[3];
503 #endif
504
505         return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
506                 & IP_VS_RTAB_MASK;
507 }
508
509 /*
510  *      Hashes ip_vs_dest in rs_table by <proto,addr,port>.
511  *      should be called with locked tables.
512  */
513 static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
514 {
515         unsigned int hash;
516
517         if (!list_empty(&dest->d_list)) {
518                 return 0;
519         }
520
521         /*
522          *      Hash by proto,addr,port,
523          *      which are the parameters of the real service.
524          */
525         hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
526
527         list_add(&dest->d_list, &ipvs->rs_table[hash]);
528
529         return 1;
530 }
531
532 /*
533  *      UNhashes ip_vs_dest from rs_table.
534  *      should be called with locked tables.
535  */
536 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
537 {
538         /*
539          * Remove it from the rs_table table.
540          */
541         if (!list_empty(&dest->d_list)) {
542                 list_del_init(&dest->d_list);
543         }
544
545         return 1;
546 }
547
548 /*
549  *      Lookup real service by <proto,addr,port> in the real service table.
550  */
551 struct ip_vs_dest *
552 ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
553                           const union nf_inet_addr *daddr,
554                           __be16 dport)
555 {
556         struct netns_ipvs *ipvs = net_ipvs(net);
557         unsigned int hash;
558         struct ip_vs_dest *dest;
559
560         /*
561          *      Check for "full" addressed entries
562          *      Return the first found entry
563          */
564         hash = ip_vs_rs_hashkey(af, daddr, dport);
565
566         read_lock(&ipvs->rs_lock);
567         list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
568                 if ((dest->af == af)
569                     && ip_vs_addr_equal(af, &dest->addr, daddr)
570                     && (dest->port == dport)
571                     && ((dest->protocol == protocol) ||
572                         dest->vfwmark)) {
573                         /* HIT */
574                         read_unlock(&ipvs->rs_lock);
575                         return dest;
576                 }
577         }
578         read_unlock(&ipvs->rs_lock);
579
580         return NULL;
581 }
582
583 /*
584  *      Lookup destination by {addr,port} in the given service
585  */
586 static struct ip_vs_dest *
587 ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
588                   __be16 dport)
589 {
590         struct ip_vs_dest *dest;
591
592         /*
593          * Find the destination for the given service
594          */
595         list_for_each_entry(dest, &svc->destinations, n_list) {
596                 if ((dest->af == svc->af)
597                     && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
598                     && (dest->port == dport)) {
599                         /* HIT */
600                         return dest;
601                 }
602         }
603
604         return NULL;
605 }
606
607 /*
608  * Find destination by {daddr,dport,vaddr,protocol}
609  * Cretaed to be used in ip_vs_process_message() in
610  * the backup synchronization daemon. It finds the
611  * destination to be bound to the received connection
612  * on the backup.
613  *
614  * ip_vs_lookup_real_service() looked promissing, but
615  * seems not working as expected.
616  */
617 struct ip_vs_dest *ip_vs_find_dest(struct net  *net, int af,
618                                    const union nf_inet_addr *daddr,
619                                    __be16 dport,
620                                    const union nf_inet_addr *vaddr,
621                                    __be16 vport, __u16 protocol, __u32 fwmark,
622                                    __u32 flags)
623 {
624         struct ip_vs_dest *dest;
625         struct ip_vs_service *svc;
626         __be16 port = dport;
627
628         svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
629         if (!svc)
630                 return NULL;
631         if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ)
632                 port = 0;
633         dest = ip_vs_lookup_dest(svc, daddr, port);
634         if (!dest)
635                 dest = ip_vs_lookup_dest(svc, daddr, port ^ dport);
636         if (dest)
637                 atomic_inc(&dest->refcnt);
638         ip_vs_service_put(svc);
639         return dest;
640 }
641
642 /*
643  *  Lookup dest by {svc,addr,port} in the destination trash.
644  *  The destination trash is used to hold the destinations that are removed
645  *  from the service table but are still referenced by some conn entries.
646  *  The reason to add the destination trash is when the dest is temporary
647  *  down (either by administrator or by monitor program), the dest can be
648  *  picked back from the trash, the remaining connections to the dest can
649  *  continue, and the counting information of the dest is also useful for
650  *  scheduling.
651  */
652 static struct ip_vs_dest *
653 ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
654                      __be16 dport)
655 {
656         struct ip_vs_dest *dest, *nxt;
657         struct netns_ipvs *ipvs = net_ipvs(svc->net);
658
659         /*
660          * Find the destination in trash
661          */
662         list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
663                 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
664                               "dest->refcnt=%d\n",
665                               dest->vfwmark,
666                               IP_VS_DBG_ADDR(svc->af, &dest->addr),
667                               ntohs(dest->port),
668                               atomic_read(&dest->refcnt));
669                 if (dest->af == svc->af &&
670                     ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
671                     dest->port == dport &&
672                     dest->vfwmark == svc->fwmark &&
673                     dest->protocol == svc->protocol &&
674                     (svc->fwmark ||
675                      (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
676                       dest->vport == svc->port))) {
677                         /* HIT */
678                         return dest;
679                 }
680
681                 /*
682                  * Try to purge the destination from trash if not referenced
683                  */
684                 if (atomic_read(&dest->refcnt) == 1) {
685                         IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
686                                       "from trash\n",
687                                       dest->vfwmark,
688                                       IP_VS_DBG_ADDR(svc->af, &dest->addr),
689                                       ntohs(dest->port));
690                         list_del(&dest->n_list);
691                         ip_vs_dst_reset(dest);
692                         __ip_vs_unbind_svc(dest);
693                         free_percpu(dest->stats.cpustats);
694                         kfree(dest);
695                 }
696         }
697
698         return NULL;
699 }
700
701
702 /*
703  *  Clean up all the destinations in the trash
704  *  Called by the ip_vs_control_cleanup()
705  *
706  *  When the ip_vs_control_clearup is activated by ipvs module exit,
707  *  the service tables must have been flushed and all the connections
708  *  are expired, and the refcnt of each destination in the trash must
709  *  be 1, so we simply release them here.
710  */
711 static void ip_vs_trash_cleanup(struct net *net)
712 {
713         struct ip_vs_dest *dest, *nxt;
714         struct netns_ipvs *ipvs = net_ipvs(net);
715
716         list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
717                 list_del(&dest->n_list);
718                 ip_vs_dst_reset(dest);
719                 __ip_vs_unbind_svc(dest);
720                 free_percpu(dest->stats.cpustats);
721                 kfree(dest);
722         }
723 }
724
725 static void
726 ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
727 {
728 #define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c
729
730         spin_lock_bh(&src->lock);
731
732         IP_VS_SHOW_STATS_COUNTER(conns);
733         IP_VS_SHOW_STATS_COUNTER(inpkts);
734         IP_VS_SHOW_STATS_COUNTER(outpkts);
735         IP_VS_SHOW_STATS_COUNTER(inbytes);
736         IP_VS_SHOW_STATS_COUNTER(outbytes);
737
738         ip_vs_read_estimator(dst, src);
739
740         spin_unlock_bh(&src->lock);
741 }
742
743 static void
744 ip_vs_zero_stats(struct ip_vs_stats *stats)
745 {
746         spin_lock_bh(&stats->lock);
747
748         /* get current counters as zero point, rates are zeroed */
749
750 #define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c
751
752         IP_VS_ZERO_STATS_COUNTER(conns);
753         IP_VS_ZERO_STATS_COUNTER(inpkts);
754         IP_VS_ZERO_STATS_COUNTER(outpkts);
755         IP_VS_ZERO_STATS_COUNTER(inbytes);
756         IP_VS_ZERO_STATS_COUNTER(outbytes);
757
758         ip_vs_zero_estimator(stats);
759
760         spin_unlock_bh(&stats->lock);
761 }
762
763 /*
764  *      Update a destination in the given service
765  */
766 static void
767 __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
768                     struct ip_vs_dest_user_kern *udest, int add)
769 {
770         struct netns_ipvs *ipvs = net_ipvs(svc->net);
771         int conn_flags;
772
773         /* set the weight and the flags */
774         atomic_set(&dest->weight, udest->weight);
775         conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
776         conn_flags |= IP_VS_CONN_F_INACTIVE;
777
778         /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
779         if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
780                 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
781         } else {
782                 /*
783                  *    Put the real service in rs_table if not present.
784                  *    For now only for NAT!
785                  */
786                 write_lock_bh(&ipvs->rs_lock);
787                 ip_vs_rs_hash(ipvs, dest);
788                 write_unlock_bh(&ipvs->rs_lock);
789         }
790         atomic_set(&dest->conn_flags, conn_flags);
791
792         /* bind the service */
793         if (!dest->svc) {
794                 __ip_vs_bind_svc(dest, svc);
795         } else {
796                 if (dest->svc != svc) {
797                         __ip_vs_unbind_svc(dest);
798                         ip_vs_zero_stats(&dest->stats);
799                         __ip_vs_bind_svc(dest, svc);
800                 }
801         }
802
803         /* set the dest status flags */
804         dest->flags |= IP_VS_DEST_F_AVAILABLE;
805
806         if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
807                 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
808         dest->u_threshold = udest->u_threshold;
809         dest->l_threshold = udest->l_threshold;
810
811         spin_lock_bh(&dest->dst_lock);
812         ip_vs_dst_reset(dest);
813         spin_unlock_bh(&dest->dst_lock);
814
815         if (add)
816                 ip_vs_start_estimator(svc->net, &dest->stats);
817
818         write_lock_bh(&__ip_vs_svc_lock);
819
820         /* Wait until all other svc users go away */
821         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
822
823         if (add) {
824                 list_add(&dest->n_list, &svc->destinations);
825                 svc->num_dests++;
826         }
827
828         /* call the update_service, because server weight may be changed */
829         if (svc->scheduler->update_service)
830                 svc->scheduler->update_service(svc);
831
832         write_unlock_bh(&__ip_vs_svc_lock);
833 }
834
835
836 /*
837  *      Create a destination for the given service
838  */
839 static int
840 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
841                struct ip_vs_dest **dest_p)
842 {
843         struct ip_vs_dest *dest;
844         unsigned int atype;
845
846         EnterFunction(2);
847
848 #ifdef CONFIG_IP_VS_IPV6
849         if (svc->af == AF_INET6) {
850                 atype = ipv6_addr_type(&udest->addr.in6);
851                 if ((!(atype & IPV6_ADDR_UNICAST) ||
852                         atype & IPV6_ADDR_LINKLOCAL) &&
853                         !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
854                         return -EINVAL;
855         } else
856 #endif
857         {
858                 atype = inet_addr_type(svc->net, udest->addr.ip);
859                 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
860                         return -EINVAL;
861         }
862
863         dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
864         if (dest == NULL)
865                 return -ENOMEM;
866
867         dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
868         if (!dest->stats.cpustats)
869                 goto err_alloc;
870
871         dest->af = svc->af;
872         dest->protocol = svc->protocol;
873         dest->vaddr = svc->addr;
874         dest->vport = svc->port;
875         dest->vfwmark = svc->fwmark;
876         ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
877         dest->port = udest->port;
878
879         atomic_set(&dest->activeconns, 0);
880         atomic_set(&dest->inactconns, 0);
881         atomic_set(&dest->persistconns, 0);
882         atomic_set(&dest->refcnt, 1);
883
884         INIT_LIST_HEAD(&dest->d_list);
885         spin_lock_init(&dest->dst_lock);
886         spin_lock_init(&dest->stats.lock);
887         __ip_vs_update_dest(svc, dest, udest, 1);
888
889         *dest_p = dest;
890
891         LeaveFunction(2);
892         return 0;
893
894 err_alloc:
895         kfree(dest);
896         return -ENOMEM;
897 }
898
899
900 /*
901  *      Add a destination into an existing service
902  */
903 static int
904 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
905 {
906         struct ip_vs_dest *dest;
907         union nf_inet_addr daddr;
908         __be16 dport = udest->port;
909         int ret;
910
911         EnterFunction(2);
912
913         if (udest->weight < 0) {
914                 pr_err("%s(): server weight less than zero\n", __func__);
915                 return -ERANGE;
916         }
917
918         if (udest->l_threshold > udest->u_threshold) {
919                 pr_err("%s(): lower threshold is higher than upper threshold\n",
920                         __func__);
921                 return -ERANGE;
922         }
923
924         ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
925
926         /*
927          * Check if the dest already exists in the list
928          */
929         dest = ip_vs_lookup_dest(svc, &daddr, dport);
930
931         if (dest != NULL) {
932                 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
933                 return -EEXIST;
934         }
935
936         /*
937          * Check if the dest already exists in the trash and
938          * is from the same service
939          */
940         dest = ip_vs_trash_get_dest(svc, &daddr, dport);
941
942         if (dest != NULL) {
943                 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
944                               "dest->refcnt=%d, service %u/%s:%u\n",
945                               IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
946                               atomic_read(&dest->refcnt),
947                               dest->vfwmark,
948                               IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
949                               ntohs(dest->vport));
950
951                 /*
952                  * Get the destination from the trash
953                  */
954                 list_del(&dest->n_list);
955
956                 __ip_vs_update_dest(svc, dest, udest, 1);
957                 ret = 0;
958         } else {
959                 /*
960                  * Allocate and initialize the dest structure
961                  */
962                 ret = ip_vs_new_dest(svc, udest, &dest);
963         }
964         LeaveFunction(2);
965
966         return ret;
967 }
968
969
970 /*
971  *      Edit a destination in the given service
972  */
973 static int
974 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
975 {
976         struct ip_vs_dest *dest;
977         union nf_inet_addr daddr;
978         __be16 dport = udest->port;
979
980         EnterFunction(2);
981
982         if (udest->weight < 0) {
983                 pr_err("%s(): server weight less than zero\n", __func__);
984                 return -ERANGE;
985         }
986
987         if (udest->l_threshold > udest->u_threshold) {
988                 pr_err("%s(): lower threshold is higher than upper threshold\n",
989                         __func__);
990                 return -ERANGE;
991         }
992
993         ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
994
995         /*
996          *  Lookup the destination list
997          */
998         dest = ip_vs_lookup_dest(svc, &daddr, dport);
999
1000         if (dest == NULL) {
1001                 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
1002                 return -ENOENT;
1003         }
1004
1005         __ip_vs_update_dest(svc, dest, udest, 0);
1006         LeaveFunction(2);
1007
1008         return 0;
1009 }
1010
1011
1012 /*
1013  *      Delete a destination (must be already unlinked from the service)
1014  */
1015 static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
1016 {
1017         struct netns_ipvs *ipvs = net_ipvs(net);
1018
1019         ip_vs_stop_estimator(net, &dest->stats);
1020
1021         /*
1022          *  Remove it from the d-linked list with the real services.
1023          */
1024         write_lock_bh(&ipvs->rs_lock);
1025         ip_vs_rs_unhash(dest);
1026         write_unlock_bh(&ipvs->rs_lock);
1027
1028         /*
1029          *  Decrease the refcnt of the dest, and free the dest
1030          *  if nobody refers to it (refcnt=0). Otherwise, throw
1031          *  the destination into the trash.
1032          */
1033         if (atomic_dec_and_test(&dest->refcnt)) {
1034                 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1035                               dest->vfwmark,
1036                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
1037                               ntohs(dest->port));
1038                 ip_vs_dst_reset(dest);
1039                 /* simply decrease svc->refcnt here, let the caller check
1040                    and release the service if nobody refers to it.
1041                    Only user context can release destination and service,
1042                    and only one user context can update virtual service at a
1043                    time, so the operation here is OK */
1044                 atomic_dec(&dest->svc->refcnt);
1045                 free_percpu(dest->stats.cpustats);
1046                 kfree(dest);
1047         } else {
1048                 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1049                               "dest->refcnt=%d\n",
1050                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
1051                               ntohs(dest->port),
1052                               atomic_read(&dest->refcnt));
1053                 list_add(&dest->n_list, &ipvs->dest_trash);
1054                 atomic_inc(&dest->refcnt);
1055         }
1056 }
1057
1058
1059 /*
1060  *      Unlink a destination from the given service
1061  */
1062 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1063                                 struct ip_vs_dest *dest,
1064                                 int svcupd)
1065 {
1066         dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1067
1068         /*
1069          *  Remove it from the d-linked destination list.
1070          */
1071         list_del(&dest->n_list);
1072         svc->num_dests--;
1073
1074         /*
1075          *  Call the update_service function of its scheduler
1076          */
1077         if (svcupd && svc->scheduler->update_service)
1078                         svc->scheduler->update_service(svc);
1079 }
1080
1081
1082 /*
1083  *      Delete a destination server in the given service
1084  */
1085 static int
1086 ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1087 {
1088         struct ip_vs_dest *dest;
1089         __be16 dport = udest->port;
1090
1091         EnterFunction(2);
1092
1093         dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
1094
1095         if (dest == NULL) {
1096                 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1097                 return -ENOENT;
1098         }
1099
1100         write_lock_bh(&__ip_vs_svc_lock);
1101
1102         /*
1103          *      Wait until all other svc users go away.
1104          */
1105         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1106
1107         /*
1108          *      Unlink dest from the service
1109          */
1110         __ip_vs_unlink_dest(svc, dest, 1);
1111
1112         write_unlock_bh(&__ip_vs_svc_lock);
1113
1114         /*
1115          *      Delete the destination
1116          */
1117         __ip_vs_del_dest(svc->net, dest);
1118
1119         LeaveFunction(2);
1120
1121         return 0;
1122 }
1123
1124
1125 /*
1126  *      Add a service into the service hash table
1127  */
1128 static int
1129 ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
1130                   struct ip_vs_service **svc_p)
1131 {
1132         int ret = 0;
1133         struct ip_vs_scheduler *sched = NULL;
1134         struct ip_vs_pe *pe = NULL;
1135         struct ip_vs_service *svc = NULL;
1136         struct netns_ipvs *ipvs = net_ipvs(net);
1137
1138         /* increase the module use count */
1139         ip_vs_use_count_inc();
1140
1141         /* Lookup the scheduler by 'u->sched_name' */
1142         sched = ip_vs_scheduler_get(u->sched_name);
1143         if (sched == NULL) {
1144                 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1145                 ret = -ENOENT;
1146                 goto out_err;
1147         }
1148
1149         if (u->pe_name && *u->pe_name) {
1150                 pe = ip_vs_pe_getbyname(u->pe_name);
1151                 if (pe == NULL) {
1152                         pr_info("persistence engine module ip_vs_pe_%s "
1153                                 "not found\n", u->pe_name);
1154                         ret = -ENOENT;
1155                         goto out_err;
1156                 }
1157         }
1158
1159 #ifdef CONFIG_IP_VS_IPV6
1160         if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1161                 ret = -EINVAL;
1162                 goto out_err;
1163         }
1164 #endif
1165
1166         svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1167         if (svc == NULL) {
1168                 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1169                 ret = -ENOMEM;
1170                 goto out_err;
1171         }
1172         svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
1173         if (!svc->stats.cpustats) {
1174                 ret = -ENOMEM;
1175                 goto out_err;
1176         }
1177
1178         /* I'm the first user of the service */
1179         atomic_set(&svc->usecnt, 0);
1180         atomic_set(&svc->refcnt, 0);
1181
1182         svc->af = u->af;
1183         svc->protocol = u->protocol;
1184         ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1185         svc->port = u->port;
1186         svc->fwmark = u->fwmark;
1187         svc->flags = u->flags;
1188         svc->timeout = u->timeout * HZ;
1189         svc->netmask = u->netmask;
1190         svc->net = net;
1191
1192         INIT_LIST_HEAD(&svc->destinations);
1193         rwlock_init(&svc->sched_lock);
1194         spin_lock_init(&svc->stats.lock);
1195
1196         /* Bind the scheduler */
1197         ret = ip_vs_bind_scheduler(svc, sched);
1198         if (ret)
1199                 goto out_err;
1200         sched = NULL;
1201
1202         /* Bind the ct retriever */
1203         ip_vs_bind_pe(svc, pe);
1204         pe = NULL;
1205
1206         /* Update the virtual service counters */
1207         if (svc->port == FTPPORT)
1208                 atomic_inc(&ipvs->ftpsvc_counter);
1209         else if (svc->port == 0)
1210                 atomic_inc(&ipvs->nullsvc_counter);
1211
1212         ip_vs_start_estimator(net, &svc->stats);
1213
1214         /* Count only IPv4 services for old get/setsockopt interface */
1215         if (svc->af == AF_INET)
1216                 ipvs->num_services++;
1217
1218         /* Hash the service into the service table */
1219         write_lock_bh(&__ip_vs_svc_lock);
1220         ip_vs_svc_hash(svc);
1221         write_unlock_bh(&__ip_vs_svc_lock);
1222
1223         *svc_p = svc;
1224         /* Now there is a service - full throttle */
1225         ipvs->enable = 1;
1226         return 0;
1227
1228
1229  out_err:
1230         if (svc != NULL) {
1231                 ip_vs_unbind_scheduler(svc);
1232                 if (svc->inc) {
1233                         local_bh_disable();
1234                         ip_vs_app_inc_put(svc->inc);
1235                         local_bh_enable();
1236                 }
1237                 if (svc->stats.cpustats)
1238                         free_percpu(svc->stats.cpustats);
1239                 kfree(svc);
1240         }
1241         ip_vs_scheduler_put(sched);
1242         ip_vs_pe_put(pe);
1243
1244         /* decrease the module use count */
1245         ip_vs_use_count_dec();
1246
1247         return ret;
1248 }
1249
1250
1251 /*
1252  *      Edit a service and bind it with a new scheduler
1253  */
1254 static int
1255 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1256 {
1257         struct ip_vs_scheduler *sched, *old_sched;
1258         struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1259         int ret = 0;
1260
1261         /*
1262          * Lookup the scheduler, by 'u->sched_name'
1263          */
1264         sched = ip_vs_scheduler_get(u->sched_name);
1265         if (sched == NULL) {
1266                 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1267                 return -ENOENT;
1268         }
1269         old_sched = sched;
1270
1271         if (u->pe_name && *u->pe_name) {
1272                 pe = ip_vs_pe_getbyname(u->pe_name);
1273                 if (pe == NULL) {
1274                         pr_info("persistence engine module ip_vs_pe_%s "
1275                                 "not found\n", u->pe_name);
1276                         ret = -ENOENT;
1277                         goto out;
1278                 }
1279                 old_pe = pe;
1280         }
1281
1282 #ifdef CONFIG_IP_VS_IPV6
1283         if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1284                 ret = -EINVAL;
1285                 goto out;
1286         }
1287 #endif
1288
1289         write_lock_bh(&__ip_vs_svc_lock);
1290
1291         /*
1292          * Wait until all other svc users go away.
1293          */
1294         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1295
1296         /*
1297          * Set the flags and timeout value
1298          */
1299         svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1300         svc->timeout = u->timeout * HZ;
1301         svc->netmask = u->netmask;
1302
1303         old_sched = svc->scheduler;
1304         if (sched != old_sched) {
1305                 /*
1306                  * Unbind the old scheduler
1307                  */
1308                 if ((ret = ip_vs_unbind_scheduler(svc))) {
1309                         old_sched = sched;
1310                         goto out_unlock;
1311                 }
1312
1313                 /*
1314                  * Bind the new scheduler
1315                  */
1316                 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1317                         /*
1318                          * If ip_vs_bind_scheduler fails, restore the old
1319                          * scheduler.
1320                          * The main reason of failure is out of memory.
1321                          *
1322                          * The question is if the old scheduler can be
1323                          * restored all the time. TODO: if it cannot be
1324                          * restored some time, we must delete the service,
1325                          * otherwise the system may crash.
1326                          */
1327                         ip_vs_bind_scheduler(svc, old_sched);
1328                         old_sched = sched;
1329                         goto out_unlock;
1330                 }
1331         }
1332
1333         old_pe = svc->pe;
1334         if (pe != old_pe) {
1335                 ip_vs_unbind_pe(svc);
1336                 ip_vs_bind_pe(svc, pe);
1337         }
1338
1339 out_unlock:
1340         write_unlock_bh(&__ip_vs_svc_lock);
1341 out:
1342         ip_vs_scheduler_put(old_sched);
1343         ip_vs_pe_put(old_pe);
1344         return ret;
1345 }
1346
1347
1348 /*
1349  *      Delete a service from the service list
1350  *      - The service must be unlinked, unlocked and not referenced!
1351  *      - We are called under _bh lock
1352  */
1353 static void __ip_vs_del_service(struct ip_vs_service *svc)
1354 {
1355         struct ip_vs_dest *dest, *nxt;
1356         struct ip_vs_scheduler *old_sched;
1357         struct ip_vs_pe *old_pe;
1358         struct netns_ipvs *ipvs = net_ipvs(svc->net);
1359
1360         pr_info("%s: enter\n", __func__);
1361
1362         /* Count only IPv4 services for old get/setsockopt interface */
1363         if (svc->af == AF_INET)
1364                 ipvs->num_services--;
1365
1366         ip_vs_stop_estimator(svc->net, &svc->stats);
1367
1368         /* Unbind scheduler */
1369         old_sched = svc->scheduler;
1370         ip_vs_unbind_scheduler(svc);
1371         ip_vs_scheduler_put(old_sched);
1372
1373         /* Unbind persistence engine */
1374         old_pe = svc->pe;
1375         ip_vs_unbind_pe(svc);
1376         ip_vs_pe_put(old_pe);
1377
1378         /* Unbind app inc */
1379         if (svc->inc) {
1380                 ip_vs_app_inc_put(svc->inc);
1381                 svc->inc = NULL;
1382         }
1383
1384         /*
1385          *    Unlink the whole destination list
1386          */
1387         list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1388                 __ip_vs_unlink_dest(svc, dest, 0);
1389                 __ip_vs_del_dest(svc->net, dest);
1390         }
1391
1392         /*
1393          *    Update the virtual service counters
1394          */
1395         if (svc->port == FTPPORT)
1396                 atomic_dec(&ipvs->ftpsvc_counter);
1397         else if (svc->port == 0)
1398                 atomic_dec(&ipvs->nullsvc_counter);
1399
1400         /*
1401          *    Free the service if nobody refers to it
1402          */
1403         if (atomic_read(&svc->refcnt) == 0) {
1404                 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1405                               svc->fwmark,
1406                               IP_VS_DBG_ADDR(svc->af, &svc->addr),
1407                               ntohs(svc->port), atomic_read(&svc->usecnt));
1408                 free_percpu(svc->stats.cpustats);
1409                 kfree(svc);
1410         }
1411
1412         /* decrease the module use count */
1413         ip_vs_use_count_dec();
1414 }
1415
1416 /*
1417  * Unlink a service from list and try to delete it if its refcnt reached 0
1418  */
1419 static void ip_vs_unlink_service(struct ip_vs_service *svc)
1420 {
1421         /*
1422          * Unhash it from the service table
1423          */
1424         write_lock_bh(&__ip_vs_svc_lock);
1425
1426         ip_vs_svc_unhash(svc);
1427
1428         /*
1429          * Wait until all the svc users go away.
1430          */
1431         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1432
1433         __ip_vs_del_service(svc);
1434
1435         write_unlock_bh(&__ip_vs_svc_lock);
1436 }
1437
1438 /*
1439  *      Delete a service from the service list
1440  */
1441 static int ip_vs_del_service(struct ip_vs_service *svc)
1442 {
1443         if (svc == NULL)
1444                 return -EEXIST;
1445         ip_vs_unlink_service(svc);
1446
1447         return 0;
1448 }
1449
1450
1451 /*
1452  *      Flush all the virtual services
1453  */
1454 static int ip_vs_flush(struct net *net)
1455 {
1456         int idx;
1457         struct ip_vs_service *svc, *nxt;
1458
1459         /*
1460          * Flush the service table hashed by <netns,protocol,addr,port>
1461          */
1462         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1463                 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
1464                                          s_list) {
1465                         if (net_eq(svc->net, net))
1466                                 ip_vs_unlink_service(svc);
1467                 }
1468         }
1469
1470         /*
1471          * Flush the service table hashed by fwmark
1472          */
1473         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1474                 list_for_each_entry_safe(svc, nxt,
1475                                          &ip_vs_svc_fwm_table[idx], f_list) {
1476                         if (net_eq(svc->net, net))
1477                                 ip_vs_unlink_service(svc);
1478                 }
1479         }
1480
1481         return 0;
1482 }
1483
1484 /*
1485  *      Delete service by {netns} in the service table.
1486  *      Called by __ip_vs_cleanup()
1487  */
1488 void ip_vs_service_net_cleanup(struct net *net)
1489 {
1490         EnterFunction(2);
1491         /* Check for "full" addressed entries */
1492         mutex_lock(&__ip_vs_mutex);
1493         ip_vs_flush(net);
1494         mutex_unlock(&__ip_vs_mutex);
1495         LeaveFunction(2);
1496 }
1497 /*
1498  * Release dst hold by dst_cache
1499  */
1500 static inline void
1501 __ip_vs_dev_reset(struct ip_vs_dest *dest, struct net_device *dev)
1502 {
1503         spin_lock_bh(&dest->dst_lock);
1504         if (dest->dst_cache && dest->dst_cache->dev == dev) {
1505                 IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n",
1506                               dev->name,
1507                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
1508                               ntohs(dest->port),
1509                               atomic_read(&dest->refcnt));
1510                 ip_vs_dst_reset(dest);
1511         }
1512         spin_unlock_bh(&dest->dst_lock);
1513
1514 }
1515 /*
1516  * Netdev event receiver
1517  * Currently only NETDEV_UNREGISTER is handled, i.e. if we hold a reference to
1518  * a device that is "unregister" it must be released.
1519  */
1520 static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
1521                             void *ptr)
1522 {
1523         struct net_device *dev = ptr;
1524         struct net *net = dev_net(dev);
1525         struct netns_ipvs *ipvs = net_ipvs(net);
1526         struct ip_vs_service *svc;
1527         struct ip_vs_dest *dest;
1528         unsigned int idx;
1529
1530         if (event != NETDEV_UNREGISTER || !ipvs)
1531                 return NOTIFY_DONE;
1532         IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name);
1533         EnterFunction(2);
1534         mutex_lock(&__ip_vs_mutex);
1535         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1536                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1537                         if (net_eq(svc->net, net)) {
1538                                 list_for_each_entry(dest, &svc->destinations,
1539                                                     n_list) {
1540                                         __ip_vs_dev_reset(dest, dev);
1541                                 }
1542                         }
1543                 }
1544
1545                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1546                         if (net_eq(svc->net, net)) {
1547                                 list_for_each_entry(dest, &svc->destinations,
1548                                                     n_list) {
1549                                         __ip_vs_dev_reset(dest, dev);
1550                                 }
1551                         }
1552
1553                 }
1554         }
1555
1556         list_for_each_entry(dest, &ipvs->dest_trash, n_list) {
1557                 __ip_vs_dev_reset(dest, dev);
1558         }
1559         mutex_unlock(&__ip_vs_mutex);
1560         LeaveFunction(2);
1561         return NOTIFY_DONE;
1562 }
1563
1564 /*
1565  *      Zero counters in a service or all services
1566  */
1567 static int ip_vs_zero_service(struct ip_vs_service *svc)
1568 {
1569         struct ip_vs_dest *dest;
1570
1571         write_lock_bh(&__ip_vs_svc_lock);
1572         list_for_each_entry(dest, &svc->destinations, n_list) {
1573                 ip_vs_zero_stats(&dest->stats);
1574         }
1575         ip_vs_zero_stats(&svc->stats);
1576         write_unlock_bh(&__ip_vs_svc_lock);
1577         return 0;
1578 }
1579
1580 static int ip_vs_zero_all(struct net *net)
1581 {
1582         int idx;
1583         struct ip_vs_service *svc;
1584
1585         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1586                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1587                         if (net_eq(svc->net, net))
1588                                 ip_vs_zero_service(svc);
1589                 }
1590         }
1591
1592         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1593                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1594                         if (net_eq(svc->net, net))
1595                                 ip_vs_zero_service(svc);
1596                 }
1597         }
1598
1599         ip_vs_zero_stats(&net_ipvs(net)->tot_stats);
1600         return 0;
1601 }
1602
1603 #ifdef CONFIG_SYSCTL
1604
1605 static int zero;
1606 static int three = 3;
1607
1608 static int
1609 proc_do_defense_mode(ctl_table *table, int write,
1610                      void __user *buffer, size_t *lenp, loff_t *ppos)
1611 {
1612         struct net *net = current->nsproxy->net_ns;
1613         int *valp = table->data;
1614         int val = *valp;
1615         int rc;
1616
1617         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1618         if (write && (*valp != val)) {
1619                 if ((*valp < 0) || (*valp > 3)) {
1620                         /* Restore the correct value */
1621                         *valp = val;
1622                 } else {
1623                         update_defense_level(net_ipvs(net));
1624                 }
1625         }
1626         return rc;
1627 }
1628
1629 static int
1630 proc_do_sync_threshold(ctl_table *table, int write,
1631                        void __user *buffer, size_t *lenp, loff_t *ppos)
1632 {
1633         int *valp = table->data;
1634         int val[2];
1635         int rc;
1636
1637         /* backup the value first */
1638         memcpy(val, valp, sizeof(val));
1639
1640         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1641         if (write && (valp[0] < 0 || valp[1] < 0 ||
1642             (valp[0] >= valp[1] && valp[1]))) {
1643                 /* Restore the correct value */
1644                 memcpy(valp, val, sizeof(val));
1645         }
1646         return rc;
1647 }
1648
1649 static int
1650 proc_do_sync_mode(ctl_table *table, int write,
1651                      void __user *buffer, size_t *lenp, loff_t *ppos)
1652 {
1653         int *valp = table->data;
1654         int val = *valp;
1655         int rc;
1656
1657         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1658         if (write && (*valp != val)) {
1659                 if ((*valp < 0) || (*valp > 1)) {
1660                         /* Restore the correct value */
1661                         *valp = val;
1662                 }
1663         }
1664         return rc;
1665 }
1666
1667 static int
1668 proc_do_sync_ports(ctl_table *table, int write,
1669                    void __user *buffer, size_t *lenp, loff_t *ppos)
1670 {
1671         int *valp = table->data;
1672         int val = *valp;
1673         int rc;
1674
1675         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1676         if (write && (*valp != val)) {
1677                 if (*valp < 1 || !is_power_of_2(*valp)) {
1678                         /* Restore the correct value */
1679                         *valp = val;
1680                 }
1681         }
1682         return rc;
1683 }
1684
1685 /*
1686  *      IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1687  *      Do not change order or insert new entries without
1688  *      align with netns init in ip_vs_control_net_init()
1689  */
1690
1691 static struct ctl_table vs_vars[] = {
1692         {
1693                 .procname       = "amemthresh",
1694                 .maxlen         = sizeof(int),
1695                 .mode           = 0644,
1696                 .proc_handler   = proc_dointvec,
1697         },
1698         {
1699                 .procname       = "am_droprate",
1700                 .maxlen         = sizeof(int),
1701                 .mode           = 0644,
1702                 .proc_handler   = proc_dointvec,
1703         },
1704         {
1705                 .procname       = "drop_entry",
1706                 .maxlen         = sizeof(int),
1707                 .mode           = 0644,
1708                 .proc_handler   = proc_do_defense_mode,
1709         },
1710         {
1711                 .procname       = "drop_packet",
1712                 .maxlen         = sizeof(int),
1713                 .mode           = 0644,
1714                 .proc_handler   = proc_do_defense_mode,
1715         },
1716 #ifdef CONFIG_IP_VS_NFCT
1717         {
1718                 .procname       = "conntrack",
1719                 .maxlen         = sizeof(int),
1720                 .mode           = 0644,
1721                 .proc_handler   = &proc_dointvec,
1722         },
1723 #endif
1724         {
1725                 .procname       = "secure_tcp",
1726                 .maxlen         = sizeof(int),
1727                 .mode           = 0644,
1728                 .proc_handler   = proc_do_defense_mode,
1729         },
1730         {
1731                 .procname       = "snat_reroute",
1732                 .maxlen         = sizeof(int),
1733                 .mode           = 0644,
1734                 .proc_handler   = &proc_dointvec,
1735         },
1736         {
1737                 .procname       = "sync_version",
1738                 .maxlen         = sizeof(int),
1739                 .mode           = 0644,
1740                 .proc_handler   = &proc_do_sync_mode,
1741         },
1742         {
1743                 .procname       = "sync_ports",
1744                 .maxlen         = sizeof(int),
1745                 .mode           = 0644,
1746                 .proc_handler   = &proc_do_sync_ports,
1747         },
1748         {
1749                 .procname       = "sync_qlen_max",
1750                 .maxlen         = sizeof(int),
1751                 .mode           = 0644,
1752                 .proc_handler   = proc_dointvec,
1753         },
1754         {
1755                 .procname       = "sync_sock_size",
1756                 .maxlen         = sizeof(int),
1757                 .mode           = 0644,
1758                 .proc_handler   = proc_dointvec,
1759         },
1760         {
1761                 .procname       = "cache_bypass",
1762                 .maxlen         = sizeof(int),
1763                 .mode           = 0644,
1764                 .proc_handler   = proc_dointvec,
1765         },
1766         {
1767                 .procname       = "expire_nodest_conn",
1768                 .maxlen         = sizeof(int),
1769                 .mode           = 0644,
1770                 .proc_handler   = proc_dointvec,
1771         },
1772         {
1773                 .procname       = "expire_quiescent_template",
1774                 .maxlen         = sizeof(int),
1775                 .mode           = 0644,
1776                 .proc_handler   = proc_dointvec,
1777         },
1778         {
1779                 .procname       = "sync_threshold",
1780                 .maxlen         =
1781                         sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
1782                 .mode           = 0644,
1783                 .proc_handler   = proc_do_sync_threshold,
1784         },
1785         {
1786                 .procname       = "sync_refresh_period",
1787                 .maxlen         = sizeof(int),
1788                 .mode           = 0644,
1789                 .proc_handler   = proc_dointvec_jiffies,
1790         },
1791         {
1792                 .procname       = "sync_retries",
1793                 .maxlen         = sizeof(int),
1794                 .mode           = 0644,
1795                 .proc_handler   = proc_dointvec_minmax,
1796                 .extra1         = &zero,
1797                 .extra2         = &three,
1798         },
1799         {
1800                 .procname       = "nat_icmp_send",
1801                 .maxlen         = sizeof(int),
1802                 .mode           = 0644,
1803                 .proc_handler   = proc_dointvec,
1804         },
1805         {
1806                 .procname       = "pmtu_disc",
1807                 .maxlen         = sizeof(int),
1808                 .mode           = 0644,
1809                 .proc_handler   = proc_dointvec,
1810         },
1811         {
1812                 .procname       = "backup_only",
1813                 .maxlen         = sizeof(int),
1814                 .mode           = 0644,
1815                 .proc_handler   = proc_dointvec,
1816         },
1817 #ifdef CONFIG_IP_VS_DEBUG
1818         {
1819                 .procname       = "debug_level",
1820                 .data           = &sysctl_ip_vs_debug_level,
1821                 .maxlen         = sizeof(int),
1822                 .mode           = 0644,
1823                 .proc_handler   = proc_dointvec,
1824         },
1825 #endif
1826 #if 0
1827         {
1828                 .procname       = "timeout_established",
1829                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1830                 .maxlen         = sizeof(int),
1831                 .mode           = 0644,
1832                 .proc_handler   = proc_dointvec_jiffies,
1833         },
1834         {
1835                 .procname       = "timeout_synsent",
1836                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1837                 .maxlen         = sizeof(int),
1838                 .mode           = 0644,
1839                 .proc_handler   = proc_dointvec_jiffies,
1840         },
1841         {
1842                 .procname       = "timeout_synrecv",
1843                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1844                 .maxlen         = sizeof(int),
1845                 .mode           = 0644,
1846                 .proc_handler   = proc_dointvec_jiffies,
1847         },
1848         {
1849                 .procname       = "timeout_finwait",
1850                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1851                 .maxlen         = sizeof(int),
1852                 .mode           = 0644,
1853                 .proc_handler   = proc_dointvec_jiffies,
1854         },
1855         {
1856                 .procname       = "timeout_timewait",
1857                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1858                 .maxlen         = sizeof(int),
1859                 .mode           = 0644,
1860                 .proc_handler   = proc_dointvec_jiffies,
1861         },
1862         {
1863                 .procname       = "timeout_close",
1864                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1865                 .maxlen         = sizeof(int),
1866                 .mode           = 0644,
1867                 .proc_handler   = proc_dointvec_jiffies,
1868         },
1869         {
1870                 .procname       = "timeout_closewait",
1871                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1872                 .maxlen         = sizeof(int),
1873                 .mode           = 0644,
1874                 .proc_handler   = proc_dointvec_jiffies,
1875         },
1876         {
1877                 .procname       = "timeout_lastack",
1878                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1879                 .maxlen         = sizeof(int),
1880                 .mode           = 0644,
1881                 .proc_handler   = proc_dointvec_jiffies,
1882         },
1883         {
1884                 .procname       = "timeout_listen",
1885                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1886                 .maxlen         = sizeof(int),
1887                 .mode           = 0644,
1888                 .proc_handler   = proc_dointvec_jiffies,
1889         },
1890         {
1891                 .procname       = "timeout_synack",
1892                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1893                 .maxlen         = sizeof(int),
1894                 .mode           = 0644,
1895                 .proc_handler   = proc_dointvec_jiffies,
1896         },
1897         {
1898                 .procname       = "timeout_udp",
1899                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1900                 .maxlen         = sizeof(int),
1901                 .mode           = 0644,
1902                 .proc_handler   = proc_dointvec_jiffies,
1903         },
1904         {
1905                 .procname       = "timeout_icmp",
1906                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1907                 .maxlen         = sizeof(int),
1908                 .mode           = 0644,
1909                 .proc_handler   = proc_dointvec_jiffies,
1910         },
1911 #endif
1912         { }
1913 };
1914
1915 #endif
1916
1917 #ifdef CONFIG_PROC_FS
1918
1919 struct ip_vs_iter {
1920         struct seq_net_private p;  /* Do not move this, netns depends upon it*/
1921         struct list_head *table;
1922         int bucket;
1923 };
1924
1925 /*
1926  *      Write the contents of the VS rule table to a PROCfs file.
1927  *      (It is kept just for backward compatibility)
1928  */
1929 static inline const char *ip_vs_fwd_name(unsigned int flags)
1930 {
1931         switch (flags & IP_VS_CONN_F_FWD_MASK) {
1932         case IP_VS_CONN_F_LOCALNODE:
1933                 return "Local";
1934         case IP_VS_CONN_F_TUNNEL:
1935                 return "Tunnel";
1936         case IP_VS_CONN_F_DROUTE:
1937                 return "Route";
1938         default:
1939                 return "Masq";
1940         }
1941 }
1942
1943
1944 /* Get the Nth entry in the two lists */
1945 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1946 {
1947         struct net *net = seq_file_net(seq);
1948         struct ip_vs_iter *iter = seq->private;
1949         int idx;
1950         struct ip_vs_service *svc;
1951
1952         /* look in hash by protocol */
1953         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1954                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1955                         if (net_eq(svc->net, net) && pos-- == 0) {
1956                                 iter->table = ip_vs_svc_table;
1957                                 iter->bucket = idx;
1958                                 return svc;
1959                         }
1960                 }
1961         }
1962
1963         /* keep looking in fwmark */
1964         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1965                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1966                         if (net_eq(svc->net, net) && pos-- == 0) {
1967                                 iter->table = ip_vs_svc_fwm_table;
1968                                 iter->bucket = idx;
1969                                 return svc;
1970                         }
1971                 }
1972         }
1973
1974         return NULL;
1975 }
1976
1977 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1978 __acquires(__ip_vs_svc_lock)
1979 {
1980
1981         read_lock_bh(&__ip_vs_svc_lock);
1982         return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1983 }
1984
1985
1986 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1987 {
1988         struct list_head *e;
1989         struct ip_vs_iter *iter;
1990         struct ip_vs_service *svc;
1991
1992         ++*pos;
1993         if (v == SEQ_START_TOKEN)
1994                 return ip_vs_info_array(seq,0);
1995
1996         svc = v;
1997         iter = seq->private;
1998
1999         if (iter->table == ip_vs_svc_table) {
2000                 /* next service in table hashed by protocol */
2001                 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
2002                         return list_entry(e, struct ip_vs_service, s_list);
2003
2004
2005                 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
2006                         list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
2007                                             s_list) {
2008                                 return svc;
2009                         }
2010                 }
2011
2012                 iter->table = ip_vs_svc_fwm_table;
2013                 iter->bucket = -1;
2014                 goto scan_fwmark;
2015         }
2016
2017         /* next service in hashed by fwmark */
2018         if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
2019                 return list_entry(e, struct ip_vs_service, f_list);
2020
2021  scan_fwmark:
2022         while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
2023                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
2024                                     f_list)
2025                         return svc;
2026         }
2027
2028         return NULL;
2029 }
2030
2031 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
2032 __releases(__ip_vs_svc_lock)
2033 {
2034         read_unlock_bh(&__ip_vs_svc_lock);
2035 }
2036
2037
2038 static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
2039 {
2040         if (v == SEQ_START_TOKEN) {
2041                 seq_printf(seq,
2042                         "IP Virtual Server version %d.%d.%d (size=%d)\n",
2043                         NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2044                 seq_puts(seq,
2045                          "Prot LocalAddress:Port Scheduler Flags\n");
2046                 seq_puts(seq,
2047                          "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
2048         } else {
2049                 const struct ip_vs_service *svc = v;
2050                 const struct ip_vs_iter *iter = seq->private;
2051                 const struct ip_vs_dest *dest;
2052
2053                 if (iter->table == ip_vs_svc_table) {
2054 #ifdef CONFIG_IP_VS_IPV6
2055                         if (svc->af == AF_INET6)
2056                                 seq_printf(seq, "%s  [%pI6]:%04X %s ",
2057                                            ip_vs_proto_name(svc->protocol),
2058                                            &svc->addr.in6,
2059                                            ntohs(svc->port),
2060                                            svc->scheduler->name);
2061                         else
2062 #endif
2063                                 seq_printf(seq, "%s  %08X:%04X %s %s ",
2064                                            ip_vs_proto_name(svc->protocol),
2065                                            ntohl(svc->addr.ip),
2066                                            ntohs(svc->port),
2067                                            svc->scheduler->name,
2068                                            (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
2069                 } else {
2070                         seq_printf(seq, "FWM  %08X %s %s",
2071                                    svc->fwmark, svc->scheduler->name,
2072                                    (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
2073                 }
2074
2075                 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
2076                         seq_printf(seq, "persistent %d %08X\n",
2077                                 svc->timeout,
2078                                 ntohl(svc->netmask));
2079                 else
2080                         seq_putc(seq, '\n');
2081
2082                 list_for_each_entry(dest, &svc->destinations, n_list) {
2083 #ifdef CONFIG_IP_VS_IPV6
2084                         if (dest->af == AF_INET6)
2085                                 seq_printf(seq,
2086                                            "  -> [%pI6]:%04X"
2087                                            "      %-7s %-6d %-10d %-10d\n",
2088                                            &dest->addr.in6,
2089                                            ntohs(dest->port),
2090                                            ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
2091                                            atomic_read(&dest->weight),
2092                                            atomic_read(&dest->activeconns),
2093                                            atomic_read(&dest->inactconns));
2094                         else
2095 #endif
2096                                 seq_printf(seq,
2097                                            "  -> %08X:%04X      "
2098                                            "%-7s %-6d %-10d %-10d\n",
2099                                            ntohl(dest->addr.ip),
2100                                            ntohs(dest->port),
2101                                            ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
2102                                            atomic_read(&dest->weight),
2103                                            atomic_read(&dest->activeconns),
2104                                            atomic_read(&dest->inactconns));
2105
2106                 }
2107         }
2108         return 0;
2109 }
2110
2111 static const struct seq_operations ip_vs_info_seq_ops = {
2112         .start = ip_vs_info_seq_start,
2113         .next  = ip_vs_info_seq_next,
2114         .stop  = ip_vs_info_seq_stop,
2115         .show  = ip_vs_info_seq_show,
2116 };
2117
2118 static int ip_vs_info_open(struct inode *inode, struct file *file)
2119 {
2120         return seq_open_net(inode, file, &ip_vs_info_seq_ops,
2121                         sizeof(struct ip_vs_iter));
2122 }
2123
2124 static const struct file_operations ip_vs_info_fops = {
2125         .owner   = THIS_MODULE,
2126         .open    = ip_vs_info_open,
2127         .read    = seq_read,
2128         .llseek  = seq_lseek,
2129         .release = seq_release_net,
2130 };
2131
2132 static int ip_vs_stats_show(struct seq_file *seq, void *v)
2133 {
2134         struct net *net = seq_file_single_net(seq);
2135         struct ip_vs_stats_user show;
2136
2137 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
2138         seq_puts(seq,
2139                  "   Total Incoming Outgoing         Incoming         Outgoing\n");
2140         seq_printf(seq,
2141                    "   Conns  Packets  Packets            Bytes            Bytes\n");
2142
2143         ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
2144         seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns,
2145                    show.inpkts, show.outpkts,
2146                    (unsigned long long) show.inbytes,
2147                    (unsigned long long) show.outbytes);
2148
2149 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2150         seq_puts(seq,
2151                    " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
2152         seq_printf(seq, "%8X %8X %8X %16X %16X\n",
2153                         show.cps, show.inpps, show.outpps,
2154                         show.inbps, show.outbps);
2155
2156         return 0;
2157 }
2158
2159 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
2160 {
2161         return single_open_net(inode, file, ip_vs_stats_show);
2162 }
2163
2164 static const struct file_operations ip_vs_stats_fops = {
2165         .owner = THIS_MODULE,
2166         .open = ip_vs_stats_seq_open,
2167         .read = seq_read,
2168         .llseek = seq_lseek,
2169         .release = single_release_net,
2170 };
2171
2172 static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2173 {
2174         struct net *net = seq_file_single_net(seq);
2175         struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
2176         struct ip_vs_cpu_stats *cpustats = tot_stats->cpustats;
2177         struct ip_vs_stats_user rates;
2178         int i;
2179
2180 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
2181         seq_puts(seq,
2182                  "       Total Incoming Outgoing         Incoming         Outgoing\n");
2183         seq_printf(seq,
2184                    "CPU    Conns  Packets  Packets            Bytes            Bytes\n");
2185
2186         for_each_possible_cpu(i) {
2187                 struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
2188                 unsigned int start;
2189                 __u64 inbytes, outbytes;
2190
2191                 do {
2192                         start = u64_stats_fetch_begin_bh(&u->syncp);
2193                         inbytes = u->ustats.inbytes;
2194                         outbytes = u->ustats.outbytes;
2195                 } while (u64_stats_fetch_retry_bh(&u->syncp, start));
2196
2197                 seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
2198                            i, u->ustats.conns, u->ustats.inpkts,
2199                            u->ustats.outpkts, (__u64)inbytes,
2200                            (__u64)outbytes);
2201         }
2202
2203         spin_lock_bh(&tot_stats->lock);
2204
2205         seq_printf(seq, "  ~ %8X %8X %8X %16LX %16LX\n\n",
2206                    tot_stats->ustats.conns, tot_stats->ustats.inpkts,
2207                    tot_stats->ustats.outpkts,
2208                    (unsigned long long) tot_stats->ustats.inbytes,
2209                    (unsigned long long) tot_stats->ustats.outbytes);
2210
2211         ip_vs_read_estimator(&rates, tot_stats);
2212
2213         spin_unlock_bh(&tot_stats->lock);
2214
2215 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2216         seq_puts(seq,
2217                    "     Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
2218         seq_printf(seq, "    %8X %8X %8X %16X %16X\n",
2219                         rates.cps,
2220                         rates.inpps,
2221                         rates.outpps,
2222                         rates.inbps,
2223                         rates.outbps);
2224
2225         return 0;
2226 }
2227
2228 static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
2229 {
2230         return single_open_net(inode, file, ip_vs_stats_percpu_show);
2231 }
2232
2233 static const struct file_operations ip_vs_stats_percpu_fops = {
2234         .owner = THIS_MODULE,
2235         .open = ip_vs_stats_percpu_seq_open,
2236         .read = seq_read,
2237         .llseek = seq_lseek,
2238         .release = single_release_net,
2239 };
2240 #endif
2241
2242 /*
2243  *      Set timeout values for tcp tcpfin udp in the timeout_table.
2244  */
2245 static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
2246 {
2247 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2248         struct ip_vs_proto_data *pd;
2249 #endif
2250
2251         IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2252                   u->tcp_timeout,
2253                   u->tcp_fin_timeout,
2254                   u->udp_timeout);
2255
2256 #ifdef CONFIG_IP_VS_PROTO_TCP
2257         if (u->tcp_timeout) {
2258                 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2259                 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
2260                         = u->tcp_timeout * HZ;
2261         }
2262
2263         if (u->tcp_fin_timeout) {
2264                 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2265                 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
2266                         = u->tcp_fin_timeout * HZ;
2267         }
2268 #endif
2269
2270 #ifdef CONFIG_IP_VS_PROTO_UDP
2271         if (u->udp_timeout) {
2272                 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2273                 pd->timeout_table[IP_VS_UDP_S_NORMAL]
2274                         = u->udp_timeout * HZ;
2275         }
2276 #endif
2277         return 0;
2278 }
2279
2280
2281 #define SET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2282 #define SERVICE_ARG_LEN         (sizeof(struct ip_vs_service_user))
2283 #define SVCDEST_ARG_LEN         (sizeof(struct ip_vs_service_user) +    \
2284                                  sizeof(struct ip_vs_dest_user))
2285 #define TIMEOUT_ARG_LEN         (sizeof(struct ip_vs_timeout_user))
2286 #define DAEMON_ARG_LEN          (sizeof(struct ip_vs_daemon_user))
2287 #define MAX_ARG_LEN             SVCDEST_ARG_LEN
2288
2289 static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
2290         [SET_CMDID(IP_VS_SO_SET_ADD)]           = SERVICE_ARG_LEN,
2291         [SET_CMDID(IP_VS_SO_SET_EDIT)]          = SERVICE_ARG_LEN,
2292         [SET_CMDID(IP_VS_SO_SET_DEL)]           = SERVICE_ARG_LEN,
2293         [SET_CMDID(IP_VS_SO_SET_FLUSH)]         = 0,
2294         [SET_CMDID(IP_VS_SO_SET_ADDDEST)]       = SVCDEST_ARG_LEN,
2295         [SET_CMDID(IP_VS_SO_SET_DELDEST)]       = SVCDEST_ARG_LEN,
2296         [SET_CMDID(IP_VS_SO_SET_EDITDEST)]      = SVCDEST_ARG_LEN,
2297         [SET_CMDID(IP_VS_SO_SET_TIMEOUT)]       = TIMEOUT_ARG_LEN,
2298         [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)]   = DAEMON_ARG_LEN,
2299         [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)]    = DAEMON_ARG_LEN,
2300         [SET_CMDID(IP_VS_SO_SET_ZERO)]          = SERVICE_ARG_LEN,
2301 };
2302
2303 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2304                                   struct ip_vs_service_user *usvc_compat)
2305 {
2306         memset(usvc, 0, sizeof(*usvc));
2307
2308         usvc->af                = AF_INET;
2309         usvc->protocol          = usvc_compat->protocol;
2310         usvc->addr.ip           = usvc_compat->addr;
2311         usvc->port              = usvc_compat->port;
2312         usvc->fwmark            = usvc_compat->fwmark;
2313
2314         /* Deep copy of sched_name is not needed here */
2315         usvc->sched_name        = usvc_compat->sched_name;
2316
2317         usvc->flags             = usvc_compat->flags;
2318         usvc->timeout           = usvc_compat->timeout;
2319         usvc->netmask           = usvc_compat->netmask;
2320 }
2321
2322 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2323                                    struct ip_vs_dest_user *udest_compat)
2324 {
2325         memset(udest, 0, sizeof(*udest));
2326
2327         udest->addr.ip          = udest_compat->addr;
2328         udest->port             = udest_compat->port;
2329         udest->conn_flags       = udest_compat->conn_flags;
2330         udest->weight           = udest_compat->weight;
2331         udest->u_threshold      = udest_compat->u_threshold;
2332         udest->l_threshold      = udest_compat->l_threshold;
2333 }
2334
2335 static int
2336 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2337 {
2338         struct net *net = sock_net(sk);
2339         int ret;
2340         unsigned char arg[MAX_ARG_LEN];
2341         struct ip_vs_service_user *usvc_compat;
2342         struct ip_vs_service_user_kern usvc;
2343         struct ip_vs_service *svc;
2344         struct ip_vs_dest_user *udest_compat;
2345         struct ip_vs_dest_user_kern udest;
2346         struct netns_ipvs *ipvs = net_ipvs(net);
2347
2348         if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2349                 return -EPERM;
2350
2351         if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2352                 return -EINVAL;
2353         if (len < 0 || len >  MAX_ARG_LEN)
2354                 return -EINVAL;
2355         if (len != set_arglen[SET_CMDID(cmd)]) {
2356                 pr_err("set_ctl: len %u != %u\n",
2357                        len, set_arglen[SET_CMDID(cmd)]);
2358                 return -EINVAL;
2359         }
2360
2361         if (copy_from_user(arg, user, len) != 0)
2362                 return -EFAULT;
2363
2364         /* increase the module use count */
2365         ip_vs_use_count_inc();
2366
2367         /* Handle daemons since they have another lock */
2368         if (cmd == IP_VS_SO_SET_STARTDAEMON ||
2369             cmd == IP_VS_SO_SET_STOPDAEMON) {
2370                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2371
2372                 if (mutex_lock_interruptible(&ipvs->sync_mutex)) {
2373                         ret = -ERESTARTSYS;
2374                         goto out_dec;
2375                 }
2376                 if (cmd == IP_VS_SO_SET_STARTDAEMON)
2377                         ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
2378                                                 dm->syncid);
2379                 else
2380                         ret = stop_sync_thread(net, dm->state);
2381                 mutex_unlock(&ipvs->sync_mutex);
2382                 goto out_dec;
2383         }
2384
2385         if (mutex_lock_interruptible(&__ip_vs_mutex)) {
2386                 ret = -ERESTARTSYS;
2387                 goto out_dec;
2388         }
2389
2390         if (cmd == IP_VS_SO_SET_FLUSH) {
2391                 /* Flush the virtual service */
2392                 ret = ip_vs_flush(net);
2393                 goto out_unlock;
2394         } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2395                 /* Set timeout values for (tcp tcpfin udp) */
2396                 ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
2397                 goto out_unlock;
2398         }
2399
2400         usvc_compat = (struct ip_vs_service_user *)arg;
2401         udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2402
2403         /* We only use the new structs internally, so copy userspace compat
2404          * structs to extended internal versions */
2405         ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2406         ip_vs_copy_udest_compat(&udest, udest_compat);
2407
2408         if (cmd == IP_VS_SO_SET_ZERO) {
2409                 /* if no service address is set, zero counters in all */
2410                 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2411                         ret = ip_vs_zero_all(net);
2412                         goto out_unlock;
2413                 }
2414         }
2415
2416         /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2417         if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2418             usvc.protocol != IPPROTO_SCTP) {
2419                 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2420                        usvc.protocol, &usvc.addr.ip,
2421                        ntohs(usvc.port), usvc.sched_name);
2422                 ret = -EFAULT;
2423                 goto out_unlock;
2424         }
2425
2426         /* Lookup the exact service by <protocol, addr, port> or fwmark */
2427         if (usvc.fwmark == 0)
2428                 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
2429                                            &usvc.addr, usvc.port);
2430         else
2431                 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
2432
2433         if (cmd != IP_VS_SO_SET_ADD
2434             && (svc == NULL || svc->protocol != usvc.protocol)) {
2435                 ret = -ESRCH;
2436                 goto out_unlock;
2437         }
2438
2439         switch (cmd) {
2440         case IP_VS_SO_SET_ADD:
2441                 if (svc != NULL)
2442                         ret = -EEXIST;
2443                 else
2444                         ret = ip_vs_add_service(net, &usvc, &svc);
2445                 break;
2446         case IP_VS_SO_SET_EDIT:
2447                 ret = ip_vs_edit_service(svc, &usvc);
2448                 break;
2449         case IP_VS_SO_SET_DEL:
2450                 ret = ip_vs_del_service(svc);
2451                 if (!ret)
2452                         goto out_unlock;
2453                 break;
2454         case IP_VS_SO_SET_ZERO:
2455                 ret = ip_vs_zero_service(svc);
2456                 break;
2457         case IP_VS_SO_SET_ADDDEST:
2458                 ret = ip_vs_add_dest(svc, &udest);
2459                 break;
2460         case IP_VS_SO_SET_EDITDEST:
2461                 ret = ip_vs_edit_dest(svc, &udest);
2462                 break;
2463         case IP_VS_SO_SET_DELDEST:
2464                 ret = ip_vs_del_dest(svc, &udest);
2465                 break;
2466         default:
2467                 ret = -EINVAL;
2468         }
2469
2470   out_unlock:
2471         mutex_unlock(&__ip_vs_mutex);
2472   out_dec:
2473         /* decrease the module use count */
2474         ip_vs_use_count_dec();
2475
2476         return ret;
2477 }
2478
2479
2480 static void
2481 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2482 {
2483         dst->protocol = src->protocol;
2484         dst->addr = src->addr.ip;
2485         dst->port = src->port;
2486         dst->fwmark = src->fwmark;
2487         strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
2488         dst->flags = src->flags;
2489         dst->timeout = src->timeout / HZ;
2490         dst->netmask = src->netmask;
2491         dst->num_dests = src->num_dests;
2492         ip_vs_copy_stats(&dst->stats, &src->stats);
2493 }
2494
2495 static inline int
2496 __ip_vs_get_service_entries(struct net *net,
2497                             const struct ip_vs_get_services *get,
2498                             struct ip_vs_get_services __user *uptr)
2499 {
2500         int idx, count=0;
2501         struct ip_vs_service *svc;
2502         struct ip_vs_service_entry entry;
2503         int ret = 0;
2504
2505         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2506                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2507                         /* Only expose IPv4 entries to old interface */
2508                         if (svc->af != AF_INET || !net_eq(svc->net, net))
2509                                 continue;
2510
2511                         if (count >= get->num_services)
2512                                 goto out;
2513                         memset(&entry, 0, sizeof(entry));
2514                         ip_vs_copy_service(&entry, svc);
2515                         if (copy_to_user(&uptr->entrytable[count],
2516                                          &entry, sizeof(entry))) {
2517                                 ret = -EFAULT;
2518                                 goto out;
2519                         }
2520                         count++;
2521                 }
2522         }
2523
2524         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2525                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2526                         /* Only expose IPv4 entries to old interface */
2527                         if (svc->af != AF_INET || !net_eq(svc->net, net))
2528                                 continue;
2529
2530                         if (count >= get->num_services)
2531                                 goto out;
2532                         memset(&entry, 0, sizeof(entry));
2533                         ip_vs_copy_service(&entry, svc);
2534                         if (copy_to_user(&uptr->entrytable[count],
2535                                          &entry, sizeof(entry))) {
2536                                 ret = -EFAULT;
2537                                 goto out;
2538                         }
2539                         count++;
2540                 }
2541         }
2542 out:
2543         return ret;
2544 }
2545
2546 static inline int
2547 __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
2548                          struct ip_vs_get_dests __user *uptr)
2549 {
2550         struct ip_vs_service *svc;
2551         union nf_inet_addr addr = { .ip = get->addr };
2552         int ret = 0;
2553
2554         if (get->fwmark)
2555                 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
2556         else
2557                 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
2558                                            get->port);
2559
2560         if (svc) {
2561                 int count = 0;
2562                 struct ip_vs_dest *dest;
2563                 struct ip_vs_dest_entry entry;
2564
2565                 list_for_each_entry(dest, &svc->destinations, n_list) {
2566                         if (count >= get->num_dests)
2567                                 break;
2568
2569                         entry.addr = dest->addr.ip;
2570                         entry.port = dest->port;
2571                         entry.conn_flags = atomic_read(&dest->conn_flags);
2572                         entry.weight = atomic_read(&dest->weight);
2573                         entry.u_threshold = dest->u_threshold;
2574                         entry.l_threshold = dest->l_threshold;
2575                         entry.activeconns = atomic_read(&dest->activeconns);
2576                         entry.inactconns = atomic_read(&dest->inactconns);
2577                         entry.persistconns = atomic_read(&dest->persistconns);
2578                         ip_vs_copy_stats(&entry.stats, &dest->stats);
2579                         if (copy_to_user(&uptr->entrytable[count],
2580                                          &entry, sizeof(entry))) {
2581                                 ret = -EFAULT;
2582                                 break;
2583                         }
2584                         count++;
2585                 }
2586         } else
2587                 ret = -ESRCH;
2588         return ret;
2589 }
2590
2591 static inline void
2592 __ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
2593 {
2594 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2595         struct ip_vs_proto_data *pd;
2596 #endif
2597
2598         memset(u, 0, sizeof (*u));
2599
2600 #ifdef CONFIG_IP_VS_PROTO_TCP
2601         pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2602         u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2603         u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2604 #endif
2605 #ifdef CONFIG_IP_VS_PROTO_UDP
2606         pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2607         u->udp_timeout =
2608                         pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2609 #endif
2610 }
2611
2612
2613 #define GET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2614 #define GET_INFO_ARG_LEN        (sizeof(struct ip_vs_getinfo))
2615 #define GET_SERVICES_ARG_LEN    (sizeof(struct ip_vs_get_services))
2616 #define GET_SERVICE_ARG_LEN     (sizeof(struct ip_vs_service_entry))
2617 #define GET_DESTS_ARG_LEN       (sizeof(struct ip_vs_get_dests))
2618 #define GET_TIMEOUT_ARG_LEN     (sizeof(struct ip_vs_timeout_user))
2619 #define GET_DAEMON_ARG_LEN      (sizeof(struct ip_vs_daemon_user) * 2)
2620
2621 static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2622         [GET_CMDID(IP_VS_SO_GET_VERSION)]       = 64,
2623         [GET_CMDID(IP_VS_SO_GET_INFO)]          = GET_INFO_ARG_LEN,
2624         [GET_CMDID(IP_VS_SO_GET_SERVICES)]      = GET_SERVICES_ARG_LEN,
2625         [GET_CMDID(IP_VS_SO_GET_SERVICE)]       = GET_SERVICE_ARG_LEN,
2626         [GET_CMDID(IP_VS_SO_GET_DESTS)]         = GET_DESTS_ARG_LEN,
2627         [GET_CMDID(IP_VS_SO_GET_TIMEOUT)]       = GET_TIMEOUT_ARG_LEN,
2628         [GET_CMDID(IP_VS_SO_GET_DAEMON)]        = GET_DAEMON_ARG_LEN,
2629 };
2630
2631 static int
2632 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2633 {
2634         unsigned char arg[128];
2635         int ret = 0;
2636         unsigned int copylen;
2637         struct net *net = sock_net(sk);
2638         struct netns_ipvs *ipvs = net_ipvs(net);
2639
2640         BUG_ON(!net);
2641         if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2642                 return -EPERM;
2643
2644         if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2645                 return -EINVAL;
2646
2647         if (*len < get_arglen[GET_CMDID(cmd)]) {
2648                 pr_err("get_ctl: len %u < %u\n",
2649                        *len, get_arglen[GET_CMDID(cmd)]);
2650                 return -EINVAL;
2651         }
2652
2653         copylen = get_arglen[GET_CMDID(cmd)];
2654         if (copylen > 128)
2655                 return -EINVAL;
2656
2657         if (copy_from_user(arg, user, copylen) != 0)
2658                 return -EFAULT;
2659         /*
2660          * Handle daemons first since it has its own locking
2661          */
2662         if (cmd == IP_VS_SO_GET_DAEMON) {
2663                 struct ip_vs_daemon_user d[2];
2664
2665                 memset(&d, 0, sizeof(d));
2666                 if (mutex_lock_interruptible(&ipvs->sync_mutex))
2667                         return -ERESTARTSYS;
2668
2669                 if (ipvs->sync_state & IP_VS_STATE_MASTER) {
2670                         d[0].state = IP_VS_STATE_MASTER;
2671                         strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
2672                                 sizeof(d[0].mcast_ifn));
2673                         d[0].syncid = ipvs->master_syncid;
2674                 }
2675                 if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
2676                         d[1].state = IP_VS_STATE_BACKUP;
2677                         strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
2678                                 sizeof(d[1].mcast_ifn));
2679                         d[1].syncid = ipvs->backup_syncid;
2680                 }
2681                 if (copy_to_user(user, &d, sizeof(d)) != 0)
2682                         ret = -EFAULT;
2683                 mutex_unlock(&ipvs->sync_mutex);
2684                 return ret;
2685         }
2686
2687         if (mutex_lock_interruptible(&__ip_vs_mutex))
2688                 return -ERESTARTSYS;
2689
2690         switch (cmd) {
2691         case IP_VS_SO_GET_VERSION:
2692         {
2693                 char buf[64];
2694
2695                 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2696                         NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2697                 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2698                         ret = -EFAULT;
2699                         goto out;
2700                 }
2701                 *len = strlen(buf)+1;
2702         }
2703         break;
2704
2705         case IP_VS_SO_GET_INFO:
2706         {
2707                 struct ip_vs_getinfo info;
2708                 info.version = IP_VS_VERSION_CODE;
2709                 info.size = ip_vs_conn_tab_size;
2710                 info.num_services = ipvs->num_services;
2711                 if (copy_to_user(user, &info, sizeof(info)) != 0)
2712                         ret = -EFAULT;
2713         }
2714         break;
2715
2716         case IP_VS_SO_GET_SERVICES:
2717         {
2718                 struct ip_vs_get_services *get;
2719                 int size;
2720
2721                 get = (struct ip_vs_get_services *)arg;
2722                 size = sizeof(*get) +
2723                         sizeof(struct ip_vs_service_entry) * get->num_services;
2724                 if (*len != size) {
2725                         pr_err("length: %u != %u\n", *len, size);
2726                         ret = -EINVAL;
2727                         goto out;
2728                 }
2729                 ret = __ip_vs_get_service_entries(net, get, user);
2730         }
2731         break;
2732
2733         case IP_VS_SO_GET_SERVICE:
2734         {
2735                 struct ip_vs_service_entry *entry;
2736                 struct ip_vs_service *svc;
2737                 union nf_inet_addr addr;
2738
2739                 entry = (struct ip_vs_service_entry *)arg;
2740                 addr.ip = entry->addr;
2741                 if (entry->fwmark)
2742                         svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
2743                 else
2744                         svc = __ip_vs_service_find(net, AF_INET,
2745                                                    entry->protocol, &addr,
2746                                                    entry->port);
2747                 if (svc) {
2748                         ip_vs_copy_service(entry, svc);
2749                         if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2750                                 ret = -EFAULT;
2751                 } else
2752                         ret = -ESRCH;
2753         }
2754         break;
2755
2756         case IP_VS_SO_GET_DESTS:
2757         {
2758                 struct ip_vs_get_dests *get;
2759                 int size;
2760
2761                 get = (struct ip_vs_get_dests *)arg;
2762                 size = sizeof(*get) +
2763                         sizeof(struct ip_vs_dest_entry) * get->num_dests;
2764                 if (*len != size) {
2765                         pr_err("length: %u != %u\n", *len, size);
2766                         ret = -EINVAL;
2767                         goto out;
2768                 }
2769                 ret = __ip_vs_get_dest_entries(net, get, user);
2770         }
2771         break;
2772
2773         case IP_VS_SO_GET_TIMEOUT:
2774         {
2775                 struct ip_vs_timeout_user t;
2776
2777                 __ip_vs_get_timeouts(net, &t);
2778                 if (copy_to_user(user, &t, sizeof(t)) != 0)
2779                         ret = -EFAULT;
2780         }
2781         break;
2782
2783         default:
2784                 ret = -EINVAL;
2785         }
2786
2787 out:
2788         mutex_unlock(&__ip_vs_mutex);
2789         return ret;
2790 }
2791
2792
2793 static struct nf_sockopt_ops ip_vs_sockopts = {
2794         .pf             = PF_INET,
2795         .set_optmin     = IP_VS_BASE_CTL,
2796         .set_optmax     = IP_VS_SO_SET_MAX+1,
2797         .set            = do_ip_vs_set_ctl,
2798         .get_optmin     = IP_VS_BASE_CTL,
2799         .get_optmax     = IP_VS_SO_GET_MAX+1,
2800         .get            = do_ip_vs_get_ctl,
2801         .owner          = THIS_MODULE,
2802 };
2803
2804 /*
2805  * Generic Netlink interface
2806  */
2807
2808 /* IPVS genetlink family */
2809 static struct genl_family ip_vs_genl_family = {
2810         .id             = GENL_ID_GENERATE,
2811         .hdrsize        = 0,
2812         .name           = IPVS_GENL_NAME,
2813         .version        = IPVS_GENL_VERSION,
2814         .maxattr        = IPVS_CMD_MAX,
2815         .netnsok        = true,         /* Make ipvsadm to work on netns */
2816 };
2817
2818 /* Policy used for first-level command attributes */
2819 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2820         [IPVS_CMD_ATTR_SERVICE]         = { .type = NLA_NESTED },
2821         [IPVS_CMD_ATTR_DEST]            = { .type = NLA_NESTED },
2822         [IPVS_CMD_ATTR_DAEMON]          = { .type = NLA_NESTED },
2823         [IPVS_CMD_ATTR_TIMEOUT_TCP]     = { .type = NLA_U32 },
2824         [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2825         [IPVS_CMD_ATTR_TIMEOUT_UDP]     = { .type = NLA_U32 },
2826 };
2827
2828 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2829 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2830         [IPVS_DAEMON_ATTR_STATE]        = { .type = NLA_U32 },
2831         [IPVS_DAEMON_ATTR_MCAST_IFN]    = { .type = NLA_NUL_STRING,
2832                                             .len = IP_VS_IFNAME_MAXLEN },
2833         [IPVS_DAEMON_ATTR_SYNC_ID]      = { .type = NLA_U32 },
2834 };
2835
2836 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2837 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2838         [IPVS_SVC_ATTR_AF]              = { .type = NLA_U16 },
2839         [IPVS_SVC_ATTR_PROTOCOL]        = { .type = NLA_U16 },
2840         [IPVS_SVC_ATTR_ADDR]            = { .type = NLA_BINARY,
2841                                             .len = sizeof(union nf_inet_addr) },
2842         [IPVS_SVC_ATTR_PORT]            = { .type = NLA_U16 },
2843         [IPVS_SVC_ATTR_FWMARK]          = { .type = NLA_U32 },
2844         [IPVS_SVC_ATTR_SCHED_NAME]      = { .type = NLA_NUL_STRING,
2845                                             .len = IP_VS_SCHEDNAME_MAXLEN },
2846         [IPVS_SVC_ATTR_PE_NAME]         = { .type = NLA_NUL_STRING,
2847                                             .len = IP_VS_PENAME_MAXLEN },
2848         [IPVS_SVC_ATTR_FLAGS]           = { .type = NLA_BINARY,
2849                                             .len = sizeof(struct ip_vs_flags) },
2850         [IPVS_SVC_ATTR_TIMEOUT]         = { .type = NLA_U32 },
2851         [IPVS_SVC_ATTR_NETMASK]         = { .type = NLA_U32 },
2852         [IPVS_SVC_ATTR_STATS]           = { .type = NLA_NESTED },
2853 };
2854
2855 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2856 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2857         [IPVS_DEST_ATTR_ADDR]           = { .type = NLA_BINARY,
2858                                             .len = sizeof(union nf_inet_addr) },
2859         [IPVS_DEST_ATTR_PORT]           = { .type = NLA_U16 },
2860         [IPVS_DEST_ATTR_FWD_METHOD]     = { .type = NLA_U32 },
2861         [IPVS_DEST_ATTR_WEIGHT]         = { .type = NLA_U32 },
2862         [IPVS_DEST_ATTR_U_THRESH]       = { .type = NLA_U32 },
2863         [IPVS_DEST_ATTR_L_THRESH]       = { .type = NLA_U32 },
2864         [IPVS_DEST_ATTR_ACTIVE_CONNS]   = { .type = NLA_U32 },
2865         [IPVS_DEST_ATTR_INACT_CONNS]    = { .type = NLA_U32 },
2866         [IPVS_DEST_ATTR_PERSIST_CONNS]  = { .type = NLA_U32 },
2867         [IPVS_DEST_ATTR_STATS]          = { .type = NLA_NESTED },
2868 };
2869
2870 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2871                                  struct ip_vs_stats *stats)
2872 {
2873         struct ip_vs_stats_user ustats;
2874         struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2875         if (!nl_stats)
2876                 return -EMSGSIZE;
2877
2878         ip_vs_copy_stats(&ustats, stats);
2879
2880         if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns) ||
2881             nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts) ||
2882             nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts) ||
2883             nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes) ||
2884             nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes) ||
2885             nla_put_u32(skb, IPVS_STATS_ATTR_CPS, ustats.cps) ||
2886             nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps) ||
2887             nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps) ||
2888             nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps) ||
2889             nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps))
2890                 goto nla_put_failure;
2891         nla_nest_end(skb, nl_stats);
2892
2893         return 0;
2894
2895 nla_put_failure:
2896         nla_nest_cancel(skb, nl_stats);
2897         return -EMSGSIZE;
2898 }
2899
2900 static int ip_vs_genl_fill_service(struct sk_buff *skb,
2901                                    struct ip_vs_service *svc)
2902 {
2903         struct nlattr *nl_service;
2904         struct ip_vs_flags flags = { .flags = svc->flags,
2905                                      .mask = ~0 };
2906
2907         nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2908         if (!nl_service)
2909                 return -EMSGSIZE;
2910
2911         if (nla_put_u16(skb, IPVS_SVC_ATTR_AF, svc->af))
2912                 goto nla_put_failure;
2913         if (svc->fwmark) {
2914                 if (nla_put_u32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark))
2915                         goto nla_put_failure;
2916         } else {
2917                 if (nla_put_u16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol) ||
2918                     nla_put(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr) ||
2919                     nla_put_u16(skb, IPVS_SVC_ATTR_PORT, svc->port))
2920                         goto nla_put_failure;
2921         }
2922
2923         if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name) ||
2924             (svc->pe &&
2925              nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name)) ||
2926             nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) ||
2927             nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) ||
2928             nla_put_u32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask))
2929                 goto nla_put_failure;
2930         if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2931                 goto nla_put_failure;
2932
2933         nla_nest_end(skb, nl_service);
2934
2935         return 0;
2936
2937 nla_put_failure:
2938         nla_nest_cancel(skb, nl_service);
2939         return -EMSGSIZE;
2940 }
2941
2942 static int ip_vs_genl_dump_service(struct sk_buff *skb,
2943                                    struct ip_vs_service *svc,
2944                                    struct netlink_callback *cb)
2945 {
2946         void *hdr;
2947
2948         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
2949                           &ip_vs_genl_family, NLM_F_MULTI,
2950                           IPVS_CMD_NEW_SERVICE);
2951         if (!hdr)
2952                 return -EMSGSIZE;
2953
2954         if (ip_vs_genl_fill_service(skb, svc) < 0)
2955                 goto nla_put_failure;
2956
2957         return genlmsg_end(skb, hdr);
2958
2959 nla_put_failure:
2960         genlmsg_cancel(skb, hdr);
2961         return -EMSGSIZE;
2962 }
2963
2964 static int ip_vs_genl_dump_services(struct sk_buff *skb,
2965                                     struct netlink_callback *cb)
2966 {
2967         int idx = 0, i;
2968         int start = cb->args[0];
2969         struct ip_vs_service *svc;
2970         struct net *net = skb_sknet(skb);
2971
2972         mutex_lock(&__ip_vs_mutex);
2973         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2974                 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2975                         if (++idx <= start || !net_eq(svc->net, net))
2976                                 continue;
2977                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2978                                 idx--;
2979                                 goto nla_put_failure;
2980                         }
2981                 }
2982         }
2983
2984         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2985                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2986                         if (++idx <= start || !net_eq(svc->net, net))
2987                                 continue;
2988                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2989                                 idx--;
2990                                 goto nla_put_failure;
2991                         }
2992                 }
2993         }
2994
2995 nla_put_failure:
2996         mutex_unlock(&__ip_vs_mutex);
2997         cb->args[0] = idx;
2998
2999         return skb->len;
3000 }
3001
3002 static int ip_vs_genl_parse_service(struct net *net,
3003                                     struct ip_vs_service_user_kern *usvc,
3004                                     struct nlattr *nla, int full_entry,
3005                                     struct ip_vs_service **ret_svc)
3006 {
3007         struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
3008         struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
3009         struct ip_vs_service *svc;
3010
3011         /* Parse mandatory identifying service fields first */
3012         if (nla == NULL ||
3013             nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
3014                 return -EINVAL;
3015
3016         nla_af          = attrs[IPVS_SVC_ATTR_AF];
3017         nla_protocol    = attrs[IPVS_SVC_ATTR_PROTOCOL];
3018         nla_addr        = attrs[IPVS_SVC_ATTR_ADDR];
3019         nla_port        = attrs[IPVS_SVC_ATTR_PORT];
3020         nla_fwmark      = attrs[IPVS_SVC_ATTR_FWMARK];
3021
3022         if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
3023                 return -EINVAL;
3024
3025         memset(usvc, 0, sizeof(*usvc));
3026
3027         usvc->af = nla_get_u16(nla_af);
3028 #ifdef CONFIG_IP_VS_IPV6
3029         if (usvc->af != AF_INET && usvc->af != AF_INET6)
3030 #else
3031         if (usvc->af != AF_INET)
3032 #endif
3033                 return -EAFNOSUPPORT;
3034
3035         if (nla_fwmark) {
3036                 usvc->protocol = IPPROTO_TCP;
3037                 usvc->fwmark = nla_get_u32(nla_fwmark);
3038         } else {
3039                 usvc->protocol = nla_get_u16(nla_protocol);
3040                 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
3041                 usvc->port = nla_get_u16(nla_port);
3042                 usvc->fwmark = 0;
3043         }
3044
3045         if (usvc->fwmark)
3046                 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
3047         else
3048                 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
3049                                            &usvc->addr, usvc->port);
3050         *ret_svc = svc;
3051
3052         /* If a full entry was requested, check for the additional fields */
3053         if (full_entry) {
3054                 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
3055                               *nla_netmask;
3056                 struct ip_vs_flags flags;
3057
3058                 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
3059                 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
3060                 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
3061                 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
3062                 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
3063
3064                 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
3065                         return -EINVAL;
3066
3067                 nla_memcpy(&flags, nla_flags, sizeof(flags));
3068
3069                 /* prefill flags from service if it already exists */
3070                 if (svc)
3071                         usvc->flags = svc->flags;
3072
3073                 /* set new flags from userland */
3074                 usvc->flags = (usvc->flags & ~flags.mask) |
3075                               (flags.flags & flags.mask);
3076                 usvc->sched_name = nla_data(nla_sched);
3077                 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
3078                 usvc->timeout = nla_get_u32(nla_timeout);
3079                 usvc->netmask = nla_get_u32(nla_netmask);
3080         }
3081
3082         return 0;
3083 }
3084
3085 static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
3086                                                      struct nlattr *nla)
3087 {
3088         struct ip_vs_service_user_kern usvc;
3089         struct ip_vs_service *svc;
3090         int ret;
3091
3092         ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
3093         return ret ? ERR_PTR(ret) : svc;
3094 }
3095
3096 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
3097 {
3098         struct nlattr *nl_dest;
3099
3100         nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
3101         if (!nl_dest)
3102                 return -EMSGSIZE;
3103
3104         if (nla_put(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr) ||
3105             nla_put_u16(skb, IPVS_DEST_ATTR_PORT, dest->port) ||
3106             nla_put_u32(skb, IPVS_DEST_ATTR_FWD_METHOD,
3107                         (atomic_read(&dest->conn_flags) &
3108                          IP_VS_CONN_F_FWD_MASK)) ||
3109             nla_put_u32(skb, IPVS_DEST_ATTR_WEIGHT,
3110                         atomic_read(&dest->weight)) ||
3111             nla_put_u32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold) ||
3112             nla_put_u32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold) ||
3113             nla_put_u32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
3114                         atomic_read(&dest->activeconns)) ||
3115             nla_put_u32(skb, IPVS_DEST_ATTR_INACT_CONNS,
3116                         atomic_read(&dest->inactconns)) ||
3117             nla_put_u32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
3118                         atomic_read(&dest->persistconns)))
3119                 goto nla_put_failure;
3120         if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
3121                 goto nla_put_failure;
3122
3123         nla_nest_end(skb, nl_dest);
3124
3125         return 0;
3126
3127 nla_put_failure:
3128         nla_nest_cancel(skb, nl_dest);
3129         return -EMSGSIZE;
3130 }
3131
3132 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
3133                                 struct netlink_callback *cb)
3134 {
3135         void *hdr;
3136
3137         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
3138                           &ip_vs_genl_family, NLM_F_MULTI,
3139                           IPVS_CMD_NEW_DEST);
3140         if (!hdr)
3141                 return -EMSGSIZE;
3142
3143         if (ip_vs_genl_fill_dest(skb, dest) < 0)
3144                 goto nla_put_failure;
3145
3146         return genlmsg_end(skb, hdr);
3147
3148 nla_put_failure:
3149         genlmsg_cancel(skb, hdr);
3150         return -EMSGSIZE;
3151 }
3152
3153 static int ip_vs_genl_dump_dests(struct sk_buff *skb,
3154                                  struct netlink_callback *cb)
3155 {
3156         int idx = 0;
3157         int start = cb->args[0];
3158         struct ip_vs_service *svc;
3159         struct ip_vs_dest *dest;
3160         struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
3161         struct net *net = skb_sknet(skb);
3162
3163         mutex_lock(&__ip_vs_mutex);
3164
3165         /* Try to find the service for which to dump destinations */
3166         if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
3167                         IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
3168                 goto out_err;
3169
3170
3171         svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
3172         if (IS_ERR(svc) || svc == NULL)
3173                 goto out_err;
3174
3175         /* Dump the destinations */
3176         list_for_each_entry(dest, &svc->destinations, n_list) {
3177                 if (++idx <= start)
3178                         continue;
3179                 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
3180                         idx--;
3181                         goto nla_put_failure;
3182                 }
3183         }
3184
3185 nla_put_failure:
3186         cb->args[0] = idx;
3187
3188 out_err:
3189         mutex_unlock(&__ip_vs_mutex);
3190
3191         return skb->len;
3192 }
3193
3194 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
3195                                  struct nlattr *nla, int full_entry)
3196 {
3197         struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
3198         struct nlattr *nla_addr, *nla_port;
3199
3200         /* Parse mandatory identifying destination fields first */
3201         if (nla == NULL ||
3202             nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
3203                 return -EINVAL;
3204
3205         nla_addr        = attrs[IPVS_DEST_ATTR_ADDR];
3206         nla_port        = attrs[IPVS_DEST_ATTR_PORT];
3207
3208         if (!(nla_addr && nla_port))
3209                 return -EINVAL;
3210
3211         memset(udest, 0, sizeof(*udest));
3212
3213         nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
3214         udest->port = nla_get_u16(nla_port);
3215
3216         /* If a full entry was requested, check for the additional fields */
3217         if (full_entry) {
3218                 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3219                               *nla_l_thresh;
3220
3221                 nla_fwd         = attrs[IPVS_DEST_ATTR_FWD_METHOD];
3222                 nla_weight      = attrs[IPVS_DEST_ATTR_WEIGHT];
3223                 nla_u_thresh    = attrs[IPVS_DEST_ATTR_U_THRESH];
3224                 nla_l_thresh    = attrs[IPVS_DEST_ATTR_L_THRESH];
3225
3226                 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3227                         return -EINVAL;
3228
3229                 udest->conn_flags = nla_get_u32(nla_fwd)
3230                                     & IP_VS_CONN_F_FWD_MASK;
3231                 udest->weight = nla_get_u32(nla_weight);
3232                 udest->u_threshold = nla_get_u32(nla_u_thresh);
3233                 udest->l_threshold = nla_get_u32(nla_l_thresh);
3234         }
3235
3236         return 0;
3237 }
3238
3239 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
3240                                   const char *mcast_ifn, __be32 syncid)
3241 {
3242         struct nlattr *nl_daemon;
3243
3244         nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3245         if (!nl_daemon)
3246                 return -EMSGSIZE;
3247
3248         if (nla_put_u32(skb, IPVS_DAEMON_ATTR_STATE, state) ||
3249             nla_put_string(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn) ||
3250             nla_put_u32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid))
3251                 goto nla_put_failure;
3252         nla_nest_end(skb, nl_daemon);
3253
3254         return 0;
3255
3256 nla_put_failure:
3257         nla_nest_cancel(skb, nl_daemon);
3258         return -EMSGSIZE;
3259 }
3260
3261 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
3262                                   const char *mcast_ifn, __be32 syncid,
3263                                   struct netlink_callback *cb)
3264 {
3265         void *hdr;
3266         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
3267                           &ip_vs_genl_family, NLM_F_MULTI,
3268                           IPVS_CMD_NEW_DAEMON);
3269         if (!hdr)
3270                 return -EMSGSIZE;
3271
3272         if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3273                 goto nla_put_failure;
3274
3275         return genlmsg_end(skb, hdr);
3276
3277 nla_put_failure:
3278         genlmsg_cancel(skb, hdr);
3279         return -EMSGSIZE;
3280 }
3281
3282 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3283                                    struct netlink_callback *cb)
3284 {
3285         struct net *net = skb_sknet(skb);
3286         struct netns_ipvs *ipvs = net_ipvs(net);
3287
3288         mutex_lock(&ipvs->sync_mutex);
3289         if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
3290                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
3291                                            ipvs->master_mcast_ifn,
3292                                            ipvs->master_syncid, cb) < 0)
3293                         goto nla_put_failure;
3294
3295                 cb->args[0] = 1;
3296         }
3297
3298         if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3299                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3300                                            ipvs->backup_mcast_ifn,
3301                                            ipvs->backup_syncid, cb) < 0)
3302                         goto nla_put_failure;
3303
3304                 cb->args[1] = 1;
3305         }
3306
3307 nla_put_failure:
3308         mutex_unlock(&ipvs->sync_mutex);
3309
3310         return skb->len;
3311 }
3312
3313 static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
3314 {
3315         if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3316               attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3317               attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3318                 return -EINVAL;
3319
3320         return start_sync_thread(net,
3321                                  nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3322                                  nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3323                                  nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3324 }
3325
3326 static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
3327 {
3328         if (!attrs[IPVS_DAEMON_ATTR_STATE])
3329                 return -EINVAL;
3330
3331         return stop_sync_thread(net,
3332                                 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3333 }
3334
3335 static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
3336 {
3337         struct ip_vs_timeout_user t;
3338
3339         __ip_vs_get_timeouts(net, &t);
3340
3341         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3342                 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3343
3344         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3345                 t.tcp_fin_timeout =
3346                         nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3347
3348         if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3349                 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3350
3351         return ip_vs_set_timeout(net, &t);
3352 }
3353
3354 static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info)
3355 {
3356         int ret = 0, cmd;
3357         struct net *net;
3358         struct netns_ipvs *ipvs;
3359
3360         net = skb_sknet(skb);
3361         ipvs = net_ipvs(net);
3362         cmd = info->genlhdr->cmd;
3363
3364         if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) {
3365                 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3366
3367                 mutex_lock(&ipvs->sync_mutex);
3368                 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3369                     nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3370                                      info->attrs[IPVS_CMD_ATTR_DAEMON],
3371                                      ip_vs_daemon_policy)) {
3372                         ret = -EINVAL;
3373                         goto out;
3374                 }
3375
3376                 if (cmd == IPVS_CMD_NEW_DAEMON)
3377                         ret = ip_vs_genl_new_daemon(net, daemon_attrs);
3378                 else
3379                         ret = ip_vs_genl_del_daemon(net, daemon_attrs);
3380 out:
3381                 mutex_unlock(&ipvs->sync_mutex);
3382         }
3383         return ret;
3384 }
3385
3386 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3387 {
3388         struct ip_vs_service *svc = NULL;
3389         struct ip_vs_service_user_kern usvc;
3390         struct ip_vs_dest_user_kern udest;
3391         int ret = 0, cmd;
3392         int need_full_svc = 0, need_full_dest = 0;
3393         struct net *net;
3394
3395         net = skb_sknet(skb);
3396         cmd = info->genlhdr->cmd;
3397
3398         mutex_lock(&__ip_vs_mutex);
3399
3400         if (cmd == IPVS_CMD_FLUSH) {
3401                 ret = ip_vs_flush(net);
3402                 goto out;
3403         } else if (cmd == IPVS_CMD_SET_CONFIG) {
3404                 ret = ip_vs_genl_set_config(net, info->attrs);
3405                 goto out;
3406         } else if (cmd == IPVS_CMD_ZERO &&
3407                    !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3408                 ret = ip_vs_zero_all(net);
3409                 goto out;
3410         }
3411
3412         /* All following commands require a service argument, so check if we
3413          * received a valid one. We need a full service specification when
3414          * adding / editing a service. Only identifying members otherwise. */
3415         if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3416                 need_full_svc = 1;
3417
3418         ret = ip_vs_genl_parse_service(net, &usvc,
3419                                        info->attrs[IPVS_CMD_ATTR_SERVICE],
3420                                        need_full_svc, &svc);
3421         if (ret)
3422                 goto out;
3423
3424         /* Unless we're adding a new service, the service must already exist */
3425         if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3426                 ret = -ESRCH;
3427                 goto out;
3428         }
3429
3430         /* Destination commands require a valid destination argument. For
3431          * adding / editing a destination, we need a full destination
3432          * specification. */
3433         if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3434             cmd == IPVS_CMD_DEL_DEST) {
3435                 if (cmd != IPVS_CMD_DEL_DEST)
3436                         need_full_dest = 1;
3437
3438                 ret = ip_vs_genl_parse_dest(&udest,
3439                                             info->attrs[IPVS_CMD_ATTR_DEST],
3440                                             need_full_dest);
3441                 if (ret)
3442                         goto out;
3443         }
3444
3445         switch (cmd) {
3446         case IPVS_CMD_NEW_SERVICE:
3447                 if (svc == NULL)
3448                         ret = ip_vs_add_service(net, &usvc, &svc);
3449                 else
3450                         ret = -EEXIST;
3451                 break;
3452         case IPVS_CMD_SET_SERVICE:
3453                 ret = ip_vs_edit_service(svc, &usvc);
3454                 break;
3455         case IPVS_CMD_DEL_SERVICE:
3456                 ret = ip_vs_del_service(svc);
3457                 /* do not use svc, it can be freed */
3458                 break;
3459         case IPVS_CMD_NEW_DEST:
3460                 ret = ip_vs_add_dest(svc, &udest);
3461                 break;
3462         case IPVS_CMD_SET_DEST:
3463                 ret = ip_vs_edit_dest(svc, &udest);
3464                 break;
3465         case IPVS_CMD_DEL_DEST:
3466                 ret = ip_vs_del_dest(svc, &udest);
3467                 break;
3468         case IPVS_CMD_ZERO:
3469                 ret = ip_vs_zero_service(svc);
3470                 break;
3471         default:
3472                 ret = -EINVAL;
3473         }
3474
3475 out:
3476         mutex_unlock(&__ip_vs_mutex);
3477
3478         return ret;
3479 }
3480
3481 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3482 {
3483         struct sk_buff *msg;
3484         void *reply;
3485         int ret, cmd, reply_cmd;
3486         struct net *net;
3487
3488         net = skb_sknet(skb);
3489         cmd = info->genlhdr->cmd;
3490
3491         if (cmd == IPVS_CMD_GET_SERVICE)
3492                 reply_cmd = IPVS_CMD_NEW_SERVICE;
3493         else if (cmd == IPVS_CMD_GET_INFO)
3494                 reply_cmd = IPVS_CMD_SET_INFO;
3495         else if (cmd == IPVS_CMD_GET_CONFIG)
3496                 reply_cmd = IPVS_CMD_SET_CONFIG;
3497         else {
3498                 pr_err("unknown Generic Netlink command\n");
3499                 return -EINVAL;
3500         }
3501
3502         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3503         if (!msg)
3504                 return -ENOMEM;
3505
3506         mutex_lock(&__ip_vs_mutex);
3507
3508         reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3509         if (reply == NULL)
3510                 goto nla_put_failure;
3511
3512         switch (cmd) {
3513         case IPVS_CMD_GET_SERVICE:
3514         {
3515                 struct ip_vs_service *svc;
3516
3517                 svc = ip_vs_genl_find_service(net,
3518                                               info->attrs[IPVS_CMD_ATTR_SERVICE]);
3519                 if (IS_ERR(svc)) {
3520                         ret = PTR_ERR(svc);
3521                         goto out_err;
3522                 } else if (svc) {
3523                         ret = ip_vs_genl_fill_service(msg, svc);
3524                         if (ret)
3525                                 goto nla_put_failure;
3526                 } else {
3527                         ret = -ESRCH;
3528                         goto out_err;
3529                 }
3530
3531                 break;
3532         }
3533
3534         case IPVS_CMD_GET_CONFIG:
3535         {
3536                 struct ip_vs_timeout_user t;
3537
3538                 __ip_vs_get_timeouts(net, &t);
3539 #ifdef CONFIG_IP_VS_PROTO_TCP
3540                 if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP,
3541                                 t.tcp_timeout) ||
3542                     nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3543                                 t.tcp_fin_timeout))
3544                         goto nla_put_failure;
3545 #endif
3546 #ifdef CONFIG_IP_VS_PROTO_UDP
3547                 if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout))
3548                         goto nla_put_failure;
3549 #endif
3550
3551                 break;
3552         }
3553
3554         case IPVS_CMD_GET_INFO:
3555                 if (nla_put_u32(msg, IPVS_INFO_ATTR_VERSION,
3556                                 IP_VS_VERSION_CODE) ||
3557                     nla_put_u32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3558                                 ip_vs_conn_tab_size))
3559                         goto nla_put_failure;
3560                 break;
3561         }
3562
3563         genlmsg_end(msg, reply);
3564         ret = genlmsg_reply(msg, info);
3565         goto out;
3566
3567 nla_put_failure:
3568         pr_err("not enough space in Netlink message\n");
3569         ret = -EMSGSIZE;
3570
3571 out_err:
3572         nlmsg_free(msg);
3573 out:
3574         mutex_unlock(&__ip_vs_mutex);
3575
3576         return ret;
3577 }
3578
3579
3580 static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3581         {
3582                 .cmd    = IPVS_CMD_NEW_SERVICE,
3583                 .flags  = GENL_ADMIN_PERM,
3584                 .policy = ip_vs_cmd_policy,
3585                 .doit   = ip_vs_genl_set_cmd,
3586         },
3587         {
3588                 .cmd    = IPVS_CMD_SET_SERVICE,
3589                 .flags  = GENL_ADMIN_PERM,
3590                 .policy = ip_vs_cmd_policy,
3591                 .doit   = ip_vs_genl_set_cmd,
3592         },
3593         {
3594                 .cmd    = IPVS_CMD_DEL_SERVICE,
3595                 .flags  = GENL_ADMIN_PERM,
3596                 .policy = ip_vs_cmd_policy,
3597                 .doit   = ip_vs_genl_set_cmd,
3598         },
3599         {
3600                 .cmd    = IPVS_CMD_GET_SERVICE,
3601                 .flags  = GENL_ADMIN_PERM,
3602                 .doit   = ip_vs_genl_get_cmd,
3603                 .dumpit = ip_vs_genl_dump_services,
3604                 .policy = ip_vs_cmd_policy,
3605         },
3606         {
3607                 .cmd    = IPVS_CMD_NEW_DEST,
3608                 .flags  = GENL_ADMIN_PERM,
3609                 .policy = ip_vs_cmd_policy,
3610                 .doit   = ip_vs_genl_set_cmd,
3611         },
3612         {
3613                 .cmd    = IPVS_CMD_SET_DEST,
3614                 .flags  = GENL_ADMIN_PERM,
3615                 .policy = ip_vs_cmd_policy,
3616                 .doit   = ip_vs_genl_set_cmd,
3617         },
3618         {
3619                 .cmd    = IPVS_CMD_DEL_DEST,
3620                 .flags  = GENL_ADMIN_PERM,
3621                 .policy = ip_vs_cmd_policy,
3622                 .doit   = ip_vs_genl_set_cmd,
3623         },
3624         {
3625                 .cmd    = IPVS_CMD_GET_DEST,
3626                 .flags  = GENL_ADMIN_PERM,
3627                 .policy = ip_vs_cmd_policy,
3628                 .dumpit = ip_vs_genl_dump_dests,
3629         },
3630         {
3631                 .cmd    = IPVS_CMD_NEW_DAEMON,
3632                 .flags  = GENL_ADMIN_PERM,
3633                 .policy = ip_vs_cmd_policy,
3634                 .doit   = ip_vs_genl_set_daemon,
3635         },
3636         {
3637                 .cmd    = IPVS_CMD_DEL_DAEMON,
3638                 .flags  = GENL_ADMIN_PERM,
3639                 .policy = ip_vs_cmd_policy,
3640                 .doit   = ip_vs_genl_set_daemon,
3641         },
3642         {
3643                 .cmd    = IPVS_CMD_GET_DAEMON,
3644                 .flags  = GENL_ADMIN_PERM,
3645                 .dumpit = ip_vs_genl_dump_daemons,
3646         },
3647         {
3648                 .cmd    = IPVS_CMD_SET_CONFIG,
3649                 .flags  = GENL_ADMIN_PERM,
3650                 .policy = ip_vs_cmd_policy,
3651                 .doit   = ip_vs_genl_set_cmd,
3652         },
3653         {
3654                 .cmd    = IPVS_CMD_GET_CONFIG,
3655                 .flags  = GENL_ADMIN_PERM,
3656                 .doit   = ip_vs_genl_get_cmd,
3657         },
3658         {
3659                 .cmd    = IPVS_CMD_GET_INFO,
3660                 .flags  = GENL_ADMIN_PERM,
3661                 .doit   = ip_vs_genl_get_cmd,
3662         },
3663         {
3664                 .cmd    = IPVS_CMD_ZERO,
3665                 .flags  = GENL_ADMIN_PERM,
3666                 .policy = ip_vs_cmd_policy,
3667                 .doit   = ip_vs_genl_set_cmd,
3668         },
3669         {
3670                 .cmd    = IPVS_CMD_FLUSH,
3671                 .flags  = GENL_ADMIN_PERM,
3672                 .doit   = ip_vs_genl_set_cmd,
3673         },
3674 };
3675
3676 static int __init ip_vs_genl_register(void)
3677 {
3678         return genl_register_family_with_ops(&ip_vs_genl_family,
3679                 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
3680 }
3681
3682 static void ip_vs_genl_unregister(void)
3683 {
3684         genl_unregister_family(&ip_vs_genl_family);
3685 }
3686
3687 /* End of Generic Netlink interface definitions */
3688
3689 /*
3690  * per netns intit/exit func.
3691  */
3692 #ifdef CONFIG_SYSCTL
3693 static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
3694 {
3695         int idx;
3696         struct netns_ipvs *ipvs = net_ipvs(net);
3697         struct ctl_table *tbl;
3698
3699         atomic_set(&ipvs->dropentry, 0);
3700         spin_lock_init(&ipvs->dropentry_lock);
3701         spin_lock_init(&ipvs->droppacket_lock);
3702         spin_lock_init(&ipvs->securetcp_lock);
3703
3704         if (!net_eq(net, &init_net)) {
3705                 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3706                 if (tbl == NULL)
3707                         return -ENOMEM;
3708
3709                 /* Don't export sysctls to unprivileged users */
3710                 if (net->user_ns != &init_user_ns)
3711                         tbl[0].procname = NULL;
3712         } else
3713                 tbl = vs_vars;
3714         /* Initialize sysctl defaults */
3715         idx = 0;
3716         ipvs->sysctl_amemthresh = 1024;
3717         tbl[idx++].data = &ipvs->sysctl_amemthresh;
3718         ipvs->sysctl_am_droprate = 10;
3719         tbl[idx++].data = &ipvs->sysctl_am_droprate;
3720         tbl[idx++].data = &ipvs->sysctl_drop_entry;
3721         tbl[idx++].data = &ipvs->sysctl_drop_packet;
3722 #ifdef CONFIG_IP_VS_NFCT
3723         tbl[idx++].data = &ipvs->sysctl_conntrack;
3724 #endif
3725         tbl[idx++].data = &ipvs->sysctl_secure_tcp;
3726         ipvs->sysctl_snat_reroute = 1;
3727         tbl[idx++].data = &ipvs->sysctl_snat_reroute;
3728         ipvs->sysctl_sync_ver = 1;
3729         tbl[idx++].data = &ipvs->sysctl_sync_ver;
3730         ipvs->sysctl_sync_ports = 1;
3731         tbl[idx++].data = &ipvs->sysctl_sync_ports;
3732         ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32;
3733         tbl[idx++].data = &ipvs->sysctl_sync_qlen_max;
3734         ipvs->sysctl_sync_sock_size = 0;
3735         tbl[idx++].data = &ipvs->sysctl_sync_sock_size;
3736         tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3737         tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3738         tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
3739         ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
3740         ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
3741         tbl[idx].data = &ipvs->sysctl_sync_threshold;
3742         tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
3743         ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD;
3744         tbl[idx++].data = &ipvs->sysctl_sync_refresh_period;
3745         ipvs->sysctl_sync_retries = clamp_t(int, DEFAULT_SYNC_RETRIES, 0, 3);
3746         tbl[idx++].data = &ipvs->sysctl_sync_retries;
3747         tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3748         ipvs->sysctl_pmtu_disc = 1;
3749         tbl[idx++].data = &ipvs->sysctl_pmtu_disc;
3750         tbl[idx++].data = &ipvs->sysctl_backup_only;
3751
3752
3753         ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
3754         if (ipvs->sysctl_hdr == NULL) {
3755                 if (!net_eq(net, &init_net))
3756                         kfree(tbl);
3757                 return -ENOMEM;
3758         }
3759         ip_vs_start_estimator(net, &ipvs->tot_stats);
3760         ipvs->sysctl_tbl = tbl;
3761         /* Schedule defense work */
3762         INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
3763         schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
3764
3765         return 0;
3766 }
3767
3768 static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net)
3769 {
3770         struct netns_ipvs *ipvs = net_ipvs(net);
3771
3772         cancel_delayed_work_sync(&ipvs->defense_work);
3773         cancel_work_sync(&ipvs->defense_work.work);
3774         unregister_net_sysctl_table(ipvs->sysctl_hdr);
3775 }
3776
3777 #else
3778
3779 static int __net_init ip_vs_control_net_init_sysctl(struct net *net) { return 0; }
3780 static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { }
3781
3782 #endif
3783
3784 static struct notifier_block ip_vs_dst_notifier = {
3785         .notifier_call = ip_vs_dst_event,
3786 };
3787
3788 int __net_init ip_vs_control_net_init(struct net *net)
3789 {
3790         int idx;
3791         struct netns_ipvs *ipvs = net_ipvs(net);
3792
3793         rwlock_init(&ipvs->rs_lock);
3794
3795         /* Initialize rs_table */
3796         for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3797                 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3798
3799         INIT_LIST_HEAD(&ipvs->dest_trash);
3800         atomic_set(&ipvs->ftpsvc_counter, 0);
3801         atomic_set(&ipvs->nullsvc_counter, 0);
3802
3803         /* procfs stats */
3804         ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
3805         if (!ipvs->tot_stats.cpustats)
3806                 return -ENOMEM;
3807
3808         spin_lock_init(&ipvs->tot_stats.lock);
3809
3810         proc_create("ip_vs", 0, net->proc_net, &ip_vs_info_fops);
3811         proc_create("ip_vs_stats", 0, net->proc_net, &ip_vs_stats_fops);
3812         proc_create("ip_vs_stats_percpu", 0, net->proc_net,
3813                     &ip_vs_stats_percpu_fops);
3814
3815         if (ip_vs_control_net_init_sysctl(net))
3816                 goto err;
3817
3818         return 0;
3819
3820 err:
3821         free_percpu(ipvs->tot_stats.cpustats);
3822         return -ENOMEM;
3823 }
3824
3825 void __net_exit ip_vs_control_net_cleanup(struct net *net)
3826 {
3827         struct netns_ipvs *ipvs = net_ipvs(net);
3828
3829         ip_vs_trash_cleanup(net);
3830         ip_vs_stop_estimator(net, &ipvs->tot_stats);
3831         ip_vs_control_net_cleanup_sysctl(net);
3832         remove_proc_entry("ip_vs_stats_percpu", net->proc_net);
3833         remove_proc_entry("ip_vs_stats", net->proc_net);
3834         remove_proc_entry("ip_vs", net->proc_net);
3835         free_percpu(ipvs->tot_stats.cpustats);
3836 }
3837
3838 int __init ip_vs_register_nl_ioctl(void)
3839 {
3840         int ret;
3841
3842         ret = nf_register_sockopt(&ip_vs_sockopts);
3843         if (ret) {
3844                 pr_err("cannot register sockopt.\n");
3845                 goto err_sock;
3846         }
3847
3848         ret = ip_vs_genl_register();
3849         if (ret) {
3850                 pr_err("cannot register Generic Netlink interface.\n");
3851                 goto err_genl;
3852         }
3853         return 0;
3854
3855 err_genl:
3856         nf_unregister_sockopt(&ip_vs_sockopts);
3857 err_sock:
3858         return ret;
3859 }
3860
3861 void ip_vs_unregister_nl_ioctl(void)
3862 {
3863         ip_vs_genl_unregister();
3864         nf_unregister_sockopt(&ip_vs_sockopts);
3865 }
3866
3867 int __init ip_vs_control_init(void)
3868 {
3869         int idx;
3870         int ret;
3871
3872         EnterFunction(2);
3873
3874         /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
3875         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3876                 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3877                 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3878         }
3879
3880         smp_wmb();      /* Do we really need it now ? */
3881
3882         ret = register_netdevice_notifier(&ip_vs_dst_notifier);
3883         if (ret < 0)
3884                 return ret;
3885
3886         LeaveFunction(2);
3887         return 0;
3888 }
3889
3890
3891 void ip_vs_control_cleanup(void)
3892 {
3893         EnterFunction(2);
3894         unregister_netdevice_notifier(&ip_vs_dst_notifier);
3895         LeaveFunction(2);
3896 }