2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
21 #define KMSG_COMPONENT "IPVS"
22 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
24 #include <linux/module.h>
25 #include <linux/init.h>
26 #include <linux/types.h>
27 #include <linux/capability.h>
29 #include <linux/sysctl.h>
30 #include <linux/proc_fs.h>
31 #include <linux/workqueue.h>
32 #include <linux/swap.h>
33 #include <linux/seq_file.h>
34 #include <linux/slab.h>
36 #include <linux/netfilter.h>
37 #include <linux/netfilter_ipv4.h>
38 #include <linux/mutex.h>
40 #include <net/net_namespace.h>
41 #include <linux/nsproxy.h>
43 #ifdef CONFIG_IP_VS_IPV6
45 #include <net/ip6_route.h>
47 #include <net/route.h>
49 #include <net/genetlink.h>
51 #include <asm/uaccess.h>
53 #include <net/ip_vs.h>
55 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
56 static DEFINE_MUTEX(__ip_vs_mutex);
58 /* lock for service table */
59 static DEFINE_RWLOCK(__ip_vs_svc_lock);
61 /* sysctl variables */
63 #ifdef CONFIG_IP_VS_DEBUG
64 static int sysctl_ip_vs_debug_level = 0;
66 int ip_vs_get_debug_level(void)
68 return sysctl_ip_vs_debug_level;
72 #ifdef CONFIG_IP_VS_IPV6
73 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
74 static int __ip_vs_addr_is_local_v6(struct net *net,
75 const struct in6_addr *addr)
82 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
83 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
92 * update_defense_level is called from keventd and from sysctl,
93 * so it needs to protect itself from softirqs
95 static void update_defense_level(struct netns_ipvs *ipvs)
98 static int old_secure_tcp = 0;
103 /* we only count free and buffered memory (in pages) */
105 availmem = i.freeram + i.bufferram;
106 /* however in linux 2.5 the i.bufferram is total page cache size,
108 /* si_swapinfo(&i); */
109 /* availmem = availmem - (i.totalswap - i.freeswap); */
111 nomem = (availmem < ipvs->sysctl_amemthresh);
116 spin_lock(&ipvs->dropentry_lock);
117 switch (ipvs->sysctl_drop_entry) {
119 atomic_set(&ipvs->dropentry, 0);
123 atomic_set(&ipvs->dropentry, 1);
124 ipvs->sysctl_drop_entry = 2;
126 atomic_set(&ipvs->dropentry, 0);
131 atomic_set(&ipvs->dropentry, 1);
133 atomic_set(&ipvs->dropentry, 0);
134 ipvs->sysctl_drop_entry = 1;
138 atomic_set(&ipvs->dropentry, 1);
141 spin_unlock(&ipvs->dropentry_lock);
144 spin_lock(&ipvs->droppacket_lock);
145 switch (ipvs->sysctl_drop_packet) {
151 ipvs->drop_rate = ipvs->drop_counter
152 = ipvs->sysctl_amemthresh /
153 (ipvs->sysctl_amemthresh-availmem);
154 ipvs->sysctl_drop_packet = 2;
161 ipvs->drop_rate = ipvs->drop_counter
162 = ipvs->sysctl_amemthresh /
163 (ipvs->sysctl_amemthresh-availmem);
166 ipvs->sysctl_drop_packet = 1;
170 ipvs->drop_rate = ipvs->sysctl_am_droprate;
173 spin_unlock(&ipvs->droppacket_lock);
176 spin_lock(&ipvs->securetcp_lock);
177 switch (ipvs->sysctl_secure_tcp) {
179 if (old_secure_tcp >= 2)
184 if (old_secure_tcp < 2)
186 ipvs->sysctl_secure_tcp = 2;
188 if (old_secure_tcp >= 2)
194 if (old_secure_tcp < 2)
197 if (old_secure_tcp >= 2)
199 ipvs->sysctl_secure_tcp = 1;
203 if (old_secure_tcp < 2)
207 old_secure_tcp = ipvs->sysctl_secure_tcp;
209 ip_vs_protocol_timeout_change(ipvs,
210 ipvs->sysctl_secure_tcp > 1);
211 spin_unlock(&ipvs->securetcp_lock);
218 * Timer for checking the defense
220 #define DEFENSE_TIMER_PERIOD 1*HZ
222 static void defense_work_handler(struct work_struct *work)
224 struct netns_ipvs *ipvs =
225 container_of(work, struct netns_ipvs, defense_work.work);
227 update_defense_level(ipvs);
228 if (atomic_read(&ipvs->dropentry))
229 ip_vs_random_dropentry(ipvs->net);
230 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
235 ip_vs_use_count_inc(void)
237 return try_module_get(THIS_MODULE);
241 ip_vs_use_count_dec(void)
243 module_put(THIS_MODULE);
248 * Hash table: for virtual service lookups
250 #define IP_VS_SVC_TAB_BITS 8
251 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
252 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
254 /* the service table hashed by <protocol, addr, port> */
255 static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
256 /* the service table hashed by fwmark */
257 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
261 * Returns hash value for virtual service
263 static inline unsigned
264 ip_vs_svc_hashkey(struct net *net, int af, unsigned proto,
265 const union nf_inet_addr *addr, __be16 port)
267 register unsigned porth = ntohs(port);
268 __be32 addr_fold = addr->ip;
270 #ifdef CONFIG_IP_VS_IPV6
272 addr_fold = addr->ip6[0]^addr->ip6[1]^
273 addr->ip6[2]^addr->ip6[3];
275 addr_fold ^= ((size_t)net>>8);
277 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
278 & IP_VS_SVC_TAB_MASK;
282 * Returns hash value of fwmark for virtual service lookup
284 static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
286 return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
290 * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
291 * or in the ip_vs_svc_fwm_table by fwmark.
292 * Should be called with locked tables.
294 static int ip_vs_svc_hash(struct ip_vs_service *svc)
298 if (svc->flags & IP_VS_SVC_F_HASHED) {
299 pr_err("%s(): request for already hashed, called from %pF\n",
300 __func__, __builtin_return_address(0));
304 if (svc->fwmark == 0) {
306 * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
308 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
309 &svc->addr, svc->port);
310 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
313 * Hash it by fwmark in svc_fwm_table
315 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
316 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
319 svc->flags |= IP_VS_SVC_F_HASHED;
320 /* increase its refcnt because it is referenced by the svc table */
321 atomic_inc(&svc->refcnt);
327 * Unhashes a service from svc_table / svc_fwm_table.
328 * Should be called with locked tables.
330 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
332 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
333 pr_err("%s(): request for unhash flagged, called from %pF\n",
334 __func__, __builtin_return_address(0));
338 if (svc->fwmark == 0) {
339 /* Remove it from the svc_table table */
340 list_del(&svc->s_list);
342 /* Remove it from the svc_fwm_table table */
343 list_del(&svc->f_list);
346 svc->flags &= ~IP_VS_SVC_F_HASHED;
347 atomic_dec(&svc->refcnt);
353 * Get service by {netns, proto,addr,port} in the service table.
355 static inline struct ip_vs_service *
356 __ip_vs_service_find(struct net *net, int af, __u16 protocol,
357 const union nf_inet_addr *vaddr, __be16 vport)
360 struct ip_vs_service *svc;
362 /* Check for "full" addressed entries */
363 hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
365 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
367 && ip_vs_addr_equal(af, &svc->addr, vaddr)
368 && (svc->port == vport)
369 && (svc->protocol == protocol)
370 && net_eq(svc->net, net)) {
381 * Get service by {fwmark} in the service table.
383 static inline struct ip_vs_service *
384 __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
387 struct ip_vs_service *svc;
389 /* Check for fwmark addressed entries */
390 hash = ip_vs_svc_fwm_hashkey(net, fwmark);
392 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
393 if (svc->fwmark == fwmark && svc->af == af
394 && net_eq(svc->net, net)) {
403 struct ip_vs_service *
404 ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
405 const union nf_inet_addr *vaddr, __be16 vport)
407 struct ip_vs_service *svc;
408 struct netns_ipvs *ipvs = net_ipvs(net);
410 read_lock(&__ip_vs_svc_lock);
413 * Check the table hashed by fwmark first
416 svc = __ip_vs_svc_fwm_find(net, af, fwmark);
422 * Check the table hashed by <protocol,addr,port>
423 * for "full" addressed entries
425 svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
428 && protocol == IPPROTO_TCP
429 && atomic_read(&ipvs->ftpsvc_counter)
430 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
432 * Check if ftp service entry exists, the packet
433 * might belong to FTP data connections.
435 svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
439 && atomic_read(&ipvs->nullsvc_counter)) {
441 * Check if the catch-all port (port zero) exists
443 svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
448 atomic_inc(&svc->usecnt);
449 read_unlock(&__ip_vs_svc_lock);
451 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
452 fwmark, ip_vs_proto_name(protocol),
453 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
454 svc ? "hit" : "not hit");
461 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
463 atomic_inc(&svc->refcnt);
468 __ip_vs_unbind_svc(struct ip_vs_dest *dest)
470 struct ip_vs_service *svc = dest->svc;
473 if (atomic_dec_and_test(&svc->refcnt)) {
474 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
476 IP_VS_DBG_ADDR(svc->af, &svc->addr),
477 ntohs(svc->port), atomic_read(&svc->usecnt));
478 free_percpu(svc->stats.cpustats);
485 * Returns hash value for real service
487 static inline unsigned ip_vs_rs_hashkey(int af,
488 const union nf_inet_addr *addr,
491 register unsigned porth = ntohs(port);
492 __be32 addr_fold = addr->ip;
494 #ifdef CONFIG_IP_VS_IPV6
496 addr_fold = addr->ip6[0]^addr->ip6[1]^
497 addr->ip6[2]^addr->ip6[3];
500 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
505 * Hashes ip_vs_dest in rs_table by <proto,addr,port>.
506 * should be called with locked tables.
508 static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
512 if (!list_empty(&dest->d_list)) {
517 * Hash by proto,addr,port,
518 * which are the parameters of the real service.
520 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
522 list_add(&dest->d_list, &ipvs->rs_table[hash]);
528 * UNhashes ip_vs_dest from rs_table.
529 * should be called with locked tables.
531 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
534 * Remove it from the rs_table table.
536 if (!list_empty(&dest->d_list)) {
537 list_del(&dest->d_list);
538 INIT_LIST_HEAD(&dest->d_list);
545 * Lookup real service by <proto,addr,port> in the real service table.
548 ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
549 const union nf_inet_addr *daddr,
552 struct netns_ipvs *ipvs = net_ipvs(net);
554 struct ip_vs_dest *dest;
557 * Check for "full" addressed entries
558 * Return the first found entry
560 hash = ip_vs_rs_hashkey(af, daddr, dport);
562 read_lock(&ipvs->rs_lock);
563 list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
565 && ip_vs_addr_equal(af, &dest->addr, daddr)
566 && (dest->port == dport)
567 && ((dest->protocol == protocol) ||
570 read_unlock(&ipvs->rs_lock);
574 read_unlock(&ipvs->rs_lock);
580 * Lookup destination by {addr,port} in the given service
582 static struct ip_vs_dest *
583 ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
586 struct ip_vs_dest *dest;
589 * Find the destination for the given service
591 list_for_each_entry(dest, &svc->destinations, n_list) {
592 if ((dest->af == svc->af)
593 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
594 && (dest->port == dport)) {
604 * Find destination by {daddr,dport,vaddr,protocol}
605 * Cretaed to be used in ip_vs_process_message() in
606 * the backup synchronization daemon. It finds the
607 * destination to be bound to the received connection
610 * ip_vs_lookup_real_service() looked promissing, but
611 * seems not working as expected.
613 struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
614 const union nf_inet_addr *daddr,
616 const union nf_inet_addr *vaddr,
617 __be16 vport, __u16 protocol, __u32 fwmark)
619 struct ip_vs_dest *dest;
620 struct ip_vs_service *svc;
622 svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
625 dest = ip_vs_lookup_dest(svc, daddr, dport);
627 atomic_inc(&dest->refcnt);
628 ip_vs_service_put(svc);
633 * Lookup dest by {svc,addr,port} in the destination trash.
634 * The destination trash is used to hold the destinations that are removed
635 * from the service table but are still referenced by some conn entries.
636 * The reason to add the destination trash is when the dest is temporary
637 * down (either by administrator or by monitor program), the dest can be
638 * picked back from the trash, the remaining connections to the dest can
639 * continue, and the counting information of the dest is also useful for
642 static struct ip_vs_dest *
643 ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
646 struct ip_vs_dest *dest, *nxt;
647 struct netns_ipvs *ipvs = net_ipvs(svc->net);
650 * Find the destination in trash
652 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
653 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
656 IP_VS_DBG_ADDR(svc->af, &dest->addr),
658 atomic_read(&dest->refcnt));
659 if (dest->af == svc->af &&
660 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
661 dest->port == dport &&
662 dest->vfwmark == svc->fwmark &&
663 dest->protocol == svc->protocol &&
665 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
666 dest->vport == svc->port))) {
672 * Try to purge the destination from trash if not referenced
674 if (atomic_read(&dest->refcnt) == 1) {
675 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
678 IP_VS_DBG_ADDR(svc->af, &dest->addr),
680 list_del(&dest->n_list);
681 ip_vs_dst_reset(dest);
682 __ip_vs_unbind_svc(dest);
683 free_percpu(dest->stats.cpustats);
693 * Clean up all the destinations in the trash
694 * Called by the ip_vs_control_cleanup()
696 * When the ip_vs_control_clearup is activated by ipvs module exit,
697 * the service tables must have been flushed and all the connections
698 * are expired, and the refcnt of each destination in the trash must
699 * be 1, so we simply release them here.
701 static void ip_vs_trash_cleanup(struct net *net)
703 struct ip_vs_dest *dest, *nxt;
704 struct netns_ipvs *ipvs = net_ipvs(net);
706 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
707 list_del(&dest->n_list);
708 ip_vs_dst_reset(dest);
709 __ip_vs_unbind_svc(dest);
710 free_percpu(dest->stats.cpustats);
716 ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
718 #define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c
720 spin_lock_bh(&src->lock);
722 IP_VS_SHOW_STATS_COUNTER(conns);
723 IP_VS_SHOW_STATS_COUNTER(inpkts);
724 IP_VS_SHOW_STATS_COUNTER(outpkts);
725 IP_VS_SHOW_STATS_COUNTER(inbytes);
726 IP_VS_SHOW_STATS_COUNTER(outbytes);
728 ip_vs_read_estimator(dst, src);
730 spin_unlock_bh(&src->lock);
734 ip_vs_zero_stats(struct ip_vs_stats *stats)
736 spin_lock_bh(&stats->lock);
738 /* get current counters as zero point, rates are zeroed */
740 #define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c
742 IP_VS_ZERO_STATS_COUNTER(conns);
743 IP_VS_ZERO_STATS_COUNTER(inpkts);
744 IP_VS_ZERO_STATS_COUNTER(outpkts);
745 IP_VS_ZERO_STATS_COUNTER(inbytes);
746 IP_VS_ZERO_STATS_COUNTER(outbytes);
748 ip_vs_zero_estimator(stats);
750 spin_unlock_bh(&stats->lock);
754 * Update a destination in the given service
757 __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
758 struct ip_vs_dest_user_kern *udest, int add)
760 struct netns_ipvs *ipvs = net_ipvs(svc->net);
763 /* set the weight and the flags */
764 atomic_set(&dest->weight, udest->weight);
765 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
766 conn_flags |= IP_VS_CONN_F_INACTIVE;
768 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
769 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
770 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
773 * Put the real service in rs_table if not present.
774 * For now only for NAT!
776 write_lock_bh(&ipvs->rs_lock);
777 ip_vs_rs_hash(ipvs, dest);
778 write_unlock_bh(&ipvs->rs_lock);
780 atomic_set(&dest->conn_flags, conn_flags);
782 /* bind the service */
784 __ip_vs_bind_svc(dest, svc);
786 if (dest->svc != svc) {
787 __ip_vs_unbind_svc(dest);
788 ip_vs_zero_stats(&dest->stats);
789 __ip_vs_bind_svc(dest, svc);
793 /* set the dest status flags */
794 dest->flags |= IP_VS_DEST_F_AVAILABLE;
796 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
797 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
798 dest->u_threshold = udest->u_threshold;
799 dest->l_threshold = udest->l_threshold;
801 spin_lock_bh(&dest->dst_lock);
802 ip_vs_dst_reset(dest);
803 spin_unlock_bh(&dest->dst_lock);
806 ip_vs_start_estimator(svc->net, &dest->stats);
808 write_lock_bh(&__ip_vs_svc_lock);
810 /* Wait until all other svc users go away */
811 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
814 list_add(&dest->n_list, &svc->destinations);
818 /* call the update_service, because server weight may be changed */
819 if (svc->scheduler->update_service)
820 svc->scheduler->update_service(svc);
822 write_unlock_bh(&__ip_vs_svc_lock);
827 * Create a destination for the given service
830 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
831 struct ip_vs_dest **dest_p)
833 struct ip_vs_dest *dest;
838 #ifdef CONFIG_IP_VS_IPV6
839 if (svc->af == AF_INET6) {
840 atype = ipv6_addr_type(&udest->addr.in6);
841 if ((!(atype & IPV6_ADDR_UNICAST) ||
842 atype & IPV6_ADDR_LINKLOCAL) &&
843 !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
848 atype = inet_addr_type(svc->net, udest->addr.ip);
849 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
853 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
855 pr_err("%s(): no memory.\n", __func__);
858 dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
859 if (!dest->stats.cpustats) {
860 pr_err("%s() alloc_percpu failed\n", __func__);
865 dest->protocol = svc->protocol;
866 dest->vaddr = svc->addr;
867 dest->vport = svc->port;
868 dest->vfwmark = svc->fwmark;
869 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
870 dest->port = udest->port;
872 atomic_set(&dest->activeconns, 0);
873 atomic_set(&dest->inactconns, 0);
874 atomic_set(&dest->persistconns, 0);
875 atomic_set(&dest->refcnt, 1);
877 INIT_LIST_HEAD(&dest->d_list);
878 spin_lock_init(&dest->dst_lock);
879 spin_lock_init(&dest->stats.lock);
880 __ip_vs_update_dest(svc, dest, udest, 1);
894 * Add a destination into an existing service
897 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
899 struct ip_vs_dest *dest;
900 union nf_inet_addr daddr;
901 __be16 dport = udest->port;
906 if (udest->weight < 0) {
907 pr_err("%s(): server weight less than zero\n", __func__);
911 if (udest->l_threshold > udest->u_threshold) {
912 pr_err("%s(): lower threshold is higher than upper threshold\n",
917 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
920 * Check if the dest already exists in the list
922 dest = ip_vs_lookup_dest(svc, &daddr, dport);
925 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
930 * Check if the dest already exists in the trash and
931 * is from the same service
933 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
936 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
937 "dest->refcnt=%d, service %u/%s:%u\n",
938 IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
939 atomic_read(&dest->refcnt),
941 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
945 * Get the destination from the trash
947 list_del(&dest->n_list);
949 __ip_vs_update_dest(svc, dest, udest, 1);
953 * Allocate and initialize the dest structure
955 ret = ip_vs_new_dest(svc, udest, &dest);
964 * Edit a destination in the given service
967 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
969 struct ip_vs_dest *dest;
970 union nf_inet_addr daddr;
971 __be16 dport = udest->port;
975 if (udest->weight < 0) {
976 pr_err("%s(): server weight less than zero\n", __func__);
980 if (udest->l_threshold > udest->u_threshold) {
981 pr_err("%s(): lower threshold is higher than upper threshold\n",
986 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
989 * Lookup the destination list
991 dest = ip_vs_lookup_dest(svc, &daddr, dport);
994 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
998 __ip_vs_update_dest(svc, dest, udest, 0);
1006 * Delete a destination (must be already unlinked from the service)
1008 static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
1010 struct netns_ipvs *ipvs = net_ipvs(net);
1012 ip_vs_stop_estimator(net, &dest->stats);
1015 * Remove it from the d-linked list with the real services.
1017 write_lock_bh(&ipvs->rs_lock);
1018 ip_vs_rs_unhash(dest);
1019 write_unlock_bh(&ipvs->rs_lock);
1022 * Decrease the refcnt of the dest, and free the dest
1023 * if nobody refers to it (refcnt=0). Otherwise, throw
1024 * the destination into the trash.
1026 if (atomic_dec_and_test(&dest->refcnt)) {
1027 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1029 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1031 ip_vs_dst_reset(dest);
1032 /* simply decrease svc->refcnt here, let the caller check
1033 and release the service if nobody refers to it.
1034 Only user context can release destination and service,
1035 and only one user context can update virtual service at a
1036 time, so the operation here is OK */
1037 atomic_dec(&dest->svc->refcnt);
1038 free_percpu(dest->stats.cpustats);
1041 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1042 "dest->refcnt=%d\n",
1043 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1045 atomic_read(&dest->refcnt));
1046 list_add(&dest->n_list, &ipvs->dest_trash);
1047 atomic_inc(&dest->refcnt);
1053 * Unlink a destination from the given service
1055 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1056 struct ip_vs_dest *dest,
1059 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1062 * Remove it from the d-linked destination list.
1064 list_del(&dest->n_list);
1068 * Call the update_service function of its scheduler
1070 if (svcupd && svc->scheduler->update_service)
1071 svc->scheduler->update_service(svc);
1076 * Delete a destination server in the given service
1079 ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1081 struct ip_vs_dest *dest;
1082 __be16 dport = udest->port;
1086 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
1089 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1093 write_lock_bh(&__ip_vs_svc_lock);
1096 * Wait until all other svc users go away.
1098 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1101 * Unlink dest from the service
1103 __ip_vs_unlink_dest(svc, dest, 1);
1105 write_unlock_bh(&__ip_vs_svc_lock);
1108 * Delete the destination
1110 __ip_vs_del_dest(svc->net, dest);
1119 * Add a service into the service hash table
1122 ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
1123 struct ip_vs_service **svc_p)
1126 struct ip_vs_scheduler *sched = NULL;
1127 struct ip_vs_pe *pe = NULL;
1128 struct ip_vs_service *svc = NULL;
1129 struct netns_ipvs *ipvs = net_ipvs(net);
1131 /* increase the module use count */
1132 ip_vs_use_count_inc();
1134 /* Lookup the scheduler by 'u->sched_name' */
1135 sched = ip_vs_scheduler_get(u->sched_name);
1136 if (sched == NULL) {
1137 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1142 if (u->pe_name && *u->pe_name) {
1143 pe = ip_vs_pe_getbyname(u->pe_name);
1145 pr_info("persistence engine module ip_vs_pe_%s "
1146 "not found\n", u->pe_name);
1152 #ifdef CONFIG_IP_VS_IPV6
1153 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1159 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1161 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1165 svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
1166 if (!svc->stats.cpustats) {
1167 pr_err("%s() alloc_percpu failed\n", __func__);
1171 /* I'm the first user of the service */
1172 atomic_set(&svc->usecnt, 0);
1173 atomic_set(&svc->refcnt, 0);
1176 svc->protocol = u->protocol;
1177 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1178 svc->port = u->port;
1179 svc->fwmark = u->fwmark;
1180 svc->flags = u->flags;
1181 svc->timeout = u->timeout * HZ;
1182 svc->netmask = u->netmask;
1185 INIT_LIST_HEAD(&svc->destinations);
1186 rwlock_init(&svc->sched_lock);
1187 spin_lock_init(&svc->stats.lock);
1189 /* Bind the scheduler */
1190 ret = ip_vs_bind_scheduler(svc, sched);
1195 /* Bind the ct retriever */
1196 ip_vs_bind_pe(svc, pe);
1199 /* Update the virtual service counters */
1200 if (svc->port == FTPPORT)
1201 atomic_inc(&ipvs->ftpsvc_counter);
1202 else if (svc->port == 0)
1203 atomic_inc(&ipvs->nullsvc_counter);
1205 ip_vs_start_estimator(net, &svc->stats);
1207 /* Count only IPv4 services for old get/setsockopt interface */
1208 if (svc->af == AF_INET)
1209 ipvs->num_services++;
1211 /* Hash the service into the service table */
1212 write_lock_bh(&__ip_vs_svc_lock);
1213 ip_vs_svc_hash(svc);
1214 write_unlock_bh(&__ip_vs_svc_lock);
1222 ip_vs_unbind_scheduler(svc);
1225 ip_vs_app_inc_put(svc->inc);
1228 if (svc->stats.cpustats)
1229 free_percpu(svc->stats.cpustats);
1232 ip_vs_scheduler_put(sched);
1235 /* decrease the module use count */
1236 ip_vs_use_count_dec();
1243 * Edit a service and bind it with a new scheduler
1246 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1248 struct ip_vs_scheduler *sched, *old_sched;
1249 struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1253 * Lookup the scheduler, by 'u->sched_name'
1255 sched = ip_vs_scheduler_get(u->sched_name);
1256 if (sched == NULL) {
1257 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1262 if (u->pe_name && *u->pe_name) {
1263 pe = ip_vs_pe_getbyname(u->pe_name);
1265 pr_info("persistence engine module ip_vs_pe_%s "
1266 "not found\n", u->pe_name);
1273 #ifdef CONFIG_IP_VS_IPV6
1274 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1280 write_lock_bh(&__ip_vs_svc_lock);
1283 * Wait until all other svc users go away.
1285 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1288 * Set the flags and timeout value
1290 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1291 svc->timeout = u->timeout * HZ;
1292 svc->netmask = u->netmask;
1294 old_sched = svc->scheduler;
1295 if (sched != old_sched) {
1297 * Unbind the old scheduler
1299 if ((ret = ip_vs_unbind_scheduler(svc))) {
1305 * Bind the new scheduler
1307 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1309 * If ip_vs_bind_scheduler fails, restore the old
1311 * The main reason of failure is out of memory.
1313 * The question is if the old scheduler can be
1314 * restored all the time. TODO: if it cannot be
1315 * restored some time, we must delete the service,
1316 * otherwise the system may crash.
1318 ip_vs_bind_scheduler(svc, old_sched);
1326 ip_vs_unbind_pe(svc);
1327 ip_vs_bind_pe(svc, pe);
1331 write_unlock_bh(&__ip_vs_svc_lock);
1333 ip_vs_scheduler_put(old_sched);
1334 ip_vs_pe_put(old_pe);
1340 * Delete a service from the service list
1341 * - The service must be unlinked, unlocked and not referenced!
1342 * - We are called under _bh lock
1344 static void __ip_vs_del_service(struct ip_vs_service *svc)
1346 struct ip_vs_dest *dest, *nxt;
1347 struct ip_vs_scheduler *old_sched;
1348 struct ip_vs_pe *old_pe;
1349 struct netns_ipvs *ipvs = net_ipvs(svc->net);
1351 pr_info("%s: enter\n", __func__);
1353 /* Count only IPv4 services for old get/setsockopt interface */
1354 if (svc->af == AF_INET)
1355 ipvs->num_services--;
1357 ip_vs_stop_estimator(svc->net, &svc->stats);
1359 /* Unbind scheduler */
1360 old_sched = svc->scheduler;
1361 ip_vs_unbind_scheduler(svc);
1362 ip_vs_scheduler_put(old_sched);
1364 /* Unbind persistence engine */
1366 ip_vs_unbind_pe(svc);
1367 ip_vs_pe_put(old_pe);
1369 /* Unbind app inc */
1371 ip_vs_app_inc_put(svc->inc);
1376 * Unlink the whole destination list
1378 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1379 __ip_vs_unlink_dest(svc, dest, 0);
1380 __ip_vs_del_dest(svc->net, dest);
1384 * Update the virtual service counters
1386 if (svc->port == FTPPORT)
1387 atomic_dec(&ipvs->ftpsvc_counter);
1388 else if (svc->port == 0)
1389 atomic_dec(&ipvs->nullsvc_counter);
1392 * Free the service if nobody refers to it
1394 if (atomic_read(&svc->refcnt) == 0) {
1395 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1397 IP_VS_DBG_ADDR(svc->af, &svc->addr),
1398 ntohs(svc->port), atomic_read(&svc->usecnt));
1399 free_percpu(svc->stats.cpustats);
1403 /* decrease the module use count */
1404 ip_vs_use_count_dec();
1408 * Unlink a service from list and try to delete it if its refcnt reached 0
1410 static void ip_vs_unlink_service(struct ip_vs_service *svc)
1413 * Unhash it from the service table
1415 write_lock_bh(&__ip_vs_svc_lock);
1417 ip_vs_svc_unhash(svc);
1420 * Wait until all the svc users go away.
1422 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1424 __ip_vs_del_service(svc);
1426 write_unlock_bh(&__ip_vs_svc_lock);
1430 * Delete a service from the service list
1432 static int ip_vs_del_service(struct ip_vs_service *svc)
1436 ip_vs_unlink_service(svc);
1443 * Flush all the virtual services
1445 static int ip_vs_flush(struct net *net)
1448 struct ip_vs_service *svc, *nxt;
1451 * Flush the service table hashed by <netns,protocol,addr,port>
1453 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1454 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
1456 if (net_eq(svc->net, net))
1457 ip_vs_unlink_service(svc);
1462 * Flush the service table hashed by fwmark
1464 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1465 list_for_each_entry_safe(svc, nxt,
1466 &ip_vs_svc_fwm_table[idx], f_list) {
1467 if (net_eq(svc->net, net))
1468 ip_vs_unlink_service(svc);
1477 * Zero counters in a service or all services
1479 static int ip_vs_zero_service(struct ip_vs_service *svc)
1481 struct ip_vs_dest *dest;
1483 write_lock_bh(&__ip_vs_svc_lock);
1484 list_for_each_entry(dest, &svc->destinations, n_list) {
1485 ip_vs_zero_stats(&dest->stats);
1487 ip_vs_zero_stats(&svc->stats);
1488 write_unlock_bh(&__ip_vs_svc_lock);
1492 static int ip_vs_zero_all(struct net *net)
1495 struct ip_vs_service *svc;
1497 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1498 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1499 if (net_eq(svc->net, net))
1500 ip_vs_zero_service(svc);
1504 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1505 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1506 if (net_eq(svc->net, net))
1507 ip_vs_zero_service(svc);
1511 ip_vs_zero_stats(&net_ipvs(net)->tot_stats);
1515 #ifdef CONFIG_SYSCTL
1517 proc_do_defense_mode(ctl_table *table, int write,
1518 void __user *buffer, size_t *lenp, loff_t *ppos)
1520 struct net *net = current->nsproxy->net_ns;
1521 int *valp = table->data;
1525 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1526 if (write && (*valp != val)) {
1527 if ((*valp < 0) || (*valp > 3)) {
1528 /* Restore the correct value */
1531 update_defense_level(net_ipvs(net));
1538 proc_do_sync_threshold(ctl_table *table, int write,
1539 void __user *buffer, size_t *lenp, loff_t *ppos)
1541 int *valp = table->data;
1545 /* backup the value first */
1546 memcpy(val, valp, sizeof(val));
1548 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1549 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1550 /* Restore the correct value */
1551 memcpy(valp, val, sizeof(val));
1557 proc_do_sync_mode(ctl_table *table, int write,
1558 void __user *buffer, size_t *lenp, loff_t *ppos)
1560 int *valp = table->data;
1564 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1565 if (write && (*valp != val)) {
1566 if ((*valp < 0) || (*valp > 1)) {
1567 /* Restore the correct value */
1570 struct net *net = current->nsproxy->net_ns;
1571 ip_vs_sync_switch_mode(net, val);
1578 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1579 * Do not change order or insert new entries without
1580 * align with netns init in __ip_vs_control_init()
1583 static struct ctl_table vs_vars[] = {
1585 .procname = "amemthresh",
1586 .maxlen = sizeof(int),
1588 .proc_handler = proc_dointvec,
1591 .procname = "am_droprate",
1592 .maxlen = sizeof(int),
1594 .proc_handler = proc_dointvec,
1597 .procname = "drop_entry",
1598 .maxlen = sizeof(int),
1600 .proc_handler = proc_do_defense_mode,
1603 .procname = "drop_packet",
1604 .maxlen = sizeof(int),
1606 .proc_handler = proc_do_defense_mode,
1608 #ifdef CONFIG_IP_VS_NFCT
1610 .procname = "conntrack",
1611 .maxlen = sizeof(int),
1613 .proc_handler = &proc_dointvec,
1617 .procname = "secure_tcp",
1618 .maxlen = sizeof(int),
1620 .proc_handler = proc_do_defense_mode,
1623 .procname = "snat_reroute",
1624 .maxlen = sizeof(int),
1626 .proc_handler = &proc_dointvec,
1629 .procname = "sync_version",
1630 .maxlen = sizeof(int),
1632 .proc_handler = &proc_do_sync_mode,
1635 .procname = "cache_bypass",
1636 .maxlen = sizeof(int),
1638 .proc_handler = proc_dointvec,
1641 .procname = "expire_nodest_conn",
1642 .maxlen = sizeof(int),
1644 .proc_handler = proc_dointvec,
1647 .procname = "expire_quiescent_template",
1648 .maxlen = sizeof(int),
1650 .proc_handler = proc_dointvec,
1653 .procname = "sync_threshold",
1655 sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
1657 .proc_handler = proc_do_sync_threshold,
1660 .procname = "nat_icmp_send",
1661 .maxlen = sizeof(int),
1663 .proc_handler = proc_dointvec,
1665 #ifdef CONFIG_IP_VS_DEBUG
1667 .procname = "debug_level",
1668 .data = &sysctl_ip_vs_debug_level,
1669 .maxlen = sizeof(int),
1671 .proc_handler = proc_dointvec,
1676 .procname = "timeout_established",
1677 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1678 .maxlen = sizeof(int),
1680 .proc_handler = proc_dointvec_jiffies,
1683 .procname = "timeout_synsent",
1684 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1685 .maxlen = sizeof(int),
1687 .proc_handler = proc_dointvec_jiffies,
1690 .procname = "timeout_synrecv",
1691 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1692 .maxlen = sizeof(int),
1694 .proc_handler = proc_dointvec_jiffies,
1697 .procname = "timeout_finwait",
1698 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1699 .maxlen = sizeof(int),
1701 .proc_handler = proc_dointvec_jiffies,
1704 .procname = "timeout_timewait",
1705 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1706 .maxlen = sizeof(int),
1708 .proc_handler = proc_dointvec_jiffies,
1711 .procname = "timeout_close",
1712 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1713 .maxlen = sizeof(int),
1715 .proc_handler = proc_dointvec_jiffies,
1718 .procname = "timeout_closewait",
1719 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1720 .maxlen = sizeof(int),
1722 .proc_handler = proc_dointvec_jiffies,
1725 .procname = "timeout_lastack",
1726 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1727 .maxlen = sizeof(int),
1729 .proc_handler = proc_dointvec_jiffies,
1732 .procname = "timeout_listen",
1733 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1734 .maxlen = sizeof(int),
1736 .proc_handler = proc_dointvec_jiffies,
1739 .procname = "timeout_synack",
1740 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1741 .maxlen = sizeof(int),
1743 .proc_handler = proc_dointvec_jiffies,
1746 .procname = "timeout_udp",
1747 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1748 .maxlen = sizeof(int),
1750 .proc_handler = proc_dointvec_jiffies,
1753 .procname = "timeout_icmp",
1754 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1755 .maxlen = sizeof(int),
1757 .proc_handler = proc_dointvec_jiffies,
1763 const struct ctl_path net_vs_ctl_path[] = {
1764 { .procname = "net", },
1765 { .procname = "ipv4", },
1766 { .procname = "vs", },
1769 EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1772 #ifdef CONFIG_PROC_FS
1775 struct seq_net_private p; /* Do not move this, netns depends upon it*/
1776 struct list_head *table;
1781 * Write the contents of the VS rule table to a PROCfs file.
1782 * (It is kept just for backward compatibility)
1784 static inline const char *ip_vs_fwd_name(unsigned flags)
1786 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1787 case IP_VS_CONN_F_LOCALNODE:
1789 case IP_VS_CONN_F_TUNNEL:
1791 case IP_VS_CONN_F_DROUTE:
1799 /* Get the Nth entry in the two lists */
1800 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1802 struct net *net = seq_file_net(seq);
1803 struct ip_vs_iter *iter = seq->private;
1805 struct ip_vs_service *svc;
1807 /* look in hash by protocol */
1808 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1809 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1810 if (net_eq(svc->net, net) && pos-- == 0) {
1811 iter->table = ip_vs_svc_table;
1818 /* keep looking in fwmark */
1819 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1820 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1821 if (net_eq(svc->net, net) && pos-- == 0) {
1822 iter->table = ip_vs_svc_fwm_table;
1832 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1833 __acquires(__ip_vs_svc_lock)
1836 read_lock_bh(&__ip_vs_svc_lock);
1837 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1841 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1843 struct list_head *e;
1844 struct ip_vs_iter *iter;
1845 struct ip_vs_service *svc;
1848 if (v == SEQ_START_TOKEN)
1849 return ip_vs_info_array(seq,0);
1852 iter = seq->private;
1854 if (iter->table == ip_vs_svc_table) {
1855 /* next service in table hashed by protocol */
1856 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1857 return list_entry(e, struct ip_vs_service, s_list);
1860 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1861 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1867 iter->table = ip_vs_svc_fwm_table;
1872 /* next service in hashed by fwmark */
1873 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1874 return list_entry(e, struct ip_vs_service, f_list);
1877 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1878 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1886 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1887 __releases(__ip_vs_svc_lock)
1889 read_unlock_bh(&__ip_vs_svc_lock);
1893 static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1895 if (v == SEQ_START_TOKEN) {
1897 "IP Virtual Server version %d.%d.%d (size=%d)\n",
1898 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1900 "Prot LocalAddress:Port Scheduler Flags\n");
1902 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1904 const struct ip_vs_service *svc = v;
1905 const struct ip_vs_iter *iter = seq->private;
1906 const struct ip_vs_dest *dest;
1908 if (iter->table == ip_vs_svc_table) {
1909 #ifdef CONFIG_IP_VS_IPV6
1910 if (svc->af == AF_INET6)
1911 seq_printf(seq, "%s [%pI6]:%04X %s ",
1912 ip_vs_proto_name(svc->protocol),
1915 svc->scheduler->name);
1918 seq_printf(seq, "%s %08X:%04X %s %s ",
1919 ip_vs_proto_name(svc->protocol),
1920 ntohl(svc->addr.ip),
1922 svc->scheduler->name,
1923 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
1925 seq_printf(seq, "FWM %08X %s %s",
1926 svc->fwmark, svc->scheduler->name,
1927 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
1930 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1931 seq_printf(seq, "persistent %d %08X\n",
1933 ntohl(svc->netmask));
1935 seq_putc(seq, '\n');
1937 list_for_each_entry(dest, &svc->destinations, n_list) {
1938 #ifdef CONFIG_IP_VS_IPV6
1939 if (dest->af == AF_INET6)
1942 " %-7s %-6d %-10d %-10d\n",
1945 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1946 atomic_read(&dest->weight),
1947 atomic_read(&dest->activeconns),
1948 atomic_read(&dest->inactconns));
1953 "%-7s %-6d %-10d %-10d\n",
1954 ntohl(dest->addr.ip),
1956 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1957 atomic_read(&dest->weight),
1958 atomic_read(&dest->activeconns),
1959 atomic_read(&dest->inactconns));
1966 static const struct seq_operations ip_vs_info_seq_ops = {
1967 .start = ip_vs_info_seq_start,
1968 .next = ip_vs_info_seq_next,
1969 .stop = ip_vs_info_seq_stop,
1970 .show = ip_vs_info_seq_show,
1973 static int ip_vs_info_open(struct inode *inode, struct file *file)
1975 return seq_open_net(inode, file, &ip_vs_info_seq_ops,
1976 sizeof(struct ip_vs_iter));
1979 static const struct file_operations ip_vs_info_fops = {
1980 .owner = THIS_MODULE,
1981 .open = ip_vs_info_open,
1983 .llseek = seq_lseek,
1984 .release = seq_release_private,
1987 static int ip_vs_stats_show(struct seq_file *seq, void *v)
1989 struct net *net = seq_file_single_net(seq);
1990 struct ip_vs_stats_user show;
1992 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1994 " Total Incoming Outgoing Incoming Outgoing\n");
1996 " Conns Packets Packets Bytes Bytes\n");
1998 ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
1999 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns,
2000 show.inpkts, show.outpkts,
2001 (unsigned long long) show.inbytes,
2002 (unsigned long long) show.outbytes);
2004 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2006 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2007 seq_printf(seq, "%8X %8X %8X %16X %16X\n",
2008 show.cps, show.inpps, show.outpps,
2009 show.inbps, show.outbps);
2014 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
2016 return single_open_net(inode, file, ip_vs_stats_show);
2019 static const struct file_operations ip_vs_stats_fops = {
2020 .owner = THIS_MODULE,
2021 .open = ip_vs_stats_seq_open,
2023 .llseek = seq_lseek,
2024 .release = single_release,
2027 static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2029 struct net *net = seq_file_single_net(seq);
2030 struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
2031 struct ip_vs_cpu_stats *cpustats = tot_stats->cpustats;
2032 struct ip_vs_stats_user rates;
2035 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2037 " Total Incoming Outgoing Incoming Outgoing\n");
2039 "CPU Conns Packets Packets Bytes Bytes\n");
2041 for_each_possible_cpu(i) {
2042 struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
2044 __u64 inbytes, outbytes;
2047 start = u64_stats_fetch_begin_bh(&u->syncp);
2048 inbytes = u->ustats.inbytes;
2049 outbytes = u->ustats.outbytes;
2050 } while (u64_stats_fetch_retry_bh(&u->syncp, start));
2052 seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
2053 i, u->ustats.conns, u->ustats.inpkts,
2054 u->ustats.outpkts, (__u64)inbytes,
2058 spin_lock_bh(&tot_stats->lock);
2060 seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n",
2061 tot_stats->ustats.conns, tot_stats->ustats.inpkts,
2062 tot_stats->ustats.outpkts,
2063 (unsigned long long) tot_stats->ustats.inbytes,
2064 (unsigned long long) tot_stats->ustats.outbytes);
2066 ip_vs_read_estimator(&rates, tot_stats);
2068 spin_unlock_bh(&tot_stats->lock);
2070 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2072 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2073 seq_printf(seq, " %8X %8X %8X %16X %16X\n",
2083 static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
2085 return single_open_net(inode, file, ip_vs_stats_percpu_show);
2088 static const struct file_operations ip_vs_stats_percpu_fops = {
2089 .owner = THIS_MODULE,
2090 .open = ip_vs_stats_percpu_seq_open,
2092 .llseek = seq_lseek,
2093 .release = single_release,
2098 * Set timeout values for tcp tcpfin udp in the timeout_table.
2100 static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
2102 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2103 struct ip_vs_proto_data *pd;
2106 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2111 #ifdef CONFIG_IP_VS_PROTO_TCP
2112 if (u->tcp_timeout) {
2113 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2114 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
2115 = u->tcp_timeout * HZ;
2118 if (u->tcp_fin_timeout) {
2119 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2120 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
2121 = u->tcp_fin_timeout * HZ;
2125 #ifdef CONFIG_IP_VS_PROTO_UDP
2126 if (u->udp_timeout) {
2127 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2128 pd->timeout_table[IP_VS_UDP_S_NORMAL]
2129 = u->udp_timeout * HZ;
2136 #define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2137 #define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2138 #define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2139 sizeof(struct ip_vs_dest_user))
2140 #define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2141 #define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2142 #define MAX_ARG_LEN SVCDEST_ARG_LEN
2144 static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
2145 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
2146 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
2147 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
2148 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
2149 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
2150 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
2151 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
2152 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
2153 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
2154 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
2155 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
2158 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2159 struct ip_vs_service_user *usvc_compat)
2161 memset(usvc, 0, sizeof(*usvc));
2164 usvc->protocol = usvc_compat->protocol;
2165 usvc->addr.ip = usvc_compat->addr;
2166 usvc->port = usvc_compat->port;
2167 usvc->fwmark = usvc_compat->fwmark;
2169 /* Deep copy of sched_name is not needed here */
2170 usvc->sched_name = usvc_compat->sched_name;
2172 usvc->flags = usvc_compat->flags;
2173 usvc->timeout = usvc_compat->timeout;
2174 usvc->netmask = usvc_compat->netmask;
2177 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2178 struct ip_vs_dest_user *udest_compat)
2180 memset(udest, 0, sizeof(*udest));
2182 udest->addr.ip = udest_compat->addr;
2183 udest->port = udest_compat->port;
2184 udest->conn_flags = udest_compat->conn_flags;
2185 udest->weight = udest_compat->weight;
2186 udest->u_threshold = udest_compat->u_threshold;
2187 udest->l_threshold = udest_compat->l_threshold;
2191 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2193 struct net *net = sock_net(sk);
2195 unsigned char arg[MAX_ARG_LEN];
2196 struct ip_vs_service_user *usvc_compat;
2197 struct ip_vs_service_user_kern usvc;
2198 struct ip_vs_service *svc;
2199 struct ip_vs_dest_user *udest_compat;
2200 struct ip_vs_dest_user_kern udest;
2202 if (!capable(CAP_NET_ADMIN))
2205 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2207 if (len < 0 || len > MAX_ARG_LEN)
2209 if (len != set_arglen[SET_CMDID(cmd)]) {
2210 pr_err("set_ctl: len %u != %u\n",
2211 len, set_arglen[SET_CMDID(cmd)]);
2215 if (copy_from_user(arg, user, len) != 0)
2218 /* increase the module use count */
2219 ip_vs_use_count_inc();
2221 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
2226 if (cmd == IP_VS_SO_SET_FLUSH) {
2227 /* Flush the virtual service */
2228 ret = ip_vs_flush(net);
2230 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2231 /* Set timeout values for (tcp tcpfin udp) */
2232 ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
2234 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2235 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2236 ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
2239 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2240 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2241 ret = stop_sync_thread(net, dm->state);
2245 usvc_compat = (struct ip_vs_service_user *)arg;
2246 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2248 /* We only use the new structs internally, so copy userspace compat
2249 * structs to extended internal versions */
2250 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2251 ip_vs_copy_udest_compat(&udest, udest_compat);
2253 if (cmd == IP_VS_SO_SET_ZERO) {
2254 /* if no service address is set, zero counters in all */
2255 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2256 ret = ip_vs_zero_all(net);
2261 /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2262 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2263 usvc.protocol != IPPROTO_SCTP) {
2264 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2265 usvc.protocol, &usvc.addr.ip,
2266 ntohs(usvc.port), usvc.sched_name);
2271 /* Lookup the exact service by <protocol, addr, port> or fwmark */
2272 if (usvc.fwmark == 0)
2273 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
2274 &usvc.addr, usvc.port);
2276 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
2278 if (cmd != IP_VS_SO_SET_ADD
2279 && (svc == NULL || svc->protocol != usvc.protocol)) {
2285 case IP_VS_SO_SET_ADD:
2289 ret = ip_vs_add_service(net, &usvc, &svc);
2291 case IP_VS_SO_SET_EDIT:
2292 ret = ip_vs_edit_service(svc, &usvc);
2294 case IP_VS_SO_SET_DEL:
2295 ret = ip_vs_del_service(svc);
2299 case IP_VS_SO_SET_ZERO:
2300 ret = ip_vs_zero_service(svc);
2302 case IP_VS_SO_SET_ADDDEST:
2303 ret = ip_vs_add_dest(svc, &udest);
2305 case IP_VS_SO_SET_EDITDEST:
2306 ret = ip_vs_edit_dest(svc, &udest);
2308 case IP_VS_SO_SET_DELDEST:
2309 ret = ip_vs_del_dest(svc, &udest);
2316 mutex_unlock(&__ip_vs_mutex);
2318 /* decrease the module use count */
2319 ip_vs_use_count_dec();
2326 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2328 dst->protocol = src->protocol;
2329 dst->addr = src->addr.ip;
2330 dst->port = src->port;
2331 dst->fwmark = src->fwmark;
2332 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
2333 dst->flags = src->flags;
2334 dst->timeout = src->timeout / HZ;
2335 dst->netmask = src->netmask;
2336 dst->num_dests = src->num_dests;
2337 ip_vs_copy_stats(&dst->stats, &src->stats);
2341 __ip_vs_get_service_entries(struct net *net,
2342 const struct ip_vs_get_services *get,
2343 struct ip_vs_get_services __user *uptr)
2346 struct ip_vs_service *svc;
2347 struct ip_vs_service_entry entry;
2350 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2351 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2352 /* Only expose IPv4 entries to old interface */
2353 if (svc->af != AF_INET || !net_eq(svc->net, net))
2356 if (count >= get->num_services)
2358 memset(&entry, 0, sizeof(entry));
2359 ip_vs_copy_service(&entry, svc);
2360 if (copy_to_user(&uptr->entrytable[count],
2361 &entry, sizeof(entry))) {
2369 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2370 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2371 /* Only expose IPv4 entries to old interface */
2372 if (svc->af != AF_INET || !net_eq(svc->net, net))
2375 if (count >= get->num_services)
2377 memset(&entry, 0, sizeof(entry));
2378 ip_vs_copy_service(&entry, svc);
2379 if (copy_to_user(&uptr->entrytable[count],
2380 &entry, sizeof(entry))) {
2392 __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
2393 struct ip_vs_get_dests __user *uptr)
2395 struct ip_vs_service *svc;
2396 union nf_inet_addr addr = { .ip = get->addr };
2400 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
2402 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
2407 struct ip_vs_dest *dest;
2408 struct ip_vs_dest_entry entry;
2410 list_for_each_entry(dest, &svc->destinations, n_list) {
2411 if (count >= get->num_dests)
2414 entry.addr = dest->addr.ip;
2415 entry.port = dest->port;
2416 entry.conn_flags = atomic_read(&dest->conn_flags);
2417 entry.weight = atomic_read(&dest->weight);
2418 entry.u_threshold = dest->u_threshold;
2419 entry.l_threshold = dest->l_threshold;
2420 entry.activeconns = atomic_read(&dest->activeconns);
2421 entry.inactconns = atomic_read(&dest->inactconns);
2422 entry.persistconns = atomic_read(&dest->persistconns);
2423 ip_vs_copy_stats(&entry.stats, &dest->stats);
2424 if (copy_to_user(&uptr->entrytable[count],
2425 &entry, sizeof(entry))) {
2437 __ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
2439 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2440 struct ip_vs_proto_data *pd;
2443 #ifdef CONFIG_IP_VS_PROTO_TCP
2444 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2445 u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2446 u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2448 #ifdef CONFIG_IP_VS_PROTO_UDP
2449 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2451 pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2456 #define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2457 #define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2458 #define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2459 #define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2460 #define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2461 #define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2462 #define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2464 static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2465 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2466 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2467 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2468 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2469 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2470 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2471 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2475 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2477 unsigned char arg[128];
2479 unsigned int copylen;
2480 struct net *net = sock_net(sk);
2481 struct netns_ipvs *ipvs = net_ipvs(net);
2484 if (!capable(CAP_NET_ADMIN))
2487 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2490 if (*len < get_arglen[GET_CMDID(cmd)]) {
2491 pr_err("get_ctl: len %u < %u\n",
2492 *len, get_arglen[GET_CMDID(cmd)]);
2496 copylen = get_arglen[GET_CMDID(cmd)];
2500 if (copy_from_user(arg, user, copylen) != 0)
2503 if (mutex_lock_interruptible(&__ip_vs_mutex))
2504 return -ERESTARTSYS;
2507 case IP_VS_SO_GET_VERSION:
2511 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2512 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2513 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2517 *len = strlen(buf)+1;
2521 case IP_VS_SO_GET_INFO:
2523 struct ip_vs_getinfo info;
2524 info.version = IP_VS_VERSION_CODE;
2525 info.size = ip_vs_conn_tab_size;
2526 info.num_services = ipvs->num_services;
2527 if (copy_to_user(user, &info, sizeof(info)) != 0)
2532 case IP_VS_SO_GET_SERVICES:
2534 struct ip_vs_get_services *get;
2537 get = (struct ip_vs_get_services *)arg;
2538 size = sizeof(*get) +
2539 sizeof(struct ip_vs_service_entry) * get->num_services;
2541 pr_err("length: %u != %u\n", *len, size);
2545 ret = __ip_vs_get_service_entries(net, get, user);
2549 case IP_VS_SO_GET_SERVICE:
2551 struct ip_vs_service_entry *entry;
2552 struct ip_vs_service *svc;
2553 union nf_inet_addr addr;
2555 entry = (struct ip_vs_service_entry *)arg;
2556 addr.ip = entry->addr;
2558 svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
2560 svc = __ip_vs_service_find(net, AF_INET,
2561 entry->protocol, &addr,
2564 ip_vs_copy_service(entry, svc);
2565 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2572 case IP_VS_SO_GET_DESTS:
2574 struct ip_vs_get_dests *get;
2577 get = (struct ip_vs_get_dests *)arg;
2578 size = sizeof(*get) +
2579 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2581 pr_err("length: %u != %u\n", *len, size);
2585 ret = __ip_vs_get_dest_entries(net, get, user);
2589 case IP_VS_SO_GET_TIMEOUT:
2591 struct ip_vs_timeout_user t;
2593 __ip_vs_get_timeouts(net, &t);
2594 if (copy_to_user(user, &t, sizeof(t)) != 0)
2599 case IP_VS_SO_GET_DAEMON:
2601 struct ip_vs_daemon_user d[2];
2603 memset(&d, 0, sizeof(d));
2604 if (ipvs->sync_state & IP_VS_STATE_MASTER) {
2605 d[0].state = IP_VS_STATE_MASTER;
2606 strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
2607 sizeof(d[0].mcast_ifn));
2608 d[0].syncid = ipvs->master_syncid;
2610 if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
2611 d[1].state = IP_VS_STATE_BACKUP;
2612 strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
2613 sizeof(d[1].mcast_ifn));
2614 d[1].syncid = ipvs->backup_syncid;
2616 if (copy_to_user(user, &d, sizeof(d)) != 0)
2626 mutex_unlock(&__ip_vs_mutex);
2631 static struct nf_sockopt_ops ip_vs_sockopts = {
2633 .set_optmin = IP_VS_BASE_CTL,
2634 .set_optmax = IP_VS_SO_SET_MAX+1,
2635 .set = do_ip_vs_set_ctl,
2636 .get_optmin = IP_VS_BASE_CTL,
2637 .get_optmax = IP_VS_SO_GET_MAX+1,
2638 .get = do_ip_vs_get_ctl,
2639 .owner = THIS_MODULE,
2643 * Generic Netlink interface
2646 /* IPVS genetlink family */
2647 static struct genl_family ip_vs_genl_family = {
2648 .id = GENL_ID_GENERATE,
2650 .name = IPVS_GENL_NAME,
2651 .version = IPVS_GENL_VERSION,
2652 .maxattr = IPVS_CMD_MAX,
2653 .netnsok = true, /* Make ipvsadm to work on netns */
2656 /* Policy used for first-level command attributes */
2657 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2658 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2659 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2660 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2661 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2662 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2663 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2666 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2667 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2668 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2669 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2670 .len = IP_VS_IFNAME_MAXLEN },
2671 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2674 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2675 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2676 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2677 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2678 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2679 .len = sizeof(union nf_inet_addr) },
2680 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2681 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2682 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2683 .len = IP_VS_SCHEDNAME_MAXLEN },
2684 [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING,
2685 .len = IP_VS_PENAME_MAXLEN },
2686 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2687 .len = sizeof(struct ip_vs_flags) },
2688 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2689 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2690 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2693 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2694 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2695 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2696 .len = sizeof(union nf_inet_addr) },
2697 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2698 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2699 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2700 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2701 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2702 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2703 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2704 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2705 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2708 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2709 struct ip_vs_stats *stats)
2711 struct ip_vs_stats_user ustats;
2712 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2716 ip_vs_copy_stats(&ustats, stats);
2718 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns);
2719 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts);
2720 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts);
2721 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes);
2722 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes);
2723 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, ustats.cps);
2724 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps);
2725 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps);
2726 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps);
2727 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps);
2729 nla_nest_end(skb, nl_stats);
2734 nla_nest_cancel(skb, nl_stats);
2738 static int ip_vs_genl_fill_service(struct sk_buff *skb,
2739 struct ip_vs_service *svc)
2741 struct nlattr *nl_service;
2742 struct ip_vs_flags flags = { .flags = svc->flags,
2745 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2749 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
2752 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2754 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2755 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2756 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2759 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2761 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name);
2762 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2763 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2764 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2766 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2767 goto nla_put_failure;
2769 nla_nest_end(skb, nl_service);
2774 nla_nest_cancel(skb, nl_service);
2778 static int ip_vs_genl_dump_service(struct sk_buff *skb,
2779 struct ip_vs_service *svc,
2780 struct netlink_callback *cb)
2784 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2785 &ip_vs_genl_family, NLM_F_MULTI,
2786 IPVS_CMD_NEW_SERVICE);
2790 if (ip_vs_genl_fill_service(skb, svc) < 0)
2791 goto nla_put_failure;
2793 return genlmsg_end(skb, hdr);
2796 genlmsg_cancel(skb, hdr);
2800 static int ip_vs_genl_dump_services(struct sk_buff *skb,
2801 struct netlink_callback *cb)
2804 int start = cb->args[0];
2805 struct ip_vs_service *svc;
2806 struct net *net = skb_sknet(skb);
2808 mutex_lock(&__ip_vs_mutex);
2809 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2810 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2811 if (++idx <= start || !net_eq(svc->net, net))
2813 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2815 goto nla_put_failure;
2820 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2821 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2822 if (++idx <= start || !net_eq(svc->net, net))
2824 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2826 goto nla_put_failure;
2832 mutex_unlock(&__ip_vs_mutex);
2838 static int ip_vs_genl_parse_service(struct net *net,
2839 struct ip_vs_service_user_kern *usvc,
2840 struct nlattr *nla, int full_entry,
2841 struct ip_vs_service **ret_svc)
2843 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2844 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2845 struct ip_vs_service *svc;
2847 /* Parse mandatory identifying service fields first */
2849 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2852 nla_af = attrs[IPVS_SVC_ATTR_AF];
2853 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2854 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2855 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2856 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2858 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2861 memset(usvc, 0, sizeof(*usvc));
2863 usvc->af = nla_get_u16(nla_af);
2864 #ifdef CONFIG_IP_VS_IPV6
2865 if (usvc->af != AF_INET && usvc->af != AF_INET6)
2867 if (usvc->af != AF_INET)
2869 return -EAFNOSUPPORT;
2872 usvc->protocol = IPPROTO_TCP;
2873 usvc->fwmark = nla_get_u32(nla_fwmark);
2875 usvc->protocol = nla_get_u16(nla_protocol);
2876 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2877 usvc->port = nla_get_u16(nla_port);
2882 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
2884 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
2885 &usvc->addr, usvc->port);
2888 /* If a full entry was requested, check for the additional fields */
2890 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
2892 struct ip_vs_flags flags;
2894 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2895 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
2896 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2897 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2898 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2900 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2903 nla_memcpy(&flags, nla_flags, sizeof(flags));
2905 /* prefill flags from service if it already exists */
2907 usvc->flags = svc->flags;
2909 /* set new flags from userland */
2910 usvc->flags = (usvc->flags & ~flags.mask) |
2911 (flags.flags & flags.mask);
2912 usvc->sched_name = nla_data(nla_sched);
2913 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
2914 usvc->timeout = nla_get_u32(nla_timeout);
2915 usvc->netmask = nla_get_u32(nla_netmask);
2921 static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
2924 struct ip_vs_service_user_kern usvc;
2925 struct ip_vs_service *svc;
2928 ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
2929 return ret ? ERR_PTR(ret) : svc;
2932 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2934 struct nlattr *nl_dest;
2936 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2940 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2941 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2943 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2944 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2945 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2946 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2947 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2948 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2949 atomic_read(&dest->activeconns));
2950 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2951 atomic_read(&dest->inactconns));
2952 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2953 atomic_read(&dest->persistconns));
2955 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2956 goto nla_put_failure;
2958 nla_nest_end(skb, nl_dest);
2963 nla_nest_cancel(skb, nl_dest);
2967 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2968 struct netlink_callback *cb)
2972 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2973 &ip_vs_genl_family, NLM_F_MULTI,
2978 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2979 goto nla_put_failure;
2981 return genlmsg_end(skb, hdr);
2984 genlmsg_cancel(skb, hdr);
2988 static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2989 struct netlink_callback *cb)
2992 int start = cb->args[0];
2993 struct ip_vs_service *svc;
2994 struct ip_vs_dest *dest;
2995 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2996 struct net *net = skb_sknet(skb);
2998 mutex_lock(&__ip_vs_mutex);
3000 /* Try to find the service for which to dump destinations */
3001 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
3002 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
3006 svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
3007 if (IS_ERR(svc) || svc == NULL)
3010 /* Dump the destinations */
3011 list_for_each_entry(dest, &svc->destinations, n_list) {
3014 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
3016 goto nla_put_failure;
3024 mutex_unlock(&__ip_vs_mutex);
3029 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
3030 struct nlattr *nla, int full_entry)
3032 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
3033 struct nlattr *nla_addr, *nla_port;
3035 /* Parse mandatory identifying destination fields first */
3037 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
3040 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
3041 nla_port = attrs[IPVS_DEST_ATTR_PORT];
3043 if (!(nla_addr && nla_port))
3046 memset(udest, 0, sizeof(*udest));
3048 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
3049 udest->port = nla_get_u16(nla_port);
3051 /* If a full entry was requested, check for the additional fields */
3053 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3056 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
3057 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
3058 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
3059 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
3061 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3064 udest->conn_flags = nla_get_u32(nla_fwd)
3065 & IP_VS_CONN_F_FWD_MASK;
3066 udest->weight = nla_get_u32(nla_weight);
3067 udest->u_threshold = nla_get_u32(nla_u_thresh);
3068 udest->l_threshold = nla_get_u32(nla_l_thresh);
3074 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
3075 const char *mcast_ifn, __be32 syncid)
3077 struct nlattr *nl_daemon;
3079 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3083 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
3084 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
3085 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
3087 nla_nest_end(skb, nl_daemon);
3092 nla_nest_cancel(skb, nl_daemon);
3096 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
3097 const char *mcast_ifn, __be32 syncid,
3098 struct netlink_callback *cb)
3101 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
3102 &ip_vs_genl_family, NLM_F_MULTI,
3103 IPVS_CMD_NEW_DAEMON);
3107 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3108 goto nla_put_failure;
3110 return genlmsg_end(skb, hdr);
3113 genlmsg_cancel(skb, hdr);
3117 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3118 struct netlink_callback *cb)
3120 struct net *net = skb_sknet(skb);
3121 struct netns_ipvs *ipvs = net_ipvs(net);
3123 mutex_lock(&__ip_vs_mutex);
3124 if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
3125 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
3126 ipvs->master_mcast_ifn,
3127 ipvs->master_syncid, cb) < 0)
3128 goto nla_put_failure;
3133 if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3134 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3135 ipvs->backup_mcast_ifn,
3136 ipvs->backup_syncid, cb) < 0)
3137 goto nla_put_failure;
3143 mutex_unlock(&__ip_vs_mutex);
3148 static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
3150 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3151 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3152 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3155 return start_sync_thread(net,
3156 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3157 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3158 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3161 static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
3163 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3166 return stop_sync_thread(net,
3167 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3170 static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
3172 struct ip_vs_timeout_user t;
3174 __ip_vs_get_timeouts(net, &t);
3176 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3177 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3179 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3181 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3183 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3184 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3186 return ip_vs_set_timeout(net, &t);
3189 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3191 struct ip_vs_service *svc = NULL;
3192 struct ip_vs_service_user_kern usvc;
3193 struct ip_vs_dest_user_kern udest;
3195 int need_full_svc = 0, need_full_dest = 0;
3197 struct netns_ipvs *ipvs;
3199 net = skb_sknet(skb);
3200 ipvs = net_ipvs(net);
3201 cmd = info->genlhdr->cmd;
3203 mutex_lock(&__ip_vs_mutex);
3205 if (cmd == IPVS_CMD_FLUSH) {
3206 ret = ip_vs_flush(net);
3208 } else if (cmd == IPVS_CMD_SET_CONFIG) {
3209 ret = ip_vs_genl_set_config(net, info->attrs);
3211 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3212 cmd == IPVS_CMD_DEL_DAEMON) {
3214 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3216 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3217 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3218 info->attrs[IPVS_CMD_ATTR_DAEMON],
3219 ip_vs_daemon_policy)) {
3224 if (cmd == IPVS_CMD_NEW_DAEMON)
3225 ret = ip_vs_genl_new_daemon(net, daemon_attrs);
3227 ret = ip_vs_genl_del_daemon(net, daemon_attrs);
3229 } else if (cmd == IPVS_CMD_ZERO &&
3230 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3231 ret = ip_vs_zero_all(net);
3235 /* All following commands require a service argument, so check if we
3236 * received a valid one. We need a full service specification when
3237 * adding / editing a service. Only identifying members otherwise. */
3238 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3241 ret = ip_vs_genl_parse_service(net, &usvc,
3242 info->attrs[IPVS_CMD_ATTR_SERVICE],
3243 need_full_svc, &svc);
3247 /* Unless we're adding a new service, the service must already exist */
3248 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3253 /* Destination commands require a valid destination argument. For
3254 * adding / editing a destination, we need a full destination
3256 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3257 cmd == IPVS_CMD_DEL_DEST) {
3258 if (cmd != IPVS_CMD_DEL_DEST)
3261 ret = ip_vs_genl_parse_dest(&udest,
3262 info->attrs[IPVS_CMD_ATTR_DEST],
3269 case IPVS_CMD_NEW_SERVICE:
3271 ret = ip_vs_add_service(net, &usvc, &svc);
3275 case IPVS_CMD_SET_SERVICE:
3276 ret = ip_vs_edit_service(svc, &usvc);
3278 case IPVS_CMD_DEL_SERVICE:
3279 ret = ip_vs_del_service(svc);
3280 /* do not use svc, it can be freed */
3282 case IPVS_CMD_NEW_DEST:
3283 ret = ip_vs_add_dest(svc, &udest);
3285 case IPVS_CMD_SET_DEST:
3286 ret = ip_vs_edit_dest(svc, &udest);
3288 case IPVS_CMD_DEL_DEST:
3289 ret = ip_vs_del_dest(svc, &udest);
3292 ret = ip_vs_zero_service(svc);
3299 mutex_unlock(&__ip_vs_mutex);
3304 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3306 struct sk_buff *msg;
3308 int ret, cmd, reply_cmd;
3310 struct netns_ipvs *ipvs;
3312 net = skb_sknet(skb);
3313 ipvs = net_ipvs(net);
3314 cmd = info->genlhdr->cmd;
3316 if (cmd == IPVS_CMD_GET_SERVICE)
3317 reply_cmd = IPVS_CMD_NEW_SERVICE;
3318 else if (cmd == IPVS_CMD_GET_INFO)
3319 reply_cmd = IPVS_CMD_SET_INFO;
3320 else if (cmd == IPVS_CMD_GET_CONFIG)
3321 reply_cmd = IPVS_CMD_SET_CONFIG;
3323 pr_err("unknown Generic Netlink command\n");
3327 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3331 mutex_lock(&__ip_vs_mutex);
3333 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3335 goto nla_put_failure;
3338 case IPVS_CMD_GET_SERVICE:
3340 struct ip_vs_service *svc;
3342 svc = ip_vs_genl_find_service(net,
3343 info->attrs[IPVS_CMD_ATTR_SERVICE]);
3348 ret = ip_vs_genl_fill_service(msg, svc);
3350 goto nla_put_failure;
3359 case IPVS_CMD_GET_CONFIG:
3361 struct ip_vs_timeout_user t;
3363 __ip_vs_get_timeouts(net, &t);
3364 #ifdef CONFIG_IP_VS_PROTO_TCP
3365 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3366 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3369 #ifdef CONFIG_IP_VS_PROTO_UDP
3370 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3376 case IPVS_CMD_GET_INFO:
3377 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3378 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3379 ip_vs_conn_tab_size);
3383 genlmsg_end(msg, reply);
3384 ret = genlmsg_reply(msg, info);
3388 pr_err("not enough space in Netlink message\n");
3394 mutex_unlock(&__ip_vs_mutex);
3400 static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3402 .cmd = IPVS_CMD_NEW_SERVICE,
3403 .flags = GENL_ADMIN_PERM,
3404 .policy = ip_vs_cmd_policy,
3405 .doit = ip_vs_genl_set_cmd,
3408 .cmd = IPVS_CMD_SET_SERVICE,
3409 .flags = GENL_ADMIN_PERM,
3410 .policy = ip_vs_cmd_policy,
3411 .doit = ip_vs_genl_set_cmd,
3414 .cmd = IPVS_CMD_DEL_SERVICE,
3415 .flags = GENL_ADMIN_PERM,
3416 .policy = ip_vs_cmd_policy,
3417 .doit = ip_vs_genl_set_cmd,
3420 .cmd = IPVS_CMD_GET_SERVICE,
3421 .flags = GENL_ADMIN_PERM,
3422 .doit = ip_vs_genl_get_cmd,
3423 .dumpit = ip_vs_genl_dump_services,
3424 .policy = ip_vs_cmd_policy,
3427 .cmd = IPVS_CMD_NEW_DEST,
3428 .flags = GENL_ADMIN_PERM,
3429 .policy = ip_vs_cmd_policy,
3430 .doit = ip_vs_genl_set_cmd,
3433 .cmd = IPVS_CMD_SET_DEST,
3434 .flags = GENL_ADMIN_PERM,
3435 .policy = ip_vs_cmd_policy,
3436 .doit = ip_vs_genl_set_cmd,
3439 .cmd = IPVS_CMD_DEL_DEST,
3440 .flags = GENL_ADMIN_PERM,
3441 .policy = ip_vs_cmd_policy,
3442 .doit = ip_vs_genl_set_cmd,
3445 .cmd = IPVS_CMD_GET_DEST,
3446 .flags = GENL_ADMIN_PERM,
3447 .policy = ip_vs_cmd_policy,
3448 .dumpit = ip_vs_genl_dump_dests,
3451 .cmd = IPVS_CMD_NEW_DAEMON,
3452 .flags = GENL_ADMIN_PERM,
3453 .policy = ip_vs_cmd_policy,
3454 .doit = ip_vs_genl_set_cmd,
3457 .cmd = IPVS_CMD_DEL_DAEMON,
3458 .flags = GENL_ADMIN_PERM,
3459 .policy = ip_vs_cmd_policy,
3460 .doit = ip_vs_genl_set_cmd,
3463 .cmd = IPVS_CMD_GET_DAEMON,
3464 .flags = GENL_ADMIN_PERM,
3465 .dumpit = ip_vs_genl_dump_daemons,
3468 .cmd = IPVS_CMD_SET_CONFIG,
3469 .flags = GENL_ADMIN_PERM,
3470 .policy = ip_vs_cmd_policy,
3471 .doit = ip_vs_genl_set_cmd,
3474 .cmd = IPVS_CMD_GET_CONFIG,
3475 .flags = GENL_ADMIN_PERM,
3476 .doit = ip_vs_genl_get_cmd,
3479 .cmd = IPVS_CMD_GET_INFO,
3480 .flags = GENL_ADMIN_PERM,
3481 .doit = ip_vs_genl_get_cmd,
3484 .cmd = IPVS_CMD_ZERO,
3485 .flags = GENL_ADMIN_PERM,
3486 .policy = ip_vs_cmd_policy,
3487 .doit = ip_vs_genl_set_cmd,
3490 .cmd = IPVS_CMD_FLUSH,
3491 .flags = GENL_ADMIN_PERM,
3492 .doit = ip_vs_genl_set_cmd,
3496 static int __init ip_vs_genl_register(void)
3498 return genl_register_family_with_ops(&ip_vs_genl_family,
3499 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
3502 static void ip_vs_genl_unregister(void)
3504 genl_unregister_family(&ip_vs_genl_family);
3507 /* End of Generic Netlink interface definitions */
3510 * per netns intit/exit func.
3512 #ifdef CONFIG_SYSCTL
3513 int __net_init __ip_vs_control_init_sysctl(struct net *net)
3516 struct netns_ipvs *ipvs = net_ipvs(net);
3517 struct ctl_table *tbl;
3519 atomic_set(&ipvs->dropentry, 0);
3520 spin_lock_init(&ipvs->dropentry_lock);
3521 spin_lock_init(&ipvs->droppacket_lock);
3522 spin_lock_init(&ipvs->securetcp_lock);
3524 if (!net_eq(net, &init_net)) {
3525 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3530 /* Initialize sysctl defaults */
3532 ipvs->sysctl_amemthresh = 1024;
3533 tbl[idx++].data = &ipvs->sysctl_amemthresh;
3534 ipvs->sysctl_am_droprate = 10;
3535 tbl[idx++].data = &ipvs->sysctl_am_droprate;
3536 tbl[idx++].data = &ipvs->sysctl_drop_entry;
3537 tbl[idx++].data = &ipvs->sysctl_drop_packet;
3538 #ifdef CONFIG_IP_VS_NFCT
3539 tbl[idx++].data = &ipvs->sysctl_conntrack;
3541 tbl[idx++].data = &ipvs->sysctl_secure_tcp;
3542 ipvs->sysctl_snat_reroute = 1;
3543 tbl[idx++].data = &ipvs->sysctl_snat_reroute;
3544 ipvs->sysctl_sync_ver = 1;
3545 tbl[idx++].data = &ipvs->sysctl_sync_ver;
3546 tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3547 tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3548 tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
3549 ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
3550 ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
3551 tbl[idx].data = &ipvs->sysctl_sync_threshold;
3552 tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
3553 tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3556 ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path,
3558 if (ipvs->sysctl_hdr == NULL) {
3559 if (!net_eq(net, &init_net))
3563 ip_vs_start_estimator(net, &ipvs->tot_stats);
3564 ipvs->sysctl_tbl = tbl;
3565 /* Schedule defense work */
3566 INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
3567 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
3572 void __net_init __ip_vs_control_cleanup_sysctl(struct net *net)
3574 struct netns_ipvs *ipvs = net_ipvs(net);
3576 cancel_delayed_work_sync(&ipvs->defense_work);
3577 cancel_work_sync(&ipvs->defense_work.work);
3578 unregister_net_sysctl_table(ipvs->sysctl_hdr);
3583 int __net_init __ip_vs_control_init_sysctl(struct net *net) { return 0; }
3584 void __net_init __ip_vs_control_cleanup_sysctl(struct net *net) { }
3588 int __net_init __ip_vs_control_init(struct net *net)
3591 struct netns_ipvs *ipvs = net_ipvs(net);
3593 ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
3595 /* Initialize rs_table */
3596 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3597 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3599 INIT_LIST_HEAD(&ipvs->dest_trash);
3600 atomic_set(&ipvs->ftpsvc_counter, 0);
3601 atomic_set(&ipvs->nullsvc_counter, 0);
3604 ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
3605 if (!ipvs->tot_stats.cpustats) {
3606 pr_err("%s(): alloc_percpu.\n", __func__);
3609 spin_lock_init(&ipvs->tot_stats.lock);
3611 proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
3612 proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
3613 proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
3614 &ip_vs_stats_percpu_fops);
3616 if (__ip_vs_control_init_sysctl(net))
3622 free_percpu(ipvs->tot_stats.cpustats);
3626 static void __net_exit __ip_vs_control_cleanup(struct net *net)
3628 struct netns_ipvs *ipvs = net_ipvs(net);
3630 ip_vs_trash_cleanup(net);
3631 ip_vs_stop_estimator(net, &ipvs->tot_stats);
3632 __ip_vs_control_cleanup_sysctl(net);
3633 proc_net_remove(net, "ip_vs_stats_percpu");
3634 proc_net_remove(net, "ip_vs_stats");
3635 proc_net_remove(net, "ip_vs");
3636 free_percpu(ipvs->tot_stats.cpustats);
3639 static struct pernet_operations ipvs_control_ops = {
3640 .init = __ip_vs_control_init,
3641 .exit = __ip_vs_control_cleanup,
3644 int __init ip_vs_control_init(void)
3651 /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
3652 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3653 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3654 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3657 ret = register_pernet_subsys(&ipvs_control_ops);
3659 pr_err("cannot register namespace.\n");
3663 smp_wmb(); /* Do we really need it now ? */
3665 ret = nf_register_sockopt(&ip_vs_sockopts);
3667 pr_err("cannot register sockopt.\n");
3671 ret = ip_vs_genl_register();
3673 pr_err("cannot register Generic Netlink interface.\n");
3674 nf_unregister_sockopt(&ip_vs_sockopts);
3682 unregister_pernet_subsys(&ipvs_control_ops);
3688 void ip_vs_control_cleanup(void)
3691 unregister_pernet_subsys(&ipvs_control_ops);
3692 ip_vs_genl_unregister();
3693 nf_unregister_sockopt(&ip_vs_sockopts);