]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - net/netfilter/ipvs/ip_vs_proto_sctp.c
Merge tag 'ipvs-for-v4.4' of https://git.kernel.org/pub/scm/linux/kernel/git/horms...
[karo-tx-linux.git] / net / netfilter / ipvs / ip_vs_proto_sctp.c
1 #include <linux/kernel.h>
2 #include <linux/ip.h>
3 #include <linux/sctp.h>
4 #include <net/ip.h>
5 #include <net/ip6_checksum.h>
6 #include <linux/netfilter.h>
7 #include <linux/netfilter_ipv4.h>
8 #include <net/sctp/checksum.h>
9 #include <net/ip_vs.h>
10
11 static int
12 sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
13                    int *verdict, struct ip_vs_conn **cpp,
14                    struct ip_vs_iphdr *iph)
15 {
16         struct net *net;
17         struct ip_vs_service *svc;
18         struct netns_ipvs *ipvs;
19         sctp_chunkhdr_t _schunkh, *sch;
20         sctp_sctphdr_t *sh, _sctph;
21         __be16 _ports[2], *ports = NULL;
22
23         if (likely(!ip_vs_iph_icmp(iph))) {
24                 sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
25                 if (sh) {
26                         sch = skb_header_pointer(
27                                 skb, iph->len + sizeof(sctp_sctphdr_t),
28                                 sizeof(_schunkh), &_schunkh);
29                         if (sch && (sch->type == SCTP_CID_INIT ||
30                                     sysctl_sloppy_sctp(ipvs)))
31                                 ports = &sh->source;
32                 }
33         } else {
34                 ports = skb_header_pointer(
35                         skb, iph->len, sizeof(_ports), &_ports);
36         }
37
38         if (!ports) {
39                 *verdict = NF_DROP;
40                 return 0;
41         }
42
43         net = skb_net(skb);
44         ipvs = net_ipvs(net);
45         rcu_read_lock();
46         if (likely(!ip_vs_iph_inverse(iph)))
47                 svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
48                                          &iph->daddr, ports[1]);
49         else
50                 svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
51                                          &iph->saddr, ports[0]);
52         if (svc) {
53                 int ignored;
54
55                 if (ip_vs_todrop(ipvs)) {
56                         /*
57                          * It seems that we are very loaded.
58                          * We have to drop this packet :(
59                          */
60                         rcu_read_unlock();
61                         *verdict = NF_DROP;
62                         return 0;
63                 }
64                 /*
65                  * Let the virtual server select a real server for the
66                  * incoming connection, and create a connection entry.
67                  */
68                 *cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph);
69                 if (!*cpp && ignored <= 0) {
70                         if (!ignored)
71                                 *verdict = ip_vs_leave(svc, skb, pd, iph);
72                         else
73                                 *verdict = NF_DROP;
74                         rcu_read_unlock();
75                         return 0;
76                 }
77         }
78         rcu_read_unlock();
79         /* NF_ACCEPT */
80         return 1;
81 }
82
83 static void sctp_nat_csum(struct sk_buff *skb, sctp_sctphdr_t *sctph,
84                           unsigned int sctphoff)
85 {
86         sctph->checksum = sctp_compute_cksum(skb, sctphoff);
87         skb->ip_summed = CHECKSUM_UNNECESSARY;
88 }
89
90 static int
91 sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
92                   struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
93 {
94         sctp_sctphdr_t *sctph;
95         unsigned int sctphoff = iph->len;
96         bool payload_csum = false;
97
98 #ifdef CONFIG_IP_VS_IPV6
99         if (cp->af == AF_INET6 && iph->fragoffs)
100                 return 1;
101 #endif
102
103         /* csum_check requires unshared skb */
104         if (!skb_make_writable(skb, sctphoff + sizeof(*sctph)))
105                 return 0;
106
107         if (unlikely(cp->app != NULL)) {
108                 int ret;
109
110                 /* Some checks before mangling */
111                 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
112                         return 0;
113
114                 /* Call application helper if needed */
115                 ret = ip_vs_app_pkt_out(cp, skb);
116                 if (ret == 0)
117                         return 0;
118                 /* ret=2: csum update is needed after payload mangling */
119                 if (ret == 2)
120                         payload_csum = true;
121         }
122
123         sctph = (void *) skb_network_header(skb) + sctphoff;
124
125         /* Only update csum if we really have to */
126         if (sctph->source != cp->vport || payload_csum ||
127             skb->ip_summed == CHECKSUM_PARTIAL) {
128                 sctph->source = cp->vport;
129                 sctp_nat_csum(skb, sctph, sctphoff);
130         } else {
131                 skb->ip_summed = CHECKSUM_UNNECESSARY;
132         }
133
134         return 1;
135 }
136
137 static int
138 sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
139                   struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
140 {
141         sctp_sctphdr_t *sctph;
142         unsigned int sctphoff = iph->len;
143         bool payload_csum = false;
144
145 #ifdef CONFIG_IP_VS_IPV6
146         if (cp->af == AF_INET6 && iph->fragoffs)
147                 return 1;
148 #endif
149
150         /* csum_check requires unshared skb */
151         if (!skb_make_writable(skb, sctphoff + sizeof(*sctph)))
152                 return 0;
153
154         if (unlikely(cp->app != NULL)) {
155                 int ret;
156
157                 /* Some checks before mangling */
158                 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
159                         return 0;
160
161                 /* Call application helper if needed */
162                 ret = ip_vs_app_pkt_in(cp, skb);
163                 if (ret == 0)
164                         return 0;
165                 /* ret=2: csum update is needed after payload mangling */
166                 if (ret == 2)
167                         payload_csum = true;
168         }
169
170         sctph = (void *) skb_network_header(skb) + sctphoff;
171
172         /* Only update csum if we really have to */
173         if (sctph->dest != cp->dport || payload_csum ||
174             (skb->ip_summed == CHECKSUM_PARTIAL &&
175              !(skb_dst(skb)->dev->features & NETIF_F_SCTP_CSUM))) {
176                 sctph->dest = cp->dport;
177                 sctp_nat_csum(skb, sctph, sctphoff);
178         } else if (skb->ip_summed != CHECKSUM_PARTIAL) {
179                 skb->ip_summed = CHECKSUM_UNNECESSARY;
180         }
181
182         return 1;
183 }
184
185 static int
186 sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
187 {
188         unsigned int sctphoff;
189         struct sctphdr *sh, _sctph;
190         __le32 cmp, val;
191
192 #ifdef CONFIG_IP_VS_IPV6
193         if (af == AF_INET6)
194                 sctphoff = sizeof(struct ipv6hdr);
195         else
196 #endif
197                 sctphoff = ip_hdrlen(skb);
198
199         sh = skb_header_pointer(skb, sctphoff, sizeof(_sctph), &_sctph);
200         if (sh == NULL)
201                 return 0;
202
203         cmp = sh->checksum;
204         val = sctp_compute_cksum(skb, sctphoff);
205
206         if (val != cmp) {
207                 /* CRC failure, dump it. */
208                 IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
209                                 "Failed checksum for");
210                 return 0;
211         }
212         return 1;
213 }
214
215 enum ipvs_sctp_event_t {
216         IP_VS_SCTP_DATA = 0,            /* DATA, SACK, HEARTBEATs */
217         IP_VS_SCTP_INIT,
218         IP_VS_SCTP_INIT_ACK,
219         IP_VS_SCTP_COOKIE_ECHO,
220         IP_VS_SCTP_COOKIE_ACK,
221         IP_VS_SCTP_SHUTDOWN,
222         IP_VS_SCTP_SHUTDOWN_ACK,
223         IP_VS_SCTP_SHUTDOWN_COMPLETE,
224         IP_VS_SCTP_ERROR,
225         IP_VS_SCTP_ABORT,
226         IP_VS_SCTP_EVENT_LAST
227 };
228
229 /* RFC 2960, 3.2 Chunk Field Descriptions */
230 static __u8 sctp_events[] = {
231         [SCTP_CID_DATA]                 = IP_VS_SCTP_DATA,
232         [SCTP_CID_INIT]                 = IP_VS_SCTP_INIT,
233         [SCTP_CID_INIT_ACK]             = IP_VS_SCTP_INIT_ACK,
234         [SCTP_CID_SACK]                 = IP_VS_SCTP_DATA,
235         [SCTP_CID_HEARTBEAT]            = IP_VS_SCTP_DATA,
236         [SCTP_CID_HEARTBEAT_ACK]        = IP_VS_SCTP_DATA,
237         [SCTP_CID_ABORT]                = IP_VS_SCTP_ABORT,
238         [SCTP_CID_SHUTDOWN]             = IP_VS_SCTP_SHUTDOWN,
239         [SCTP_CID_SHUTDOWN_ACK]         = IP_VS_SCTP_SHUTDOWN_ACK,
240         [SCTP_CID_ERROR]                = IP_VS_SCTP_ERROR,
241         [SCTP_CID_COOKIE_ECHO]          = IP_VS_SCTP_COOKIE_ECHO,
242         [SCTP_CID_COOKIE_ACK]           = IP_VS_SCTP_COOKIE_ACK,
243         [SCTP_CID_ECN_ECNE]             = IP_VS_SCTP_DATA,
244         [SCTP_CID_ECN_CWR]              = IP_VS_SCTP_DATA,
245         [SCTP_CID_SHUTDOWN_COMPLETE]    = IP_VS_SCTP_SHUTDOWN_COMPLETE,
246 };
247
248 /* SCTP States:
249  * See RFC 2960, 4. SCTP Association State Diagram
250  *
251  * New states (not in diagram):
252  * - INIT1 state: use shorter timeout for dropped INIT packets
253  * - REJECTED state: use shorter timeout if INIT is rejected with ABORT
254  * - INIT, COOKIE_SENT, COOKIE_REPLIED, COOKIE states: for better debugging
255  *
256  * The states are as seen in real server. In the diagram, INIT1, INIT,
257  * COOKIE_SENT and COOKIE_REPLIED processing happens in CLOSED state.
258  *
259  * States as per packets from client (C) and server (S):
260  *
261  * Setup of client connection:
262  * IP_VS_SCTP_S_INIT1: First C:INIT sent, wait for S:INIT-ACK
263  * IP_VS_SCTP_S_INIT: Next C:INIT sent, wait for S:INIT-ACK
264  * IP_VS_SCTP_S_COOKIE_SENT: S:INIT-ACK sent, wait for C:COOKIE-ECHO
265  * IP_VS_SCTP_S_COOKIE_REPLIED: C:COOKIE-ECHO sent, wait for S:COOKIE-ACK
266  *
267  * Setup of server connection:
268  * IP_VS_SCTP_S_COOKIE_WAIT: S:INIT sent, wait for C:INIT-ACK
269  * IP_VS_SCTP_S_COOKIE: C:INIT-ACK sent, wait for S:COOKIE-ECHO
270  * IP_VS_SCTP_S_COOKIE_ECHOED: S:COOKIE-ECHO sent, wait for C:COOKIE-ACK
271  */
272
273 #define sNO IP_VS_SCTP_S_NONE
274 #define sI1 IP_VS_SCTP_S_INIT1
275 #define sIN IP_VS_SCTP_S_INIT
276 #define sCS IP_VS_SCTP_S_COOKIE_SENT
277 #define sCR IP_VS_SCTP_S_COOKIE_REPLIED
278 #define sCW IP_VS_SCTP_S_COOKIE_WAIT
279 #define sCO IP_VS_SCTP_S_COOKIE
280 #define sCE IP_VS_SCTP_S_COOKIE_ECHOED
281 #define sES IP_VS_SCTP_S_ESTABLISHED
282 #define sSS IP_VS_SCTP_S_SHUTDOWN_SENT
283 #define sSR IP_VS_SCTP_S_SHUTDOWN_RECEIVED
284 #define sSA IP_VS_SCTP_S_SHUTDOWN_ACK_SENT
285 #define sRJ IP_VS_SCTP_S_REJECTED
286 #define sCL IP_VS_SCTP_S_CLOSED
287
288 static const __u8 sctp_states
289         [IP_VS_DIR_LAST][IP_VS_SCTP_EVENT_LAST][IP_VS_SCTP_S_LAST] = {
290         { /* INPUT */
291 /*        sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/
292 /* d   */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
293 /* i   */{sI1, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sIN, sIN},
294 /* i_a */{sCW, sCW, sCW, sCS, sCR, sCO, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
295 /* c_e */{sCR, sIN, sIN, sCR, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
296 /* c_a */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sES, sES, sSS, sSR, sSA, sRJ, sCL},
297 /* s   */{sSR, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sSR, sSS, sSR, sSA, sRJ, sCL},
298 /* s_a */{sCL, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sES, sCL, sSR, sCL, sRJ, sCL},
299 /* s_c */{sCL, sCL, sCL, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sCL, sRJ, sCL},
300 /* err */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCL, sES, sSS, sSR, sSA, sRJ, sCL},
301 /* ab  */{sCL, sCL, sCL, sCL, sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
302         },
303         { /* OUTPUT */
304 /*        sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/
305 /* d   */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
306 /* i   */{sCW, sCW, sCW, sCW, sCW, sCW, sCW, sCW, sES, sCW, sCW, sCW, sCW, sCW},
307 /* i_a */{sCS, sCS, sCS, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
308 /* c_e */{sCE, sCE, sCE, sCE, sCE, sCE, sCE, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
309 /* c_a */{sES, sES, sES, sES, sES, sES, sES, sES, sES, sSS, sSR, sSA, sRJ, sCL},
310 /* s   */{sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSR, sSA, sRJ, sCL},
311 /* s_a */{sSA, sSA, sSA, sSA, sSA, sCW, sCO, sCE, sES, sSA, sSA, sSA, sRJ, sCL},
312 /* s_c */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
313 /* err */{sCL, sCL, sCL, sCL, sCL, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
314 /* ab  */{sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
315         },
316         { /* INPUT-ONLY */
317 /*        sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/
318 /* d   */{sES, sI1, sIN, sCS, sCR, sES, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
319 /* i   */{sI1, sIN, sIN, sIN, sIN, sIN, sCO, sCE, sES, sSS, sSR, sSA, sIN, sIN},
320 /* i_a */{sCE, sCE, sCE, sCE, sCE, sCE, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
321 /* c_e */{sES, sES, sES, sES, sES, sES, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
322 /* c_a */{sES, sI1, sIN, sES, sES, sCW, sES, sES, sES, sSS, sSR, sSA, sRJ, sCL},
323 /* s   */{sSR, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sSR, sSS, sSR, sSA, sRJ, sCL},
324 /* s_a */{sCL, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sCL, sCL, sSR, sCL, sRJ, sCL},
325 /* s_c */{sCL, sCL, sCL, sCL, sCL, sCW, sCO, sCE, sES, sSS, sCL, sCL, sRJ, sCL},
326 /* err */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
327 /* ab  */{sCL, sCL, sCL, sCL, sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
328         },
329 };
330
331 #define IP_VS_SCTP_MAX_RTO      ((60 + 1) * HZ)
332
333 /* Timeout table[state] */
334 static const int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = {
335         [IP_VS_SCTP_S_NONE]                     = 2 * HZ,
336         [IP_VS_SCTP_S_INIT1]                    = (0 + 3 + 1) * HZ,
337         [IP_VS_SCTP_S_INIT]                     = IP_VS_SCTP_MAX_RTO,
338         [IP_VS_SCTP_S_COOKIE_SENT]              = IP_VS_SCTP_MAX_RTO,
339         [IP_VS_SCTP_S_COOKIE_REPLIED]           = IP_VS_SCTP_MAX_RTO,
340         [IP_VS_SCTP_S_COOKIE_WAIT]              = IP_VS_SCTP_MAX_RTO,
341         [IP_VS_SCTP_S_COOKIE]                   = IP_VS_SCTP_MAX_RTO,
342         [IP_VS_SCTP_S_COOKIE_ECHOED]            = IP_VS_SCTP_MAX_RTO,
343         [IP_VS_SCTP_S_ESTABLISHED]              = 15 * 60 * HZ,
344         [IP_VS_SCTP_S_SHUTDOWN_SENT]            = IP_VS_SCTP_MAX_RTO,
345         [IP_VS_SCTP_S_SHUTDOWN_RECEIVED]        = IP_VS_SCTP_MAX_RTO,
346         [IP_VS_SCTP_S_SHUTDOWN_ACK_SENT]        = IP_VS_SCTP_MAX_RTO,
347         [IP_VS_SCTP_S_REJECTED]                 = (0 + 3 + 1) * HZ,
348         [IP_VS_SCTP_S_CLOSED]                   = IP_VS_SCTP_MAX_RTO,
349         [IP_VS_SCTP_S_LAST]                     = 2 * HZ,
350 };
351
352 static const char *sctp_state_name_table[IP_VS_SCTP_S_LAST + 1] = {
353         [IP_VS_SCTP_S_NONE]                     = "NONE",
354         [IP_VS_SCTP_S_INIT1]                    = "INIT1",
355         [IP_VS_SCTP_S_INIT]                     = "INIT",
356         [IP_VS_SCTP_S_COOKIE_SENT]              = "C-SENT",
357         [IP_VS_SCTP_S_COOKIE_REPLIED]           = "C-REPLIED",
358         [IP_VS_SCTP_S_COOKIE_WAIT]              = "C-WAIT",
359         [IP_VS_SCTP_S_COOKIE]                   = "COOKIE",
360         [IP_VS_SCTP_S_COOKIE_ECHOED]            = "C-ECHOED",
361         [IP_VS_SCTP_S_ESTABLISHED]              = "ESTABLISHED",
362         [IP_VS_SCTP_S_SHUTDOWN_SENT]            = "S-SENT",
363         [IP_VS_SCTP_S_SHUTDOWN_RECEIVED]        = "S-RECEIVED",
364         [IP_VS_SCTP_S_SHUTDOWN_ACK_SENT]        = "S-ACK-SENT",
365         [IP_VS_SCTP_S_REJECTED]                 = "REJECTED",
366         [IP_VS_SCTP_S_CLOSED]                   = "CLOSED",
367         [IP_VS_SCTP_S_LAST]                     = "BUG!",
368 };
369
370
371 static const char *sctp_state_name(int state)
372 {
373         if (state >= IP_VS_SCTP_S_LAST)
374                 return "ERR!";
375         if (sctp_state_name_table[state])
376                 return sctp_state_name_table[state];
377         return "?";
378 }
379
380 static inline void
381 set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
382                 int direction, const struct sk_buff *skb)
383 {
384         sctp_chunkhdr_t _sctpch, *sch;
385         unsigned char chunk_type;
386         int event, next_state;
387         int ihl, cofs;
388
389 #ifdef CONFIG_IP_VS_IPV6
390         ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr);
391 #else
392         ihl = ip_hdrlen(skb);
393 #endif
394
395         cofs = ihl + sizeof(sctp_sctphdr_t);
396         sch = skb_header_pointer(skb, cofs, sizeof(_sctpch), &_sctpch);
397         if (sch == NULL)
398                 return;
399
400         chunk_type = sch->type;
401         /*
402          * Section 3: Multiple chunks can be bundled into one SCTP packet
403          * up to the MTU size, except for the INIT, INIT ACK, and
404          * SHUTDOWN COMPLETE chunks. These chunks MUST NOT be bundled with
405          * any other chunk in a packet.
406          *
407          * Section 3.3.7: DATA chunks MUST NOT be bundled with ABORT. Control
408          * chunks (except for INIT, INIT ACK, and SHUTDOWN COMPLETE) MAY be
409          * bundled with an ABORT, but they MUST be placed before the ABORT
410          * in the SCTP packet or they will be ignored by the receiver.
411          */
412         if ((sch->type == SCTP_CID_COOKIE_ECHO) ||
413             (sch->type == SCTP_CID_COOKIE_ACK)) {
414                 int clen = ntohs(sch->length);
415
416                 if (clen >= sizeof(sctp_chunkhdr_t)) {
417                         sch = skb_header_pointer(skb, cofs + ALIGN(clen, 4),
418                                                  sizeof(_sctpch), &_sctpch);
419                         if (sch && sch->type == SCTP_CID_ABORT)
420                                 chunk_type = sch->type;
421                 }
422         }
423
424         event = (chunk_type < sizeof(sctp_events)) ?
425                 sctp_events[chunk_type] : IP_VS_SCTP_DATA;
426
427         /* Update direction to INPUT_ONLY if necessary
428          * or delete NO_OUTPUT flag if output packet detected
429          */
430         if (cp->flags & IP_VS_CONN_F_NOOUTPUT) {
431                 if (direction == IP_VS_DIR_OUTPUT)
432                         cp->flags &= ~IP_VS_CONN_F_NOOUTPUT;
433                 else
434                         direction = IP_VS_DIR_INPUT_ONLY;
435         }
436
437         next_state = sctp_states[direction][event][cp->state];
438
439         if (next_state != cp->state) {
440                 struct ip_vs_dest *dest = cp->dest;
441
442                 IP_VS_DBG_BUF(8, "%s %s  %s:%d->"
443                                 "%s:%d state: %s->%s conn->refcnt:%d\n",
444                                 pd->pp->name,
445                                 ((direction == IP_VS_DIR_OUTPUT) ?
446                                  "output " : "input "),
447                                 IP_VS_DBG_ADDR(cp->daf, &cp->daddr),
448                                 ntohs(cp->dport),
449                                 IP_VS_DBG_ADDR(cp->af, &cp->caddr),
450                                 ntohs(cp->cport),
451                                 sctp_state_name(cp->state),
452                                 sctp_state_name(next_state),
453                                 atomic_read(&cp->refcnt));
454                 if (dest) {
455                         if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
456                                 (next_state != IP_VS_SCTP_S_ESTABLISHED)) {
457                                 atomic_dec(&dest->activeconns);
458                                 atomic_inc(&dest->inactconns);
459                                 cp->flags |= IP_VS_CONN_F_INACTIVE;
460                         } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
461                                    (next_state == IP_VS_SCTP_S_ESTABLISHED)) {
462                                 atomic_inc(&dest->activeconns);
463                                 atomic_dec(&dest->inactconns);
464                                 cp->flags &= ~IP_VS_CONN_F_INACTIVE;
465                         }
466                 }
467         }
468         if (likely(pd))
469                 cp->timeout = pd->timeout_table[cp->state = next_state];
470         else    /* What to do ? */
471                 cp->timeout = sctp_timeouts[cp->state = next_state];
472 }
473
474 static void
475 sctp_state_transition(struct ip_vs_conn *cp, int direction,
476                 const struct sk_buff *skb, struct ip_vs_proto_data *pd)
477 {
478         spin_lock_bh(&cp->lock);
479         set_sctp_state(pd, cp, direction, skb);
480         spin_unlock_bh(&cp->lock);
481 }
482
483 static inline __u16 sctp_app_hashkey(__be16 port)
484 {
485         return (((__force u16)port >> SCTP_APP_TAB_BITS) ^ (__force u16)port)
486                 & SCTP_APP_TAB_MASK;
487 }
488
489 static int sctp_register_app(struct net *net, struct ip_vs_app *inc)
490 {
491         struct ip_vs_app *i;
492         __u16 hash;
493         __be16 port = inc->port;
494         int ret = 0;
495         struct netns_ipvs *ipvs = net_ipvs(net);
496         struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);
497
498         hash = sctp_app_hashkey(port);
499
500         list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) {
501                 if (i->port == port) {
502                         ret = -EEXIST;
503                         goto out;
504                 }
505         }
506         list_add_rcu(&inc->p_list, &ipvs->sctp_apps[hash]);
507         atomic_inc(&pd->appcnt);
508 out:
509
510         return ret;
511 }
512
513 static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc)
514 {
515         struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);
516
517         atomic_dec(&pd->appcnt);
518         list_del_rcu(&inc->p_list);
519 }
520
521 static int sctp_app_conn_bind(struct ip_vs_conn *cp)
522 {
523         struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
524         int hash;
525         struct ip_vs_app *inc;
526         int result = 0;
527
528         /* Default binding: bind app only for NAT */
529         if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
530                 return 0;
531         /* Lookup application incarnations and bind the right one */
532         hash = sctp_app_hashkey(cp->vport);
533
534         rcu_read_lock();
535         list_for_each_entry_rcu(inc, &ipvs->sctp_apps[hash], p_list) {
536                 if (inc->port == cp->vport) {
537                         if (unlikely(!ip_vs_app_inc_get(inc)))
538                                 break;
539                         rcu_read_unlock();
540
541                         IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
542                                         "%s:%u to app %s on port %u\n",
543                                         __func__,
544                                         IP_VS_DBG_ADDR(cp->af, &cp->caddr),
545                                         ntohs(cp->cport),
546                                         IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
547                                         ntohs(cp->vport),
548                                         inc->name, ntohs(inc->port));
549                         cp->app = inc;
550                         if (inc->init_conn)
551                                 result = inc->init_conn(inc, cp);
552                         goto out;
553                 }
554         }
555         rcu_read_unlock();
556 out:
557         return result;
558 }
559
560 /* ---------------------------------------------
561  *   timeouts is netns related now.
562  * ---------------------------------------------
563  */
564 static int __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd)
565 {
566         struct netns_ipvs *ipvs = net_ipvs(net);
567
568         ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE);
569         pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts,
570                                                         sizeof(sctp_timeouts));
571         if (!pd->timeout_table)
572                 return -ENOMEM;
573         return 0;
574 }
575
576 static void __ip_vs_sctp_exit(struct net *net, struct ip_vs_proto_data *pd)
577 {
578         kfree(pd->timeout_table);
579 }
580
581 struct ip_vs_protocol ip_vs_protocol_sctp = {
582         .name           = "SCTP",
583         .protocol       = IPPROTO_SCTP,
584         .num_states     = IP_VS_SCTP_S_LAST,
585         .dont_defrag    = 0,
586         .init           = NULL,
587         .exit           = NULL,
588         .init_netns     = __ip_vs_sctp_init,
589         .exit_netns     = __ip_vs_sctp_exit,
590         .register_app   = sctp_register_app,
591         .unregister_app = sctp_unregister_app,
592         .conn_schedule  = sctp_conn_schedule,
593         .conn_in_get    = ip_vs_conn_in_get_proto,
594         .conn_out_get   = ip_vs_conn_out_get_proto,
595         .snat_handler   = sctp_snat_handler,
596         .dnat_handler   = sctp_dnat_handler,
597         .csum_check     = sctp_csum_check,
598         .state_name     = sctp_state_name,
599         .state_transition = sctp_state_transition,
600         .app_conn_bind  = sctp_app_conn_bind,
601         .debug_packet   = ip_vs_tcpudp_debug_packet,
602         .timeout_change = NULL,
603 };