]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - net/netfilter/ipvs/ip_vs_proto_sctp.c
Merge remote-tracking branch 'ipsec/master'
[karo-tx-linux.git] / net / netfilter / ipvs / ip_vs_proto_sctp.c
1 #include <linux/kernel.h>
2 #include <linux/ip.h>
3 #include <linux/sctp.h>
4 #include <net/ip.h>
5 #include <net/ip6_checksum.h>
6 #include <linux/netfilter.h>
7 #include <linux/netfilter_ipv4.h>
8 #include <net/sctp/checksum.h>
9 #include <net/ip_vs.h>
10
11 static int
12 sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
13                    struct ip_vs_proto_data *pd,
14                    int *verdict, struct ip_vs_conn **cpp,
15                    struct ip_vs_iphdr *iph)
16 {
17         struct ip_vs_service *svc;
18         sctp_chunkhdr_t _schunkh, *sch;
19         sctp_sctphdr_t *sh, _sctph;
20         __be16 _ports[2], *ports = NULL;
21
22         if (likely(!ip_vs_iph_icmp(iph))) {
23                 sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
24                 if (sh) {
25                         sch = skb_header_pointer(
26                                 skb, iph->len + sizeof(sctp_sctphdr_t),
27                                 sizeof(_schunkh), &_schunkh);
28                         if (sch && (sch->type == SCTP_CID_INIT ||
29                                     sysctl_sloppy_sctp(ipvs)))
30                                 ports = &sh->source;
31                 }
32         } else {
33                 ports = skb_header_pointer(
34                         skb, iph->len, sizeof(_ports), &_ports);
35         }
36
37         if (!ports) {
38                 *verdict = NF_DROP;
39                 return 0;
40         }
41
42         rcu_read_lock();
43         if (likely(!ip_vs_iph_inverse(iph)))
44                 svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
45                                          &iph->daddr, ports[1]);
46         else
47                 svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
48                                          &iph->saddr, ports[0]);
49         if (svc) {
50                 int ignored;
51
52                 if (ip_vs_todrop(ipvs)) {
53                         /*
54                          * It seems that we are very loaded.
55                          * We have to drop this packet :(
56                          */
57                         rcu_read_unlock();
58                         *verdict = NF_DROP;
59                         return 0;
60                 }
61                 /*
62                  * Let the virtual server select a real server for the
63                  * incoming connection, and create a connection entry.
64                  */
65                 *cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph);
66                 if (!*cpp && ignored <= 0) {
67                         if (!ignored)
68                                 *verdict = ip_vs_leave(svc, skb, pd, iph);
69                         else
70                                 *verdict = NF_DROP;
71                         rcu_read_unlock();
72                         return 0;
73                 }
74         }
75         rcu_read_unlock();
76         /* NF_ACCEPT */
77         return 1;
78 }
79
80 static void sctp_nat_csum(struct sk_buff *skb, sctp_sctphdr_t *sctph,
81                           unsigned int sctphoff)
82 {
83         sctph->checksum = sctp_compute_cksum(skb, sctphoff);
84         skb->ip_summed = CHECKSUM_UNNECESSARY;
85 }
86
87 static int
88 sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
89                   struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
90 {
91         sctp_sctphdr_t *sctph;
92         unsigned int sctphoff = iph->len;
93         bool payload_csum = false;
94
95 #ifdef CONFIG_IP_VS_IPV6
96         if (cp->af == AF_INET6 && iph->fragoffs)
97                 return 1;
98 #endif
99
100         /* csum_check requires unshared skb */
101         if (!skb_make_writable(skb, sctphoff + sizeof(*sctph)))
102                 return 0;
103
104         if (unlikely(cp->app != NULL)) {
105                 int ret;
106
107                 /* Some checks before mangling */
108                 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
109                         return 0;
110
111                 /* Call application helper if needed */
112                 ret = ip_vs_app_pkt_out(cp, skb);
113                 if (ret == 0)
114                         return 0;
115                 /* ret=2: csum update is needed after payload mangling */
116                 if (ret == 2)
117                         payload_csum = true;
118         }
119
120         sctph = (void *) skb_network_header(skb) + sctphoff;
121
122         /* Only update csum if we really have to */
123         if (sctph->source != cp->vport || payload_csum ||
124             skb->ip_summed == CHECKSUM_PARTIAL) {
125                 sctph->source = cp->vport;
126                 sctp_nat_csum(skb, sctph, sctphoff);
127         } else {
128                 skb->ip_summed = CHECKSUM_UNNECESSARY;
129         }
130
131         return 1;
132 }
133
134 static int
135 sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
136                   struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
137 {
138         sctp_sctphdr_t *sctph;
139         unsigned int sctphoff = iph->len;
140         bool payload_csum = false;
141
142 #ifdef CONFIG_IP_VS_IPV6
143         if (cp->af == AF_INET6 && iph->fragoffs)
144                 return 1;
145 #endif
146
147         /* csum_check requires unshared skb */
148         if (!skb_make_writable(skb, sctphoff + sizeof(*sctph)))
149                 return 0;
150
151         if (unlikely(cp->app != NULL)) {
152                 int ret;
153
154                 /* Some checks before mangling */
155                 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
156                         return 0;
157
158                 /* Call application helper if needed */
159                 ret = ip_vs_app_pkt_in(cp, skb);
160                 if (ret == 0)
161                         return 0;
162                 /* ret=2: csum update is needed after payload mangling */
163                 if (ret == 2)
164                         payload_csum = true;
165         }
166
167         sctph = (void *) skb_network_header(skb) + sctphoff;
168
169         /* Only update csum if we really have to */
170         if (sctph->dest != cp->dport || payload_csum ||
171             (skb->ip_summed == CHECKSUM_PARTIAL &&
172              !(skb_dst(skb)->dev->features & NETIF_F_SCTP_CSUM))) {
173                 sctph->dest = cp->dport;
174                 sctp_nat_csum(skb, sctph, sctphoff);
175         } else if (skb->ip_summed != CHECKSUM_PARTIAL) {
176                 skb->ip_summed = CHECKSUM_UNNECESSARY;
177         }
178
179         return 1;
180 }
181
182 static int
183 sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
184 {
185         unsigned int sctphoff;
186         struct sctphdr *sh, _sctph;
187         __le32 cmp, val;
188
189 #ifdef CONFIG_IP_VS_IPV6
190         if (af == AF_INET6)
191                 sctphoff = sizeof(struct ipv6hdr);
192         else
193 #endif
194                 sctphoff = ip_hdrlen(skb);
195
196         sh = skb_header_pointer(skb, sctphoff, sizeof(_sctph), &_sctph);
197         if (sh == NULL)
198                 return 0;
199
200         cmp = sh->checksum;
201         val = sctp_compute_cksum(skb, sctphoff);
202
203         if (val != cmp) {
204                 /* CRC failure, dump it. */
205                 IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
206                                 "Failed checksum for");
207                 return 0;
208         }
209         return 1;
210 }
211
212 enum ipvs_sctp_event_t {
213         IP_VS_SCTP_DATA = 0,            /* DATA, SACK, HEARTBEATs */
214         IP_VS_SCTP_INIT,
215         IP_VS_SCTP_INIT_ACK,
216         IP_VS_SCTP_COOKIE_ECHO,
217         IP_VS_SCTP_COOKIE_ACK,
218         IP_VS_SCTP_SHUTDOWN,
219         IP_VS_SCTP_SHUTDOWN_ACK,
220         IP_VS_SCTP_SHUTDOWN_COMPLETE,
221         IP_VS_SCTP_ERROR,
222         IP_VS_SCTP_ABORT,
223         IP_VS_SCTP_EVENT_LAST
224 };
225
226 /* RFC 2960, 3.2 Chunk Field Descriptions */
227 static __u8 sctp_events[] = {
228         [SCTP_CID_DATA]                 = IP_VS_SCTP_DATA,
229         [SCTP_CID_INIT]                 = IP_VS_SCTP_INIT,
230         [SCTP_CID_INIT_ACK]             = IP_VS_SCTP_INIT_ACK,
231         [SCTP_CID_SACK]                 = IP_VS_SCTP_DATA,
232         [SCTP_CID_HEARTBEAT]            = IP_VS_SCTP_DATA,
233         [SCTP_CID_HEARTBEAT_ACK]        = IP_VS_SCTP_DATA,
234         [SCTP_CID_ABORT]                = IP_VS_SCTP_ABORT,
235         [SCTP_CID_SHUTDOWN]             = IP_VS_SCTP_SHUTDOWN,
236         [SCTP_CID_SHUTDOWN_ACK]         = IP_VS_SCTP_SHUTDOWN_ACK,
237         [SCTP_CID_ERROR]                = IP_VS_SCTP_ERROR,
238         [SCTP_CID_COOKIE_ECHO]          = IP_VS_SCTP_COOKIE_ECHO,
239         [SCTP_CID_COOKIE_ACK]           = IP_VS_SCTP_COOKIE_ACK,
240         [SCTP_CID_ECN_ECNE]             = IP_VS_SCTP_DATA,
241         [SCTP_CID_ECN_CWR]              = IP_VS_SCTP_DATA,
242         [SCTP_CID_SHUTDOWN_COMPLETE]    = IP_VS_SCTP_SHUTDOWN_COMPLETE,
243 };
244
245 /* SCTP States:
246  * See RFC 2960, 4. SCTP Association State Diagram
247  *
248  * New states (not in diagram):
249  * - INIT1 state: use shorter timeout for dropped INIT packets
250  * - REJECTED state: use shorter timeout if INIT is rejected with ABORT
251  * - INIT, COOKIE_SENT, COOKIE_REPLIED, COOKIE states: for better debugging
252  *
253  * The states are as seen in real server. In the diagram, INIT1, INIT,
254  * COOKIE_SENT and COOKIE_REPLIED processing happens in CLOSED state.
255  *
256  * States as per packets from client (C) and server (S):
257  *
258  * Setup of client connection:
259  * IP_VS_SCTP_S_INIT1: First C:INIT sent, wait for S:INIT-ACK
260  * IP_VS_SCTP_S_INIT: Next C:INIT sent, wait for S:INIT-ACK
261  * IP_VS_SCTP_S_COOKIE_SENT: S:INIT-ACK sent, wait for C:COOKIE-ECHO
262  * IP_VS_SCTP_S_COOKIE_REPLIED: C:COOKIE-ECHO sent, wait for S:COOKIE-ACK
263  *
264  * Setup of server connection:
265  * IP_VS_SCTP_S_COOKIE_WAIT: S:INIT sent, wait for C:INIT-ACK
266  * IP_VS_SCTP_S_COOKIE: C:INIT-ACK sent, wait for S:COOKIE-ECHO
267  * IP_VS_SCTP_S_COOKIE_ECHOED: S:COOKIE-ECHO sent, wait for C:COOKIE-ACK
268  */
269
270 #define sNO IP_VS_SCTP_S_NONE
271 #define sI1 IP_VS_SCTP_S_INIT1
272 #define sIN IP_VS_SCTP_S_INIT
273 #define sCS IP_VS_SCTP_S_COOKIE_SENT
274 #define sCR IP_VS_SCTP_S_COOKIE_REPLIED
275 #define sCW IP_VS_SCTP_S_COOKIE_WAIT
276 #define sCO IP_VS_SCTP_S_COOKIE
277 #define sCE IP_VS_SCTP_S_COOKIE_ECHOED
278 #define sES IP_VS_SCTP_S_ESTABLISHED
279 #define sSS IP_VS_SCTP_S_SHUTDOWN_SENT
280 #define sSR IP_VS_SCTP_S_SHUTDOWN_RECEIVED
281 #define sSA IP_VS_SCTP_S_SHUTDOWN_ACK_SENT
282 #define sRJ IP_VS_SCTP_S_REJECTED
283 #define sCL IP_VS_SCTP_S_CLOSED
284
285 static const __u8 sctp_states
286         [IP_VS_DIR_LAST][IP_VS_SCTP_EVENT_LAST][IP_VS_SCTP_S_LAST] = {
287         { /* INPUT */
288 /*        sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/
289 /* d   */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
290 /* i   */{sI1, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sIN, sIN},
291 /* i_a */{sCW, sCW, sCW, sCS, sCR, sCO, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
292 /* c_e */{sCR, sIN, sIN, sCR, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
293 /* c_a */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sES, sES, sSS, sSR, sSA, sRJ, sCL},
294 /* s   */{sSR, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sSR, sSS, sSR, sSA, sRJ, sCL},
295 /* s_a */{sCL, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sES, sCL, sSR, sCL, sRJ, sCL},
296 /* s_c */{sCL, sCL, sCL, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sCL, sRJ, sCL},
297 /* err */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCL, sES, sSS, sSR, sSA, sRJ, sCL},
298 /* ab  */{sCL, sCL, sCL, sCL, sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
299         },
300         { /* OUTPUT */
301 /*        sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/
302 /* d   */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
303 /* i   */{sCW, sCW, sCW, sCW, sCW, sCW, sCW, sCW, sES, sCW, sCW, sCW, sCW, sCW},
304 /* i_a */{sCS, sCS, sCS, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
305 /* c_e */{sCE, sCE, sCE, sCE, sCE, sCE, sCE, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
306 /* c_a */{sES, sES, sES, sES, sES, sES, sES, sES, sES, sSS, sSR, sSA, sRJ, sCL},
307 /* s   */{sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSR, sSA, sRJ, sCL},
308 /* s_a */{sSA, sSA, sSA, sSA, sSA, sCW, sCO, sCE, sES, sSA, sSA, sSA, sRJ, sCL},
309 /* s_c */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
310 /* err */{sCL, sCL, sCL, sCL, sCL, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
311 /* ab  */{sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
312         },
313         { /* INPUT-ONLY */
314 /*        sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/
315 /* d   */{sES, sI1, sIN, sCS, sCR, sES, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
316 /* i   */{sI1, sIN, sIN, sIN, sIN, sIN, sCO, sCE, sES, sSS, sSR, sSA, sIN, sIN},
317 /* i_a */{sCE, sCE, sCE, sCE, sCE, sCE, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
318 /* c_e */{sES, sES, sES, sES, sES, sES, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
319 /* c_a */{sES, sI1, sIN, sES, sES, sCW, sES, sES, sES, sSS, sSR, sSA, sRJ, sCL},
320 /* s   */{sSR, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sSR, sSS, sSR, sSA, sRJ, sCL},
321 /* s_a */{sCL, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sCL, sCL, sSR, sCL, sRJ, sCL},
322 /* s_c */{sCL, sCL, sCL, sCL, sCL, sCW, sCO, sCE, sES, sSS, sCL, sCL, sRJ, sCL},
323 /* err */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
324 /* ab  */{sCL, sCL, sCL, sCL, sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
325         },
326 };
327
328 #define IP_VS_SCTP_MAX_RTO      ((60 + 1) * HZ)
329
330 /* Timeout table[state] */
331 static const int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = {
332         [IP_VS_SCTP_S_NONE]                     = 2 * HZ,
333         [IP_VS_SCTP_S_INIT1]                    = (0 + 3 + 1) * HZ,
334         [IP_VS_SCTP_S_INIT]                     = IP_VS_SCTP_MAX_RTO,
335         [IP_VS_SCTP_S_COOKIE_SENT]              = IP_VS_SCTP_MAX_RTO,
336         [IP_VS_SCTP_S_COOKIE_REPLIED]           = IP_VS_SCTP_MAX_RTO,
337         [IP_VS_SCTP_S_COOKIE_WAIT]              = IP_VS_SCTP_MAX_RTO,
338         [IP_VS_SCTP_S_COOKIE]                   = IP_VS_SCTP_MAX_RTO,
339         [IP_VS_SCTP_S_COOKIE_ECHOED]            = IP_VS_SCTP_MAX_RTO,
340         [IP_VS_SCTP_S_ESTABLISHED]              = 15 * 60 * HZ,
341         [IP_VS_SCTP_S_SHUTDOWN_SENT]            = IP_VS_SCTP_MAX_RTO,
342         [IP_VS_SCTP_S_SHUTDOWN_RECEIVED]        = IP_VS_SCTP_MAX_RTO,
343         [IP_VS_SCTP_S_SHUTDOWN_ACK_SENT]        = IP_VS_SCTP_MAX_RTO,
344         [IP_VS_SCTP_S_REJECTED]                 = (0 + 3 + 1) * HZ,
345         [IP_VS_SCTP_S_CLOSED]                   = IP_VS_SCTP_MAX_RTO,
346         [IP_VS_SCTP_S_LAST]                     = 2 * HZ,
347 };
348
349 static const char *sctp_state_name_table[IP_VS_SCTP_S_LAST + 1] = {
350         [IP_VS_SCTP_S_NONE]                     = "NONE",
351         [IP_VS_SCTP_S_INIT1]                    = "INIT1",
352         [IP_VS_SCTP_S_INIT]                     = "INIT",
353         [IP_VS_SCTP_S_COOKIE_SENT]              = "C-SENT",
354         [IP_VS_SCTP_S_COOKIE_REPLIED]           = "C-REPLIED",
355         [IP_VS_SCTP_S_COOKIE_WAIT]              = "C-WAIT",
356         [IP_VS_SCTP_S_COOKIE]                   = "COOKIE",
357         [IP_VS_SCTP_S_COOKIE_ECHOED]            = "C-ECHOED",
358         [IP_VS_SCTP_S_ESTABLISHED]              = "ESTABLISHED",
359         [IP_VS_SCTP_S_SHUTDOWN_SENT]            = "S-SENT",
360         [IP_VS_SCTP_S_SHUTDOWN_RECEIVED]        = "S-RECEIVED",
361         [IP_VS_SCTP_S_SHUTDOWN_ACK_SENT]        = "S-ACK-SENT",
362         [IP_VS_SCTP_S_REJECTED]                 = "REJECTED",
363         [IP_VS_SCTP_S_CLOSED]                   = "CLOSED",
364         [IP_VS_SCTP_S_LAST]                     = "BUG!",
365 };
366
367
368 static const char *sctp_state_name(int state)
369 {
370         if (state >= IP_VS_SCTP_S_LAST)
371                 return "ERR!";
372         if (sctp_state_name_table[state])
373                 return sctp_state_name_table[state];
374         return "?";
375 }
376
377 static inline void
378 set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
379                 int direction, const struct sk_buff *skb)
380 {
381         sctp_chunkhdr_t _sctpch, *sch;
382         unsigned char chunk_type;
383         int event, next_state;
384         int ihl, cofs;
385
386 #ifdef CONFIG_IP_VS_IPV6
387         ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr);
388 #else
389         ihl = ip_hdrlen(skb);
390 #endif
391
392         cofs = ihl + sizeof(sctp_sctphdr_t);
393         sch = skb_header_pointer(skb, cofs, sizeof(_sctpch), &_sctpch);
394         if (sch == NULL)
395                 return;
396
397         chunk_type = sch->type;
398         /*
399          * Section 3: Multiple chunks can be bundled into one SCTP packet
400          * up to the MTU size, except for the INIT, INIT ACK, and
401          * SHUTDOWN COMPLETE chunks. These chunks MUST NOT be bundled with
402          * any other chunk in a packet.
403          *
404          * Section 3.3.7: DATA chunks MUST NOT be bundled with ABORT. Control
405          * chunks (except for INIT, INIT ACK, and SHUTDOWN COMPLETE) MAY be
406          * bundled with an ABORT, but they MUST be placed before the ABORT
407          * in the SCTP packet or they will be ignored by the receiver.
408          */
409         if ((sch->type == SCTP_CID_COOKIE_ECHO) ||
410             (sch->type == SCTP_CID_COOKIE_ACK)) {
411                 int clen = ntohs(sch->length);
412
413                 if (clen >= sizeof(sctp_chunkhdr_t)) {
414                         sch = skb_header_pointer(skb, cofs + ALIGN(clen, 4),
415                                                  sizeof(_sctpch), &_sctpch);
416                         if (sch && sch->type == SCTP_CID_ABORT)
417                                 chunk_type = sch->type;
418                 }
419         }
420
421         event = (chunk_type < sizeof(sctp_events)) ?
422                 sctp_events[chunk_type] : IP_VS_SCTP_DATA;
423
424         /* Update direction to INPUT_ONLY if necessary
425          * or delete NO_OUTPUT flag if output packet detected
426          */
427         if (cp->flags & IP_VS_CONN_F_NOOUTPUT) {
428                 if (direction == IP_VS_DIR_OUTPUT)
429                         cp->flags &= ~IP_VS_CONN_F_NOOUTPUT;
430                 else
431                         direction = IP_VS_DIR_INPUT_ONLY;
432         }
433
434         next_state = sctp_states[direction][event][cp->state];
435
436         if (next_state != cp->state) {
437                 struct ip_vs_dest *dest = cp->dest;
438
439                 IP_VS_DBG_BUF(8, "%s %s  %s:%d->"
440                                 "%s:%d state: %s->%s conn->refcnt:%d\n",
441                                 pd->pp->name,
442                                 ((direction == IP_VS_DIR_OUTPUT) ?
443                                  "output " : "input "),
444                                 IP_VS_DBG_ADDR(cp->daf, &cp->daddr),
445                                 ntohs(cp->dport),
446                                 IP_VS_DBG_ADDR(cp->af, &cp->caddr),
447                                 ntohs(cp->cport),
448                                 sctp_state_name(cp->state),
449                                 sctp_state_name(next_state),
450                                 atomic_read(&cp->refcnt));
451                 if (dest) {
452                         if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
453                                 (next_state != IP_VS_SCTP_S_ESTABLISHED)) {
454                                 atomic_dec(&dest->activeconns);
455                                 atomic_inc(&dest->inactconns);
456                                 cp->flags |= IP_VS_CONN_F_INACTIVE;
457                         } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
458                                    (next_state == IP_VS_SCTP_S_ESTABLISHED)) {
459                                 atomic_inc(&dest->activeconns);
460                                 atomic_dec(&dest->inactconns);
461                                 cp->flags &= ~IP_VS_CONN_F_INACTIVE;
462                         }
463                 }
464         }
465         if (likely(pd))
466                 cp->timeout = pd->timeout_table[cp->state = next_state];
467         else    /* What to do ? */
468                 cp->timeout = sctp_timeouts[cp->state = next_state];
469 }
470
471 static void
472 sctp_state_transition(struct ip_vs_conn *cp, int direction,
473                 const struct sk_buff *skb, struct ip_vs_proto_data *pd)
474 {
475         spin_lock_bh(&cp->lock);
476         set_sctp_state(pd, cp, direction, skb);
477         spin_unlock_bh(&cp->lock);
478 }
479
480 static inline __u16 sctp_app_hashkey(__be16 port)
481 {
482         return (((__force u16)port >> SCTP_APP_TAB_BITS) ^ (__force u16)port)
483                 & SCTP_APP_TAB_MASK;
484 }
485
486 static int sctp_register_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
487 {
488         struct ip_vs_app *i;
489         __u16 hash;
490         __be16 port = inc->port;
491         int ret = 0;
492         struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_SCTP);
493
494         hash = sctp_app_hashkey(port);
495
496         list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) {
497                 if (i->port == port) {
498                         ret = -EEXIST;
499                         goto out;
500                 }
501         }
502         list_add_rcu(&inc->p_list, &ipvs->sctp_apps[hash]);
503         atomic_inc(&pd->appcnt);
504 out:
505
506         return ret;
507 }
508
509 static void sctp_unregister_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
510 {
511         struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_SCTP);
512
513         atomic_dec(&pd->appcnt);
514         list_del_rcu(&inc->p_list);
515 }
516
517 static int sctp_app_conn_bind(struct ip_vs_conn *cp)
518 {
519         struct netns_ipvs *ipvs = cp->ipvs;
520         int hash;
521         struct ip_vs_app *inc;
522         int result = 0;
523
524         /* Default binding: bind app only for NAT */
525         if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
526                 return 0;
527         /* Lookup application incarnations and bind the right one */
528         hash = sctp_app_hashkey(cp->vport);
529
530         rcu_read_lock();
531         list_for_each_entry_rcu(inc, &ipvs->sctp_apps[hash], p_list) {
532                 if (inc->port == cp->vport) {
533                         if (unlikely(!ip_vs_app_inc_get(inc)))
534                                 break;
535                         rcu_read_unlock();
536
537                         IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
538                                         "%s:%u to app %s on port %u\n",
539                                         __func__,
540                                         IP_VS_DBG_ADDR(cp->af, &cp->caddr),
541                                         ntohs(cp->cport),
542                                         IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
543                                         ntohs(cp->vport),
544                                         inc->name, ntohs(inc->port));
545                         cp->app = inc;
546                         if (inc->init_conn)
547                                 result = inc->init_conn(inc, cp);
548                         goto out;
549                 }
550         }
551         rcu_read_unlock();
552 out:
553         return result;
554 }
555
556 /* ---------------------------------------------
557  *   timeouts is netns related now.
558  * ---------------------------------------------
559  */
560 static int __ip_vs_sctp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
561 {
562         ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE);
563         pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts,
564                                                         sizeof(sctp_timeouts));
565         if (!pd->timeout_table)
566                 return -ENOMEM;
567         return 0;
568 }
569
570 static void __ip_vs_sctp_exit(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
571 {
572         kfree(pd->timeout_table);
573 }
574
575 struct ip_vs_protocol ip_vs_protocol_sctp = {
576         .name           = "SCTP",
577         .protocol       = IPPROTO_SCTP,
578         .num_states     = IP_VS_SCTP_S_LAST,
579         .dont_defrag    = 0,
580         .init           = NULL,
581         .exit           = NULL,
582         .init_netns     = __ip_vs_sctp_init,
583         .exit_netns     = __ip_vs_sctp_exit,
584         .register_app   = sctp_register_app,
585         .unregister_app = sctp_unregister_app,
586         .conn_schedule  = sctp_conn_schedule,
587         .conn_in_get    = ip_vs_conn_in_get_proto,
588         .conn_out_get   = ip_vs_conn_out_get_proto,
589         .snat_handler   = sctp_snat_handler,
590         .dnat_handler   = sctp_dnat_handler,
591         .csum_check     = sctp_csum_check,
592         .state_name     = sctp_state_name,
593         .state_transition = sctp_state_transition,
594         .app_conn_bind  = sctp_app_conn_bind,
595         .debug_packet   = ip_vs_tcpudp_debug_packet,
596         .timeout_change = NULL,
597 };