]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - net/netfilter/nf_conntrack_proto_tcp.c
xfrm: dst_entries_init() per-net dst_ops
[karo-tx-linux.git] / net / netfilter / nf_conntrack_proto_tcp.c
1 /* (C) 1999-2001 Paul `Rusty' Russell
2  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3  * (C) 2002-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
4  * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10
11 #include <linux/types.h>
12 #include <linux/timer.h>
13 #include <linux/module.h>
14 #include <linux/in.h>
15 #include <linux/tcp.h>
16 #include <linux/spinlock.h>
17 #include <linux/skbuff.h>
18 #include <linux/ipv6.h>
19 #include <net/ip6_checksum.h>
20 #include <asm/unaligned.h>
21
22 #include <net/tcp.h>
23
24 #include <linux/netfilter.h>
25 #include <linux/netfilter_ipv4.h>
26 #include <linux/netfilter_ipv6.h>
27 #include <net/netfilter/nf_conntrack.h>
28 #include <net/netfilter/nf_conntrack_l4proto.h>
29 #include <net/netfilter/nf_conntrack_ecache.h>
30 #include <net/netfilter/nf_conntrack_seqadj.h>
31 #include <net/netfilter/nf_conntrack_synproxy.h>
32 #include <net/netfilter/nf_log.h>
33 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
34 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
35
36 /* "Be conservative in what you do,
37     be liberal in what you accept from others."
38     If it's non-zero, we mark only out of window RST segments as INVALID. */
39 static int nf_ct_tcp_be_liberal __read_mostly = 0;
40
41 /* If it is set to zero, we disable picking up already established
42    connections. */
43 static int nf_ct_tcp_loose __read_mostly = 1;
44
45 /* Max number of the retransmitted packets without receiving an (acceptable)
46    ACK from the destination. If this number is reached, a shorter timer
47    will be started. */
48 static int nf_ct_tcp_max_retrans __read_mostly = 3;
49
50   /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
51      closely.  They're more complex. --RR */
52
53 static const char *const tcp_conntrack_names[] = {
54         "NONE",
55         "SYN_SENT",
56         "SYN_RECV",
57         "ESTABLISHED",
58         "FIN_WAIT",
59         "CLOSE_WAIT",
60         "LAST_ACK",
61         "TIME_WAIT",
62         "CLOSE",
63         "SYN_SENT2",
64 };
65
66 #define SECS * HZ
67 #define MINS * 60 SECS
68 #define HOURS * 60 MINS
69 #define DAYS * 24 HOURS
70
71 static unsigned int tcp_timeouts[TCP_CONNTRACK_TIMEOUT_MAX] __read_mostly = {
72         [TCP_CONNTRACK_SYN_SENT]        = 2 MINS,
73         [TCP_CONNTRACK_SYN_RECV]        = 60 SECS,
74         [TCP_CONNTRACK_ESTABLISHED]     = 5 DAYS,
75         [TCP_CONNTRACK_FIN_WAIT]        = 2 MINS,
76         [TCP_CONNTRACK_CLOSE_WAIT]      = 60 SECS,
77         [TCP_CONNTRACK_LAST_ACK]        = 30 SECS,
78         [TCP_CONNTRACK_TIME_WAIT]       = 2 MINS,
79         [TCP_CONNTRACK_CLOSE]           = 10 SECS,
80         [TCP_CONNTRACK_SYN_SENT2]       = 2 MINS,
81 /* RFC1122 says the R2 limit should be at least 100 seconds.
82    Linux uses 15 packets as limit, which corresponds
83    to ~13-30min depending on RTO. */
84         [TCP_CONNTRACK_RETRANS]         = 5 MINS,
85         [TCP_CONNTRACK_UNACK]           = 5 MINS,
86 };
87
88 #define sNO TCP_CONNTRACK_NONE
89 #define sSS TCP_CONNTRACK_SYN_SENT
90 #define sSR TCP_CONNTRACK_SYN_RECV
91 #define sES TCP_CONNTRACK_ESTABLISHED
92 #define sFW TCP_CONNTRACK_FIN_WAIT
93 #define sCW TCP_CONNTRACK_CLOSE_WAIT
94 #define sLA TCP_CONNTRACK_LAST_ACK
95 #define sTW TCP_CONNTRACK_TIME_WAIT
96 #define sCL TCP_CONNTRACK_CLOSE
97 #define sS2 TCP_CONNTRACK_SYN_SENT2
98 #define sIV TCP_CONNTRACK_MAX
99 #define sIG TCP_CONNTRACK_IGNORE
100
101 /* What TCP flags are set from RST/SYN/FIN/ACK. */
102 enum tcp_bit_set {
103         TCP_SYN_SET,
104         TCP_SYNACK_SET,
105         TCP_FIN_SET,
106         TCP_ACK_SET,
107         TCP_RST_SET,
108         TCP_NONE_SET,
109 };
110
111 /*
112  * The TCP state transition table needs a few words...
113  *
114  * We are the man in the middle. All the packets go through us
115  * but might get lost in transit to the destination.
116  * It is assumed that the destinations can't receive segments
117  * we haven't seen.
118  *
119  * The checked segment is in window, but our windows are *not*
120  * equivalent with the ones of the sender/receiver. We always
121  * try to guess the state of the current sender.
122  *
123  * The meaning of the states are:
124  *
125  * NONE:        initial state
126  * SYN_SENT:    SYN-only packet seen
127  * SYN_SENT2:   SYN-only packet seen from reply dir, simultaneous open
128  * SYN_RECV:    SYN-ACK packet seen
129  * ESTABLISHED: ACK packet seen
130  * FIN_WAIT:    FIN packet seen
131  * CLOSE_WAIT:  ACK seen (after FIN)
132  * LAST_ACK:    FIN seen (after FIN)
133  * TIME_WAIT:   last ACK seen
134  * CLOSE:       closed connection (RST)
135  *
136  * Packets marked as IGNORED (sIG):
137  *      if they may be either invalid or valid
138  *      and the receiver may send back a connection
139  *      closing RST or a SYN/ACK.
140  *
141  * Packets marked as INVALID (sIV):
142  *      if we regard them as truly invalid packets
143  */
144 static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
145         {
146 /* ORIGINAL */
147 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
148 /*syn*/    { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sS2 },
149 /*
150  *      sNO -> sSS      Initialize a new connection
151  *      sSS -> sSS      Retransmitted SYN
152  *      sS2 -> sS2      Late retransmitted SYN
153  *      sSR -> sIG
154  *      sES -> sIG      Error: SYNs in window outside the SYN_SENT state
155  *                      are errors. Receiver will reply with RST
156  *                      and close the connection.
157  *                      Or we are not in sync and hold a dead connection.
158  *      sFW -> sIG
159  *      sCW -> sIG
160  *      sLA -> sIG
161  *      sTW -> sSS      Reopened connection (RFC 1122).
162  *      sCL -> sSS
163  */
164 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
165 /*synack*/ { sIV, sIV, sSR, sIV, sIV, sIV, sIV, sIV, sIV, sSR },
166 /*
167  *      sNO -> sIV      Too late and no reason to do anything
168  *      sSS -> sIV      Client can't send SYN and then SYN/ACK
169  *      sS2 -> sSR      SYN/ACK sent to SYN2 in simultaneous open
170  *      sSR -> sSR      Late retransmitted SYN/ACK in simultaneous open
171  *      sES -> sIV      Invalid SYN/ACK packets sent by the client
172  *      sFW -> sIV
173  *      sCW -> sIV
174  *      sLA -> sIV
175  *      sTW -> sIV
176  *      sCL -> sIV
177  */
178 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
179 /*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
180 /*
181  *      sNO -> sIV      Too late and no reason to do anything...
182  *      sSS -> sIV      Client migth not send FIN in this state:
183  *                      we enforce waiting for a SYN/ACK reply first.
184  *      sS2 -> sIV
185  *      sSR -> sFW      Close started.
186  *      sES -> sFW
187  *      sFW -> sLA      FIN seen in both directions, waiting for
188  *                      the last ACK.
189  *                      Migth be a retransmitted FIN as well...
190  *      sCW -> sLA
191  *      sLA -> sLA      Retransmitted FIN. Remain in the same state.
192  *      sTW -> sTW
193  *      sCL -> sCL
194  */
195 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
196 /*ack*/    { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
197 /*
198  *      sNO -> sES      Assumed.
199  *      sSS -> sIV      ACK is invalid: we haven't seen a SYN/ACK yet.
200  *      sS2 -> sIV
201  *      sSR -> sES      Established state is reached.
202  *      sES -> sES      :-)
203  *      sFW -> sCW      Normal close request answered by ACK.
204  *      sCW -> sCW
205  *      sLA -> sTW      Last ACK detected (RFC5961 challenged)
206  *      sTW -> sTW      Retransmitted last ACK. Remain in the same state.
207  *      sCL -> sCL
208  */
209 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
210 /*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
211 /*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
212         },
213         {
214 /* REPLY */
215 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
216 /*syn*/    { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sSS, sIV, sS2 },
217 /*
218  *      sNO -> sIV      Never reached.
219  *      sSS -> sS2      Simultaneous open
220  *      sS2 -> sS2      Retransmitted simultaneous SYN
221  *      sSR -> sIV      Invalid SYN packets sent by the server
222  *      sES -> sIV
223  *      sFW -> sIV
224  *      sCW -> sIV
225  *      sLA -> sIV
226  *      sTW -> sSS      Reopened connection, but server may have switched role
227  *      sCL -> sIV
228  */
229 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
230 /*synack*/ { sIV, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
231 /*
232  *      sSS -> sSR      Standard open.
233  *      sS2 -> sSR      Simultaneous open
234  *      sSR -> sIG      Retransmitted SYN/ACK, ignore it.
235  *      sES -> sIG      Late retransmitted SYN/ACK?
236  *      sFW -> sIG      Might be SYN/ACK answering ignored SYN
237  *      sCW -> sIG
238  *      sLA -> sIG
239  *      sTW -> sIG
240  *      sCL -> sIG
241  */
242 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
243 /*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
244 /*
245  *      sSS -> sIV      Server might not send FIN in this state.
246  *      sS2 -> sIV
247  *      sSR -> sFW      Close started.
248  *      sES -> sFW
249  *      sFW -> sLA      FIN seen in both directions.
250  *      sCW -> sLA
251  *      sLA -> sLA      Retransmitted FIN.
252  *      sTW -> sTW
253  *      sCL -> sCL
254  */
255 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
256 /*ack*/    { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIG },
257 /*
258  *      sSS -> sIG      Might be a half-open connection.
259  *      sS2 -> sIG
260  *      sSR -> sSR      Might answer late resent SYN.
261  *      sES -> sES      :-)
262  *      sFW -> sCW      Normal close request answered by ACK.
263  *      sCW -> sCW
264  *      sLA -> sTW      Last ACK detected (RFC5961 challenged)
265  *      sTW -> sTW      Retransmitted last ACK.
266  *      sCL -> sCL
267  */
268 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
269 /*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
270 /*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
271         }
272 };
273
274 static inline struct nf_tcp_net *tcp_pernet(struct net *net)
275 {
276         return &net->ct.nf_ct_proto.tcp;
277 }
278
279 static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
280                              struct nf_conntrack_tuple *tuple)
281 {
282         const struct tcphdr *hp;
283         struct tcphdr _hdr;
284
285         /* Actually only need first 8 bytes. */
286         hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
287         if (hp == NULL)
288                 return false;
289
290         tuple->src.u.tcp.port = hp->source;
291         tuple->dst.u.tcp.port = hp->dest;
292
293         return true;
294 }
295
296 static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
297                              const struct nf_conntrack_tuple *orig)
298 {
299         tuple->src.u.tcp.port = orig->dst.u.tcp.port;
300         tuple->dst.u.tcp.port = orig->src.u.tcp.port;
301         return true;
302 }
303
304 /* Print out the per-protocol part of the tuple. */
305 static void tcp_print_tuple(struct seq_file *s,
306                             const struct nf_conntrack_tuple *tuple)
307 {
308         seq_printf(s, "sport=%hu dport=%hu ",
309                    ntohs(tuple->src.u.tcp.port),
310                    ntohs(tuple->dst.u.tcp.port));
311 }
312
313 /* Print out the private part of the conntrack. */
314 static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
315 {
316         enum tcp_conntrack state;
317
318         spin_lock_bh(&ct->lock);
319         state = ct->proto.tcp.state;
320         spin_unlock_bh(&ct->lock);
321
322         seq_printf(s, "%s ", tcp_conntrack_names[state]);
323 }
324
325 static unsigned int get_conntrack_index(const struct tcphdr *tcph)
326 {
327         if (tcph->rst) return TCP_RST_SET;
328         else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
329         else if (tcph->fin) return TCP_FIN_SET;
330         else if (tcph->ack) return TCP_ACK_SET;
331         else return TCP_NONE_SET;
332 }
333
334 /* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
335    in IP Filter' by Guido van Rooij.
336
337    http://www.sane.nl/events/sane2000/papers.html
338    http://www.darkart.com/mirrors/www.obfuscation.org/ipf/
339
340    The boundaries and the conditions are changed according to RFC793:
341    the packet must intersect the window (i.e. segments may be
342    after the right or before the left edge) and thus receivers may ACK
343    segments after the right edge of the window.
344
345         td_maxend = max(sack + max(win,1)) seen in reply packets
346         td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
347         td_maxwin += seq + len - sender.td_maxend
348                         if seq + len > sender.td_maxend
349         td_end    = max(seq + len) seen in sent packets
350
351    I.   Upper bound for valid data:     seq <= sender.td_maxend
352    II.  Lower bound for valid data:     seq + len >= sender.td_end - receiver.td_maxwin
353    III. Upper bound for valid (s)ack:   sack <= receiver.td_end
354    IV.  Lower bound for valid (s)ack:   sack >= receiver.td_end - MAXACKWINDOW
355
356    where sack is the highest right edge of sack block found in the packet
357    or ack in the case of packet without SACK option.
358
359    The upper bound limit for a valid (s)ack is not ignored -
360    we doesn't have to deal with fragments.
361 */
362
363 static inline __u32 segment_seq_plus_len(__u32 seq,
364                                          size_t len,
365                                          unsigned int dataoff,
366                                          const struct tcphdr *tcph)
367 {
368         /* XXX Should I use payload length field in IP/IPv6 header ?
369          * - YK */
370         return (seq + len - dataoff - tcph->doff*4
371                 + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
372 }
373
374 /* Fixme: what about big packets? */
375 #define MAXACKWINCONST                  66000
376 #define MAXACKWINDOW(sender)                                            \
377         ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin     \
378                                               : MAXACKWINCONST)
379
380 /*
381  * Simplified tcp_parse_options routine from tcp_input.c
382  */
383 static void tcp_options(const struct sk_buff *skb,
384                         unsigned int dataoff,
385                         const struct tcphdr *tcph,
386                         struct ip_ct_tcp_state *state)
387 {
388         unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
389         const unsigned char *ptr;
390         int length = (tcph->doff*4) - sizeof(struct tcphdr);
391
392         if (!length)
393                 return;
394
395         ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
396                                  length, buff);
397         BUG_ON(ptr == NULL);
398
399         state->td_scale =
400         state->flags = 0;
401
402         while (length > 0) {
403                 int opcode=*ptr++;
404                 int opsize;
405
406                 switch (opcode) {
407                 case TCPOPT_EOL:
408                         return;
409                 case TCPOPT_NOP:        /* Ref: RFC 793 section 3.1 */
410                         length--;
411                         continue;
412                 default:
413                         opsize=*ptr++;
414                         if (opsize < 2) /* "silly options" */
415                                 return;
416                         if (opsize > length)
417                                 return; /* don't parse partial options */
418
419                         if (opcode == TCPOPT_SACK_PERM
420                             && opsize == TCPOLEN_SACK_PERM)
421                                 state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
422                         else if (opcode == TCPOPT_WINDOW
423                                  && opsize == TCPOLEN_WINDOW) {
424                                 state->td_scale = *(u_int8_t *)ptr;
425
426                                 if (state->td_scale > 14) {
427                                         /* See RFC1323 */
428                                         state->td_scale = 14;
429                                 }
430                                 state->flags |=
431                                         IP_CT_TCP_FLAG_WINDOW_SCALE;
432                         }
433                         ptr += opsize - 2;
434                         length -= opsize;
435                 }
436         }
437 }
438
439 static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
440                      const struct tcphdr *tcph, __u32 *sack)
441 {
442         unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
443         const unsigned char *ptr;
444         int length = (tcph->doff*4) - sizeof(struct tcphdr);
445         __u32 tmp;
446
447         if (!length)
448                 return;
449
450         ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
451                                  length, buff);
452         BUG_ON(ptr == NULL);
453
454         /* Fast path for timestamp-only option */
455         if (length == TCPOLEN_TSTAMP_ALIGNED
456             && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24)
457                                        | (TCPOPT_NOP << 16)
458                                        | (TCPOPT_TIMESTAMP << 8)
459                                        | TCPOLEN_TIMESTAMP))
460                 return;
461
462         while (length > 0) {
463                 int opcode = *ptr++;
464                 int opsize, i;
465
466                 switch (opcode) {
467                 case TCPOPT_EOL:
468                         return;
469                 case TCPOPT_NOP:        /* Ref: RFC 793 section 3.1 */
470                         length--;
471                         continue;
472                 default:
473                         opsize = *ptr++;
474                         if (opsize < 2) /* "silly options" */
475                                 return;
476                         if (opsize > length)
477                                 return; /* don't parse partial options */
478
479                         if (opcode == TCPOPT_SACK
480                             && opsize >= (TCPOLEN_SACK_BASE
481                                           + TCPOLEN_SACK_PERBLOCK)
482                             && !((opsize - TCPOLEN_SACK_BASE)
483                                  % TCPOLEN_SACK_PERBLOCK)) {
484                                 for (i = 0;
485                                      i < (opsize - TCPOLEN_SACK_BASE);
486                                      i += TCPOLEN_SACK_PERBLOCK) {
487                                         tmp = get_unaligned_be32((__be32 *)(ptr+i)+1);
488
489                                         if (after(tmp, *sack))
490                                                 *sack = tmp;
491                                 }
492                                 return;
493                         }
494                         ptr += opsize - 2;
495                         length -= opsize;
496                 }
497         }
498 }
499
500 static bool tcp_in_window(const struct nf_conn *ct,
501                           struct ip_ct_tcp *state,
502                           enum ip_conntrack_dir dir,
503                           unsigned int index,
504                           const struct sk_buff *skb,
505                           unsigned int dataoff,
506                           const struct tcphdr *tcph,
507                           u_int8_t pf)
508 {
509         struct net *net = nf_ct_net(ct);
510         struct nf_tcp_net *tn = tcp_pernet(net);
511         struct ip_ct_tcp_state *sender = &state->seen[dir];
512         struct ip_ct_tcp_state *receiver = &state->seen[!dir];
513         const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
514         __u32 seq, ack, sack, end, win, swin;
515         s32 receiver_offset;
516         bool res, in_recv_win;
517
518         /*
519          * Get the required data from the packet.
520          */
521         seq = ntohl(tcph->seq);
522         ack = sack = ntohl(tcph->ack_seq);
523         win = ntohs(tcph->window);
524         end = segment_seq_plus_len(seq, skb->len, dataoff, tcph);
525
526         if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
527                 tcp_sack(skb, dataoff, tcph, &sack);
528
529         /* Take into account NAT sequence number mangling */
530         receiver_offset = nf_ct_seq_offset(ct, !dir, ack - 1);
531         ack -= receiver_offset;
532         sack -= receiver_offset;
533
534         pr_debug("tcp_in_window: START\n");
535         pr_debug("tcp_in_window: ");
536         nf_ct_dump_tuple(tuple);
537         pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
538                  seq, ack, receiver_offset, sack, receiver_offset, win, end);
539         pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
540                  "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
541                  sender->td_end, sender->td_maxend, sender->td_maxwin,
542                  sender->td_scale,
543                  receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
544                  receiver->td_scale);
545
546         if (sender->td_maxwin == 0) {
547                 /*
548                  * Initialize sender data.
549                  */
550                 if (tcph->syn) {
551                         /*
552                          * SYN-ACK in reply to a SYN
553                          * or SYN from reply direction in simultaneous open.
554                          */
555                         sender->td_end =
556                         sender->td_maxend = end;
557                         sender->td_maxwin = (win == 0 ? 1 : win);
558
559                         tcp_options(skb, dataoff, tcph, sender);
560                         /*
561                          * RFC 1323:
562                          * Both sides must send the Window Scale option
563                          * to enable window scaling in either direction.
564                          */
565                         if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
566                               && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
567                                 sender->td_scale =
568                                 receiver->td_scale = 0;
569                         if (!tcph->ack)
570                                 /* Simultaneous open */
571                                 return true;
572                 } else {
573                         /*
574                          * We are in the middle of a connection,
575                          * its history is lost for us.
576                          * Let's try to use the data from the packet.
577                          */
578                         sender->td_end = end;
579                         swin = win << sender->td_scale;
580                         sender->td_maxwin = (swin == 0 ? 1 : swin);
581                         sender->td_maxend = end + sender->td_maxwin;
582                         /*
583                          * We haven't seen traffic in the other direction yet
584                          * but we have to tweak window tracking to pass III
585                          * and IV until that happens.
586                          */
587                         if (receiver->td_maxwin == 0)
588                                 receiver->td_end = receiver->td_maxend = sack;
589                 }
590         } else if (((state->state == TCP_CONNTRACK_SYN_SENT
591                      && dir == IP_CT_DIR_ORIGINAL)
592                    || (state->state == TCP_CONNTRACK_SYN_RECV
593                      && dir == IP_CT_DIR_REPLY))
594                    && after(end, sender->td_end)) {
595                 /*
596                  * RFC 793: "if a TCP is reinitialized ... then it need
597                  * not wait at all; it must only be sure to use sequence
598                  * numbers larger than those recently used."
599                  */
600                 sender->td_end =
601                 sender->td_maxend = end;
602                 sender->td_maxwin = (win == 0 ? 1 : win);
603
604                 tcp_options(skb, dataoff, tcph, sender);
605         }
606
607         if (!(tcph->ack)) {
608                 /*
609                  * If there is no ACK, just pretend it was set and OK.
610                  */
611                 ack = sack = receiver->td_end;
612         } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
613                     (TCP_FLAG_ACK|TCP_FLAG_RST))
614                    && (ack == 0)) {
615                 /*
616                  * Broken TCP stacks, that set ACK in RST packets as well
617                  * with zero ack value.
618                  */
619                 ack = sack = receiver->td_end;
620         }
621
622         if (tcph->rst && seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)
623                 /*
624                  * RST sent answering SYN.
625                  */
626                 seq = end = sender->td_end;
627
628         pr_debug("tcp_in_window: ");
629         nf_ct_dump_tuple(tuple);
630         pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
631                  seq, ack, receiver_offset, sack, receiver_offset, win, end);
632         pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
633                  "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
634                  sender->td_end, sender->td_maxend, sender->td_maxwin,
635                  sender->td_scale,
636                  receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
637                  receiver->td_scale);
638
639         /* Is the ending sequence in the receive window (if available)? */
640         in_recv_win = !receiver->td_maxwin ||
641                       after(end, sender->td_end - receiver->td_maxwin - 1);
642
643         pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
644                  before(seq, sender->td_maxend + 1),
645                  (in_recv_win ? 1 : 0),
646                  before(sack, receiver->td_end + 1),
647                  after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1));
648
649         if (before(seq, sender->td_maxend + 1) &&
650             in_recv_win &&
651             before(sack, receiver->td_end + 1) &&
652             after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) {
653                 /*
654                  * Take into account window scaling (RFC 1323).
655                  */
656                 if (!tcph->syn)
657                         win <<= sender->td_scale;
658
659                 /*
660                  * Update sender data.
661                  */
662                 swin = win + (sack - ack);
663                 if (sender->td_maxwin < swin)
664                         sender->td_maxwin = swin;
665                 if (after(end, sender->td_end)) {
666                         sender->td_end = end;
667                         sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
668                 }
669                 if (tcph->ack) {
670                         if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) {
671                                 sender->td_maxack = ack;
672                                 sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET;
673                         } else if (after(ack, sender->td_maxack))
674                                 sender->td_maxack = ack;
675                 }
676
677                 /*
678                  * Update receiver data.
679                  */
680                 if (receiver->td_maxwin != 0 && after(end, sender->td_maxend))
681                         receiver->td_maxwin += end - sender->td_maxend;
682                 if (after(sack + win, receiver->td_maxend - 1)) {
683                         receiver->td_maxend = sack + win;
684                         if (win == 0)
685                                 receiver->td_maxend++;
686                 }
687                 if (ack == receiver->td_end)
688                         receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
689
690                 /*
691                  * Check retransmissions.
692                  */
693                 if (index == TCP_ACK_SET) {
694                         if (state->last_dir == dir
695                             && state->last_seq == seq
696                             && state->last_ack == ack
697                             && state->last_end == end
698                             && state->last_win == win)
699                                 state->retrans++;
700                         else {
701                                 state->last_dir = dir;
702                                 state->last_seq = seq;
703                                 state->last_ack = ack;
704                                 state->last_end = end;
705                                 state->last_win = win;
706                                 state->retrans = 0;
707                         }
708                 }
709                 res = true;
710         } else {
711                 res = false;
712                 if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
713                     tn->tcp_be_liberal)
714                         res = true;
715                 if (!res && LOG_INVALID(net, IPPROTO_TCP))
716                         nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
717                         "nf_ct_tcp: %s ",
718                         before(seq, sender->td_maxend + 1) ?
719                         in_recv_win ?
720                         before(sack, receiver->td_end + 1) ?
721                         after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG"
722                         : "ACK is under the lower bound (possible overly delayed ACK)"
723                         : "ACK is over the upper bound (ACKed data not seen yet)"
724                         : "SEQ is under the lower bound (already ACKed data retransmitted)"
725                         : "SEQ is over the upper bound (over the window of the receiver)");
726         }
727
728         pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u "
729                  "receiver end=%u maxend=%u maxwin=%u\n",
730                  res, sender->td_end, sender->td_maxend, sender->td_maxwin,
731                  receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
732
733         return res;
734 }
735
736 /* table of valid flag combinations - PUSH, ECE and CWR are always valid */
737 static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK|
738                                  TCPHDR_URG) + 1] =
739 {
740         [TCPHDR_SYN]                            = 1,
741         [TCPHDR_SYN|TCPHDR_URG]                 = 1,
742         [TCPHDR_SYN|TCPHDR_ACK]                 = 1,
743         [TCPHDR_RST]                            = 1,
744         [TCPHDR_RST|TCPHDR_ACK]                 = 1,
745         [TCPHDR_FIN|TCPHDR_ACK]                 = 1,
746         [TCPHDR_FIN|TCPHDR_ACK|TCPHDR_URG]      = 1,
747         [TCPHDR_ACK]                            = 1,
748         [TCPHDR_ACK|TCPHDR_URG]                 = 1,
749 };
750
751 /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c.  */
752 static int tcp_error(struct net *net, struct nf_conn *tmpl,
753                      struct sk_buff *skb,
754                      unsigned int dataoff,
755                      enum ip_conntrack_info *ctinfo,
756                      u_int8_t pf,
757                      unsigned int hooknum)
758 {
759         const struct tcphdr *th;
760         struct tcphdr _tcph;
761         unsigned int tcplen = skb->len - dataoff;
762         u_int8_t tcpflags;
763
764         /* Smaller that minimal TCP header? */
765         th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
766         if (th == NULL) {
767                 if (LOG_INVALID(net, IPPROTO_TCP))
768                         nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
769                                 "nf_ct_tcp: short packet ");
770                 return -NF_ACCEPT;
771         }
772
773         /* Not whole TCP header or malformed packet */
774         if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
775                 if (LOG_INVALID(net, IPPROTO_TCP))
776                         nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
777                                 "nf_ct_tcp: truncated/malformed packet ");
778                 return -NF_ACCEPT;
779         }
780
781         /* Checksum invalid? Ignore.
782          * We skip checking packets on the outgoing path
783          * because the checksum is assumed to be correct.
784          */
785         /* FIXME: Source route IP option packets --RR */
786         if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
787             nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
788                 if (LOG_INVALID(net, IPPROTO_TCP))
789                         nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
790                                   "nf_ct_tcp: bad TCP checksum ");
791                 return -NF_ACCEPT;
792         }
793
794         /* Check TCP flags. */
795         tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
796         if (!tcp_valid_flags[tcpflags]) {
797                 if (LOG_INVALID(net, IPPROTO_TCP))
798                         nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
799                                   "nf_ct_tcp: invalid TCP flag combination ");
800                 return -NF_ACCEPT;
801         }
802
803         return NF_ACCEPT;
804 }
805
806 static unsigned int *tcp_get_timeouts(struct net *net)
807 {
808         return tcp_pernet(net)->timeouts;
809 }
810
811 /* Returns verdict for packet, or -1 for invalid. */
812 static int tcp_packet(struct nf_conn *ct,
813                       const struct sk_buff *skb,
814                       unsigned int dataoff,
815                       enum ip_conntrack_info ctinfo,
816                       u_int8_t pf,
817                       unsigned int hooknum,
818                       unsigned int *timeouts)
819 {
820         struct net *net = nf_ct_net(ct);
821         struct nf_tcp_net *tn = tcp_pernet(net);
822         struct nf_conntrack_tuple *tuple;
823         enum tcp_conntrack new_state, old_state;
824         enum ip_conntrack_dir dir;
825         const struct tcphdr *th;
826         struct tcphdr _tcph;
827         unsigned long timeout;
828         unsigned int index;
829
830         th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
831         BUG_ON(th == NULL);
832
833         spin_lock_bh(&ct->lock);
834         old_state = ct->proto.tcp.state;
835         dir = CTINFO2DIR(ctinfo);
836         index = get_conntrack_index(th);
837         new_state = tcp_conntracks[dir][index][old_state];
838         tuple = &ct->tuplehash[dir].tuple;
839
840         switch (new_state) {
841         case TCP_CONNTRACK_SYN_SENT:
842                 if (old_state < TCP_CONNTRACK_TIME_WAIT)
843                         break;
844                 /* RFC 1122: "When a connection is closed actively,
845                  * it MUST linger in TIME-WAIT state for a time 2xMSL
846                  * (Maximum Segment Lifetime). However, it MAY accept
847                  * a new SYN from the remote TCP to reopen the connection
848                  * directly from TIME-WAIT state, if..."
849                  * We ignore the conditions because we are in the
850                  * TIME-WAIT state anyway.
851                  *
852                  * Handle aborted connections: we and the server
853                  * think there is an existing connection but the client
854                  * aborts it and starts a new one.
855                  */
856                 if (((ct->proto.tcp.seen[dir].flags
857                       | ct->proto.tcp.seen[!dir].flags)
858                      & IP_CT_TCP_FLAG_CLOSE_INIT)
859                     || (ct->proto.tcp.last_dir == dir
860                         && ct->proto.tcp.last_index == TCP_RST_SET)) {
861                         /* Attempt to reopen a closed/aborted connection.
862                          * Delete this connection and look up again. */
863                         spin_unlock_bh(&ct->lock);
864
865                         /* Only repeat if we can actually remove the timer.
866                          * Destruction may already be in progress in process
867                          * context and we must give it a chance to terminate.
868                          */
869                         if (nf_ct_kill(ct))
870                                 return -NF_REPEAT;
871                         return NF_DROP;
872                 }
873                 /* Fall through */
874         case TCP_CONNTRACK_IGNORE:
875                 /* Ignored packets:
876                  *
877                  * Our connection entry may be out of sync, so ignore
878                  * packets which may signal the real connection between
879                  * the client and the server.
880                  *
881                  * a) SYN in ORIGINAL
882                  * b) SYN/ACK in REPLY
883                  * c) ACK in reply direction after initial SYN in original.
884                  *
885                  * If the ignored packet is invalid, the receiver will send
886                  * a RST we'll catch below.
887                  */
888                 if (index == TCP_SYNACK_SET
889                     && ct->proto.tcp.last_index == TCP_SYN_SET
890                     && ct->proto.tcp.last_dir != dir
891                     && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
892                         /* b) This SYN/ACK acknowledges a SYN that we earlier
893                          * ignored as invalid. This means that the client and
894                          * the server are both in sync, while the firewall is
895                          * not. We get in sync from the previously annotated
896                          * values.
897                          */
898                         old_state = TCP_CONNTRACK_SYN_SENT;
899                         new_state = TCP_CONNTRACK_SYN_RECV;
900                         ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_end =
901                                 ct->proto.tcp.last_end;
902                         ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxend =
903                                 ct->proto.tcp.last_end;
904                         ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxwin =
905                                 ct->proto.tcp.last_win == 0 ?
906                                         1 : ct->proto.tcp.last_win;
907                         ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_scale =
908                                 ct->proto.tcp.last_wscale;
909                         ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK;
910                         ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags =
911                                 ct->proto.tcp.last_flags;
912                         memset(&ct->proto.tcp.seen[dir], 0,
913                                sizeof(struct ip_ct_tcp_state));
914                         break;
915                 }
916                 ct->proto.tcp.last_index = index;
917                 ct->proto.tcp.last_dir = dir;
918                 ct->proto.tcp.last_seq = ntohl(th->seq);
919                 ct->proto.tcp.last_end =
920                     segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
921                 ct->proto.tcp.last_win = ntohs(th->window);
922
923                 /* a) This is a SYN in ORIGINAL. The client and the server
924                  * may be in sync but we are not. In that case, we annotate
925                  * the TCP options and let the packet go through. If it is a
926                  * valid SYN packet, the server will reply with a SYN/ACK, and
927                  * then we'll get in sync. Otherwise, the server potentially
928                  * responds with a challenge ACK if implementing RFC5961.
929                  */
930                 if (index == TCP_SYN_SET && dir == IP_CT_DIR_ORIGINAL) {
931                         struct ip_ct_tcp_state seen = {};
932
933                         ct->proto.tcp.last_flags =
934                         ct->proto.tcp.last_wscale = 0;
935                         tcp_options(skb, dataoff, th, &seen);
936                         if (seen.flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
937                                 ct->proto.tcp.last_flags |=
938                                         IP_CT_TCP_FLAG_WINDOW_SCALE;
939                                 ct->proto.tcp.last_wscale = seen.td_scale;
940                         }
941                         if (seen.flags & IP_CT_TCP_FLAG_SACK_PERM) {
942                                 ct->proto.tcp.last_flags |=
943                                         IP_CT_TCP_FLAG_SACK_PERM;
944                         }
945                         /* Mark the potential for RFC5961 challenge ACK,
946                          * this pose a special problem for LAST_ACK state
947                          * as ACK is intrepretated as ACKing last FIN.
948                          */
949                         if (old_state == TCP_CONNTRACK_LAST_ACK)
950                                 ct->proto.tcp.last_flags |=
951                                         IP_CT_EXP_CHALLENGE_ACK;
952                 }
953                 spin_unlock_bh(&ct->lock);
954                 if (LOG_INVALID(net, IPPROTO_TCP))
955                         nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
956                                   "nf_ct_tcp: invalid packet ignored in "
957                                   "state %s ", tcp_conntrack_names[old_state]);
958                 return NF_ACCEPT;
959         case TCP_CONNTRACK_MAX:
960                 /* Special case for SYN proxy: when the SYN to the server or
961                  * the SYN/ACK from the server is lost, the client may transmit
962                  * a keep-alive packet while in SYN_SENT state. This needs to
963                  * be associated with the original conntrack entry in order to
964                  * generate a new SYN with the correct sequence number.
965                  */
966                 if (nfct_synproxy(ct) && old_state == TCP_CONNTRACK_SYN_SENT &&
967                     index == TCP_ACK_SET && dir == IP_CT_DIR_ORIGINAL &&
968                     ct->proto.tcp.last_dir == IP_CT_DIR_ORIGINAL &&
969                     ct->proto.tcp.seen[dir].td_end - 1 == ntohl(th->seq)) {
970                         pr_debug("nf_ct_tcp: SYN proxy client keep alive\n");
971                         spin_unlock_bh(&ct->lock);
972                         return NF_ACCEPT;
973                 }
974
975                 /* Invalid packet */
976                 pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
977                          dir, get_conntrack_index(th), old_state);
978                 spin_unlock_bh(&ct->lock);
979                 if (LOG_INVALID(net, IPPROTO_TCP))
980                         nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
981                                   "nf_ct_tcp: invalid state ");
982                 return -NF_ACCEPT;
983         case TCP_CONNTRACK_TIME_WAIT:
984                 /* RFC5961 compliance cause stack to send "challenge-ACK"
985                  * e.g. in response to spurious SYNs.  Conntrack MUST
986                  * not believe this ACK is acking last FIN.
987                  */
988                 if (old_state == TCP_CONNTRACK_LAST_ACK &&
989                     index == TCP_ACK_SET &&
990                     ct->proto.tcp.last_dir != dir &&
991                     ct->proto.tcp.last_index == TCP_SYN_SET &&
992                     (ct->proto.tcp.last_flags & IP_CT_EXP_CHALLENGE_ACK)) {
993                         /* Detected RFC5961 challenge ACK */
994                         ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK;
995                         spin_unlock_bh(&ct->lock);
996                         if (LOG_INVALID(net, IPPROTO_TCP))
997                                 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
998                                       "nf_ct_tcp: challenge-ACK ignored ");
999                         return NF_ACCEPT; /* Don't change state */
1000                 }
1001                 break;
1002         case TCP_CONNTRACK_CLOSE:
1003                 if (index == TCP_RST_SET
1004                     && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET)
1005                     && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) {
1006                         /* Invalid RST  */
1007                         spin_unlock_bh(&ct->lock);
1008                         if (LOG_INVALID(net, IPPROTO_TCP))
1009                                 nf_log_packet(net, pf, 0, skb, NULL, NULL,
1010                                               NULL, "nf_ct_tcp: invalid RST ");
1011                         return -NF_ACCEPT;
1012                 }
1013                 if (index == TCP_RST_SET
1014                     && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status)
1015                          && ct->proto.tcp.last_index == TCP_SYN_SET)
1016                         || (!test_bit(IPS_ASSURED_BIT, &ct->status)
1017                             && ct->proto.tcp.last_index == TCP_ACK_SET))
1018                     && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
1019                         /* RST sent to invalid SYN or ACK we had let through
1020                          * at a) and c) above:
1021                          *
1022                          * a) SYN was in window then
1023                          * c) we hold a half-open connection.
1024                          *
1025                          * Delete our connection entry.
1026                          * We skip window checking, because packet might ACK
1027                          * segments we ignored. */
1028                         goto in_window;
1029                 }
1030                 /* Just fall through */
1031         default:
1032                 /* Keep compilers happy. */
1033                 break;
1034         }
1035
1036         if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
1037                            skb, dataoff, th, pf)) {
1038                 spin_unlock_bh(&ct->lock);
1039                 return -NF_ACCEPT;
1040         }
1041      in_window:
1042         /* From now on we have got in-window packets */
1043         ct->proto.tcp.last_index = index;
1044         ct->proto.tcp.last_dir = dir;
1045
1046         pr_debug("tcp_conntracks: ");
1047         nf_ct_dump_tuple(tuple);
1048         pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
1049                  (th->syn ? 1 : 0), (th->ack ? 1 : 0),
1050                  (th->fin ? 1 : 0), (th->rst ? 1 : 0),
1051                  old_state, new_state);
1052
1053         ct->proto.tcp.state = new_state;
1054         if (old_state != new_state
1055             && new_state == TCP_CONNTRACK_FIN_WAIT)
1056                 ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
1057
1058         if (ct->proto.tcp.retrans >= tn->tcp_max_retrans &&
1059             timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
1060                 timeout = timeouts[TCP_CONNTRACK_RETRANS];
1061         else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) &
1062                  IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
1063                  timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK])
1064                 timeout = timeouts[TCP_CONNTRACK_UNACK];
1065         else
1066                 timeout = timeouts[new_state];
1067         spin_unlock_bh(&ct->lock);
1068
1069         if (new_state != old_state)
1070                 nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
1071
1072         if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
1073                 /* If only reply is a RST, we can consider ourselves not to
1074                    have an established connection: this is a fairly common
1075                    problem case, so we can delete the conntrack
1076                    immediately.  --RR */
1077                 if (th->rst) {
1078                         nf_ct_kill_acct(ct, ctinfo, skb);
1079                         return NF_ACCEPT;
1080                 }
1081                 /* ESTABLISHED without SEEN_REPLY, i.e. mid-connection
1082                  * pickup with loose=1. Avoid large ESTABLISHED timeout.
1083                  */
1084                 if (new_state == TCP_CONNTRACK_ESTABLISHED &&
1085                     timeout > timeouts[TCP_CONNTRACK_UNACK])
1086                         timeout = timeouts[TCP_CONNTRACK_UNACK];
1087         } else if (!test_bit(IPS_ASSURED_BIT, &ct->status)
1088                    && (old_state == TCP_CONNTRACK_SYN_RECV
1089                        || old_state == TCP_CONNTRACK_ESTABLISHED)
1090                    && new_state == TCP_CONNTRACK_ESTABLISHED) {
1091                 /* Set ASSURED if we see see valid ack in ESTABLISHED
1092                    after SYN_RECV or a valid answer for a picked up
1093                    connection. */
1094                 set_bit(IPS_ASSURED_BIT, &ct->status);
1095                 nf_conntrack_event_cache(IPCT_ASSURED, ct);
1096         }
1097         nf_ct_refresh_acct(ct, ctinfo, skb, timeout);
1098
1099         return NF_ACCEPT;
1100 }
1101
1102 /* Called when a new connection for this protocol found. */
1103 static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
1104                     unsigned int dataoff, unsigned int *timeouts)
1105 {
1106         enum tcp_conntrack new_state;
1107         const struct tcphdr *th;
1108         struct tcphdr _tcph;
1109         struct net *net = nf_ct_net(ct);
1110         struct nf_tcp_net *tn = tcp_pernet(net);
1111         const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0];
1112         const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1];
1113
1114         th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
1115         BUG_ON(th == NULL);
1116
1117         /* Don't need lock here: this conntrack not in circulation yet */
1118         new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE];
1119
1120         /* Invalid: delete conntrack */
1121         if (new_state >= TCP_CONNTRACK_MAX) {
1122                 pr_debug("nf_ct_tcp: invalid new deleting.\n");
1123                 return false;
1124         }
1125
1126         if (new_state == TCP_CONNTRACK_SYN_SENT) {
1127                 memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
1128                 /* SYN packet */
1129                 ct->proto.tcp.seen[0].td_end =
1130                         segment_seq_plus_len(ntohl(th->seq), skb->len,
1131                                              dataoff, th);
1132                 ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1133                 if (ct->proto.tcp.seen[0].td_maxwin == 0)
1134                         ct->proto.tcp.seen[0].td_maxwin = 1;
1135                 ct->proto.tcp.seen[0].td_maxend =
1136                         ct->proto.tcp.seen[0].td_end;
1137
1138                 tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
1139         } else if (tn->tcp_loose == 0) {
1140                 /* Don't try to pick up connections. */
1141                 return false;
1142         } else {
1143                 memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
1144                 /*
1145                  * We are in the middle of a connection,
1146                  * its history is lost for us.
1147                  * Let's try to use the data from the packet.
1148                  */
1149                 ct->proto.tcp.seen[0].td_end =
1150                         segment_seq_plus_len(ntohl(th->seq), skb->len,
1151                                              dataoff, th);
1152                 ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1153                 if (ct->proto.tcp.seen[0].td_maxwin == 0)
1154                         ct->proto.tcp.seen[0].td_maxwin = 1;
1155                 ct->proto.tcp.seen[0].td_maxend =
1156                         ct->proto.tcp.seen[0].td_end +
1157                         ct->proto.tcp.seen[0].td_maxwin;
1158
1159                 /* We assume SACK and liberal window checking to handle
1160                  * window scaling */
1161                 ct->proto.tcp.seen[0].flags =
1162                 ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
1163                                               IP_CT_TCP_FLAG_BE_LIBERAL;
1164         }
1165
1166         /* tcp_packet will set them */
1167         ct->proto.tcp.last_index = TCP_NONE_SET;
1168
1169         pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
1170                  "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
1171                  sender->td_end, sender->td_maxend, sender->td_maxwin,
1172                  sender->td_scale,
1173                  receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
1174                  receiver->td_scale);
1175         return true;
1176 }
1177
1178 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1179
1180 #include <linux/netfilter/nfnetlink.h>
1181 #include <linux/netfilter/nfnetlink_conntrack.h>
1182
1183 static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
1184                          struct nf_conn *ct)
1185 {
1186         struct nlattr *nest_parms;
1187         struct nf_ct_tcp_flags tmp = {};
1188
1189         spin_lock_bh(&ct->lock);
1190         nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED);
1191         if (!nest_parms)
1192                 goto nla_put_failure;
1193
1194         if (nla_put_u8(skb, CTA_PROTOINFO_TCP_STATE, ct->proto.tcp.state) ||
1195             nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL,
1196                        ct->proto.tcp.seen[0].td_scale) ||
1197             nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY,
1198                        ct->proto.tcp.seen[1].td_scale))
1199                 goto nla_put_failure;
1200
1201         tmp.flags = ct->proto.tcp.seen[0].flags;
1202         if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
1203                     sizeof(struct nf_ct_tcp_flags), &tmp))
1204                 goto nla_put_failure;
1205
1206         tmp.flags = ct->proto.tcp.seen[1].flags;
1207         if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY,
1208                     sizeof(struct nf_ct_tcp_flags), &tmp))
1209                 goto nla_put_failure;
1210         spin_unlock_bh(&ct->lock);
1211
1212         nla_nest_end(skb, nest_parms);
1213
1214         return 0;
1215
1216 nla_put_failure:
1217         spin_unlock_bh(&ct->lock);
1218         return -1;
1219 }
1220
1221 static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = {
1222         [CTA_PROTOINFO_TCP_STATE]           = { .type = NLA_U8 },
1223         [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 },
1224         [CTA_PROTOINFO_TCP_WSCALE_REPLY]    = { .type = NLA_U8 },
1225         [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]  = { .len = sizeof(struct nf_ct_tcp_flags) },
1226         [CTA_PROTOINFO_TCP_FLAGS_REPLY]     = { .len =  sizeof(struct nf_ct_tcp_flags) },
1227 };
1228
1229 static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
1230 {
1231         struct nlattr *pattr = cda[CTA_PROTOINFO_TCP];
1232         struct nlattr *tb[CTA_PROTOINFO_TCP_MAX+1];
1233         int err;
1234
1235         /* updates could not contain anything about the private
1236          * protocol info, in that case skip the parsing */
1237         if (!pattr)
1238                 return 0;
1239
1240         err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, pattr, tcp_nla_policy);
1241         if (err < 0)
1242                 return err;
1243
1244         if (tb[CTA_PROTOINFO_TCP_STATE] &&
1245             nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX)
1246                 return -EINVAL;
1247
1248         spin_lock_bh(&ct->lock);
1249         if (tb[CTA_PROTOINFO_TCP_STATE])
1250                 ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);
1251
1252         if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) {
1253                 struct nf_ct_tcp_flags *attr =
1254                         nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]);
1255                 ct->proto.tcp.seen[0].flags &= ~attr->mask;
1256                 ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask;
1257         }
1258
1259         if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]) {
1260                 struct nf_ct_tcp_flags *attr =
1261                         nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]);
1262                 ct->proto.tcp.seen[1].flags &= ~attr->mask;
1263                 ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask;
1264         }
1265
1266         if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] &&
1267             tb[CTA_PROTOINFO_TCP_WSCALE_REPLY] &&
1268             ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
1269             ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
1270                 ct->proto.tcp.seen[0].td_scale =
1271                         nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]);
1272                 ct->proto.tcp.seen[1].td_scale =
1273                         nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
1274         }
1275         spin_unlock_bh(&ct->lock);
1276
1277         return 0;
1278 }
1279
1280 static int tcp_nlattr_size(void)
1281 {
1282         return nla_total_size(0)           /* CTA_PROTOINFO_TCP */
1283                 + nla_policy_len(tcp_nla_policy, CTA_PROTOINFO_TCP_MAX + 1);
1284 }
1285
1286 static int tcp_nlattr_tuple_size(void)
1287 {
1288         return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
1289 }
1290 #endif
1291
1292 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1293
1294 #include <linux/netfilter/nfnetlink.h>
1295 #include <linux/netfilter/nfnetlink_cttimeout.h>
1296
1297 static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[],
1298                                      struct net *net, void *data)
1299 {
1300         unsigned int *timeouts = data;
1301         struct nf_tcp_net *tn = tcp_pernet(net);
1302         int i;
1303
1304         /* set default TCP timeouts. */
1305         for (i=0; i<TCP_CONNTRACK_TIMEOUT_MAX; i++)
1306                 timeouts[i] = tn->timeouts[i];
1307
1308         if (tb[CTA_TIMEOUT_TCP_SYN_SENT]) {
1309                 timeouts[TCP_CONNTRACK_SYN_SENT] =
1310                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT]))*HZ;
1311         }
1312         if (tb[CTA_TIMEOUT_TCP_SYN_RECV]) {
1313                 timeouts[TCP_CONNTRACK_SYN_RECV] =
1314                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_RECV]))*HZ;
1315         }
1316         if (tb[CTA_TIMEOUT_TCP_ESTABLISHED]) {
1317                 timeouts[TCP_CONNTRACK_ESTABLISHED] =
1318                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_ESTABLISHED]))*HZ;
1319         }
1320         if (tb[CTA_TIMEOUT_TCP_FIN_WAIT]) {
1321                 timeouts[TCP_CONNTRACK_FIN_WAIT] =
1322                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_FIN_WAIT]))*HZ;
1323         }
1324         if (tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]) {
1325                 timeouts[TCP_CONNTRACK_CLOSE_WAIT] =
1326                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]))*HZ;
1327         }
1328         if (tb[CTA_TIMEOUT_TCP_LAST_ACK]) {
1329                 timeouts[TCP_CONNTRACK_LAST_ACK] =
1330                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_LAST_ACK]))*HZ;
1331         }
1332         if (tb[CTA_TIMEOUT_TCP_TIME_WAIT]) {
1333                 timeouts[TCP_CONNTRACK_TIME_WAIT] =
1334                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_TIME_WAIT]))*HZ;
1335         }
1336         if (tb[CTA_TIMEOUT_TCP_CLOSE]) {
1337                 timeouts[TCP_CONNTRACK_CLOSE] =
1338                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE]))*HZ;
1339         }
1340         if (tb[CTA_TIMEOUT_TCP_SYN_SENT2]) {
1341                 timeouts[TCP_CONNTRACK_SYN_SENT2] =
1342                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT2]))*HZ;
1343         }
1344         if (tb[CTA_TIMEOUT_TCP_RETRANS]) {
1345                 timeouts[TCP_CONNTRACK_RETRANS] =
1346                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_RETRANS]))*HZ;
1347         }
1348         if (tb[CTA_TIMEOUT_TCP_UNACK]) {
1349                 timeouts[TCP_CONNTRACK_UNACK] =
1350                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_UNACK]))*HZ;
1351         }
1352         return 0;
1353 }
1354
1355 static int
1356 tcp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
1357 {
1358         const unsigned int *timeouts = data;
1359
1360         if (nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT,
1361                         htonl(timeouts[TCP_CONNTRACK_SYN_SENT] / HZ)) ||
1362             nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_RECV,
1363                          htonl(timeouts[TCP_CONNTRACK_SYN_RECV] / HZ)) ||
1364             nla_put_be32(skb, CTA_TIMEOUT_TCP_ESTABLISHED,
1365                          htonl(timeouts[TCP_CONNTRACK_ESTABLISHED] / HZ)) ||
1366             nla_put_be32(skb, CTA_TIMEOUT_TCP_FIN_WAIT,
1367                          htonl(timeouts[TCP_CONNTRACK_FIN_WAIT] / HZ)) ||
1368             nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE_WAIT,
1369                          htonl(timeouts[TCP_CONNTRACK_CLOSE_WAIT] / HZ)) ||
1370             nla_put_be32(skb, CTA_TIMEOUT_TCP_LAST_ACK,
1371                          htonl(timeouts[TCP_CONNTRACK_LAST_ACK] / HZ)) ||
1372             nla_put_be32(skb, CTA_TIMEOUT_TCP_TIME_WAIT,
1373                          htonl(timeouts[TCP_CONNTRACK_TIME_WAIT] / HZ)) ||
1374             nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE,
1375                          htonl(timeouts[TCP_CONNTRACK_CLOSE] / HZ)) ||
1376             nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT2,
1377                          htonl(timeouts[TCP_CONNTRACK_SYN_SENT2] / HZ)) ||
1378             nla_put_be32(skb, CTA_TIMEOUT_TCP_RETRANS,
1379                          htonl(timeouts[TCP_CONNTRACK_RETRANS] / HZ)) ||
1380             nla_put_be32(skb, CTA_TIMEOUT_TCP_UNACK,
1381                          htonl(timeouts[TCP_CONNTRACK_UNACK] / HZ)))
1382                 goto nla_put_failure;
1383         return 0;
1384
1385 nla_put_failure:
1386         return -ENOSPC;
1387 }
1388
1389 static const struct nla_policy tcp_timeout_nla_policy[CTA_TIMEOUT_TCP_MAX+1] = {
1390         [CTA_TIMEOUT_TCP_SYN_SENT]      = { .type = NLA_U32 },
1391         [CTA_TIMEOUT_TCP_SYN_RECV]      = { .type = NLA_U32 },
1392         [CTA_TIMEOUT_TCP_ESTABLISHED]   = { .type = NLA_U32 },
1393         [CTA_TIMEOUT_TCP_FIN_WAIT]      = { .type = NLA_U32 },
1394         [CTA_TIMEOUT_TCP_CLOSE_WAIT]    = { .type = NLA_U32 },
1395         [CTA_TIMEOUT_TCP_LAST_ACK]      = { .type = NLA_U32 },
1396         [CTA_TIMEOUT_TCP_TIME_WAIT]     = { .type = NLA_U32 },
1397         [CTA_TIMEOUT_TCP_CLOSE]         = { .type = NLA_U32 },
1398         [CTA_TIMEOUT_TCP_SYN_SENT2]     = { .type = NLA_U32 },
1399         [CTA_TIMEOUT_TCP_RETRANS]       = { .type = NLA_U32 },
1400         [CTA_TIMEOUT_TCP_UNACK]         = { .type = NLA_U32 },
1401 };
1402 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
1403
1404 #ifdef CONFIG_SYSCTL
1405 static struct ctl_table tcp_sysctl_table[] = {
1406         {
1407                 .procname       = "nf_conntrack_tcp_timeout_syn_sent",
1408                 .maxlen         = sizeof(unsigned int),
1409                 .mode           = 0644,
1410                 .proc_handler   = proc_dointvec_jiffies,
1411         },
1412         {
1413                 .procname       = "nf_conntrack_tcp_timeout_syn_recv",
1414                 .maxlen         = sizeof(unsigned int),
1415                 .mode           = 0644,
1416                 .proc_handler   = proc_dointvec_jiffies,
1417         },
1418         {
1419                 .procname       = "nf_conntrack_tcp_timeout_established",
1420                 .maxlen         = sizeof(unsigned int),
1421                 .mode           = 0644,
1422                 .proc_handler   = proc_dointvec_jiffies,
1423         },
1424         {
1425                 .procname       = "nf_conntrack_tcp_timeout_fin_wait",
1426                 .maxlen         = sizeof(unsigned int),
1427                 .mode           = 0644,
1428                 .proc_handler   = proc_dointvec_jiffies,
1429         },
1430         {
1431                 .procname       = "nf_conntrack_tcp_timeout_close_wait",
1432                 .maxlen         = sizeof(unsigned int),
1433                 .mode           = 0644,
1434                 .proc_handler   = proc_dointvec_jiffies,
1435         },
1436         {
1437                 .procname       = "nf_conntrack_tcp_timeout_last_ack",
1438                 .maxlen         = sizeof(unsigned int),
1439                 .mode           = 0644,
1440                 .proc_handler   = proc_dointvec_jiffies,
1441         },
1442         {
1443                 .procname       = "nf_conntrack_tcp_timeout_time_wait",
1444                 .maxlen         = sizeof(unsigned int),
1445                 .mode           = 0644,
1446                 .proc_handler   = proc_dointvec_jiffies,
1447         },
1448         {
1449                 .procname       = "nf_conntrack_tcp_timeout_close",
1450                 .maxlen         = sizeof(unsigned int),
1451                 .mode           = 0644,
1452                 .proc_handler   = proc_dointvec_jiffies,
1453         },
1454         {
1455                 .procname       = "nf_conntrack_tcp_timeout_max_retrans",
1456                 .maxlen         = sizeof(unsigned int),
1457                 .mode           = 0644,
1458                 .proc_handler   = proc_dointvec_jiffies,
1459         },
1460         {
1461                 .procname       = "nf_conntrack_tcp_timeout_unacknowledged",
1462                 .maxlen         = sizeof(unsigned int),
1463                 .mode           = 0644,
1464                 .proc_handler   = proc_dointvec_jiffies,
1465         },
1466         {
1467                 .procname       = "nf_conntrack_tcp_loose",
1468                 .maxlen         = sizeof(unsigned int),
1469                 .mode           = 0644,
1470                 .proc_handler   = proc_dointvec,
1471         },
1472         {
1473                 .procname       = "nf_conntrack_tcp_be_liberal",
1474                 .maxlen         = sizeof(unsigned int),
1475                 .mode           = 0644,
1476                 .proc_handler   = proc_dointvec,
1477         },
1478         {
1479                 .procname       = "nf_conntrack_tcp_max_retrans",
1480                 .maxlen         = sizeof(unsigned int),
1481                 .mode           = 0644,
1482                 .proc_handler   = proc_dointvec,
1483         },
1484         { }
1485 };
1486
1487 #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
1488 static struct ctl_table tcp_compat_sysctl_table[] = {
1489         {
1490                 .procname       = "ip_conntrack_tcp_timeout_syn_sent",
1491                 .maxlen         = sizeof(unsigned int),
1492                 .mode           = 0644,
1493                 .proc_handler   = proc_dointvec_jiffies,
1494         },
1495         {
1496                 .procname       = "ip_conntrack_tcp_timeout_syn_sent2",
1497                 .maxlen         = sizeof(unsigned int),
1498                 .mode           = 0644,
1499                 .proc_handler   = proc_dointvec_jiffies,
1500         },
1501         {
1502                 .procname       = "ip_conntrack_tcp_timeout_syn_recv",
1503                 .maxlen         = sizeof(unsigned int),
1504                 .mode           = 0644,
1505                 .proc_handler   = proc_dointvec_jiffies,
1506         },
1507         {
1508                 .procname       = "ip_conntrack_tcp_timeout_established",
1509                 .maxlen         = sizeof(unsigned int),
1510                 .mode           = 0644,
1511                 .proc_handler   = proc_dointvec_jiffies,
1512         },
1513         {
1514                 .procname       = "ip_conntrack_tcp_timeout_fin_wait",
1515                 .maxlen         = sizeof(unsigned int),
1516                 .mode           = 0644,
1517                 .proc_handler   = proc_dointvec_jiffies,
1518         },
1519         {
1520                 .procname       = "ip_conntrack_tcp_timeout_close_wait",
1521                 .maxlen         = sizeof(unsigned int),
1522                 .mode           = 0644,
1523                 .proc_handler   = proc_dointvec_jiffies,
1524         },
1525         {
1526                 .procname       = "ip_conntrack_tcp_timeout_last_ack",
1527                 .maxlen         = sizeof(unsigned int),
1528                 .mode           = 0644,
1529                 .proc_handler   = proc_dointvec_jiffies,
1530         },
1531         {
1532                 .procname       = "ip_conntrack_tcp_timeout_time_wait",
1533                 .maxlen         = sizeof(unsigned int),
1534                 .mode           = 0644,
1535                 .proc_handler   = proc_dointvec_jiffies,
1536         },
1537         {
1538                 .procname       = "ip_conntrack_tcp_timeout_close",
1539                 .maxlen         = sizeof(unsigned int),
1540                 .mode           = 0644,
1541                 .proc_handler   = proc_dointvec_jiffies,
1542         },
1543         {
1544                 .procname       = "ip_conntrack_tcp_timeout_max_retrans",
1545                 .maxlen         = sizeof(unsigned int),
1546                 .mode           = 0644,
1547                 .proc_handler   = proc_dointvec_jiffies,
1548         },
1549         {
1550                 .procname       = "ip_conntrack_tcp_loose",
1551                 .maxlen         = sizeof(unsigned int),
1552                 .mode           = 0644,
1553                 .proc_handler   = proc_dointvec,
1554         },
1555         {
1556                 .procname       = "ip_conntrack_tcp_be_liberal",
1557                 .maxlen         = sizeof(unsigned int),
1558                 .mode           = 0644,
1559                 .proc_handler   = proc_dointvec,
1560         },
1561         {
1562                 .procname       = "ip_conntrack_tcp_max_retrans",
1563                 .maxlen         = sizeof(unsigned int),
1564                 .mode           = 0644,
1565                 .proc_handler   = proc_dointvec,
1566         },
1567         { }
1568 };
1569 #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
1570 #endif /* CONFIG_SYSCTL */
1571
1572 static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn,
1573                                     struct nf_tcp_net *tn)
1574 {
1575 #ifdef CONFIG_SYSCTL
1576         if (pn->ctl_table)
1577                 return 0;
1578
1579         pn->ctl_table = kmemdup(tcp_sysctl_table,
1580                                 sizeof(tcp_sysctl_table),
1581                                 GFP_KERNEL);
1582         if (!pn->ctl_table)
1583                 return -ENOMEM;
1584
1585         pn->ctl_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT];
1586         pn->ctl_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV];
1587         pn->ctl_table[2].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
1588         pn->ctl_table[3].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT];
1589         pn->ctl_table[4].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT];
1590         pn->ctl_table[5].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK];
1591         pn->ctl_table[6].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT];
1592         pn->ctl_table[7].data = &tn->timeouts[TCP_CONNTRACK_CLOSE];
1593         pn->ctl_table[8].data = &tn->timeouts[TCP_CONNTRACK_RETRANS];
1594         pn->ctl_table[9].data = &tn->timeouts[TCP_CONNTRACK_UNACK];
1595         pn->ctl_table[10].data = &tn->tcp_loose;
1596         pn->ctl_table[11].data = &tn->tcp_be_liberal;
1597         pn->ctl_table[12].data = &tn->tcp_max_retrans;
1598 #endif
1599         return 0;
1600 }
1601
1602 static int tcp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
1603                                            struct nf_tcp_net *tn)
1604 {
1605 #ifdef CONFIG_SYSCTL
1606 #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
1607         pn->ctl_compat_table = kmemdup(tcp_compat_sysctl_table,
1608                                        sizeof(tcp_compat_sysctl_table),
1609                                        GFP_KERNEL);
1610         if (!pn->ctl_compat_table)
1611                 return -ENOMEM;
1612
1613         pn->ctl_compat_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT];
1614         pn->ctl_compat_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT2];
1615         pn->ctl_compat_table[2].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV];
1616         pn->ctl_compat_table[3].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
1617         pn->ctl_compat_table[4].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT];
1618         pn->ctl_compat_table[5].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT];
1619         pn->ctl_compat_table[6].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK];
1620         pn->ctl_compat_table[7].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT];
1621         pn->ctl_compat_table[8].data = &tn->timeouts[TCP_CONNTRACK_CLOSE];
1622         pn->ctl_compat_table[9].data = &tn->timeouts[TCP_CONNTRACK_RETRANS];
1623         pn->ctl_compat_table[10].data = &tn->tcp_loose;
1624         pn->ctl_compat_table[11].data = &tn->tcp_be_liberal;
1625         pn->ctl_compat_table[12].data = &tn->tcp_max_retrans;
1626 #endif
1627 #endif
1628         return 0;
1629 }
1630
1631 static int tcp_init_net(struct net *net, u_int16_t proto)
1632 {
1633         int ret;
1634         struct nf_tcp_net *tn = tcp_pernet(net);
1635         struct nf_proto_net *pn = &tn->pn;
1636
1637         if (!pn->users) {
1638                 int i;
1639
1640                 for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++)
1641                         tn->timeouts[i] = tcp_timeouts[i];
1642
1643                 tn->tcp_loose = nf_ct_tcp_loose;
1644                 tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
1645                 tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
1646         }
1647
1648         if (proto == AF_INET) {
1649                 ret = tcp_kmemdup_compat_sysctl_table(pn, tn);
1650                 if (ret < 0)
1651                         return ret;
1652
1653                 ret = tcp_kmemdup_sysctl_table(pn, tn);
1654                 if (ret < 0)
1655                         nf_ct_kfree_compat_sysctl_table(pn);
1656         } else
1657                 ret = tcp_kmemdup_sysctl_table(pn, tn);
1658
1659         return ret;
1660 }
1661
1662 static struct nf_proto_net *tcp_get_net_proto(struct net *net)
1663 {
1664         return &net->ct.nf_ct_proto.tcp.pn;
1665 }
1666
1667 struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
1668 {
1669         .l3proto                = PF_INET,
1670         .l4proto                = IPPROTO_TCP,
1671         .name                   = "tcp",
1672         .pkt_to_tuple           = tcp_pkt_to_tuple,
1673         .invert_tuple           = tcp_invert_tuple,
1674         .print_tuple            = tcp_print_tuple,
1675         .print_conntrack        = tcp_print_conntrack,
1676         .packet                 = tcp_packet,
1677         .get_timeouts           = tcp_get_timeouts,
1678         .new                    = tcp_new,
1679         .error                  = tcp_error,
1680 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1681         .to_nlattr              = tcp_to_nlattr,
1682         .nlattr_size            = tcp_nlattr_size,
1683         .from_nlattr            = nlattr_to_tcp,
1684         .tuple_to_nlattr        = nf_ct_port_tuple_to_nlattr,
1685         .nlattr_to_tuple        = nf_ct_port_nlattr_to_tuple,
1686         .nlattr_tuple_size      = tcp_nlattr_tuple_size,
1687         .nla_policy             = nf_ct_port_nla_policy,
1688 #endif
1689 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1690         .ctnl_timeout           = {
1691                 .nlattr_to_obj  = tcp_timeout_nlattr_to_obj,
1692                 .obj_to_nlattr  = tcp_timeout_obj_to_nlattr,
1693                 .nlattr_max     = CTA_TIMEOUT_TCP_MAX,
1694                 .obj_size       = sizeof(unsigned int) *
1695                                         TCP_CONNTRACK_TIMEOUT_MAX,
1696                 .nla_policy     = tcp_timeout_nla_policy,
1697         },
1698 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
1699         .init_net               = tcp_init_net,
1700         .get_net_proto          = tcp_get_net_proto,
1701 };
1702 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4);
1703
1704 struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
1705 {
1706         .l3proto                = PF_INET6,
1707         .l4proto                = IPPROTO_TCP,
1708         .name                   = "tcp",
1709         .pkt_to_tuple           = tcp_pkt_to_tuple,
1710         .invert_tuple           = tcp_invert_tuple,
1711         .print_tuple            = tcp_print_tuple,
1712         .print_conntrack        = tcp_print_conntrack,
1713         .packet                 = tcp_packet,
1714         .get_timeouts           = tcp_get_timeouts,
1715         .new                    = tcp_new,
1716         .error                  = tcp_error,
1717 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1718         .to_nlattr              = tcp_to_nlattr,
1719         .nlattr_size            = tcp_nlattr_size,
1720         .from_nlattr            = nlattr_to_tcp,
1721         .tuple_to_nlattr        = nf_ct_port_tuple_to_nlattr,
1722         .nlattr_to_tuple        = nf_ct_port_nlattr_to_tuple,
1723         .nlattr_tuple_size      = tcp_nlattr_tuple_size,
1724         .nla_policy             = nf_ct_port_nla_policy,
1725 #endif
1726 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1727         .ctnl_timeout           = {
1728                 .nlattr_to_obj  = tcp_timeout_nlattr_to_obj,
1729                 .obj_to_nlattr  = tcp_timeout_obj_to_nlattr,
1730                 .nlattr_max     = CTA_TIMEOUT_TCP_MAX,
1731                 .obj_size       = sizeof(unsigned int) *
1732                                         TCP_CONNTRACK_TIMEOUT_MAX,
1733                 .nla_policy     = tcp_timeout_nla_policy,
1734         },
1735 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
1736         .init_net               = tcp_init_net,
1737         .get_net_proto          = tcp_get_net_proto,
1738 };
1739 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp6);