3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
10 * linux/net/ipv4/tcp_input.c
11 * linux/net/ipv4/tcp_output.c
14 * Hideaki YOSHIFUJI : sin6_scope_id support
15 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
16 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
17 * a single port at the same time.
18 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
26 #include <linux/bottom_half.h>
27 #include <linux/module.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/socket.h>
31 #include <linux/sockios.h>
32 #include <linux/net.h>
33 #include <linux/jiffies.h>
35 #include <linux/in6.h>
36 #include <linux/netdevice.h>
37 #include <linux/init.h>
38 #include <linux/jhash.h>
39 #include <linux/ipsec.h>
40 #include <linux/times.h>
41 #include <linux/slab.h>
42 #include <linux/uaccess.h>
43 #include <linux/ipv6.h>
44 #include <linux/icmpv6.h>
45 #include <linux/random.h>
48 #include <net/ndisc.h>
49 #include <net/inet6_hashtables.h>
50 #include <net/inet6_connection_sock.h>
52 #include <net/transp_v6.h>
53 #include <net/addrconf.h>
54 #include <net/ip6_route.h>
55 #include <net/ip6_checksum.h>
56 #include <net/inet_ecn.h>
57 #include <net/protocol.h>
60 #include <net/dsfield.h>
61 #include <net/timewait_sock.h>
62 #include <net/inet_common.h>
63 #include <net/secure_seq.h>
64 #include <net/busy_poll.h>
66 #include <linux/proc_fs.h>
67 #include <linux/seq_file.h>
69 #include <crypto/hash.h>
70 #include <linux/scatterlist.h>
72 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
73 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
74 struct request_sock *req);
76 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
78 static const struct inet_connection_sock_af_ops ipv6_mapped;
79 static const struct inet_connection_sock_af_ops ipv6_specific;
80 #ifdef CONFIG_TCP_MD5SIG
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
82 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
84 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
85 const struct in6_addr *addr)
91 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
93 struct dst_entry *dst = skb_dst(skb);
95 if (dst && dst_hold_safe(dst)) {
96 const struct rt6_info *rt = (const struct rt6_info *)dst;
99 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
100 inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
104 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
106 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
107 ipv6_hdr(skb)->saddr.s6_addr32,
109 tcp_hdr(skb)->source);
112 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
114 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
115 ipv6_hdr(skb)->saddr.s6_addr32);
118 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
121 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
122 struct inet_sock *inet = inet_sk(sk);
123 struct inet_connection_sock *icsk = inet_csk(sk);
124 struct ipv6_pinfo *np = inet6_sk(sk);
125 struct tcp_sock *tp = tcp_sk(sk);
126 struct in6_addr *saddr = NULL, *final_p, final;
127 struct ipv6_txoptions *opt;
129 struct dst_entry *dst;
132 struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
134 if (addr_len < SIN6_LEN_RFC2133)
137 if (usin->sin6_family != AF_INET6)
138 return -EAFNOSUPPORT;
140 memset(&fl6, 0, sizeof(fl6));
143 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
144 IP6_ECN_flow_init(fl6.flowlabel);
145 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
146 struct ip6_flowlabel *flowlabel;
147 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
150 fl6_sock_release(flowlabel);
155 * connect() to INADDR_ANY means loopback (BSD'ism).
158 if (ipv6_addr_any(&usin->sin6_addr)) {
159 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
160 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
163 usin->sin6_addr = in6addr_loopback;
166 addr_type = ipv6_addr_type(&usin->sin6_addr);
168 if (addr_type & IPV6_ADDR_MULTICAST)
171 if (addr_type&IPV6_ADDR_LINKLOCAL) {
172 if (addr_len >= sizeof(struct sockaddr_in6) &&
173 usin->sin6_scope_id) {
174 /* If interface is set while binding, indices
177 if (sk->sk_bound_dev_if &&
178 sk->sk_bound_dev_if != usin->sin6_scope_id)
181 sk->sk_bound_dev_if = usin->sin6_scope_id;
184 /* Connect to link-local address requires an interface */
185 if (!sk->sk_bound_dev_if)
189 if (tp->rx_opt.ts_recent_stamp &&
190 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
191 tp->rx_opt.ts_recent = 0;
192 tp->rx_opt.ts_recent_stamp = 0;
196 sk->sk_v6_daddr = usin->sin6_addr;
197 np->flow_label = fl6.flowlabel;
203 if (addr_type & IPV6_ADDR_MAPPED) {
204 u32 exthdrlen = icsk->icsk_ext_hdr_len;
205 struct sockaddr_in sin;
207 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
209 if (__ipv6_only_sock(sk))
212 sin.sin_family = AF_INET;
213 sin.sin_port = usin->sin6_port;
214 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
216 icsk->icsk_af_ops = &ipv6_mapped;
217 sk->sk_backlog_rcv = tcp_v4_do_rcv;
218 #ifdef CONFIG_TCP_MD5SIG
219 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
222 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
225 icsk->icsk_ext_hdr_len = exthdrlen;
226 icsk->icsk_af_ops = &ipv6_specific;
227 sk->sk_backlog_rcv = tcp_v6_do_rcv;
228 #ifdef CONFIG_TCP_MD5SIG
229 tp->af_specific = &tcp_sock_ipv6_specific;
233 np->saddr = sk->sk_v6_rcv_saddr;
238 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
239 saddr = &sk->sk_v6_rcv_saddr;
241 fl6.flowi6_proto = IPPROTO_TCP;
242 fl6.daddr = sk->sk_v6_daddr;
243 fl6.saddr = saddr ? *saddr : np->saddr;
244 fl6.flowi6_oif = sk->sk_bound_dev_if;
245 fl6.flowi6_mark = sk->sk_mark;
246 fl6.fl6_dport = usin->sin6_port;
247 fl6.fl6_sport = inet->inet_sport;
248 fl6.flowi6_uid = sk->sk_uid;
250 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
251 final_p = fl6_update_dst(&fl6, opt, &final);
253 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
255 dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
263 sk->sk_v6_rcv_saddr = *saddr;
266 /* set the source address */
268 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
270 sk->sk_gso_type = SKB_GSO_TCPV6;
271 ip6_dst_store(sk, dst, NULL, NULL);
273 icsk->icsk_ext_hdr_len = 0;
275 icsk->icsk_ext_hdr_len = opt->opt_flen +
278 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
280 inet->inet_dport = usin->sin6_port;
282 tcp_set_state(sk, TCP_SYN_SENT);
283 err = inet6_hash_connect(tcp_death_row, sk);
289 if (likely(!tp->repair)) {
291 tp->write_seq = secure_tcpv6_seq(np->saddr.s6_addr32,
292 sk->sk_v6_daddr.s6_addr32,
295 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
297 sk->sk_v6_daddr.s6_addr32);
300 if (tcp_fastopen_defer_connect(sk, &err))
305 err = tcp_connect(sk);
312 tcp_set_state(sk, TCP_CLOSE);
314 inet->inet_dport = 0;
315 sk->sk_route_caps = 0;
319 static void tcp_v6_mtu_reduced(struct sock *sk)
321 struct dst_entry *dst;
323 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
326 dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info);
330 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
331 tcp_sync_mss(sk, dst_mtu(dst));
332 tcp_simple_retransmit(sk);
336 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
337 u8 type, u8 code, int offset, __be32 info)
339 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
340 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
341 struct net *net = dev_net(skb->dev);
342 struct request_sock *fastopen;
343 struct ipv6_pinfo *np;
350 sk = __inet6_lookup_established(net, &tcp_hashinfo,
351 &hdr->daddr, th->dest,
352 &hdr->saddr, ntohs(th->source),
356 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
361 if (sk->sk_state == TCP_TIME_WAIT) {
362 inet_twsk_put(inet_twsk(sk));
365 seq = ntohl(th->seq);
366 fatal = icmpv6_err_convert(type, code, &err);
367 if (sk->sk_state == TCP_NEW_SYN_RECV)
368 return tcp_req_err(sk, seq, fatal);
371 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
372 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
374 if (sk->sk_state == TCP_CLOSE)
377 if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) {
378 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
383 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
384 fastopen = tp->fastopen_rsk;
385 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
386 if (sk->sk_state != TCP_LISTEN &&
387 !between(seq, snd_una, tp->snd_nxt)) {
388 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
394 if (type == NDISC_REDIRECT) {
395 if (!sock_owned_by_user(sk)) {
396 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
399 dst->ops->redirect(dst, sk, skb);
404 if (type == ICMPV6_PKT_TOOBIG) {
405 /* We are not interested in TCP_LISTEN and open_requests
406 * (SYN-ACKs send out by Linux are always <576bytes so
407 * they should go through unfragmented).
409 if (sk->sk_state == TCP_LISTEN)
412 if (!ip6_sk_accept_pmtu(sk))
415 tp->mtu_info = ntohl(info);
416 if (!sock_owned_by_user(sk))
417 tcp_v6_mtu_reduced(sk);
418 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
425 /* Might be for an request_sock */
426 switch (sk->sk_state) {
429 /* Only in fast or simultaneous open. If a fast open socket is
430 * is already accepted it is treated as a connected one below.
432 if (fastopen && !fastopen->sk)
435 if (!sock_owned_by_user(sk)) {
437 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
441 sk->sk_err_soft = err;
445 if (!sock_owned_by_user(sk) && np->recverr) {
447 sk->sk_error_report(sk);
449 sk->sk_err_soft = err;
457 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
459 struct request_sock *req,
460 struct tcp_fastopen_cookie *foc,
461 enum tcp_synack_type synack_type)
463 struct inet_request_sock *ireq = inet_rsk(req);
464 struct ipv6_pinfo *np = inet6_sk(sk);
465 struct ipv6_txoptions *opt;
466 struct flowi6 *fl6 = &fl->u.ip6;
470 /* First, grab a route. */
471 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
472 IPPROTO_TCP)) == NULL)
475 skb = tcp_make_synack(sk, dst, req, foc, synack_type);
478 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
479 &ireq->ir_v6_rmt_addr);
481 fl6->daddr = ireq->ir_v6_rmt_addr;
482 if (np->repflow && ireq->pktopts)
483 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
486 opt = ireq->ipv6_opt;
488 opt = rcu_dereference(np->opt);
489 err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, np->tclass);
491 err = net_xmit_eval(err);
499 static void tcp_v6_reqsk_destructor(struct request_sock *req)
501 kfree(inet_rsk(req)->ipv6_opt);
502 kfree_skb(inet_rsk(req)->pktopts);
505 #ifdef CONFIG_TCP_MD5SIG
506 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
507 const struct in6_addr *addr)
509 return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
512 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
513 const struct sock *addr_sk)
515 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
518 static int tcp_v6_parse_md5_keys(struct sock *sk, char __user *optval,
521 struct tcp_md5sig cmd;
522 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
524 if (optlen < sizeof(cmd))
527 if (copy_from_user(&cmd, optval, sizeof(cmd)))
530 if (sin6->sin6_family != AF_INET6)
533 if (!cmd.tcpm_keylen) {
534 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
535 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
537 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
541 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
544 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
545 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
546 AF_INET, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
548 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
549 AF_INET6, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
552 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
553 const struct in6_addr *daddr,
554 const struct in6_addr *saddr,
555 const struct tcphdr *th, int nbytes)
557 struct tcp6_pseudohdr *bp;
558 struct scatterlist sg;
562 /* 1. TCP pseudo-header (RFC2460) */
565 bp->protocol = cpu_to_be32(IPPROTO_TCP);
566 bp->len = cpu_to_be32(nbytes);
568 _th = (struct tcphdr *)(bp + 1);
569 memcpy(_th, th, sizeof(*th));
572 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
573 ahash_request_set_crypt(hp->md5_req, &sg, NULL,
574 sizeof(*bp) + sizeof(*th));
575 return crypto_ahash_update(hp->md5_req);
578 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
579 const struct in6_addr *daddr, struct in6_addr *saddr,
580 const struct tcphdr *th)
582 struct tcp_md5sig_pool *hp;
583 struct ahash_request *req;
585 hp = tcp_get_md5sig_pool();
587 goto clear_hash_noput;
590 if (crypto_ahash_init(req))
592 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
594 if (tcp_md5_hash_key(hp, key))
596 ahash_request_set_crypt(req, NULL, md5_hash, 0);
597 if (crypto_ahash_final(req))
600 tcp_put_md5sig_pool();
604 tcp_put_md5sig_pool();
606 memset(md5_hash, 0, 16);
610 static int tcp_v6_md5_hash_skb(char *md5_hash,
611 const struct tcp_md5sig_key *key,
612 const struct sock *sk,
613 const struct sk_buff *skb)
615 const struct in6_addr *saddr, *daddr;
616 struct tcp_md5sig_pool *hp;
617 struct ahash_request *req;
618 const struct tcphdr *th = tcp_hdr(skb);
620 if (sk) { /* valid for establish/request sockets */
621 saddr = &sk->sk_v6_rcv_saddr;
622 daddr = &sk->sk_v6_daddr;
624 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
625 saddr = &ip6h->saddr;
626 daddr = &ip6h->daddr;
629 hp = tcp_get_md5sig_pool();
631 goto clear_hash_noput;
634 if (crypto_ahash_init(req))
637 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
639 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
641 if (tcp_md5_hash_key(hp, key))
643 ahash_request_set_crypt(req, NULL, md5_hash, 0);
644 if (crypto_ahash_final(req))
647 tcp_put_md5sig_pool();
651 tcp_put_md5sig_pool();
653 memset(md5_hash, 0, 16);
659 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
660 const struct sk_buff *skb)
662 #ifdef CONFIG_TCP_MD5SIG
663 const __u8 *hash_location = NULL;
664 struct tcp_md5sig_key *hash_expected;
665 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
666 const struct tcphdr *th = tcp_hdr(skb);
670 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
671 hash_location = tcp_parse_md5sig_option(th);
673 /* We've parsed the options - do we have a hash? */
674 if (!hash_expected && !hash_location)
677 if (hash_expected && !hash_location) {
678 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
682 if (!hash_expected && hash_location) {
683 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
687 /* check the signature */
688 genhash = tcp_v6_md5_hash_skb(newhash,
692 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
693 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
694 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
695 genhash ? "failed" : "mismatch",
696 &ip6h->saddr, ntohs(th->source),
697 &ip6h->daddr, ntohs(th->dest));
704 static void tcp_v6_init_req(struct request_sock *req,
705 const struct sock *sk_listener,
708 struct inet_request_sock *ireq = inet_rsk(req);
709 const struct ipv6_pinfo *np = inet6_sk(sk_listener);
711 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
712 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
714 /* So that link locals have meaning */
715 if (!sk_listener->sk_bound_dev_if &&
716 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
717 ireq->ir_iif = tcp_v6_iif(skb);
719 if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
720 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
721 np->rxopt.bits.rxinfo ||
722 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
723 np->rxopt.bits.rxohlim || np->repflow)) {
724 atomic_inc(&skb->users);
729 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
731 const struct request_sock *req)
733 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
736 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
738 .obj_size = sizeof(struct tcp6_request_sock),
739 .rtx_syn_ack = tcp_rtx_synack,
740 .send_ack = tcp_v6_reqsk_send_ack,
741 .destructor = tcp_v6_reqsk_destructor,
742 .send_reset = tcp_v6_send_reset,
743 .syn_ack_timeout = tcp_syn_ack_timeout,
746 static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
747 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
748 sizeof(struct ipv6hdr),
749 #ifdef CONFIG_TCP_MD5SIG
750 .req_md5_lookup = tcp_v6_md5_lookup,
751 .calc_md5_hash = tcp_v6_md5_hash_skb,
753 .init_req = tcp_v6_init_req,
754 #ifdef CONFIG_SYN_COOKIES
755 .cookie_init_seq = cookie_v6_init_sequence,
757 .route_req = tcp_v6_route_req,
758 .init_seq = tcp_v6_init_seq,
759 .init_ts_off = tcp_v6_init_ts_off,
760 .send_synack = tcp_v6_send_synack,
763 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
764 u32 ack, u32 win, u32 tsval, u32 tsecr,
765 int oif, struct tcp_md5sig_key *key, int rst,
766 u8 tclass, __be32 label)
768 const struct tcphdr *th = tcp_hdr(skb);
770 struct sk_buff *buff;
772 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
773 struct sock *ctl_sk = net->ipv6.tcp_sk;
774 unsigned int tot_len = sizeof(struct tcphdr);
775 struct dst_entry *dst;
779 tot_len += TCPOLEN_TSTAMP_ALIGNED;
780 #ifdef CONFIG_TCP_MD5SIG
782 tot_len += TCPOLEN_MD5SIG_ALIGNED;
785 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
790 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
792 t1 = (struct tcphdr *) skb_push(buff, tot_len);
793 skb_reset_transport_header(buff);
795 /* Swap the send and the receive. */
796 memset(t1, 0, sizeof(*t1));
797 t1->dest = th->source;
798 t1->source = th->dest;
799 t1->doff = tot_len / 4;
800 t1->seq = htonl(seq);
801 t1->ack_seq = htonl(ack);
802 t1->ack = !rst || !th->ack;
804 t1->window = htons(win);
806 topt = (__be32 *)(t1 + 1);
809 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
810 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
811 *topt++ = htonl(tsval);
812 *topt++ = htonl(tsecr);
815 #ifdef CONFIG_TCP_MD5SIG
817 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
818 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
819 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
820 &ipv6_hdr(skb)->saddr,
821 &ipv6_hdr(skb)->daddr, t1);
825 memset(&fl6, 0, sizeof(fl6));
826 fl6.daddr = ipv6_hdr(skb)->saddr;
827 fl6.saddr = ipv6_hdr(skb)->daddr;
828 fl6.flowlabel = label;
830 buff->ip_summed = CHECKSUM_PARTIAL;
833 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
835 fl6.flowi6_proto = IPPROTO_TCP;
836 if (rt6_need_strict(&fl6.daddr) && !oif)
837 fl6.flowi6_oif = tcp_v6_iif(skb);
839 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
842 fl6.flowi6_oif = oif;
845 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark);
846 fl6.fl6_dport = t1->dest;
847 fl6.fl6_sport = t1->source;
848 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
849 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
851 /* Pass a socket to ip6_dst_lookup either it is for RST
852 * Underlying function will use this to retrieve the network
855 dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
857 skb_dst_set(buff, dst);
858 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass);
859 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
861 TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
868 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
870 const struct tcphdr *th = tcp_hdr(skb);
871 u32 seq = 0, ack_seq = 0;
872 struct tcp_md5sig_key *key = NULL;
873 #ifdef CONFIG_TCP_MD5SIG
874 const __u8 *hash_location = NULL;
875 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
876 unsigned char newhash[16];
878 struct sock *sk1 = NULL;
885 /* If sk not NULL, it means we did a successful lookup and incoming
886 * route had to be correct. prequeue might have dropped our dst.
888 if (!sk && !ipv6_unicast_destination(skb))
891 #ifdef CONFIG_TCP_MD5SIG
893 hash_location = tcp_parse_md5sig_option(th);
894 if (sk && sk_fullsock(sk)) {
895 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
896 } else if (hash_location) {
898 * active side is lost. Try to find listening socket through
899 * source port, and then find md5 key through listening socket.
900 * we are not loose security here:
901 * Incoming packet is checked with md5 hash with finding key,
902 * no RST generated if md5 hash doesn't match.
904 sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
905 &tcp_hashinfo, NULL, 0,
907 th->source, &ipv6h->daddr,
908 ntohs(th->source), tcp_v6_iif(skb));
912 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
916 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
917 if (genhash || memcmp(hash_location, newhash, 16) != 0)
923 seq = ntohl(th->ack_seq);
925 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
928 oif = sk ? sk->sk_bound_dev_if : 0;
929 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0);
931 #ifdef CONFIG_TCP_MD5SIG
937 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
938 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
939 struct tcp_md5sig_key *key, u8 tclass,
942 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
946 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
948 struct inet_timewait_sock *tw = inet_twsk(sk);
949 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
951 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
952 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
953 tcp_time_stamp_raw() + tcptw->tw_ts_offset,
954 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
955 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel));
960 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
961 struct request_sock *req)
963 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
964 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
967 * The window field (SEG.WND) of every outgoing segment, with the
968 * exception of <SYN> segments, MUST be right-shifted by
969 * Rcv.Wind.Shift bits:
971 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
972 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
973 tcp_rsk(req)->rcv_nxt,
974 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
975 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
976 req->ts_recent, sk->sk_bound_dev_if,
977 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr),
982 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
984 #ifdef CONFIG_SYN_COOKIES
985 const struct tcphdr *th = tcp_hdr(skb);
988 sk = cookie_v6_check(sk, skb);
993 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
995 if (skb->protocol == htons(ETH_P_IP))
996 return tcp_v4_conn_request(sk, skb);
998 if (!ipv6_unicast_destination(skb))
1001 return tcp_conn_request(&tcp6_request_sock_ops,
1002 &tcp_request_sock_ipv6_ops, sk, skb);
1006 return 0; /* don't send reset */
1009 static void tcp_v6_restore_cb(struct sk_buff *skb)
1011 /* We need to move header back to the beginning if xfrm6_policy_check()
1012 * and tcp_v6_fill_cb() are going to be called again.
1013 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1015 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1016 sizeof(struct inet6_skb_parm));
1019 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1020 struct request_sock *req,
1021 struct dst_entry *dst,
1022 struct request_sock *req_unhash,
1025 struct inet_request_sock *ireq;
1026 struct ipv6_pinfo *newnp;
1027 const struct ipv6_pinfo *np = inet6_sk(sk);
1028 struct ipv6_txoptions *opt;
1029 struct tcp6_sock *newtcp6sk;
1030 struct inet_sock *newinet;
1031 struct tcp_sock *newtp;
1033 #ifdef CONFIG_TCP_MD5SIG
1034 struct tcp_md5sig_key *key;
1038 if (skb->protocol == htons(ETH_P_IP)) {
1043 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1044 req_unhash, own_req);
1049 newtcp6sk = (struct tcp6_sock *)newsk;
1050 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1052 newinet = inet_sk(newsk);
1053 newnp = inet6_sk(newsk);
1054 newtp = tcp_sk(newsk);
1056 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1058 newnp->saddr = newsk->sk_v6_rcv_saddr;
1060 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1061 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1062 #ifdef CONFIG_TCP_MD5SIG
1063 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1066 newnp->ipv6_mc_list = NULL;
1067 newnp->ipv6_ac_list = NULL;
1068 newnp->ipv6_fl_list = NULL;
1069 newnp->pktoptions = NULL;
1071 newnp->mcast_oif = tcp_v6_iif(skb);
1072 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1073 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1075 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1078 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1079 * here, tcp_create_openreq_child now does this for us, see the comment in
1080 * that function for the gory details. -acme
1083 /* It is tricky place. Until this moment IPv4 tcp
1084 worked with IPv6 icsk.icsk_af_ops.
1087 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1092 ireq = inet_rsk(req);
1094 if (sk_acceptq_is_full(sk))
1098 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1103 newsk = tcp_create_openreq_child(sk, req, skb);
1108 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1109 * count here, tcp_create_openreq_child now does this for us, see the
1110 * comment in that function for the gory details. -acme
1113 newsk->sk_gso_type = SKB_GSO_TCPV6;
1114 ip6_dst_store(newsk, dst, NULL, NULL);
1115 inet6_sk_rx_dst_set(newsk, skb);
1117 newtcp6sk = (struct tcp6_sock *)newsk;
1118 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1120 newtp = tcp_sk(newsk);
1121 newinet = inet_sk(newsk);
1122 newnp = inet6_sk(newsk);
1124 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1126 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1127 newnp->saddr = ireq->ir_v6_loc_addr;
1128 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1129 newsk->sk_bound_dev_if = ireq->ir_iif;
1131 /* Now IPv6 options...
1133 First: no IPv4 options.
1135 newinet->inet_opt = NULL;
1136 newnp->ipv6_mc_list = NULL;
1137 newnp->ipv6_ac_list = NULL;
1138 newnp->ipv6_fl_list = NULL;
1141 newnp->rxopt.all = np->rxopt.all;
1143 newnp->pktoptions = NULL;
1145 newnp->mcast_oif = tcp_v6_iif(skb);
1146 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1147 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1149 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1151 /* Clone native IPv6 options from listening socket (if any)
1153 Yes, keeping reference count would be much more clever,
1154 but we make one more one thing there: reattach optmem
1157 opt = ireq->ipv6_opt;
1159 opt = rcu_dereference(np->opt);
1161 opt = ipv6_dup_options(newsk, opt);
1162 RCU_INIT_POINTER(newnp->opt, opt);
1164 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1166 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1169 tcp_ca_openreq_child(newsk, dst);
1171 tcp_sync_mss(newsk, dst_mtu(dst));
1172 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1174 tcp_initialize_rcv_mss(newsk);
1176 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1177 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1179 #ifdef CONFIG_TCP_MD5SIG
1180 /* Copy over the MD5 key from the original socket */
1181 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr);
1183 /* We're using one, so create a matching key
1184 * on the newsk structure. If we fail to get
1185 * memory, then we end up not copying the key
1188 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1189 AF_INET6, key->key, key->keylen,
1190 sk_gfp_mask(sk, GFP_ATOMIC));
1194 if (__inet_inherit_port(sk, newsk) < 0) {
1195 inet_csk_prepare_forced_close(newsk);
1199 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
1201 tcp_move_syn(newtp, req);
1203 /* Clone pktoptions received with SYN, if we own the req */
1204 if (ireq->pktopts) {
1205 newnp->pktoptions = skb_clone(ireq->pktopts,
1206 sk_gfp_mask(sk, GFP_ATOMIC));
1207 consume_skb(ireq->pktopts);
1208 ireq->pktopts = NULL;
1209 if (newnp->pktoptions) {
1210 tcp_v6_restore_cb(newnp->pktoptions);
1211 skb_set_owner_r(newnp->pktoptions, newsk);
1219 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1227 /* The socket must have it's spinlock held when we get
1228 * here, unless it is a TCP_LISTEN socket.
1230 * We have a potential double-lock case here, so even when
1231 * doing backlog processing we use the BH locking scheme.
1232 * This is because we cannot sleep with the original spinlock
1235 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1237 struct ipv6_pinfo *np = inet6_sk(sk);
1238 struct tcp_sock *tp;
1239 struct sk_buff *opt_skb = NULL;
1241 /* Imagine: socket is IPv6. IPv4 packet arrives,
1242 goes to IPv4 receive handler and backlogged.
1243 From backlog it always goes here. Kerboom...
1244 Fortunately, tcp_rcv_established and rcv_established
1245 handle them correctly, but it is not case with
1246 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1249 if (skb->protocol == htons(ETH_P_IP))
1250 return tcp_v4_do_rcv(sk, skb);
1252 if (tcp_filter(sk, skb))
1256 * socket locking is here for SMP purposes as backlog rcv
1257 * is currently called with bh processing disabled.
1260 /* Do Stevens' IPV6_PKTOPTIONS.
1262 Yes, guys, it is the only place in our code, where we
1263 may make it not affecting IPv4.
1264 The rest of code is protocol independent,
1265 and I do not like idea to uglify IPv4.
1267 Actually, all the idea behind IPV6_PKTOPTIONS
1268 looks not very well thought. For now we latch
1269 options, received in the last packet, enqueued
1270 by tcp. Feel free to propose better solution.
1274 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1276 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1277 struct dst_entry *dst = sk->sk_rx_dst;
1279 sock_rps_save_rxhash(sk, skb);
1280 sk_mark_napi_id(sk, skb);
1282 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1283 dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1285 sk->sk_rx_dst = NULL;
1289 tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
1291 goto ipv6_pktoptions;
1295 if (tcp_checksum_complete(skb))
1298 if (sk->sk_state == TCP_LISTEN) {
1299 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1305 if (tcp_child_process(sk, nsk, skb))
1308 __kfree_skb(opt_skb);
1312 sock_rps_save_rxhash(sk, skb);
1314 if (tcp_rcv_state_process(sk, skb))
1317 goto ipv6_pktoptions;
1321 tcp_v6_send_reset(sk, skb);
1324 __kfree_skb(opt_skb);
1328 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1329 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1334 /* Do you ask, what is it?
1336 1. skb was enqueued by tcp.
1337 2. skb is added to tail of read queue, rather than out of order.
1338 3. socket is not in passive state.
1339 4. Finally, it really contains options, which user wants to receive.
1342 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1343 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1344 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1345 np->mcast_oif = tcp_v6_iif(opt_skb);
1346 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1347 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1348 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1349 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1351 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1352 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1353 skb_set_owner_r(opt_skb, sk);
1354 tcp_v6_restore_cb(opt_skb);
1355 opt_skb = xchg(&np->pktoptions, opt_skb);
1357 __kfree_skb(opt_skb);
1358 opt_skb = xchg(&np->pktoptions, NULL);
1366 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1367 const struct tcphdr *th)
1369 /* This is tricky: we move IP6CB at its correct location into
1370 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1371 * _decode_session6() uses IP6CB().
1372 * barrier() makes sure compiler won't play aliasing games.
1374 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1375 sizeof(struct inet6_skb_parm));
1378 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1379 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1380 skb->len - th->doff*4);
1381 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1382 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1383 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1384 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1385 TCP_SKB_CB(skb)->sacked = 0;
1388 static int tcp_v6_rcv(struct sk_buff *skb)
1390 const struct tcphdr *th;
1391 const struct ipv6hdr *hdr;
1395 struct net *net = dev_net(skb->dev);
1397 if (skb->pkt_type != PACKET_HOST)
1401 * Count it even if it's bad.
1403 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1405 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1408 th = (const struct tcphdr *)skb->data;
1410 if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1412 if (!pskb_may_pull(skb, th->doff*4))
1415 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1418 th = (const struct tcphdr *)skb->data;
1419 hdr = ipv6_hdr(skb);
1422 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1423 th->source, th->dest, inet6_iif(skb),
1429 if (sk->sk_state == TCP_TIME_WAIT)
1432 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1433 struct request_sock *req = inet_reqsk(sk);
1436 sk = req->rsk_listener;
1437 tcp_v6_fill_cb(skb, hdr, th);
1438 if (tcp_v6_inbound_md5_hash(sk, skb)) {
1439 sk_drops_add(sk, skb);
1443 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1444 inet_csk_reqsk_queue_drop_and_put(sk, req);
1449 nsk = tcp_check_req(sk, skb, req, false);
1452 goto discard_and_relse;
1456 tcp_v6_restore_cb(skb);
1457 } else if (tcp_child_process(sk, nsk, skb)) {
1458 tcp_v6_send_reset(nsk, skb);
1459 goto discard_and_relse;
1465 if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) {
1466 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1467 goto discard_and_relse;
1470 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1471 goto discard_and_relse;
1473 tcp_v6_fill_cb(skb, hdr, th);
1475 if (tcp_v6_inbound_md5_hash(sk, skb))
1476 goto discard_and_relse;
1478 if (tcp_filter(sk, skb))
1479 goto discard_and_relse;
1480 th = (const struct tcphdr *)skb->data;
1481 hdr = ipv6_hdr(skb);
1485 if (sk->sk_state == TCP_LISTEN) {
1486 ret = tcp_v6_do_rcv(sk, skb);
1487 goto put_and_return;
1490 sk_incoming_cpu_update(sk);
1492 bh_lock_sock_nested(sk);
1493 tcp_segs_in(tcp_sk(sk), skb);
1495 if (!sock_owned_by_user(sk)) {
1496 if (!tcp_prequeue(sk, skb))
1497 ret = tcp_v6_do_rcv(sk, skb);
1498 } else if (tcp_add_backlog(sk, skb)) {
1499 goto discard_and_relse;
1506 return ret ? -1 : 0;
1509 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1512 tcp_v6_fill_cb(skb, hdr, th);
1514 if (tcp_checksum_complete(skb)) {
1516 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1518 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1520 tcp_v6_send_reset(NULL, skb);
1528 sk_drops_add(sk, skb);
1534 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1535 inet_twsk_put(inet_twsk(sk));
1539 tcp_v6_fill_cb(skb, hdr, th);
1541 if (tcp_checksum_complete(skb)) {
1542 inet_twsk_put(inet_twsk(sk));
1546 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1551 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1552 skb, __tcp_hdrlen(th),
1553 &ipv6_hdr(skb)->saddr, th->source,
1554 &ipv6_hdr(skb)->daddr,
1555 ntohs(th->dest), tcp_v6_iif(skb));
1557 struct inet_timewait_sock *tw = inet_twsk(sk);
1558 inet_twsk_deschedule_put(tw);
1560 tcp_v6_restore_cb(skb);
1564 /* Fall through to ACK */
1567 tcp_v6_timewait_ack(sk, skb);
1570 tcp_v6_restore_cb(skb);
1571 tcp_v6_send_reset(sk, skb);
1572 inet_twsk_deschedule_put(inet_twsk(sk));
1574 case TCP_TW_SUCCESS:
1580 static void tcp_v6_early_demux(struct sk_buff *skb)
1582 const struct ipv6hdr *hdr;
1583 const struct tcphdr *th;
1586 if (skb->pkt_type != PACKET_HOST)
1589 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1592 hdr = ipv6_hdr(skb);
1595 if (th->doff < sizeof(struct tcphdr) / 4)
1598 /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1599 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1600 &hdr->saddr, th->source,
1601 &hdr->daddr, ntohs(th->dest),
1605 skb->destructor = sock_edemux;
1606 if (sk_fullsock(sk)) {
1607 struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1610 dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
1612 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1613 skb_dst_set_noref(skb, dst);
1618 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1619 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1620 .twsk_unique = tcp_twsk_unique,
1621 .twsk_destructor = tcp_twsk_destructor,
1624 static const struct inet_connection_sock_af_ops ipv6_specific = {
1625 .queue_xmit = inet6_csk_xmit,
1626 .send_check = tcp_v6_send_check,
1627 .rebuild_header = inet6_sk_rebuild_header,
1628 .sk_rx_dst_set = inet6_sk_rx_dst_set,
1629 .conn_request = tcp_v6_conn_request,
1630 .syn_recv_sock = tcp_v6_syn_recv_sock,
1631 .net_header_len = sizeof(struct ipv6hdr),
1632 .net_frag_header_len = sizeof(struct frag_hdr),
1633 .setsockopt = ipv6_setsockopt,
1634 .getsockopt = ipv6_getsockopt,
1635 .addr2sockaddr = inet6_csk_addr2sockaddr,
1636 .sockaddr_len = sizeof(struct sockaddr_in6),
1637 #ifdef CONFIG_COMPAT
1638 .compat_setsockopt = compat_ipv6_setsockopt,
1639 .compat_getsockopt = compat_ipv6_getsockopt,
1641 .mtu_reduced = tcp_v6_mtu_reduced,
1644 #ifdef CONFIG_TCP_MD5SIG
1645 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1646 .md5_lookup = tcp_v6_md5_lookup,
1647 .calc_md5_hash = tcp_v6_md5_hash_skb,
1648 .md5_parse = tcp_v6_parse_md5_keys,
1653 * TCP over IPv4 via INET6 API
1655 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1656 .queue_xmit = ip_queue_xmit,
1657 .send_check = tcp_v4_send_check,
1658 .rebuild_header = inet_sk_rebuild_header,
1659 .sk_rx_dst_set = inet_sk_rx_dst_set,
1660 .conn_request = tcp_v6_conn_request,
1661 .syn_recv_sock = tcp_v6_syn_recv_sock,
1662 .net_header_len = sizeof(struct iphdr),
1663 .setsockopt = ipv6_setsockopt,
1664 .getsockopt = ipv6_getsockopt,
1665 .addr2sockaddr = inet6_csk_addr2sockaddr,
1666 .sockaddr_len = sizeof(struct sockaddr_in6),
1667 #ifdef CONFIG_COMPAT
1668 .compat_setsockopt = compat_ipv6_setsockopt,
1669 .compat_getsockopt = compat_ipv6_getsockopt,
1671 .mtu_reduced = tcp_v4_mtu_reduced,
1674 #ifdef CONFIG_TCP_MD5SIG
1675 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1676 .md5_lookup = tcp_v4_md5_lookup,
1677 .calc_md5_hash = tcp_v4_md5_hash_skb,
1678 .md5_parse = tcp_v6_parse_md5_keys,
1682 /* NOTE: A lot of things set to zero explicitly by call to
1683 * sk_alloc() so need not be done here.
1685 static int tcp_v6_init_sock(struct sock *sk)
1687 struct inet_connection_sock *icsk = inet_csk(sk);
1691 icsk->icsk_af_ops = &ipv6_specific;
1693 #ifdef CONFIG_TCP_MD5SIG
1694 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1700 static void tcp_v6_destroy_sock(struct sock *sk)
1702 tcp_v4_destroy_sock(sk);
1703 inet6_destroy_sock(sk);
1706 #ifdef CONFIG_PROC_FS
1707 /* Proc filesystem TCPv6 sock list dumping. */
1708 static void get_openreq6(struct seq_file *seq,
1709 const struct request_sock *req, int i)
1711 long ttd = req->rsk_timer.expires - jiffies;
1712 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1713 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1719 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1720 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1722 src->s6_addr32[0], src->s6_addr32[1],
1723 src->s6_addr32[2], src->s6_addr32[3],
1724 inet_rsk(req)->ir_num,
1725 dest->s6_addr32[0], dest->s6_addr32[1],
1726 dest->s6_addr32[2], dest->s6_addr32[3],
1727 ntohs(inet_rsk(req)->ir_rmt_port),
1729 0, 0, /* could print option size, but that is af dependent. */
1730 1, /* timers active (only the expire timer) */
1731 jiffies_to_clock_t(ttd),
1733 from_kuid_munged(seq_user_ns(seq),
1734 sock_i_uid(req->rsk_listener)),
1735 0, /* non standard timer */
1736 0, /* open_requests have no inode */
1740 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1742 const struct in6_addr *dest, *src;
1745 unsigned long timer_expires;
1746 const struct inet_sock *inet = inet_sk(sp);
1747 const struct tcp_sock *tp = tcp_sk(sp);
1748 const struct inet_connection_sock *icsk = inet_csk(sp);
1749 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1753 dest = &sp->sk_v6_daddr;
1754 src = &sp->sk_v6_rcv_saddr;
1755 destp = ntohs(inet->inet_dport);
1756 srcp = ntohs(inet->inet_sport);
1758 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
1759 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
1760 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1762 timer_expires = icsk->icsk_timeout;
1763 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1765 timer_expires = icsk->icsk_timeout;
1766 } else if (timer_pending(&sp->sk_timer)) {
1768 timer_expires = sp->sk_timer.expires;
1771 timer_expires = jiffies;
1774 state = sk_state_load(sp);
1775 if (state == TCP_LISTEN)
1776 rx_queue = sp->sk_ack_backlog;
1778 /* Because we don't lock the socket,
1779 * we might find a transient negative value.
1781 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
1784 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1785 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
1787 src->s6_addr32[0], src->s6_addr32[1],
1788 src->s6_addr32[2], src->s6_addr32[3], srcp,
1789 dest->s6_addr32[0], dest->s6_addr32[1],
1790 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1792 tp->write_seq - tp->snd_una,
1795 jiffies_delta_to_clock_t(timer_expires - jiffies),
1796 icsk->icsk_retransmits,
1797 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1798 icsk->icsk_probes_out,
1800 atomic_read(&sp->sk_refcnt), sp,
1801 jiffies_to_clock_t(icsk->icsk_rto),
1802 jiffies_to_clock_t(icsk->icsk_ack.ato),
1803 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
1805 state == TCP_LISTEN ?
1806 fastopenq->max_qlen :
1807 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
1811 static void get_timewait6_sock(struct seq_file *seq,
1812 struct inet_timewait_sock *tw, int i)
1814 long delta = tw->tw_timer.expires - jiffies;
1815 const struct in6_addr *dest, *src;
1818 dest = &tw->tw_v6_daddr;
1819 src = &tw->tw_v6_rcv_saddr;
1820 destp = ntohs(tw->tw_dport);
1821 srcp = ntohs(tw->tw_sport);
1824 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1825 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1827 src->s6_addr32[0], src->s6_addr32[1],
1828 src->s6_addr32[2], src->s6_addr32[3], srcp,
1829 dest->s6_addr32[0], dest->s6_addr32[1],
1830 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1831 tw->tw_substate, 0, 0,
1832 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
1833 atomic_read(&tw->tw_refcnt), tw);
1836 static int tcp6_seq_show(struct seq_file *seq, void *v)
1838 struct tcp_iter_state *st;
1839 struct sock *sk = v;
1841 if (v == SEQ_START_TOKEN) {
1846 "st tx_queue rx_queue tr tm->when retrnsmt"
1847 " uid timeout inode\n");
1852 if (sk->sk_state == TCP_TIME_WAIT)
1853 get_timewait6_sock(seq, v, st->num);
1854 else if (sk->sk_state == TCP_NEW_SYN_RECV)
1855 get_openreq6(seq, v, st->num);
1857 get_tcp6_sock(seq, v, st->num);
1862 static const struct file_operations tcp6_afinfo_seq_fops = {
1863 .owner = THIS_MODULE,
1864 .open = tcp_seq_open,
1866 .llseek = seq_lseek,
1867 .release = seq_release_net
1870 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
1873 .seq_fops = &tcp6_afinfo_seq_fops,
1875 .show = tcp6_seq_show,
1879 int __net_init tcp6_proc_init(struct net *net)
1881 return tcp_proc_register(net, &tcp6_seq_afinfo);
1884 void tcp6_proc_exit(struct net *net)
1886 tcp_proc_unregister(net, &tcp6_seq_afinfo);
1890 struct proto tcpv6_prot = {
1892 .owner = THIS_MODULE,
1894 .connect = tcp_v6_connect,
1895 .disconnect = tcp_disconnect,
1896 .accept = inet_csk_accept,
1898 .init = tcp_v6_init_sock,
1899 .destroy = tcp_v6_destroy_sock,
1900 .shutdown = tcp_shutdown,
1901 .setsockopt = tcp_setsockopt,
1902 .getsockopt = tcp_getsockopt,
1903 .keepalive = tcp_set_keepalive,
1904 .recvmsg = tcp_recvmsg,
1905 .sendmsg = tcp_sendmsg,
1906 .sendpage = tcp_sendpage,
1907 .backlog_rcv = tcp_v6_do_rcv,
1908 .release_cb = tcp_release_cb,
1910 .unhash = inet_unhash,
1911 .get_port = inet_csk_get_port,
1912 .enter_memory_pressure = tcp_enter_memory_pressure,
1913 .leave_memory_pressure = tcp_leave_memory_pressure,
1914 .stream_memory_free = tcp_stream_memory_free,
1915 .sockets_allocated = &tcp_sockets_allocated,
1916 .memory_allocated = &tcp_memory_allocated,
1917 .memory_pressure = &tcp_memory_pressure,
1918 .orphan_count = &tcp_orphan_count,
1919 .sysctl_mem = sysctl_tcp_mem,
1920 .sysctl_wmem = sysctl_tcp_wmem,
1921 .sysctl_rmem = sysctl_tcp_rmem,
1922 .max_header = MAX_TCP_HEADER,
1923 .obj_size = sizeof(struct tcp6_sock),
1924 .slab_flags = SLAB_TYPESAFE_BY_RCU,
1925 .twsk_prot = &tcp6_timewait_sock_ops,
1926 .rsk_prot = &tcp6_request_sock_ops,
1927 .h.hashinfo = &tcp_hashinfo,
1928 .no_autobind = true,
1929 #ifdef CONFIG_COMPAT
1930 .compat_setsockopt = compat_tcp_setsockopt,
1931 .compat_getsockopt = compat_tcp_getsockopt,
1933 .diag_destroy = tcp_abort,
1936 static struct inet6_protocol tcpv6_protocol = {
1937 .early_demux = tcp_v6_early_demux,
1938 .early_demux_handler = tcp_v6_early_demux,
1939 .handler = tcp_v6_rcv,
1940 .err_handler = tcp_v6_err,
1941 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
1944 static struct inet_protosw tcpv6_protosw = {
1945 .type = SOCK_STREAM,
1946 .protocol = IPPROTO_TCP,
1947 .prot = &tcpv6_prot,
1948 .ops = &inet6_stream_ops,
1949 .flags = INET_PROTOSW_PERMANENT |
1953 static int __net_init tcpv6_net_init(struct net *net)
1955 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
1956 SOCK_RAW, IPPROTO_TCP, net);
1959 static void __net_exit tcpv6_net_exit(struct net *net)
1961 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
1964 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
1966 inet_twsk_purge(&tcp_hashinfo, AF_INET6);
1969 static struct pernet_operations tcpv6_net_ops = {
1970 .init = tcpv6_net_init,
1971 .exit = tcpv6_net_exit,
1972 .exit_batch = tcpv6_net_exit_batch,
1975 int __init tcpv6_init(void)
1979 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
1983 /* register inet6 protocol */
1984 ret = inet6_register_protosw(&tcpv6_protosw);
1986 goto out_tcpv6_protocol;
1988 ret = register_pernet_subsys(&tcpv6_net_ops);
1990 goto out_tcpv6_protosw;
1995 inet6_unregister_protosw(&tcpv6_protosw);
1997 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2001 void tcpv6_exit(void)
2003 unregister_pernet_subsys(&tcpv6_net_ops);
2004 inet6_unregister_protosw(&tcpv6_protosw);
2005 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);