3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
10 * linux/net/ipv4/tcp_input.c
11 * linux/net/ipv4/tcp_output.c
14 * Hideaki YOSHIFUJI : sin6_scope_id support
15 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
16 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
17 * a single port at the same time.
18 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
26 #include <linux/bottom_half.h>
27 #include <linux/module.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/socket.h>
31 #include <linux/sockios.h>
32 #include <linux/net.h>
33 #include <linux/jiffies.h>
35 #include <linux/in6.h>
36 #include <linux/netdevice.h>
37 #include <linux/init.h>
38 #include <linux/jhash.h>
39 #include <linux/ipsec.h>
40 #include <linux/times.h>
41 #include <linux/slab.h>
43 #include <linux/ipv6.h>
44 #include <linux/icmpv6.h>
45 #include <linux/random.h>
48 #include <net/ndisc.h>
49 #include <net/inet6_hashtables.h>
50 #include <net/inet6_connection_sock.h>
52 #include <net/transp_v6.h>
53 #include <net/addrconf.h>
54 #include <net/ip6_route.h>
55 #include <net/ip6_checksum.h>
56 #include <net/inet_ecn.h>
57 #include <net/protocol.h>
60 #include <net/dsfield.h>
61 #include <net/timewait_sock.h>
62 #include <net/netdma.h>
63 #include <net/inet_common.h>
64 #include <net/secure_seq.h>
65 #include <net/tcp_memcontrol.h>
67 #include <asm/uaccess.h>
69 #include <linux/proc_fs.h>
70 #include <linux/seq_file.h>
72 #include <linux/crypto.h>
73 #include <linux/scatterlist.h>
75 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb);
76 static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
77 struct request_sock *req);
79 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
80 static void __tcp_v6_send_check(struct sk_buff *skb,
81 const struct in6_addr *saddr,
82 const struct in6_addr *daddr);
84 static const struct inet_connection_sock_af_ops ipv6_mapped;
85 static const struct inet_connection_sock_af_ops ipv6_specific;
86 #ifdef CONFIG_TCP_MD5SIG
87 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
88 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
90 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
91 const struct in6_addr *addr)
97 static void tcp_v6_hash(struct sock *sk)
99 if (sk->sk_state != TCP_CLOSE) {
100 if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) {
105 __inet6_hash(sk, NULL);
110 static __inline__ __sum16 tcp_v6_check(int len,
111 const struct in6_addr *saddr,
112 const struct in6_addr *daddr,
115 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
118 static __u32 tcp_v6_init_sequence(const struct sk_buff *skb)
120 return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
121 ipv6_hdr(skb)->saddr.s6_addr32,
123 tcp_hdr(skb)->source);
126 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
129 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
130 struct inet_sock *inet = inet_sk(sk);
131 struct inet_connection_sock *icsk = inet_csk(sk);
132 struct ipv6_pinfo *np = inet6_sk(sk);
133 struct tcp_sock *tp = tcp_sk(sk);
134 struct in6_addr *saddr = NULL, *final_p, final;
137 struct dst_entry *dst;
141 if (addr_len < SIN6_LEN_RFC2133)
144 if (usin->sin6_family != AF_INET6)
145 return -EAFNOSUPPORT;
147 memset(&fl6, 0, sizeof(fl6));
150 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
151 IP6_ECN_flow_init(fl6.flowlabel);
152 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
153 struct ip6_flowlabel *flowlabel;
154 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
155 if (flowlabel == NULL)
157 usin->sin6_addr = flowlabel->dst;
158 fl6_sock_release(flowlabel);
163 * connect() to INADDR_ANY means loopback (BSD'ism).
166 if(ipv6_addr_any(&usin->sin6_addr))
167 usin->sin6_addr.s6_addr[15] = 0x1;
169 addr_type = ipv6_addr_type(&usin->sin6_addr);
171 if(addr_type & IPV6_ADDR_MULTICAST)
174 if (addr_type&IPV6_ADDR_LINKLOCAL) {
175 if (addr_len >= sizeof(struct sockaddr_in6) &&
176 usin->sin6_scope_id) {
177 /* If interface is set while binding, indices
180 if (sk->sk_bound_dev_if &&
181 sk->sk_bound_dev_if != usin->sin6_scope_id)
184 sk->sk_bound_dev_if = usin->sin6_scope_id;
187 /* Connect to link-local address requires an interface */
188 if (!sk->sk_bound_dev_if)
192 if (tp->rx_opt.ts_recent_stamp &&
193 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
194 tp->rx_opt.ts_recent = 0;
195 tp->rx_opt.ts_recent_stamp = 0;
199 np->daddr = usin->sin6_addr;
200 np->flow_label = fl6.flowlabel;
206 if (addr_type == IPV6_ADDR_MAPPED) {
207 u32 exthdrlen = icsk->icsk_ext_hdr_len;
208 struct sockaddr_in sin;
210 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
212 if (__ipv6_only_sock(sk))
215 sin.sin_family = AF_INET;
216 sin.sin_port = usin->sin6_port;
217 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
219 icsk->icsk_af_ops = &ipv6_mapped;
220 sk->sk_backlog_rcv = tcp_v4_do_rcv;
221 #ifdef CONFIG_TCP_MD5SIG
222 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
225 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
228 icsk->icsk_ext_hdr_len = exthdrlen;
229 icsk->icsk_af_ops = &ipv6_specific;
230 sk->sk_backlog_rcv = tcp_v6_do_rcv;
231 #ifdef CONFIG_TCP_MD5SIG
232 tp->af_specific = &tcp_sock_ipv6_specific;
236 ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
237 ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
244 if (!ipv6_addr_any(&np->rcv_saddr))
245 saddr = &np->rcv_saddr;
247 fl6.flowi6_proto = IPPROTO_TCP;
248 fl6.daddr = np->daddr;
249 fl6.saddr = saddr ? *saddr : np->saddr;
250 fl6.flowi6_oif = sk->sk_bound_dev_if;
251 fl6.flowi6_mark = sk->sk_mark;
252 fl6.fl6_dport = usin->sin6_port;
253 fl6.fl6_sport = inet->inet_sport;
255 final_p = fl6_update_dst(&fl6, np->opt, &final);
257 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
259 dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true);
267 np->rcv_saddr = *saddr;
270 /* set the source address */
272 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
274 sk->sk_gso_type = SKB_GSO_TCPV6;
275 __ip6_dst_store(sk, dst, NULL, NULL);
277 rt = (struct rt6_info *) dst;
278 if (tcp_death_row.sysctl_tw_recycle &&
279 !tp->rx_opt.ts_recent_stamp &&
280 ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr)) {
281 struct inet_peer *peer = rt6_get_peer(rt);
283 * VJ's idea. We save last timestamp seen from
284 * the destination in peer table, when entering state
285 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
286 * when trying new connection.
289 inet_peer_refcheck(peer);
290 if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
291 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
292 tp->rx_opt.ts_recent = peer->tcp_ts;
297 icsk->icsk_ext_hdr_len = 0;
299 icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
302 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
304 inet->inet_dport = usin->sin6_port;
306 tcp_set_state(sk, TCP_SYN_SENT);
307 err = inet6_hash_connect(&tcp_death_row, sk);
312 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
317 err = tcp_connect(sk);
324 tcp_set_state(sk, TCP_CLOSE);
327 inet->inet_dport = 0;
328 sk->sk_route_caps = 0;
332 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
333 u8 type, u8 code, int offset, __be32 info)
335 const struct ipv6hdr *hdr = (const struct ipv6hdr*)skb->data;
336 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
337 struct ipv6_pinfo *np;
342 struct net *net = dev_net(skb->dev);
344 sk = inet6_lookup(net, &tcp_hashinfo, &hdr->daddr,
345 th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
348 ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
353 if (sk->sk_state == TCP_TIME_WAIT) {
354 inet_twsk_put(inet_twsk(sk));
359 if (sock_owned_by_user(sk))
360 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
362 if (sk->sk_state == TCP_CLOSE)
365 if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) {
366 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
371 seq = ntohl(th->seq);
372 if (sk->sk_state != TCP_LISTEN &&
373 !between(seq, tp->snd_una, tp->snd_nxt)) {
374 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
380 if (type == ICMPV6_PKT_TOOBIG) {
381 struct dst_entry *dst;
383 if (sock_owned_by_user(sk))
385 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
388 /* icmp should have updated the destination cache entry */
389 dst = __sk_dst_check(sk, np->dst_cookie);
392 struct inet_sock *inet = inet_sk(sk);
395 /* BUGGG_FUTURE: Again, it is not clear how
396 to handle rthdr case. Ignore this complexity
399 memset(&fl6, 0, sizeof(fl6));
400 fl6.flowi6_proto = IPPROTO_TCP;
401 fl6.daddr = np->daddr;
402 fl6.saddr = np->saddr;
403 fl6.flowi6_oif = sk->sk_bound_dev_if;
404 fl6.flowi6_mark = sk->sk_mark;
405 fl6.fl6_dport = inet->inet_dport;
406 fl6.fl6_sport = inet->inet_sport;
407 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
409 dst = ip6_dst_lookup_flow(sk, &fl6, NULL, false);
411 sk->sk_err_soft = -PTR_ERR(dst);
418 dst->ops->update_pmtu(dst, ntohl(info));
420 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
421 tcp_sync_mss(sk, dst_mtu(dst));
422 tcp_simple_retransmit(sk);
423 } /* else let the usual retransmit timer handle it */
428 icmpv6_err_convert(type, code, &err);
430 /* Might be for an request_sock */
431 switch (sk->sk_state) {
432 struct request_sock *req, **prev;
434 if (sock_owned_by_user(sk))
437 req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
438 &hdr->saddr, inet6_iif(skb));
442 /* ICMPs are not backlogged, hence we cannot get
443 * an established socket here.
445 WARN_ON(req->sk != NULL);
447 if (seq != tcp_rsk(req)->snt_isn) {
448 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
452 inet_csk_reqsk_queue_drop(sk, req, prev);
456 case TCP_SYN_RECV: /* Cannot happen.
457 It can, it SYNs are crossed. --ANK */
458 if (!sock_owned_by_user(sk)) {
460 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
464 sk->sk_err_soft = err;
468 if (!sock_owned_by_user(sk) && np->recverr) {
470 sk->sk_error_report(sk);
472 sk->sk_err_soft = err;
480 static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
482 struct request_sock *req,
483 struct request_values *rvp,
486 struct inet6_request_sock *treq = inet6_rsk(req);
487 struct ipv6_pinfo *np = inet6_sk(sk);
488 struct sk_buff * skb;
491 /* First, grab a route. */
492 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL)
495 skb = tcp_make_synack(sk, dst, req, rvp);
498 __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
500 fl6->daddr = treq->rmt_addr;
501 skb_set_queue_mapping(skb, queue_mapping);
502 err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
503 err = net_xmit_eval(err);
510 static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req,
511 struct request_values *rvp)
515 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
516 return tcp_v6_send_synack(sk, NULL, &fl6, req, rvp, 0);
519 static void tcp_v6_reqsk_destructor(struct request_sock *req)
521 kfree_skb(inet6_rsk(req)->pktopts);
524 #ifdef CONFIG_TCP_MD5SIG
525 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
526 const struct in6_addr *addr)
528 return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
531 static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk,
532 struct sock *addr_sk)
534 return tcp_v6_md5_do_lookup(sk, &inet6_sk(addr_sk)->daddr);
537 static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk,
538 struct request_sock *req)
540 return tcp_v6_md5_do_lookup(sk, &inet6_rsk(req)->rmt_addr);
543 static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval,
546 struct tcp_md5sig cmd;
547 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
549 if (optlen < sizeof(cmd))
552 if (copy_from_user(&cmd, optval, sizeof(cmd)))
555 if (sin6->sin6_family != AF_INET6)
558 if (!cmd.tcpm_keylen) {
559 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
560 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
562 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
566 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
569 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
570 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
571 AF_INET, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
573 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
574 AF_INET6, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
577 static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
578 const struct in6_addr *daddr,
579 const struct in6_addr *saddr, int nbytes)
581 struct tcp6_pseudohdr *bp;
582 struct scatterlist sg;
584 bp = &hp->md5_blk.ip6;
585 /* 1. TCP pseudo-header (RFC2460) */
588 bp->protocol = cpu_to_be32(IPPROTO_TCP);
589 bp->len = cpu_to_be32(nbytes);
591 sg_init_one(&sg, bp, sizeof(*bp));
592 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
595 static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
596 const struct in6_addr *daddr, struct in6_addr *saddr,
597 const struct tcphdr *th)
599 struct tcp_md5sig_pool *hp;
600 struct hash_desc *desc;
602 hp = tcp_get_md5sig_pool();
604 goto clear_hash_noput;
605 desc = &hp->md5_desc;
607 if (crypto_hash_init(desc))
609 if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
611 if (tcp_md5_hash_header(hp, th))
613 if (tcp_md5_hash_key(hp, key))
615 if (crypto_hash_final(desc, md5_hash))
618 tcp_put_md5sig_pool();
622 tcp_put_md5sig_pool();
624 memset(md5_hash, 0, 16);
628 static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
629 const struct sock *sk,
630 const struct request_sock *req,
631 const struct sk_buff *skb)
633 const struct in6_addr *saddr, *daddr;
634 struct tcp_md5sig_pool *hp;
635 struct hash_desc *desc;
636 const struct tcphdr *th = tcp_hdr(skb);
639 saddr = &inet6_sk(sk)->saddr;
640 daddr = &inet6_sk(sk)->daddr;
642 saddr = &inet6_rsk(req)->loc_addr;
643 daddr = &inet6_rsk(req)->rmt_addr;
645 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
646 saddr = &ip6h->saddr;
647 daddr = &ip6h->daddr;
650 hp = tcp_get_md5sig_pool();
652 goto clear_hash_noput;
653 desc = &hp->md5_desc;
655 if (crypto_hash_init(desc))
658 if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
660 if (tcp_md5_hash_header(hp, th))
662 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
664 if (tcp_md5_hash_key(hp, key))
666 if (crypto_hash_final(desc, md5_hash))
669 tcp_put_md5sig_pool();
673 tcp_put_md5sig_pool();
675 memset(md5_hash, 0, 16);
679 static int tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
681 const __u8 *hash_location = NULL;
682 struct tcp_md5sig_key *hash_expected;
683 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
684 const struct tcphdr *th = tcp_hdr(skb);
688 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
689 hash_location = tcp_parse_md5sig_option(th);
691 /* We've parsed the options - do we have a hash? */
692 if (!hash_expected && !hash_location)
695 if (hash_expected && !hash_location) {
696 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
700 if (!hash_expected && hash_location) {
701 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
705 /* check the signature */
706 genhash = tcp_v6_md5_hash_skb(newhash,
710 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
711 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
712 genhash ? "failed" : "mismatch",
713 &ip6h->saddr, ntohs(th->source),
714 &ip6h->daddr, ntohs(th->dest));
721 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
723 .obj_size = sizeof(struct tcp6_request_sock),
724 .rtx_syn_ack = tcp_v6_rtx_synack,
725 .send_ack = tcp_v6_reqsk_send_ack,
726 .destructor = tcp_v6_reqsk_destructor,
727 .send_reset = tcp_v6_send_reset,
728 .syn_ack_timeout = tcp_syn_ack_timeout,
731 #ifdef CONFIG_TCP_MD5SIG
732 static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
733 .md5_lookup = tcp_v6_reqsk_md5_lookup,
734 .calc_md5_hash = tcp_v6_md5_hash_skb,
738 static void __tcp_v6_send_check(struct sk_buff *skb,
739 const struct in6_addr *saddr, const struct in6_addr *daddr)
741 struct tcphdr *th = tcp_hdr(skb);
743 if (skb->ip_summed == CHECKSUM_PARTIAL) {
744 th->check = ~tcp_v6_check(skb->len, saddr, daddr, 0);
745 skb->csum_start = skb_transport_header(skb) - skb->head;
746 skb->csum_offset = offsetof(struct tcphdr, check);
748 th->check = tcp_v6_check(skb->len, saddr, daddr,
749 csum_partial(th, th->doff << 2,
754 static void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
756 struct ipv6_pinfo *np = inet6_sk(sk);
758 __tcp_v6_send_check(skb, &np->saddr, &np->daddr);
761 static int tcp_v6_gso_send_check(struct sk_buff *skb)
763 const struct ipv6hdr *ipv6h;
766 if (!pskb_may_pull(skb, sizeof(*th)))
769 ipv6h = ipv6_hdr(skb);
773 skb->ip_summed = CHECKSUM_PARTIAL;
774 __tcp_v6_send_check(skb, &ipv6h->saddr, &ipv6h->daddr);
778 static struct sk_buff **tcp6_gro_receive(struct sk_buff **head,
781 const struct ipv6hdr *iph = skb_gro_network_header(skb);
783 switch (skb->ip_summed) {
784 case CHECKSUM_COMPLETE:
785 if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr,
787 skb->ip_summed = CHECKSUM_UNNECESSARY;
793 NAPI_GRO_CB(skb)->flush = 1;
797 return tcp_gro_receive(head, skb);
800 static int tcp6_gro_complete(struct sk_buff *skb)
802 const struct ipv6hdr *iph = ipv6_hdr(skb);
803 struct tcphdr *th = tcp_hdr(skb);
805 th->check = ~tcp_v6_check(skb->len - skb_transport_offset(skb),
806 &iph->saddr, &iph->daddr, 0);
807 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
809 return tcp_gro_complete(skb);
812 static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
813 u32 ts, struct tcp_md5sig_key *key, int rst, u8 tclass)
815 const struct tcphdr *th = tcp_hdr(skb);
817 struct sk_buff *buff;
819 struct net *net = dev_net(skb_dst(skb)->dev);
820 struct sock *ctl_sk = net->ipv6.tcp_sk;
821 unsigned int tot_len = sizeof(struct tcphdr);
822 struct dst_entry *dst;
826 tot_len += TCPOLEN_TSTAMP_ALIGNED;
827 #ifdef CONFIG_TCP_MD5SIG
829 tot_len += TCPOLEN_MD5SIG_ALIGNED;
832 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
837 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
839 t1 = (struct tcphdr *) skb_push(buff, tot_len);
840 skb_reset_transport_header(buff);
842 /* Swap the send and the receive. */
843 memset(t1, 0, sizeof(*t1));
844 t1->dest = th->source;
845 t1->source = th->dest;
846 t1->doff = tot_len / 4;
847 t1->seq = htonl(seq);
848 t1->ack_seq = htonl(ack);
849 t1->ack = !rst || !th->ack;
851 t1->window = htons(win);
853 topt = (__be32 *)(t1 + 1);
856 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
857 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
858 *topt++ = htonl(tcp_time_stamp);
862 #ifdef CONFIG_TCP_MD5SIG
864 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
865 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
866 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
867 &ipv6_hdr(skb)->saddr,
868 &ipv6_hdr(skb)->daddr, t1);
872 memset(&fl6, 0, sizeof(fl6));
873 fl6.daddr = ipv6_hdr(skb)->saddr;
874 fl6.saddr = ipv6_hdr(skb)->daddr;
876 buff->ip_summed = CHECKSUM_PARTIAL;
879 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
881 fl6.flowi6_proto = IPPROTO_TCP;
882 fl6.flowi6_oif = inet6_iif(skb);
883 fl6.fl6_dport = t1->dest;
884 fl6.fl6_sport = t1->source;
885 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
887 /* Pass a socket to ip6_dst_lookup either it is for RST
888 * Underlying function will use this to retrieve the network
891 dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL, false);
893 skb_dst_set(buff, dst);
894 ip6_xmit(ctl_sk, buff, &fl6, NULL, tclass);
895 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
897 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
904 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
906 const struct tcphdr *th = tcp_hdr(skb);
907 u32 seq = 0, ack_seq = 0;
908 struct tcp_md5sig_key *key = NULL;
909 #ifdef CONFIG_TCP_MD5SIG
910 const __u8 *hash_location = NULL;
911 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
912 unsigned char newhash[16];
914 struct sock *sk1 = NULL;
920 if (!ipv6_unicast_destination(skb))
923 #ifdef CONFIG_TCP_MD5SIG
924 hash_location = tcp_parse_md5sig_option(th);
925 if (!sk && hash_location) {
927 * active side is lost. Try to find listening socket through
928 * source port, and then find md5 key through listening socket.
929 * we are not loose security here:
930 * Incoming packet is checked with md5 hash with finding key,
931 * no RST generated if md5 hash doesn't match.
933 sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
934 &tcp_hashinfo, &ipv6h->daddr,
935 ntohs(th->source), inet6_iif(skb));
940 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
944 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, NULL, skb);
945 if (genhash || memcmp(hash_location, newhash, 16) != 0)
948 key = sk ? tcp_v6_md5_do_lookup(sk, &ipv6h->saddr) : NULL;
953 seq = ntohl(th->ack_seq);
955 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
958 tcp_v6_send_response(skb, seq, ack_seq, 0, 0, key, 1, 0);
960 #ifdef CONFIG_TCP_MD5SIG
969 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts,
970 struct tcp_md5sig_key *key, u8 tclass)
972 tcp_v6_send_response(skb, seq, ack, win, ts, key, 0, tclass);
975 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
977 struct inet_timewait_sock *tw = inet_twsk(sk);
978 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
980 tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
981 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
982 tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw),
988 static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
989 struct request_sock *req)
991 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent,
992 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0);
996 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
998 struct request_sock *req, **prev;
999 const struct tcphdr *th = tcp_hdr(skb);
1002 /* Find possible connection requests. */
1003 req = inet6_csk_search_req(sk, &prev, th->source,
1004 &ipv6_hdr(skb)->saddr,
1005 &ipv6_hdr(skb)->daddr, inet6_iif(skb));
1007 return tcp_check_req(sk, skb, req, prev);
1009 nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo,
1010 &ipv6_hdr(skb)->saddr, th->source,
1011 &ipv6_hdr(skb)->daddr, ntohs(th->dest), inet6_iif(skb));
1014 if (nsk->sk_state != TCP_TIME_WAIT) {
1018 inet_twsk_put(inet_twsk(nsk));
1022 #ifdef CONFIG_SYN_COOKIES
1024 sk = cookie_v6_check(sk, skb);
1029 /* FIXME: this is substantially similar to the ipv4 code.
1030 * Can some kind of merge be done? -- erics
1032 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1034 struct tcp_extend_values tmp_ext;
1035 struct tcp_options_received tmp_opt;
1036 const u8 *hash_location;
1037 struct request_sock *req;
1038 struct inet6_request_sock *treq;
1039 struct ipv6_pinfo *np = inet6_sk(sk);
1040 struct tcp_sock *tp = tcp_sk(sk);
1041 __u32 isn = TCP_SKB_CB(skb)->when;
1042 struct dst_entry *dst = NULL;
1044 bool want_cookie = false;
1046 if (skb->protocol == htons(ETH_P_IP))
1047 return tcp_v4_conn_request(sk, skb);
1049 if (!ipv6_unicast_destination(skb))
1052 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1053 want_cookie = tcp_syn_flood_action(sk, skb, "TCPv6");
1058 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1061 req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
1065 #ifdef CONFIG_TCP_MD5SIG
1066 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv6_ops;
1069 tcp_clear_options(&tmp_opt);
1070 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1071 tmp_opt.user_mss = tp->rx_opt.user_mss;
1072 tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
1074 if (tmp_opt.cookie_plus > 0 &&
1075 tmp_opt.saw_tstamp &&
1076 !tp->rx_opt.cookie_out_never &&
1077 (sysctl_tcp_cookie_size > 0 ||
1078 (tp->cookie_values != NULL &&
1079 tp->cookie_values->cookie_desired > 0))) {
1082 u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
1083 int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
1085 if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
1088 /* Secret recipe starts with IP addresses */
1089 d = (__force u32 *)&ipv6_hdr(skb)->daddr.s6_addr32[0];
1094 d = (__force u32 *)&ipv6_hdr(skb)->saddr.s6_addr32[0];
1100 /* plus variable length Initiator Cookie */
1103 *c++ ^= *hash_location++;
1105 want_cookie = false; /* not our kind of cookie */
1106 tmp_ext.cookie_out_never = 0; /* false */
1107 tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1108 } else if (!tp->rx_opt.cookie_in_always) {
1109 /* redundant indications, but ensure initialization. */
1110 tmp_ext.cookie_out_never = 1; /* true */
1111 tmp_ext.cookie_plus = 0;
1115 tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
1117 if (want_cookie && !tmp_opt.saw_tstamp)
1118 tcp_clear_options(&tmp_opt);
1120 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1121 tcp_openreq_init(req, &tmp_opt, skb);
1123 treq = inet6_rsk(req);
1124 treq->rmt_addr = ipv6_hdr(skb)->saddr;
1125 treq->loc_addr = ipv6_hdr(skb)->daddr;
1126 if (!want_cookie || tmp_opt.tstamp_ok)
1127 TCP_ECN_create_request(req, skb);
1129 treq->iif = sk->sk_bound_dev_if;
1131 /* So that link locals have meaning */
1132 if (!sk->sk_bound_dev_if &&
1133 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1134 treq->iif = inet6_iif(skb);
1137 struct inet_peer *peer = NULL;
1139 if (ipv6_opt_accepted(sk, skb) ||
1140 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
1141 np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
1142 atomic_inc(&skb->users);
1143 treq->pktopts = skb;
1147 isn = cookie_v6_init_sequence(sk, skb, &req->mss);
1148 req->cookie_ts = tmp_opt.tstamp_ok;
1152 /* VJ's idea. We save last timestamp seen
1153 * from the destination in peer table, when entering
1154 * state TIME-WAIT, and check against it before
1155 * accepting new connection request.
1157 * If "isn" is not zero, this request hit alive
1158 * timewait bucket, so that all the necessary checks
1159 * are made in the function processing timewait state.
1161 if (tmp_opt.saw_tstamp &&
1162 tcp_death_row.sysctl_tw_recycle &&
1163 (dst = inet6_csk_route_req(sk, &fl6, req)) != NULL &&
1164 (peer = rt6_get_peer((struct rt6_info *)dst)) != NULL &&
1165 ipv6_addr_equal((struct in6_addr *)peer->daddr.addr.a6,
1167 inet_peer_refcheck(peer);
1168 if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
1169 (s32)(peer->tcp_ts - req->ts_recent) >
1171 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1172 goto drop_and_release;
1175 /* Kill the following clause, if you dislike this way. */
1176 else if (!sysctl_tcp_syncookies &&
1177 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1178 (sysctl_max_syn_backlog >> 2)) &&
1179 (!peer || !peer->tcp_ts_stamp) &&
1180 !tcp_peer_is_proven(req, dst)) {
1181 /* Without syncookies last quarter of
1182 * backlog is filled with destinations,
1183 * proven to be alive.
1184 * It means that we continue to communicate
1185 * to destinations, already remembered
1186 * to the moment of synflood.
1188 LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI6/%u\n",
1189 &treq->rmt_addr, ntohs(tcp_hdr(skb)->source));
1190 goto drop_and_release;
1193 isn = tcp_v6_init_sequence(skb);
1196 tcp_rsk(req)->snt_isn = isn;
1197 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1199 if (security_inet_conn_request(sk, skb, req))
1200 goto drop_and_release;
1202 if (tcp_v6_send_synack(sk, dst, &fl6, req,
1203 (struct request_values *)&tmp_ext,
1204 skb_get_queue_mapping(skb)) ||
1208 inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1216 return 0; /* don't send reset */
1219 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1220 struct request_sock *req,
1221 struct dst_entry *dst)
1223 struct inet6_request_sock *treq;
1224 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1225 struct tcp6_sock *newtcp6sk;
1226 struct inet_sock *newinet;
1227 struct tcp_sock *newtp;
1229 #ifdef CONFIG_TCP_MD5SIG
1230 struct tcp_md5sig_key *key;
1234 if (skb->protocol == htons(ETH_P_IP)) {
1239 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1244 newtcp6sk = (struct tcp6_sock *)newsk;
1245 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1247 newinet = inet_sk(newsk);
1248 newnp = inet6_sk(newsk);
1249 newtp = tcp_sk(newsk);
1251 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1253 ipv6_addr_set_v4mapped(newinet->inet_daddr, &newnp->daddr);
1255 ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr);
1257 newnp->rcv_saddr = newnp->saddr;
1259 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1260 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1261 #ifdef CONFIG_TCP_MD5SIG
1262 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1265 newnp->ipv6_ac_list = NULL;
1266 newnp->ipv6_fl_list = NULL;
1267 newnp->pktoptions = NULL;
1269 newnp->mcast_oif = inet6_iif(skb);
1270 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1271 newnp->rcv_tclass = ipv6_tclass(ipv6_hdr(skb));
1274 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1275 * here, tcp_create_openreq_child now does this for us, see the comment in
1276 * that function for the gory details. -acme
1279 /* It is tricky place. Until this moment IPv4 tcp
1280 worked with IPv6 icsk.icsk_af_ops.
1283 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1288 treq = inet6_rsk(req);
1290 if (sk_acceptq_is_full(sk))
1294 dst = inet6_csk_route_req(sk, &fl6, req);
1299 newsk = tcp_create_openreq_child(sk, req, skb);
1304 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1305 * count here, tcp_create_openreq_child now does this for us, see the
1306 * comment in that function for the gory details. -acme
1309 newsk->sk_gso_type = SKB_GSO_TCPV6;
1310 __ip6_dst_store(newsk, dst, NULL, NULL);
1312 newtcp6sk = (struct tcp6_sock *)newsk;
1313 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1315 newtp = tcp_sk(newsk);
1316 newinet = inet_sk(newsk);
1317 newnp = inet6_sk(newsk);
1319 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1321 newnp->daddr = treq->rmt_addr;
1322 newnp->saddr = treq->loc_addr;
1323 newnp->rcv_saddr = treq->loc_addr;
1324 newsk->sk_bound_dev_if = treq->iif;
1326 /* Now IPv6 options...
1328 First: no IPv4 options.
1330 newinet->inet_opt = NULL;
1331 newnp->ipv6_ac_list = NULL;
1332 newnp->ipv6_fl_list = NULL;
1335 newnp->rxopt.all = np->rxopt.all;
1337 /* Clone pktoptions received with SYN */
1338 newnp->pktoptions = NULL;
1339 if (treq->pktopts != NULL) {
1340 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1341 consume_skb(treq->pktopts);
1342 treq->pktopts = NULL;
1343 if (newnp->pktoptions)
1344 skb_set_owner_r(newnp->pktoptions, newsk);
1347 newnp->mcast_oif = inet6_iif(skb);
1348 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1349 newnp->rcv_tclass = ipv6_tclass(ipv6_hdr(skb));
1351 /* Clone native IPv6 options from listening socket (if any)
1353 Yes, keeping reference count would be much more clever,
1354 but we make one more one thing there: reattach optmem
1358 newnp->opt = ipv6_dup_options(newsk, np->opt);
1360 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1362 inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
1363 newnp->opt->opt_flen);
1365 tcp_mtup_init(newsk);
1366 tcp_sync_mss(newsk, dst_mtu(dst));
1367 newtp->advmss = dst_metric_advmss(dst);
1368 if (tcp_sk(sk)->rx_opt.user_mss &&
1369 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1370 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1372 tcp_initialize_rcv_mss(newsk);
1373 if (tcp_rsk(req)->snt_synack)
1374 tcp_valid_rtt_meas(newsk,
1375 tcp_time_stamp - tcp_rsk(req)->snt_synack);
1376 newtp->total_retrans = req->retrans;
1378 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1379 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1381 #ifdef CONFIG_TCP_MD5SIG
1382 /* Copy over the MD5 key from the original socket */
1383 if ((key = tcp_v6_md5_do_lookup(sk, &newnp->daddr)) != NULL) {
1384 /* We're using one, so create a matching key
1385 * on the newsk structure. If we fail to get
1386 * memory, then we end up not copying the key
1389 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newnp->daddr,
1390 AF_INET6, key->key, key->keylen, GFP_ATOMIC);
1394 if (__inet_inherit_port(sk, newsk) < 0) {
1398 __inet6_hash(newsk, NULL);
1403 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1407 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1411 static __sum16 tcp_v6_checksum_init(struct sk_buff *skb)
1413 if (skb->ip_summed == CHECKSUM_COMPLETE) {
1414 if (!tcp_v6_check(skb->len, &ipv6_hdr(skb)->saddr,
1415 &ipv6_hdr(skb)->daddr, skb->csum)) {
1416 skb->ip_summed = CHECKSUM_UNNECESSARY;
1421 skb->csum = ~csum_unfold(tcp_v6_check(skb->len,
1422 &ipv6_hdr(skb)->saddr,
1423 &ipv6_hdr(skb)->daddr, 0));
1425 if (skb->len <= 76) {
1426 return __skb_checksum_complete(skb);
1431 /* The socket must have it's spinlock held when we get
1434 * We have a potential double-lock case here, so even when
1435 * doing backlog processing we use the BH locking scheme.
1436 * This is because we cannot sleep with the original spinlock
1439 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1441 struct ipv6_pinfo *np = inet6_sk(sk);
1442 struct tcp_sock *tp;
1443 struct sk_buff *opt_skb = NULL;
1445 /* Imagine: socket is IPv6. IPv4 packet arrives,
1446 goes to IPv4 receive handler and backlogged.
1447 From backlog it always goes here. Kerboom...
1448 Fortunately, tcp_rcv_established and rcv_established
1449 handle them correctly, but it is not case with
1450 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1453 if (skb->protocol == htons(ETH_P_IP))
1454 return tcp_v4_do_rcv(sk, skb);
1456 #ifdef CONFIG_TCP_MD5SIG
1457 if (tcp_v6_inbound_md5_hash (sk, skb))
1461 if (sk_filter(sk, skb))
1465 * socket locking is here for SMP purposes as backlog rcv
1466 * is currently called with bh processing disabled.
1469 /* Do Stevens' IPV6_PKTOPTIONS.
1471 Yes, guys, it is the only place in our code, where we
1472 may make it not affecting IPv4.
1473 The rest of code is protocol independent,
1474 and I do not like idea to uglify IPv4.
1476 Actually, all the idea behind IPV6_PKTOPTIONS
1477 looks not very well thought. For now we latch
1478 options, received in the last packet, enqueued
1479 by tcp. Feel free to propose better solution.
1483 opt_skb = skb_clone(skb, GFP_ATOMIC);
1485 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1486 sock_rps_save_rxhash(sk, skb);
1487 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len))
1490 goto ipv6_pktoptions;
1494 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1497 if (sk->sk_state == TCP_LISTEN) {
1498 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1503 * Queue it on the new socket if the new socket is active,
1504 * otherwise we just shortcircuit this and continue with
1508 sock_rps_save_rxhash(nsk, skb);
1509 if (tcp_child_process(sk, nsk, skb))
1512 __kfree_skb(opt_skb);
1516 sock_rps_save_rxhash(sk, skb);
1518 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len))
1521 goto ipv6_pktoptions;
1525 tcp_v6_send_reset(sk, skb);
1528 __kfree_skb(opt_skb);
1532 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1537 /* Do you ask, what is it?
1539 1. skb was enqueued by tcp.
1540 2. skb is added to tail of read queue, rather than out of order.
1541 3. socket is not in passive state.
1542 4. Finally, it really contains options, which user wants to receive.
1545 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1546 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1547 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1548 np->mcast_oif = inet6_iif(opt_skb);
1549 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1550 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1551 if (np->rxopt.bits.rxtclass)
1552 np->rcv_tclass = ipv6_tclass(ipv6_hdr(skb));
1553 if (ipv6_opt_accepted(sk, opt_skb)) {
1554 skb_set_owner_r(opt_skb, sk);
1555 opt_skb = xchg(&np->pktoptions, opt_skb);
1557 __kfree_skb(opt_skb);
1558 opt_skb = xchg(&np->pktoptions, NULL);
1566 static int tcp_v6_rcv(struct sk_buff *skb)
1568 const struct tcphdr *th;
1569 const struct ipv6hdr *hdr;
1572 struct net *net = dev_net(skb->dev);
1574 if (skb->pkt_type != PACKET_HOST)
1578 * Count it even if it's bad.
1580 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1582 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1587 if (th->doff < sizeof(struct tcphdr)/4)
1589 if (!pskb_may_pull(skb, th->doff*4))
1592 if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb))
1596 hdr = ipv6_hdr(skb);
1597 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1598 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1599 skb->len - th->doff*4);
1600 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1601 TCP_SKB_CB(skb)->when = 0;
1602 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1603 TCP_SKB_CB(skb)->sacked = 0;
1605 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1610 if (sk->sk_state == TCP_TIME_WAIT)
1613 if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) {
1614 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1615 goto discard_and_relse;
1618 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1619 goto discard_and_relse;
1621 if (sk_filter(sk, skb))
1622 goto discard_and_relse;
1626 bh_lock_sock_nested(sk);
1628 if (!sock_owned_by_user(sk)) {
1629 #ifdef CONFIG_NET_DMA
1630 struct tcp_sock *tp = tcp_sk(sk);
1631 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1632 tp->ucopy.dma_chan = net_dma_find_channel();
1633 if (tp->ucopy.dma_chan)
1634 ret = tcp_v6_do_rcv(sk, skb);
1638 if (!tcp_prequeue(sk, skb))
1639 ret = tcp_v6_do_rcv(sk, skb);
1641 } else if (unlikely(sk_add_backlog(sk, skb,
1642 sk->sk_rcvbuf + sk->sk_sndbuf))) {
1644 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1645 goto discard_and_relse;
1650 return ret ? -1 : 0;
1653 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1656 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1658 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1660 tcp_v6_send_reset(NULL, skb);
1677 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1678 inet_twsk_put(inet_twsk(sk));
1682 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1683 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1684 inet_twsk_put(inet_twsk(sk));
1688 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1693 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1694 &ipv6_hdr(skb)->daddr,
1695 ntohs(th->dest), inet6_iif(skb));
1697 struct inet_timewait_sock *tw = inet_twsk(sk);
1698 inet_twsk_deschedule(tw, &tcp_death_row);
1703 /* Fall through to ACK */
1706 tcp_v6_timewait_ack(sk, skb);
1710 case TCP_TW_SUCCESS:;
1715 static struct inet_peer *tcp_v6_get_peer(struct sock *sk)
1717 struct rt6_info *rt = (struct rt6_info *) __sk_dst_get(sk);
1718 struct ipv6_pinfo *np = inet6_sk(sk);
1720 /* If we don't have a valid cached route, or we're doing IP
1721 * options which make the IPv6 header destination address
1722 * different from our peer's, do not bother with this.
1724 if (!rt || !ipv6_addr_equal(&np->daddr, &rt->rt6i_dst.addr))
1726 return rt6_get_peer_create(rt);
1729 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1730 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1731 .twsk_unique = tcp_twsk_unique,
1732 .twsk_destructor= tcp_twsk_destructor,
1735 static const struct inet_connection_sock_af_ops ipv6_specific = {
1736 .queue_xmit = inet6_csk_xmit,
1737 .send_check = tcp_v6_send_check,
1738 .rebuild_header = inet6_sk_rebuild_header,
1739 .conn_request = tcp_v6_conn_request,
1740 .syn_recv_sock = tcp_v6_syn_recv_sock,
1741 .get_peer = tcp_v6_get_peer,
1742 .net_header_len = sizeof(struct ipv6hdr),
1743 .net_frag_header_len = sizeof(struct frag_hdr),
1744 .setsockopt = ipv6_setsockopt,
1745 .getsockopt = ipv6_getsockopt,
1746 .addr2sockaddr = inet6_csk_addr2sockaddr,
1747 .sockaddr_len = sizeof(struct sockaddr_in6),
1748 .bind_conflict = inet6_csk_bind_conflict,
1749 #ifdef CONFIG_COMPAT
1750 .compat_setsockopt = compat_ipv6_setsockopt,
1751 .compat_getsockopt = compat_ipv6_getsockopt,
1755 #ifdef CONFIG_TCP_MD5SIG
1756 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1757 .md5_lookup = tcp_v6_md5_lookup,
1758 .calc_md5_hash = tcp_v6_md5_hash_skb,
1759 .md5_parse = tcp_v6_parse_md5_keys,
1764 * TCP over IPv4 via INET6 API
1767 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1768 .queue_xmit = ip_queue_xmit,
1769 .send_check = tcp_v4_send_check,
1770 .rebuild_header = inet_sk_rebuild_header,
1771 .conn_request = tcp_v6_conn_request,
1772 .syn_recv_sock = tcp_v6_syn_recv_sock,
1773 .get_peer = tcp_v4_get_peer,
1774 .net_header_len = sizeof(struct iphdr),
1775 .setsockopt = ipv6_setsockopt,
1776 .getsockopt = ipv6_getsockopt,
1777 .addr2sockaddr = inet6_csk_addr2sockaddr,
1778 .sockaddr_len = sizeof(struct sockaddr_in6),
1779 .bind_conflict = inet6_csk_bind_conflict,
1780 #ifdef CONFIG_COMPAT
1781 .compat_setsockopt = compat_ipv6_setsockopt,
1782 .compat_getsockopt = compat_ipv6_getsockopt,
1786 #ifdef CONFIG_TCP_MD5SIG
1787 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1788 .md5_lookup = tcp_v4_md5_lookup,
1789 .calc_md5_hash = tcp_v4_md5_hash_skb,
1790 .md5_parse = tcp_v6_parse_md5_keys,
1794 /* NOTE: A lot of things set to zero explicitly by call to
1795 * sk_alloc() so need not be done here.
1797 static int tcp_v6_init_sock(struct sock *sk)
1799 struct inet_connection_sock *icsk = inet_csk(sk);
1803 icsk->icsk_af_ops = &ipv6_specific;
1805 #ifdef CONFIG_TCP_MD5SIG
1806 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1812 static void tcp_v6_destroy_sock(struct sock *sk)
1814 tcp_v4_destroy_sock(sk);
1815 inet6_destroy_sock(sk);
1818 #ifdef CONFIG_PROC_FS
1819 /* Proc filesystem TCPv6 sock list dumping. */
1820 static void get_openreq6(struct seq_file *seq,
1821 const struct sock *sk, struct request_sock *req, int i, int uid)
1823 int ttd = req->expires - jiffies;
1824 const struct in6_addr *src = &inet6_rsk(req)->loc_addr;
1825 const struct in6_addr *dest = &inet6_rsk(req)->rmt_addr;
1831 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1832 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1834 src->s6_addr32[0], src->s6_addr32[1],
1835 src->s6_addr32[2], src->s6_addr32[3],
1836 ntohs(inet_rsk(req)->loc_port),
1837 dest->s6_addr32[0], dest->s6_addr32[1],
1838 dest->s6_addr32[2], dest->s6_addr32[3],
1839 ntohs(inet_rsk(req)->rmt_port),
1841 0,0, /* could print option size, but that is af dependent. */
1842 1, /* timers active (only the expire timer) */
1843 jiffies_to_clock_t(ttd),
1846 0, /* non standard timer */
1847 0, /* open_requests have no inode */
1851 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1853 const struct in6_addr *dest, *src;
1856 unsigned long timer_expires;
1857 const struct inet_sock *inet = inet_sk(sp);
1858 const struct tcp_sock *tp = tcp_sk(sp);
1859 const struct inet_connection_sock *icsk = inet_csk(sp);
1860 const struct ipv6_pinfo *np = inet6_sk(sp);
1863 src = &np->rcv_saddr;
1864 destp = ntohs(inet->inet_dport);
1865 srcp = ntohs(inet->inet_sport);
1867 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1869 timer_expires = icsk->icsk_timeout;
1870 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1872 timer_expires = icsk->icsk_timeout;
1873 } else if (timer_pending(&sp->sk_timer)) {
1875 timer_expires = sp->sk_timer.expires;
1878 timer_expires = jiffies;
1882 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1883 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %lu %lu %u %u %d\n",
1885 src->s6_addr32[0], src->s6_addr32[1],
1886 src->s6_addr32[2], src->s6_addr32[3], srcp,
1887 dest->s6_addr32[0], dest->s6_addr32[1],
1888 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1890 tp->write_seq-tp->snd_una,
1891 (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
1893 jiffies_to_clock_t(timer_expires - jiffies),
1894 icsk->icsk_retransmits,
1896 icsk->icsk_probes_out,
1898 atomic_read(&sp->sk_refcnt), sp,
1899 jiffies_to_clock_t(icsk->icsk_rto),
1900 jiffies_to_clock_t(icsk->icsk_ack.ato),
1901 (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
1903 tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh
1907 static void get_timewait6_sock(struct seq_file *seq,
1908 struct inet_timewait_sock *tw, int i)
1910 const struct in6_addr *dest, *src;
1912 const struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
1913 int ttd = tw->tw_ttd - jiffies;
1918 dest = &tw6->tw_v6_daddr;
1919 src = &tw6->tw_v6_rcv_saddr;
1920 destp = ntohs(tw->tw_dport);
1921 srcp = ntohs(tw->tw_sport);
1924 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1925 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1927 src->s6_addr32[0], src->s6_addr32[1],
1928 src->s6_addr32[2], src->s6_addr32[3], srcp,
1929 dest->s6_addr32[0], dest->s6_addr32[1],
1930 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1931 tw->tw_substate, 0, 0,
1932 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
1933 atomic_read(&tw->tw_refcnt), tw);
1936 static int tcp6_seq_show(struct seq_file *seq, void *v)
1938 struct tcp_iter_state *st;
1940 if (v == SEQ_START_TOKEN) {
1945 "st tx_queue rx_queue tr tm->when retrnsmt"
1946 " uid timeout inode\n");
1951 switch (st->state) {
1952 case TCP_SEQ_STATE_LISTENING:
1953 case TCP_SEQ_STATE_ESTABLISHED:
1954 get_tcp6_sock(seq, v, st->num);
1956 case TCP_SEQ_STATE_OPENREQ:
1957 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
1959 case TCP_SEQ_STATE_TIME_WAIT:
1960 get_timewait6_sock(seq, v, st->num);
1967 static const struct file_operations tcp6_afinfo_seq_fops = {
1968 .owner = THIS_MODULE,
1969 .open = tcp_seq_open,
1971 .llseek = seq_lseek,
1972 .release = seq_release_net
1975 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
1978 .seq_fops = &tcp6_afinfo_seq_fops,
1980 .show = tcp6_seq_show,
1984 int __net_init tcp6_proc_init(struct net *net)
1986 return tcp_proc_register(net, &tcp6_seq_afinfo);
1989 void tcp6_proc_exit(struct net *net)
1991 tcp_proc_unregister(net, &tcp6_seq_afinfo);
1995 struct proto tcpv6_prot = {
1997 .owner = THIS_MODULE,
1999 .connect = tcp_v6_connect,
2000 .disconnect = tcp_disconnect,
2001 .accept = inet_csk_accept,
2003 .init = tcp_v6_init_sock,
2004 .destroy = tcp_v6_destroy_sock,
2005 .shutdown = tcp_shutdown,
2006 .setsockopt = tcp_setsockopt,
2007 .getsockopt = tcp_getsockopt,
2008 .recvmsg = tcp_recvmsg,
2009 .sendmsg = tcp_sendmsg,
2010 .sendpage = tcp_sendpage,
2011 .backlog_rcv = tcp_v6_do_rcv,
2012 .hash = tcp_v6_hash,
2013 .unhash = inet_unhash,
2014 .get_port = inet_csk_get_port,
2015 .enter_memory_pressure = tcp_enter_memory_pressure,
2016 .sockets_allocated = &tcp_sockets_allocated,
2017 .memory_allocated = &tcp_memory_allocated,
2018 .memory_pressure = &tcp_memory_pressure,
2019 .orphan_count = &tcp_orphan_count,
2020 .sysctl_wmem = sysctl_tcp_wmem,
2021 .sysctl_rmem = sysctl_tcp_rmem,
2022 .max_header = MAX_TCP_HEADER,
2023 .obj_size = sizeof(struct tcp6_sock),
2024 .slab_flags = SLAB_DESTROY_BY_RCU,
2025 .twsk_prot = &tcp6_timewait_sock_ops,
2026 .rsk_prot = &tcp6_request_sock_ops,
2027 .h.hashinfo = &tcp_hashinfo,
2028 .no_autobind = true,
2029 #ifdef CONFIG_COMPAT
2030 .compat_setsockopt = compat_tcp_setsockopt,
2031 .compat_getsockopt = compat_tcp_getsockopt,
2033 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
2034 .proto_cgroup = tcp_proto_cgroup,
2038 static const struct inet6_protocol tcpv6_protocol = {
2039 .handler = tcp_v6_rcv,
2040 .err_handler = tcp_v6_err,
2041 .gso_send_check = tcp_v6_gso_send_check,
2042 .gso_segment = tcp_tso_segment,
2043 .gro_receive = tcp6_gro_receive,
2044 .gro_complete = tcp6_gro_complete,
2045 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2048 static struct inet_protosw tcpv6_protosw = {
2049 .type = SOCK_STREAM,
2050 .protocol = IPPROTO_TCP,
2051 .prot = &tcpv6_prot,
2052 .ops = &inet6_stream_ops,
2054 .flags = INET_PROTOSW_PERMANENT |
2058 static int __net_init tcpv6_net_init(struct net *net)
2060 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2061 SOCK_RAW, IPPROTO_TCP, net);
2064 static void __net_exit tcpv6_net_exit(struct net *net)
2066 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2069 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2071 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET6);
2074 static struct pernet_operations tcpv6_net_ops = {
2075 .init = tcpv6_net_init,
2076 .exit = tcpv6_net_exit,
2077 .exit_batch = tcpv6_net_exit_batch,
2080 int __init tcpv6_init(void)
2084 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2088 /* register inet6 protocol */
2089 ret = inet6_register_protosw(&tcpv6_protosw);
2091 goto out_tcpv6_protocol;
2093 ret = register_pernet_subsys(&tcpv6_net_ops);
2095 goto out_tcpv6_protosw;
2100 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2102 inet6_unregister_protosw(&tcpv6_protosw);
2106 void tcpv6_exit(void)
2108 unregister_pernet_subsys(&tcpv6_net_ops);
2109 inet6_unregister_protosw(&tcpv6_protosw);
2110 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);