2 * IPv6 output functions
3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * Based on linux/net/ipv4/ip_output.c
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
42 #include <linux/netfilter.h>
43 #include <linux/netfilter_ipv6.h>
49 #include <net/ndisc.h>
50 #include <net/protocol.h>
51 #include <net/ip6_route.h>
52 #include <net/addrconf.h>
53 #include <net/rawv6.h>
56 #include <net/checksum.h>
57 #include <linux/mroute6.h>
58 #include <net/l3mdev.h>
59 #include <net/lwtunnel.h>
61 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
63 struct dst_entry *dst = skb_dst(skb);
64 struct net_device *dev = dst->dev;
65 struct neighbour *neigh;
66 struct in6_addr *nexthop;
69 skb->protocol = htons(ETH_P_IPV6);
72 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
73 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
75 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
76 ((mroute6_socket(net, skb) &&
77 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
78 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
79 &ipv6_hdr(skb)->saddr))) {
80 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
82 /* Do not check for IFF_ALLMULTI; multicast routing
83 is not supported in any case.
86 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
87 net, sk, newskb, NULL, newskb->dev,
90 if (ipv6_hdr(skb)->hop_limit == 0) {
91 IP6_INC_STATS(net, idev,
92 IPSTATS_MIB_OUTDISCARDS);
98 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
100 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
101 IPV6_ADDR_SCOPE_NODELOCAL &&
102 !(dev->flags & IFF_LOOPBACK)) {
108 if (lwtunnel_xmit_redirect(dst->lwtstate)) {
109 int res = lwtunnel_xmit(skb);
111 if (res < 0 || res == LWTUNNEL_XMIT_DONE)
116 nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
117 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
118 if (unlikely(!neigh))
119 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
120 if (!IS_ERR(neigh)) {
121 ret = dst_neigh_output(dst, neigh, skb);
122 rcu_read_unlock_bh();
125 rcu_read_unlock_bh();
127 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
132 static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
134 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
135 dst_allfrag(skb_dst(skb)) ||
136 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
137 return ip6_fragment(net, sk, skb, ip6_finish_output2);
139 return ip6_finish_output2(net, sk, skb);
142 int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
144 struct net_device *dev = skb_dst(skb)->dev;
145 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
147 if (unlikely(idev->cnf.disable_ipv6)) {
148 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
153 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
154 net, sk, skb, NULL, dev,
156 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
160 * xmit an sk_buff (used by TCP, SCTP and DCCP)
161 * Note : socket lock is not held for SYNACK packets, but might be modified
162 * by calls to skb_set_owner_w() and ipv6_local_error(),
163 * which are using proper atomic operations or spinlocks.
165 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
166 struct ipv6_txoptions *opt, int tclass)
168 struct net *net = sock_net(sk);
169 const struct ipv6_pinfo *np = inet6_sk(sk);
170 struct in6_addr *first_hop = &fl6->daddr;
171 struct dst_entry *dst = skb_dst(skb);
173 u8 proto = fl6->flowi6_proto;
174 int seg_len = skb->len;
179 unsigned int head_room;
181 /* First: exthdrs may take lots of space (~8K for now)
182 MAX_HEADER is not enough.
184 head_room = opt->opt_nflen + opt->opt_flen;
185 seg_len += head_room;
186 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
188 if (skb_headroom(skb) < head_room) {
189 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
191 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
192 IPSTATS_MIB_OUTDISCARDS);
198 /* skb_set_owner_w() changes sk->sk_wmem_alloc atomically,
199 * it is safe to call in our context (socket lock not held)
201 skb_set_owner_w(skb, (struct sock *)sk);
204 ipv6_push_frag_opts(skb, opt, &proto);
206 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
209 skb_push(skb, sizeof(struct ipv6hdr));
210 skb_reset_network_header(skb);
214 * Fill in the IPv6 header
217 hlimit = np->hop_limit;
219 hlimit = ip6_dst_hoplimit(dst);
221 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
222 np->autoflowlabel, fl6));
224 hdr->payload_len = htons(seg_len);
225 hdr->nexthdr = proto;
226 hdr->hop_limit = hlimit;
228 hdr->saddr = fl6->saddr;
229 hdr->daddr = *first_hop;
231 skb->protocol = htons(ETH_P_IPV6);
232 skb->priority = sk->sk_priority;
233 skb->mark = sk->sk_mark;
236 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
237 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
238 IPSTATS_MIB_OUT, skb->len);
240 /* if egress device is enslaved to an L3 master device pass the
241 * skb to its handler for processing
243 skb = l3mdev_ip6_out((struct sock *)sk, skb);
247 /* hooks should never assume socket lock is held.
248 * we promote our socket to non const
250 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
251 net, (struct sock *)sk, skb, NULL, dst->dev,
256 /* ipv6_local_error() does not require socket lock,
257 * we promote our socket to non const
259 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
261 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
265 EXPORT_SYMBOL(ip6_xmit);
267 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
269 struct ip6_ra_chain *ra;
270 struct sock *last = NULL;
272 read_lock(&ip6_ra_lock);
273 for (ra = ip6_ra_chain; ra; ra = ra->next) {
274 struct sock *sk = ra->sk;
275 if (sk && ra->sel == sel &&
276 (!sk->sk_bound_dev_if ||
277 sk->sk_bound_dev_if == skb->dev->ifindex)) {
279 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
281 rawv6_rcv(last, skb2);
288 rawv6_rcv(last, skb);
289 read_unlock(&ip6_ra_lock);
292 read_unlock(&ip6_ra_lock);
296 static int ip6_forward_proxy_check(struct sk_buff *skb)
298 struct ipv6hdr *hdr = ipv6_hdr(skb);
299 u8 nexthdr = hdr->nexthdr;
303 if (ipv6_ext_hdr(nexthdr)) {
304 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
308 offset = sizeof(struct ipv6hdr);
310 if (nexthdr == IPPROTO_ICMPV6) {
311 struct icmp6hdr *icmp6;
313 if (!pskb_may_pull(skb, (skb_network_header(skb) +
314 offset + 1 - skb->data)))
317 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
319 switch (icmp6->icmp6_type) {
320 case NDISC_ROUTER_SOLICITATION:
321 case NDISC_ROUTER_ADVERTISEMENT:
322 case NDISC_NEIGHBOUR_SOLICITATION:
323 case NDISC_NEIGHBOUR_ADVERTISEMENT:
325 /* For reaction involving unicast neighbor discovery
326 * message destined to the proxied address, pass it to
336 * The proxying router can't forward traffic sent to a link-local
337 * address, so signal the sender and discard the packet. This
338 * behavior is clarified by the MIPv6 specification.
340 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
341 dst_link_failure(skb);
348 static inline int ip6_forward_finish(struct net *net, struct sock *sk,
351 return dst_output(net, sk, skb);
354 static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
357 struct inet6_dev *idev;
359 if (dst_metric_locked(dst, RTAX_MTU)) {
360 mtu = dst_metric_raw(dst, RTAX_MTU);
367 idev = __in6_dev_get(dst->dev);
369 mtu = idev->cnf.mtu6;
375 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
380 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
381 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
387 if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
393 int ip6_forward(struct sk_buff *skb)
395 struct dst_entry *dst = skb_dst(skb);
396 struct ipv6hdr *hdr = ipv6_hdr(skb);
397 struct inet6_skb_parm *opt = IP6CB(skb);
398 struct net *net = dev_net(dst->dev);
401 if (net->ipv6.devconf_all->forwarding == 0)
404 if (skb->pkt_type != PACKET_HOST)
407 if (unlikely(skb->sk))
410 if (skb_warn_if_lro(skb))
413 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
414 __IP6_INC_STATS(net, ip6_dst_idev(dst),
415 IPSTATS_MIB_INDISCARDS);
419 skb_forward_csum(skb);
422 * We DO NOT make any processing on
423 * RA packets, pushing them to user level AS IS
424 * without ane WARRANTY that application will be able
425 * to interpret them. The reason is that we
426 * cannot make anything clever here.
428 * We are not end-node, so that if packet contains
429 * AH/ESP, we cannot make anything.
430 * Defragmentation also would be mistake, RA packets
431 * cannot be fragmented, because there is no warranty
432 * that different fragments will go along one path. --ANK
434 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
435 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
440 * check and decrement ttl
442 if (hdr->hop_limit <= 1) {
443 /* Force OUTPUT device used as source address */
445 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
446 __IP6_INC_STATS(net, ip6_dst_idev(dst),
447 IPSTATS_MIB_INHDRERRORS);
453 /* XXX: idev->cnf.proxy_ndp? */
454 if (net->ipv6.devconf_all->proxy_ndp &&
455 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
456 int proxied = ip6_forward_proxy_check(skb);
458 return ip6_input(skb);
459 else if (proxied < 0) {
460 __IP6_INC_STATS(net, ip6_dst_idev(dst),
461 IPSTATS_MIB_INDISCARDS);
466 if (!xfrm6_route_forward(skb)) {
467 __IP6_INC_STATS(net, ip6_dst_idev(dst),
468 IPSTATS_MIB_INDISCARDS);
473 /* IPv6 specs say nothing about it, but it is clear that we cannot
474 send redirects to source routed frames.
475 We don't send redirects to frames decapsulated from IPsec.
477 if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
478 struct in6_addr *target = NULL;
479 struct inet_peer *peer;
483 * incoming and outgoing devices are the same
487 rt = (struct rt6_info *) dst;
488 if (rt->rt6i_flags & RTF_GATEWAY)
489 target = &rt->rt6i_gateway;
491 target = &hdr->daddr;
493 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
495 /* Limit redirects both by destination (here)
496 and by source (inside ndisc_send_redirect)
498 if (inet_peer_xrlim_allow(peer, 1*HZ))
499 ndisc_send_redirect(skb, target);
503 int addrtype = ipv6_addr_type(&hdr->saddr);
505 /* This check is security critical. */
506 if (addrtype == IPV6_ADDR_ANY ||
507 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
509 if (addrtype & IPV6_ADDR_LINKLOCAL) {
510 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
511 ICMPV6_NOT_NEIGHBOUR, 0);
516 mtu = ip6_dst_mtu_forward(dst);
517 if (mtu < IPV6_MIN_MTU)
520 if (ip6_pkt_too_big(skb, mtu)) {
521 /* Again, force OUTPUT device used as source address */
523 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
524 __IP6_INC_STATS(net, ip6_dst_idev(dst),
525 IPSTATS_MIB_INTOOBIGERRORS);
526 __IP6_INC_STATS(net, ip6_dst_idev(dst),
527 IPSTATS_MIB_FRAGFAILS);
532 if (skb_cow(skb, dst->dev->hard_header_len)) {
533 __IP6_INC_STATS(net, ip6_dst_idev(dst),
534 IPSTATS_MIB_OUTDISCARDS);
540 /* Mangling hops number delayed to point after skb COW */
544 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
545 __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
546 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
547 net, NULL, skb, skb->dev, dst->dev,
551 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
557 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
559 to->pkt_type = from->pkt_type;
560 to->priority = from->priority;
561 to->protocol = from->protocol;
563 skb_dst_set(to, dst_clone(skb_dst(from)));
565 to->mark = from->mark;
567 #ifdef CONFIG_NET_SCHED
568 to->tc_index = from->tc_index;
571 skb_copy_secmark(to, from);
574 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
575 int (*output)(struct net *, struct sock *, struct sk_buff *))
577 struct sk_buff *frag;
578 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
579 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
580 inet6_sk(skb->sk) : NULL;
581 struct ipv6hdr *tmp_hdr;
583 unsigned int mtu, hlen, left, len;
586 int ptr, offset = 0, err = 0;
587 u8 *prevhdr, nexthdr = 0;
589 hlen = ip6_find_1stfragopt(skb, &prevhdr);
592 mtu = ip6_skb_dst_mtu(skb);
594 /* We must not fragment if the socket is set to force MTU discovery
595 * or if the skb it not generated by a local socket.
597 if (unlikely(!skb->ignore_df && skb->len > mtu))
600 if (IP6CB(skb)->frag_max_size) {
601 if (IP6CB(skb)->frag_max_size > mtu)
604 /* don't send fragments larger than what we received */
605 mtu = IP6CB(skb)->frag_max_size;
606 if (mtu < IPV6_MIN_MTU)
610 if (np && np->frag_size < mtu) {
614 if (mtu < hlen + sizeof(struct frag_hdr) + 8)
616 mtu -= hlen + sizeof(struct frag_hdr);
618 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
619 &ipv6_hdr(skb)->saddr);
621 if (skb->ip_summed == CHECKSUM_PARTIAL &&
622 (err = skb_checksum_help(skb)))
625 hroom = LL_RESERVED_SPACE(rt->dst.dev);
626 if (skb_has_frag_list(skb)) {
627 int first_len = skb_pagelen(skb);
628 struct sk_buff *frag2;
630 if (first_len - hlen > mtu ||
631 ((first_len - hlen) & 7) ||
633 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
636 skb_walk_frags(skb, frag) {
637 /* Correct geometry. */
638 if (frag->len > mtu ||
639 ((frag->len & 7) && frag->next) ||
640 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
641 goto slow_path_clean;
643 /* Partially cloned skb? */
644 if (skb_shared(frag))
645 goto slow_path_clean;
650 frag->destructor = sock_wfree;
652 skb->truesize -= frag->truesize;
659 *prevhdr = NEXTHDR_FRAGMENT;
660 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
662 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
663 IPSTATS_MIB_FRAGFAILS);
667 frag = skb_shinfo(skb)->frag_list;
668 skb_frag_list_init(skb);
670 __skb_pull(skb, hlen);
671 fh = (struct frag_hdr *)__skb_push(skb, sizeof(struct frag_hdr));
672 __skb_push(skb, hlen);
673 skb_reset_network_header(skb);
674 memcpy(skb_network_header(skb), tmp_hdr, hlen);
676 fh->nexthdr = nexthdr;
678 fh->frag_off = htons(IP6_MF);
679 fh->identification = frag_id;
681 first_len = skb_pagelen(skb);
682 skb->data_len = first_len - skb_headlen(skb);
683 skb->len = first_len;
684 ipv6_hdr(skb)->payload_len = htons(first_len -
685 sizeof(struct ipv6hdr));
690 /* Prepare header of the next frame,
691 * before previous one went down. */
693 frag->ip_summed = CHECKSUM_NONE;
694 skb_reset_transport_header(frag);
695 fh = (struct frag_hdr *)__skb_push(frag, sizeof(struct frag_hdr));
696 __skb_push(frag, hlen);
697 skb_reset_network_header(frag);
698 memcpy(skb_network_header(frag), tmp_hdr,
700 offset += skb->len - hlen - sizeof(struct frag_hdr);
701 fh->nexthdr = nexthdr;
703 fh->frag_off = htons(offset);
705 fh->frag_off |= htons(IP6_MF);
706 fh->identification = frag_id;
707 ipv6_hdr(frag)->payload_len =
709 sizeof(struct ipv6hdr));
710 ip6_copy_metadata(frag, skb);
713 err = output(net, sk, skb);
715 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
716 IPSTATS_MIB_FRAGCREATES);
729 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
730 IPSTATS_MIB_FRAGOKS);
735 kfree_skb_list(frag);
737 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
738 IPSTATS_MIB_FRAGFAILS);
743 skb_walk_frags(skb, frag2) {
747 frag2->destructor = NULL;
748 skb->truesize += frag2->truesize;
753 left = skb->len - hlen; /* Space per frame */
754 ptr = hlen; /* Where to start from */
757 * Fragment the datagram.
760 *prevhdr = NEXTHDR_FRAGMENT;
761 troom = rt->dst.dev->needed_tailroom;
764 * Keep copying data until we run out.
768 /* IF: it doesn't fit, use 'mtu' - the data space left */
771 /* IF: we are not sending up to and including the packet end
772 then align the next start on an eight byte boundary */
777 /* Allocate buffer */
778 frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
779 hroom + troom, GFP_ATOMIC);
781 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
782 IPSTATS_MIB_FRAGFAILS);
788 * Set up data on packet
791 ip6_copy_metadata(frag, skb);
792 skb_reserve(frag, hroom);
793 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
794 skb_reset_network_header(frag);
795 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
796 frag->transport_header = (frag->network_header + hlen +
797 sizeof(struct frag_hdr));
800 * Charge the memory for the fragment to any owner
804 skb_set_owner_w(frag, skb->sk);
807 * Copy the packet header into the new buffer.
809 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
812 * Build fragment header.
814 fh->nexthdr = nexthdr;
816 fh->identification = frag_id;
819 * Copy a block of the IP datagram.
821 BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
825 fh->frag_off = htons(offset);
827 fh->frag_off |= htons(IP6_MF);
828 ipv6_hdr(frag)->payload_len = htons(frag->len -
829 sizeof(struct ipv6hdr));
835 * Put this fragment into the sending queue.
837 err = output(net, sk, frag);
841 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
842 IPSTATS_MIB_FRAGCREATES);
844 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
845 IPSTATS_MIB_FRAGOKS);
850 if (skb->sk && dst_allfrag(skb_dst(skb)))
851 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
853 skb->dev = skb_dst(skb)->dev;
854 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
858 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
859 IPSTATS_MIB_FRAGFAILS);
864 static inline int ip6_rt_check(const struct rt6key *rt_key,
865 const struct in6_addr *fl_addr,
866 const struct in6_addr *addr_cache)
868 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
869 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
872 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
873 struct dst_entry *dst,
874 const struct flowi6 *fl6)
876 struct ipv6_pinfo *np = inet6_sk(sk);
882 if (dst->ops->family != AF_INET6) {
887 rt = (struct rt6_info *)dst;
888 /* Yes, checking route validity in not connected
889 * case is not very simple. Take into account,
890 * that we do not support routing by source, TOS,
891 * and MSG_DONTROUTE --ANK (980726)
893 * 1. ip6_rt_check(): If route was host route,
894 * check that cached destination is current.
895 * If it is network route, we still may
896 * check its validity using saved pointer
897 * to the last used address: daddr_cache.
898 * We do not want to save whole address now,
899 * (because main consumer of this service
900 * is tcp, which has not this problem),
901 * so that the last trick works only on connected
903 * 2. oif also should be the same.
905 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
906 #ifdef CONFIG_IPV6_SUBTREES
907 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
909 (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
910 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
919 static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
920 struct dst_entry **dst, struct flowi6 *fl6)
922 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
929 if (ipv6_addr_any(&fl6->saddr) && fl6->flowi6_oif &&
930 (!*dst || !(*dst)->error)) {
931 err = l3mdev_get_saddr6(net, sk, fl6);
936 /* The correct way to handle this would be to do
937 * ip6_route_get_saddr, and then ip6_route_output; however,
938 * the route-specific preferred source forces the
939 * ip6_route_output call _before_ ip6_route_get_saddr.
941 * In source specific routing (no src=any default route),
942 * ip6_route_output will fail given src=any saddr, though, so
943 * that's why we try it again later.
945 if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
947 bool had_dst = *dst != NULL;
950 *dst = ip6_route_output(net, sk, fl6);
951 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
952 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
953 sk ? inet6_sk(sk)->srcprefs : 0,
956 goto out_err_release;
958 /* If we had an erroneous initial result, pretend it
959 * never existed and let the SA-enabled version take
962 if (!had_dst && (*dst)->error) {
968 flags |= RT6_LOOKUP_F_IFACE;
972 *dst = ip6_route_output_flags(net, sk, fl6, flags);
976 goto out_err_release;
978 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
980 * Here if the dst entry we've looked up
981 * has a neighbour entry that is in the INCOMPLETE
982 * state and the src address from the flow is
983 * marked as OPTIMISTIC, we release the found
984 * dst entry and replace it instead with the
985 * dst entry of the nexthop router
987 rt = (struct rt6_info *) *dst;
989 n = __ipv6_neigh_lookup_noref(rt->dst.dev,
990 rt6_nexthop(rt, &fl6->daddr));
991 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
992 rcu_read_unlock_bh();
995 struct inet6_ifaddr *ifp;
996 struct flowi6 fl_gw6;
999 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1002 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1008 * We need to get the dst entry for the
1009 * default router instead
1012 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1013 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1014 *dst = ip6_route_output(net, sk, &fl_gw6);
1015 err = (*dst)->error;
1017 goto out_err_release;
1028 if (err == -ENETUNREACH)
1029 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1034 * ip6_dst_lookup - perform route lookup on flow
1035 * @sk: socket which provides route info
1036 * @dst: pointer to dst_entry * for result
1037 * @fl6: flow to lookup
1039 * This function performs a route lookup on the given flow.
1041 * It returns zero on success, or a standard errno code on error.
1043 int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1047 return ip6_dst_lookup_tail(net, sk, dst, fl6);
1049 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1052 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1053 * @sk: socket which provides route info
1054 * @fl6: flow to lookup
1055 * @final_dst: final destination address for ipsec lookup
1057 * This function performs a route lookup on the given flow.
1059 * It returns a valid dst pointer on success, or a pointer encoded
1062 struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
1063 const struct in6_addr *final_dst)
1065 struct dst_entry *dst = NULL;
1068 err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
1070 return ERR_PTR(err);
1072 fl6->daddr = *final_dst;
1073 if (!fl6->flowi6_oif)
1074 fl6->flowi6_oif = l3mdev_fib_oif(dst->dev);
1076 return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1078 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1081 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1082 * @sk: socket which provides the dst cache and route info
1083 * @fl6: flow to lookup
1084 * @final_dst: final destination address for ipsec lookup
1086 * This function performs a route lookup on the given flow with the
1087 * possibility of using the cached route in the socket if it is valid.
1088 * It will take the socket dst lock when operating on the dst cache.
1089 * As a result, this function can only be used in process context.
1091 * It returns a valid dst pointer on success, or a pointer encoded
1094 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1095 const struct in6_addr *final_dst)
1097 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1099 dst = ip6_sk_dst_check(sk, dst, fl6);
1101 dst = ip6_dst_lookup_flow(sk, fl6, final_dst);
1105 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1107 static inline int ip6_ufo_append_data(struct sock *sk,
1108 struct sk_buff_head *queue,
1109 int getfrag(void *from, char *to, int offset, int len,
1110 int odd, struct sk_buff *skb),
1111 void *from, int length, int hh_len, int fragheaderlen,
1112 int exthdrlen, int transhdrlen, int mtu,
1113 unsigned int flags, const struct flowi6 *fl6)
1116 struct sk_buff *skb;
1119 /* There is support for UDP large send offload by network
1120 * device, so create one single skb packet containing complete
1123 skb = skb_peek_tail(queue);
1125 skb = sock_alloc_send_skb(sk,
1126 hh_len + fragheaderlen + transhdrlen + 20,
1127 (flags & MSG_DONTWAIT), &err);
1131 /* reserve space for Hardware header */
1132 skb_reserve(skb, hh_len);
1134 /* create space for UDP/IP header */
1135 skb_put(skb, fragheaderlen + transhdrlen);
1137 /* initialize network header pointer */
1138 skb_set_network_header(skb, exthdrlen);
1140 /* initialize protocol header pointer */
1141 skb->transport_header = skb->network_header + fragheaderlen;
1143 skb->protocol = htons(ETH_P_IPV6);
1146 __skb_queue_tail(queue, skb);
1147 } else if (skb_is_gso(skb)) {
1151 skb->ip_summed = CHECKSUM_PARTIAL;
1152 /* Specify the length of each IPv6 datagram fragment.
1153 * It has to be a multiple of 8.
1155 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1156 sizeof(struct frag_hdr)) & ~7;
1157 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1158 skb_shinfo(skb)->ip6_frag_id = ipv6_select_ident(sock_net(sk),
1163 return skb_append_datato_frags(sk, skb, getfrag, from,
1164 (length - transhdrlen));
1167 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1170 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1173 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1176 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1179 static void ip6_append_data_mtu(unsigned int *mtu,
1181 unsigned int fragheaderlen,
1182 struct sk_buff *skb,
1183 struct rt6_info *rt,
1184 unsigned int orig_mtu)
1186 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1188 /* first fragment, reserve header_len */
1189 *mtu = orig_mtu - rt->dst.header_len;
1193 * this fragment is not first, the headers
1194 * space is regarded as data space.
1198 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1199 + fragheaderlen - sizeof(struct frag_hdr);
1203 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1204 struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1205 struct rt6_info *rt, struct flowi6 *fl6)
1207 struct ipv6_pinfo *np = inet6_sk(sk);
1209 struct ipv6_txoptions *opt = ipc6->opt;
1215 if (WARN_ON(v6_cork->opt))
1218 v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation);
1219 if (unlikely(!v6_cork->opt))
1222 v6_cork->opt->tot_len = opt->tot_len;
1223 v6_cork->opt->opt_flen = opt->opt_flen;
1224 v6_cork->opt->opt_nflen = opt->opt_nflen;
1226 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1228 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1231 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1233 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1236 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1238 if (opt->hopopt && !v6_cork->opt->hopopt)
1241 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1243 if (opt->srcrt && !v6_cork->opt->srcrt)
1246 /* need source address above miyazawa*/
1249 cork->base.dst = &rt->dst;
1250 cork->fl.u.ip6 = *fl6;
1251 v6_cork->hop_limit = ipc6->hlimit;
1252 v6_cork->tclass = ipc6->tclass;
1253 if (rt->dst.flags & DST_XFRM_TUNNEL)
1254 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1255 rt->dst.dev->mtu : dst_mtu(&rt->dst);
1257 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1258 rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1259 if (np->frag_size < mtu) {
1261 mtu = np->frag_size;
1263 cork->base.fragsize = mtu;
1264 if (dst_allfrag(rt->dst.path))
1265 cork->base.flags |= IPCORK_ALLFRAG;
1266 cork->base.length = 0;
1271 static int __ip6_append_data(struct sock *sk,
1273 struct sk_buff_head *queue,
1274 struct inet_cork *cork,
1275 struct inet6_cork *v6_cork,
1276 struct page_frag *pfrag,
1277 int getfrag(void *from, char *to, int offset,
1278 int len, int odd, struct sk_buff *skb),
1279 void *from, int length, int transhdrlen,
1280 unsigned int flags, struct ipcm6_cookie *ipc6,
1281 const struct sockcm_cookie *sockc)
1283 struct sk_buff *skb, *skb_prev = NULL;
1284 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;
1286 int dst_exthdrlen = 0;
1293 struct rt6_info *rt = (struct rt6_info *)cork->dst;
1294 struct ipv6_txoptions *opt = v6_cork->opt;
1295 int csummode = CHECKSUM_NONE;
1296 unsigned int maxnonfragsize, headersize;
1298 skb = skb_peek_tail(queue);
1300 exthdrlen = opt ? opt->opt_flen : 0;
1301 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1304 mtu = cork->fragsize;
1307 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1309 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1310 (opt ? opt->opt_nflen : 0);
1311 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1312 sizeof(struct frag_hdr);
1314 headersize = sizeof(struct ipv6hdr) +
1315 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1316 (dst_allfrag(&rt->dst) ?
1317 sizeof(struct frag_hdr) : 0) +
1318 rt->rt6i_nfheader_len;
1320 if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1321 (sk->sk_protocol == IPPROTO_UDP ||
1322 sk->sk_protocol == IPPROTO_RAW)) {
1323 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1324 sizeof(struct ipv6hdr));
1328 if (ip6_sk_ignore_df(sk))
1329 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1331 maxnonfragsize = mtu;
1333 if (cork->length + length > maxnonfragsize - headersize) {
1335 ipv6_local_error(sk, EMSGSIZE, fl6,
1337 sizeof(struct ipv6hdr));
1341 /* CHECKSUM_PARTIAL only with no extension headers and when
1342 * we are not going to fragment
1344 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1345 headersize == sizeof(struct ipv6hdr) &&
1346 length < mtu - headersize &&
1347 !(flags & MSG_MORE) &&
1348 rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1349 csummode = CHECKSUM_PARTIAL;
1351 if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
1352 sock_tx_timestamp(sk, sockc->tsflags, &tx_flags);
1353 if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
1354 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1355 tskey = sk->sk_tskey++;
1359 * Let's try using as much space as possible.
1360 * Use MTU if total length of the message fits into the MTU.
1361 * Otherwise, we need to reserve fragment header and
1362 * fragment alignment (= 8-15 octects, in total).
1364 * Note that we may need to "move" the data from the tail of
1365 * of the buffer to the new fragment when we split
1368 * FIXME: It may be fragmented into multiple chunks
1369 * at once if non-fragmentable extension headers
1374 cork->length += length;
1375 if (((length > mtu) ||
1376 (skb && skb_is_gso(skb))) &&
1377 (sk->sk_protocol == IPPROTO_UDP) &&
1378 (rt->dst.dev->features & NETIF_F_UFO) &&
1379 (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) {
1380 err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
1381 hh_len, fragheaderlen, exthdrlen,
1382 transhdrlen, mtu, flags, fl6);
1391 while (length > 0) {
1392 /* Check if the remaining data fits into current packet. */
1393 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1395 copy = maxfraglen - skb->len;
1399 unsigned int datalen;
1400 unsigned int fraglen;
1401 unsigned int fraggap;
1402 unsigned int alloclen;
1404 /* There's no room in the current skb */
1406 fraggap = skb->len - maxfraglen;
1409 /* update mtu and maxfraglen if necessary */
1410 if (!skb || !skb_prev)
1411 ip6_append_data_mtu(&mtu, &maxfraglen,
1412 fragheaderlen, skb, rt,
1418 * If remaining data exceeds the mtu,
1419 * we know we need more fragment(s).
1421 datalen = length + fraggap;
1423 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1424 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1425 if ((flags & MSG_MORE) &&
1426 !(rt->dst.dev->features&NETIF_F_SG))
1429 alloclen = datalen + fragheaderlen;
1431 alloclen += dst_exthdrlen;
1433 if (datalen != length + fraggap) {
1435 * this is not the last fragment, the trailer
1436 * space is regarded as data space.
1438 datalen += rt->dst.trailer_len;
1441 alloclen += rt->dst.trailer_len;
1442 fraglen = datalen + fragheaderlen;
1445 * We just reserve space for fragment header.
1446 * Note: this may be overallocation if the message
1447 * (without MSG_MORE) fits into the MTU.
1449 alloclen += sizeof(struct frag_hdr);
1452 skb = sock_alloc_send_skb(sk,
1454 (flags & MSG_DONTWAIT), &err);
1457 if (atomic_read(&sk->sk_wmem_alloc) <=
1459 skb = sock_wmalloc(sk,
1460 alloclen + hh_len, 1,
1468 * Fill in the control structures
1470 skb->protocol = htons(ETH_P_IPV6);
1471 skb->ip_summed = csummode;
1473 /* reserve for fragmentation and ipsec header */
1474 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1477 /* Only the initial fragment is time stamped */
1478 skb_shinfo(skb)->tx_flags = tx_flags;
1480 skb_shinfo(skb)->tskey = tskey;
1484 * Find where to start putting bytes
1486 data = skb_put(skb, fraglen);
1487 skb_set_network_header(skb, exthdrlen);
1488 data += fragheaderlen;
1489 skb->transport_header = (skb->network_header +
1492 skb->csum = skb_copy_and_csum_bits(
1493 skb_prev, maxfraglen,
1494 data + transhdrlen, fraggap, 0);
1495 skb_prev->csum = csum_sub(skb_prev->csum,
1498 pskb_trim_unique(skb_prev, maxfraglen);
1500 copy = datalen - transhdrlen - fraggap;
1506 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1513 length -= datalen - fraggap;
1519 * Put the packet on the pending queue
1521 __skb_queue_tail(queue, skb);
1528 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1532 if (getfrag(from, skb_put(skb, copy),
1533 offset, copy, off, skb) < 0) {
1534 __skb_trim(skb, off);
1539 int i = skb_shinfo(skb)->nr_frags;
1542 if (!sk_page_frag_refill(sk, pfrag))
1545 if (!skb_can_coalesce(skb, i, pfrag->page,
1548 if (i == MAX_SKB_FRAGS)
1551 __skb_fill_page_desc(skb, i, pfrag->page,
1553 skb_shinfo(skb)->nr_frags = ++i;
1554 get_page(pfrag->page);
1556 copy = min_t(int, copy, pfrag->size - pfrag->offset);
1558 page_address(pfrag->page) + pfrag->offset,
1559 offset, copy, skb->len, skb) < 0)
1562 pfrag->offset += copy;
1563 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1565 skb->data_len += copy;
1566 skb->truesize += copy;
1567 atomic_add(copy, &sk->sk_wmem_alloc);
1578 cork->length -= length;
1579 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1583 int ip6_append_data(struct sock *sk,
1584 int getfrag(void *from, char *to, int offset, int len,
1585 int odd, struct sk_buff *skb),
1586 void *from, int length, int transhdrlen,
1587 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1588 struct rt6_info *rt, unsigned int flags,
1589 const struct sockcm_cookie *sockc)
1591 struct inet_sock *inet = inet_sk(sk);
1592 struct ipv6_pinfo *np = inet6_sk(sk);
1596 if (flags&MSG_PROBE)
1598 if (skb_queue_empty(&sk->sk_write_queue)) {
1602 err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1607 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1608 length += exthdrlen;
1609 transhdrlen += exthdrlen;
1611 fl6 = &inet->cork.fl.u.ip6;
1615 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1616 &np->cork, sk_page_frag(sk), getfrag,
1617 from, length, transhdrlen, flags, ipc6, sockc);
1619 EXPORT_SYMBOL_GPL(ip6_append_data);
1621 static void ip6_cork_release(struct inet_cork_full *cork,
1622 struct inet6_cork *v6_cork)
1625 kfree(v6_cork->opt->dst0opt);
1626 kfree(v6_cork->opt->dst1opt);
1627 kfree(v6_cork->opt->hopopt);
1628 kfree(v6_cork->opt->srcrt);
1629 kfree(v6_cork->opt);
1630 v6_cork->opt = NULL;
1633 if (cork->base.dst) {
1634 dst_release(cork->base.dst);
1635 cork->base.dst = NULL;
1636 cork->base.flags &= ~IPCORK_ALLFRAG;
1638 memset(&cork->fl, 0, sizeof(cork->fl));
1641 struct sk_buff *__ip6_make_skb(struct sock *sk,
1642 struct sk_buff_head *queue,
1643 struct inet_cork_full *cork,
1644 struct inet6_cork *v6_cork)
1646 struct sk_buff *skb, *tmp_skb;
1647 struct sk_buff **tail_skb;
1648 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1649 struct ipv6_pinfo *np = inet6_sk(sk);
1650 struct net *net = sock_net(sk);
1651 struct ipv6hdr *hdr;
1652 struct ipv6_txoptions *opt = v6_cork->opt;
1653 struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1654 struct flowi6 *fl6 = &cork->fl.u.ip6;
1655 unsigned char proto = fl6->flowi6_proto;
1657 skb = __skb_dequeue(queue);
1660 tail_skb = &(skb_shinfo(skb)->frag_list);
1662 /* move skb->data to ip header from ext header */
1663 if (skb->data < skb_network_header(skb))
1664 __skb_pull(skb, skb_network_offset(skb));
1665 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1666 __skb_pull(tmp_skb, skb_network_header_len(skb));
1667 *tail_skb = tmp_skb;
1668 tail_skb = &(tmp_skb->next);
1669 skb->len += tmp_skb->len;
1670 skb->data_len += tmp_skb->len;
1671 skb->truesize += tmp_skb->truesize;
1672 tmp_skb->destructor = NULL;
1676 /* Allow local fragmentation. */
1677 skb->ignore_df = ip6_sk_ignore_df(sk);
1679 *final_dst = fl6->daddr;
1680 __skb_pull(skb, skb_network_header_len(skb));
1681 if (opt && opt->opt_flen)
1682 ipv6_push_frag_opts(skb, opt, &proto);
1683 if (opt && opt->opt_nflen)
1684 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1686 skb_push(skb, sizeof(struct ipv6hdr));
1687 skb_reset_network_header(skb);
1688 hdr = ipv6_hdr(skb);
1690 ip6_flow_hdr(hdr, v6_cork->tclass,
1691 ip6_make_flowlabel(net, skb, fl6->flowlabel,
1692 np->autoflowlabel, fl6));
1693 hdr->hop_limit = v6_cork->hop_limit;
1694 hdr->nexthdr = proto;
1695 hdr->saddr = fl6->saddr;
1696 hdr->daddr = *final_dst;
1698 skb->priority = sk->sk_priority;
1699 skb->mark = sk->sk_mark;
1701 skb_dst_set(skb, dst_clone(&rt->dst));
1702 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1703 if (proto == IPPROTO_ICMPV6) {
1704 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1706 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1707 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1710 ip6_cork_release(cork, v6_cork);
1715 int ip6_send_skb(struct sk_buff *skb)
1717 struct net *net = sock_net(skb->sk);
1718 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1721 err = ip6_local_out(net, skb->sk, skb);
1724 err = net_xmit_errno(err);
1726 IP6_INC_STATS(net, rt->rt6i_idev,
1727 IPSTATS_MIB_OUTDISCARDS);
1733 int ip6_push_pending_frames(struct sock *sk)
1735 struct sk_buff *skb;
1737 skb = ip6_finish_skb(sk);
1741 return ip6_send_skb(skb);
1743 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1745 static void __ip6_flush_pending_frames(struct sock *sk,
1746 struct sk_buff_head *queue,
1747 struct inet_cork_full *cork,
1748 struct inet6_cork *v6_cork)
1750 struct sk_buff *skb;
1752 while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1754 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1755 IPSTATS_MIB_OUTDISCARDS);
1759 ip6_cork_release(cork, v6_cork);
1762 void ip6_flush_pending_frames(struct sock *sk)
1764 __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1765 &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1767 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1769 struct sk_buff *ip6_make_skb(struct sock *sk,
1770 int getfrag(void *from, char *to, int offset,
1771 int len, int odd, struct sk_buff *skb),
1772 void *from, int length, int transhdrlen,
1773 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1774 struct rt6_info *rt, unsigned int flags,
1775 const struct sockcm_cookie *sockc)
1777 struct inet_cork_full cork;
1778 struct inet6_cork v6_cork;
1779 struct sk_buff_head queue;
1780 int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1783 if (flags & MSG_PROBE)
1786 __skb_queue_head_init(&queue);
1788 cork.base.flags = 0;
1790 cork.base.opt = NULL;
1792 err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6);
1794 return ERR_PTR(err);
1796 if (ipc6->dontfrag < 0)
1797 ipc6->dontfrag = inet6_sk(sk)->dontfrag;
1799 err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork,
1800 ¤t->task_frag, getfrag, from,
1801 length + exthdrlen, transhdrlen + exthdrlen,
1802 flags, ipc6, sockc);
1804 __ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork);
1805 return ERR_PTR(err);
1808 return __ip6_make_skb(sk, &queue, &cork, &v6_cork);