2 * Linux NET3: GRE over IP protocol decoder.
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15 #include <linux/capability.h>
16 #include <linux/module.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/kconfig.h>
20 #include <linux/slab.h>
21 #include <asm/uaccess.h>
22 #include <linux/skbuff.h>
23 #include <linux/netdevice.h>
24 #include <linux/netdev_features.h>
26 #include <linux/tcp.h>
27 #include <linux/udp.h>
28 #include <linux/if_arp.h>
29 #include <linux/mroute.h>
30 #include <linux/if_vlan.h>
31 #include <linux/init.h>
32 #include <linux/in6.h>
33 #include <linux/inetdevice.h>
34 #include <linux/igmp.h>
35 #include <linux/netfilter_ipv4.h>
36 #include <linux/etherdevice.h>
37 #include <linux/if_ether.h>
42 #include <net/protocol.h>
43 #include <net/ip_tunnels.h>
45 #include <net/checksum.h>
46 #include <net/dsfield.h>
47 #include <net/inet_ecn.h>
49 #include <net/net_namespace.h>
50 #include <net/netns/generic.h>
51 #include <net/rtnetlink.h>
53 #include <net/dst_metadata.h>
55 #ifndef USE_UPSTREAM_TUNNEL
56 #if IS_ENABLED(CONFIG_IPV6)
58 #include <net/ip6_fib.h>
59 #include <net/ip6_route.h>
63 #include "vport-netdev.h"
65 static int gre_tap_net_id __read_mostly;
67 #define ip_gre_calc_hlen rpl_ip_gre_calc_hlen
68 static int ip_gre_calc_hlen(__be16 o_flags)
72 if (o_flags & TUNNEL_CSUM)
74 if (o_flags & TUNNEL_KEY)
76 if (o_flags & TUNNEL_SEQ)
81 #define tnl_flags_to_gre_flags rpl_tnl_flags_to_gre_flags
82 static __be16 tnl_flags_to_gre_flags(__be16 tflags)
86 if (tflags & TUNNEL_CSUM)
88 if (tflags & TUNNEL_ROUTING)
90 if (tflags & TUNNEL_KEY)
92 if (tflags & TUNNEL_SEQ)
94 if (tflags & TUNNEL_STRICT)
96 if (tflags & TUNNEL_REC)
98 if (tflags & TUNNEL_VERSION)
104 static __be64 key_to_tunnel_id(__be32 key)
107 return (__force __be64)((__force u32)key);
109 return (__force __be64)((__force u64)key << 32);
113 /* Returns the least-significant 32 bits of a __be64. */
114 static __be32 tunnel_id_to_key(__be64 x)
117 return (__force __be32)x;
119 return (__force __be32)((__force u64)x >> 32);
123 static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
125 struct net *net = dev_net(skb->dev);
126 struct metadata_dst tun_dst;
127 struct ip_tunnel_net *itn;
128 const struct iphdr *iph;
129 struct ip_tunnel *tunnel;
131 if (tpi->proto != htons(ETH_P_TEB))
132 return PACKET_REJECT;
134 itn = net_generic(net, gre_tap_net_id);
137 tunnel = rcu_dereference(itn->collect_md_tun);
144 skb_pop_mac_header(skb);
145 flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
146 tun_id = key_to_tunnel_id(tpi->key);
147 ovs_ip_tun_rx_dst(&tun_dst.u.tun_info, skb, flags, tun_id, 0);
149 skb_reset_network_header(skb);
150 err = IP_ECN_decapsulate(iph, skb);
153 ++tunnel->dev->stats.rx_frame_errors;
154 ++tunnel->dev->stats.rx_errors;
155 return PACKET_REJECT;
159 ovs_ip_tunnel_rcv(tunnel->dev, skb, &tun_dst);
162 return PACKET_REJECT;
165 static int gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
167 if (ipgre_rcv(skb, tpi) == PACKET_RCVD)
174 #if LINUX_VERSION_CODE < KERNEL_VERSION(4,7,0)
175 /* gre_handle_offloads() has different return type on older kernsl. */
176 static void gre_nop_fix(struct sk_buff *skb) { }
178 static void gre_csum_fix(struct sk_buff *skb)
180 struct gre_base_hdr *greh;
182 int gre_offset = skb_transport_offset(skb);
184 greh = (struct gre_base_hdr *)skb_transport_header(skb);
185 options = ((__be32 *)greh + 1);
188 *(__sum16 *)options = csum_fold(skb_checksum(skb, gre_offset,
189 skb->len - gre_offset, 0));
192 static bool is_gre_gso(struct sk_buff *skb)
194 return skb_is_gso(skb);
197 static int rpl_gre_handle_offloads(struct sk_buff *skb, bool gre_csum)
199 int type = gre_csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE;
200 gso_fix_segment_t fix_segment;
203 fix_segment = gre_csum_fix;
205 fix_segment = gre_nop_fix;
207 return ovs_iptunnel_handle_offloads(skb, gre_csum, type, fix_segment);
211 static bool is_gre_gso(struct sk_buff *skb)
213 return skb_shinfo(skb)->gso_type &
214 (SKB_GSO_GRE | SKB_GSO_GRE_CSUM);
217 static int rpl_gre_handle_offloads(struct sk_buff *skb, bool gre_csum)
219 if (skb_is_gso(skb) && skb_is_encapsulated(skb))
222 #undef gre_handle_offloads
223 return gre_handle_offloads(skb, gre_csum);
227 static void build_header(struct sk_buff *skb, int hdr_len, __be16 flags,
228 __be16 proto, __be32 key, __be32 seq)
230 struct gre_base_hdr *greh;
232 skb_push(skb, hdr_len);
234 skb_reset_transport_header(skb);
235 greh = (struct gre_base_hdr *)skb->data;
236 greh->flags = tnl_flags_to_gre_flags(flags);
237 greh->protocol = proto;
239 if (flags & (TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_SEQ)) {
240 __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
242 if (flags & TUNNEL_SEQ) {
246 if (flags & TUNNEL_KEY) {
250 if (flags & TUNNEL_CSUM && !is_gre_gso(skb)) {
252 *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
256 ovs_skb_set_inner_protocol(skb, proto);
259 static struct rtable *gre_get_rt(struct sk_buff *skb,
260 struct net_device *dev,
262 const struct ip_tunnel_key *key)
264 struct net *net = dev_net(dev);
266 memset(fl, 0, sizeof(*fl));
267 fl->daddr = key->u.ipv4.dst;
268 fl->saddr = key->u.ipv4.src;
269 fl->flowi4_tos = RT_TOS(key->tos);
270 fl->flowi4_mark = skb->mark;
271 fl->flowi4_proto = IPPROTO_GRE;
273 return ip_route_output_key(net, fl);
276 netdev_tx_t rpl_gre_fb_xmit(struct sk_buff *skb)
278 struct net_device *dev = skb->dev;
279 struct ip_tunnel_info *tun_info;
280 const struct ip_tunnel_key *key;
288 tun_info = skb_tunnel_info(skb);
289 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
290 ip_tunnel_info_af(tun_info) != AF_INET))
293 key = &tun_info->key;
295 rt = gre_get_rt(skb, dev, &fl, key);
299 tunnel_hlen = ip_gre_calc_hlen(key->tun_flags);
301 min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
302 + tunnel_hlen + sizeof(struct iphdr)
303 + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
304 if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
305 int head_delta = SKB_DATA_ALIGN(min_headroom -
308 err = pskb_expand_head(skb, max_t(int, head_delta, 0),
314 skb = vlan_hwaccel_push_inside(skb);
315 if (unlikely(!skb)) {
320 /* Push Tunnel header. */
321 err = rpl_gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM));
325 flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
326 build_header(skb, tunnel_hlen, flags, htons(ETH_P_TEB),
327 tunnel_id_to_key(tun_info->key.tun_id), 0);
329 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
330 err = iptunnel_xmit(skb->sk, rt, skb, fl.saddr,
331 key->u.ipv4.dst, IPPROTO_GRE,
332 key->tos, key->ttl, df, false);
333 iptunnel_xmit_stats(err, &dev->stats, (struct pcpu_sw_netstats __percpu *)dev->tstats);
340 dev->stats.tx_dropped++;
343 EXPORT_SYMBOL(rpl_gre_fb_xmit);
345 #define GRE_FEATURES (NETIF_F_SG | \
351 static void __gre_tunnel_init(struct net_device *dev)
353 struct ip_tunnel *tunnel;
356 tunnel = netdev_priv(dev);
357 tunnel->parms.iph.protocol = IPPROTO_GRE;
358 tunnel->tun_hlen = ip_gre_calc_hlen(tunnel->parms.o_flags);
360 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
362 t_hlen = tunnel->hlen + sizeof(struct iphdr);
364 dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4;
365 dev->mtu = ETH_DATA_LEN - t_hlen - 4;
367 dev->features |= GRE_FEATURES;
368 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,39)
369 dev->hw_features |= GRE_FEATURES;
372 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
373 /* TCP offload with GRE SEQ is not supported. */
374 dev->features |= NETIF_F_GSO_SOFTWARE;
375 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,39)
376 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
378 /* Can use a lockless transmit, unless we generate
381 dev->features |= NETIF_F_LLTX;
385 /* Called with rcu_read_lock and BH disabled. */
386 static int gre_err(struct sk_buff *skb, u32 info,
387 const struct tnl_ptk_info *tpi)
389 return PACKET_REJECT;
392 static struct gre_cisco_protocol ipgre_protocol = {
394 .err_handler = gre_err,
398 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
406 if (data[IFLA_GRE_IFLAGS])
407 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
408 if (data[IFLA_GRE_OFLAGS])
409 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
410 if (flags & (GRE_VERSION|GRE_ROUTING))
416 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
420 if (tb[IFLA_ADDRESS]) {
421 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
423 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
424 return -EADDRNOTAVAIL;
430 if (data[IFLA_GRE_REMOTE]) {
431 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
437 return ipgre_tunnel_validate(tb, data);
440 static void ipgre_netlink_parms(struct net_device *dev,
441 struct nlattr *data[],
443 struct ip_tunnel_parm *parms)
445 memset(parms, 0, sizeof(*parms));
447 parms->iph.protocol = IPPROTO_GRE;
450 static int gre_tap_init(struct net_device *dev)
452 __gre_tunnel_init(dev);
453 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
455 return ip_tunnel_init(dev);
458 static netdev_tx_t gre_dev_xmit(struct sk_buff *skb, struct net_device *dev)
460 /* Drop All packets coming from networking stack. OVS-CB is
461 * not initialized for these packets.
465 dev->stats.tx_dropped++;
469 int ovs_gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
471 struct ip_tunnel_info *info = skb_tunnel_info(skb);
475 if (ip_tunnel_info_af(info) != AF_INET)
478 rt = gre_get_rt(skb, dev, &fl4, &info->key);
483 info->key.u.ipv4.src = fl4.saddr;
486 EXPORT_SYMBOL_GPL(ovs_gre_fill_metadata_dst);
488 static const struct net_device_ops gre_tap_netdev_ops = {
489 .ndo_init = gre_tap_init,
490 .ndo_uninit = ip_tunnel_uninit,
491 .ndo_start_xmit = gre_dev_xmit,
492 .ndo_set_mac_address = eth_mac_addr,
493 .ndo_validate_addr = eth_validate_addr,
494 .ndo_change_mtu = ip_tunnel_change_mtu,
495 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,39)
496 .ndo_get_stats64 = ip_tunnel_get_stats64,
498 #ifdef HAVE_NDO_GET_IFLINK
499 .ndo_get_iflink = ip_tunnel_get_iflink,
501 #ifdef HAVE_NDO_FILL_METADATA_DST
502 .ndo_fill_metadata_dst = gre_fill_metadata_dst,
506 static void ipgre_tap_setup(struct net_device *dev)
509 dev->netdev_ops = &gre_tap_netdev_ops;
510 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
511 ip_tunnel_setup(dev, gre_tap_net_id);
514 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,39)
515 static int ipgre_newlink(struct net *src_net, struct net_device *dev,
516 struct nlattr *tb[], struct nlattr *data[])
518 static int ipgre_newlink(struct net_device *dev,
519 struct nlattr *tb[], struct nlattr *data[])
522 struct ip_tunnel_parm p;
525 ipgre_netlink_parms(dev, data, tb, &p);
526 err = ip_tunnel_newlink(dev, tb, &p);
531 static size_t ipgre_get_size(const struct net_device *dev)
536 /* IFLA_GRE_IFLAGS */
538 /* IFLA_GRE_OFLAGS */
546 /* IFLA_GRE_REMOTE */
552 /* IFLA_GRE_PMTUDISC */
554 /* IFLA_GRE_ENCAP_TYPE */
556 /* IFLA_GRE_ENCAP_FLAGS */
558 /* IFLA_GRE_ENCAP_SPORT */
560 /* IFLA_GRE_ENCAP_DPORT */
562 /* IFLA_GRE_COLLECT_METADATA */
567 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
569 struct ip_tunnel *t = netdev_priv(dev);
570 struct ip_tunnel_parm *p = &t->parms;
572 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
573 nla_put_be16(skb, IFLA_GRE_IFLAGS, tnl_flags_to_gre_flags(p->i_flags)) ||
574 nla_put_be16(skb, IFLA_GRE_OFLAGS, tnl_flags_to_gre_flags(p->o_flags)) ||
575 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
576 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
577 nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
578 nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
579 nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
580 nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
581 nla_put_u8(skb, IFLA_GRE_PMTUDISC,
582 !!(p->iph.frag_off & htons(IP_DF))))
583 goto nla_put_failure;
591 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
592 [IFLA_GRE_LINK] = { .type = NLA_U32 },
593 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
594 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
595 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
596 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
597 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
598 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
599 [IFLA_GRE_TTL] = { .type = NLA_U8 },
600 [IFLA_GRE_TOS] = { .type = NLA_U8 },
601 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
604 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
605 .kind = "ovs_gretap",
606 .maxtype = IFLA_GRE_MAX,
607 .policy = ipgre_policy,
608 .priv_size = sizeof(struct ip_tunnel),
609 .setup = ipgre_tap_setup,
610 .validate = ipgre_tap_validate,
611 .newlink = ipgre_newlink,
612 .dellink = ip_tunnel_dellink,
613 .get_size = ipgre_get_size,
614 .fill_info = ipgre_fill_info,
615 #ifdef HAVE_GET_LINK_NET
616 .get_link_net = ip_tunnel_get_link_net,
620 struct net_device *rpl_gretap_fb_dev_create(struct net *net, const char *name,
623 struct nlattr *tb[IFLA_MAX + 1];
624 struct net_device *dev;
628 memset(&tb, 0, sizeof(tb));
630 dev = rtnl_create_link(net, (char *)name, name_assign_type,
635 t = netdev_priv(dev);
636 t->collect_md = true;
637 /* Configure flow based GRE device. */
638 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,39)
639 err = ipgre_newlink(net, dev, tb, NULL);
641 err = ipgre_newlink(dev, tb, NULL);
646 /* openvswitch users expect packet sizes to be unrestricted,
647 * so set the largest MTU we can.
649 err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
658 EXPORT_SYMBOL_GPL(rpl_gretap_fb_dev_create);
660 static int __net_init ipgre_tap_init_net(struct net *net)
662 return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
665 static void __net_exit ipgre_tap_exit_net(struct net *net)
667 struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
669 ip_tunnel_delete_net(itn, &ipgre_tap_ops);
672 static struct pernet_operations ipgre_tap_net_ops = {
673 .init = ipgre_tap_init_net,
674 .exit = ipgre_tap_exit_net,
675 .id = &gre_tap_net_id,
676 .size = sizeof(struct ip_tunnel_net),
679 int rpl_ipgre_init(void)
683 err = register_pernet_device(&ipgre_tap_net_ops);
687 err = gre_cisco_register(&ipgre_protocol);
689 pr_info("%s: can't add protocol\n", __func__);
690 goto add_proto_failed;
693 err = rtnl_link_register(&ipgre_tap_ops);
697 pr_info("GRE over IPv4 tunneling driver\n");
701 gre_cisco_unregister(&ipgre_protocol);
703 unregister_pernet_device(&ipgre_tap_net_ops);
708 void rpl_ipgre_fini(void)
710 rtnl_link_unregister(&ipgre_tap_ops);
711 gre_cisco_unregister(&ipgre_protocol);
712 unregister_pernet_device(&ipgre_tap_net_ops);