2 * Copyright (c) 2015 Nicira, Inc.
3 * Copyright (c) 2013 Cisco Systems, Inc.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 2 of the GNU General Public
7 * License as published by the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
16 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
18 #include <linux/version.h>
20 #include <linux/etherdevice.h>
23 #include <linux/net.h>
24 #include <linux/module.h>
25 #include <linux/rculist.h>
26 #include <linux/udp.h>
31 #include <net/net_namespace.h>
32 #include <net/netns/generic.h>
33 #include <net/route.h>
35 #include <net/udp_tunnel.h>
42 #include "vport-netdev.h"
44 #define LISP_UDP_PORT 4341
45 #define LISP_NETDEV_VER "0.1"
46 static int lisp_net_id;
48 /* Pseudo network device */
50 struct net *net; /* netns for packet i/o */
51 struct net_device *dev; /* netdev for lisp tunnel */
54 struct list_head next;
57 /* per-network namespace private data for this module */
59 struct list_head lisp_list;
63 * LISP encapsulation header:
65 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
66 * |N|L|E|V|I|flags| Nonce/Map-Version |
67 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
68 * | Instance ID/Locator Status Bits |
69 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
74 * struct lisphdr - LISP header
75 * @nonce_present: Flag indicating the presence of a 24 bit nonce value.
76 * @locator_status_bits_present: Flag indicating the presence of Locator Status
78 * @solicit_echo_nonce: Flag indicating the use of the echo noncing mechanism.
79 * @map_version_present: Flag indicating the use of mapping versioning.
80 * @instance_id_present: Flag indicating the presence of a 24 bit Instance ID.
81 * @reserved_flags: 3 bits reserved for future flags.
82 * @nonce: 24 bit nonce value.
83 * @map_version: 24 bit mapping version.
84 * @locator_status_bits: Locator Status Bits: 32 bits when instance_id_present
85 * is not set, 8 bits when it is.
86 * @instance_id: 24 bit Instance ID
89 #ifdef __LITTLE_ENDIAN_BITFIELD
90 __u8 reserved_flags:3;
91 __u8 instance_id_present:1;
92 __u8 map_version_present:1;
93 __u8 solicit_echo_nonce:1;
94 __u8 locator_status_bits_present:1;
98 __u8 locator_status_bits_present:1;
99 __u8 solicit_echo_nonce:1;
100 __u8 map_version_present:1;
101 __u8 instance_id_present:1;
102 __u8 reserved_flags:3;
109 __be32 locator_status_bits;
112 __u8 locator_status_bits;
117 #define LISP_HLEN (sizeof(struct udphdr) + sizeof(struct lisphdr))
118 #define LISP_MAX_MTU (IP_MAX_MTU - LISP_HLEN - sizeof(struct iphdr))
120 static inline struct lisphdr *lisp_hdr(const struct sk_buff *skb)
122 return (struct lisphdr *)(udp_hdr(skb) + 1);
125 /* Convert 64 bit tunnel ID to 24 bit Instance ID. */
126 static void tunnel_id_to_instance_id(__be64 tun_id, __u8 *iid)
130 iid[0] = (__force __u8)(tun_id >> 16);
131 iid[1] = (__force __u8)(tun_id >> 8);
132 iid[2] = (__force __u8)tun_id;
134 iid[0] = (__force __u8)((__force u64)tun_id >> 40);
135 iid[1] = (__force __u8)((__force u64)tun_id >> 48);
136 iid[2] = (__force __u8)((__force u64)tun_id >> 56);
140 /* Convert 24 bit Instance ID to 64 bit tunnel ID. */
141 static __be64 instance_id_to_tunnel_id(__u8 *iid)
144 return (iid[0] << 16) | (iid[1] << 8) | iid[2];
146 return (__force __be64)(((__force u64)iid[0] << 40) |
147 ((__force u64)iid[1] << 48) |
148 ((__force u64)iid[2] << 56));
152 /* Compute source UDP port for outgoing packet.
153 * Currently we use the flow hash.
155 static u16 get_src_port(struct net *net, struct sk_buff *skb)
157 u32 hash = skb_get_hash(skb);
163 if (skb->protocol == htons(ETH_P_IP)) {
165 int size = (sizeof(iph->saddr) * 2) / sizeof(u32);
167 iph = (struct iphdr *) skb_network_header(skb);
168 hash = jhash2((const u32 *)&iph->saddr, size, 0);
169 } else if (skb->protocol == htons(ETH_P_IPV6)) {
170 struct ipv6hdr *ipv6hdr;
172 ipv6hdr = (struct ipv6hdr *) skb_network_header(skb);
173 hash = jhash2((const u32 *)&ipv6hdr->saddr,
174 (sizeof(struct in6_addr) * 2) / sizeof(u32), 0);
176 pr_warn_once("LISP inner protocol is not IP when "
177 "calculating hash.\n");
181 inet_get_local_port_range(net, &low, &high);
182 range = (high - low) + 1;
183 return (((u64) hash * range) >> 32) + low;
186 static void lisp_build_header(struct sk_buff *skb,
187 const struct ip_tunnel_key *tun_key)
189 struct lisphdr *lisph;
191 lisph = (struct lisphdr *)__skb_push(skb, sizeof(struct lisphdr));
192 lisph->nonce_present = 0; /* We don't support echo nonce algorithm */
193 lisph->locator_status_bits_present = 1; /* Set LSB */
194 lisph->solicit_echo_nonce = 0; /* No echo noncing */
195 lisph->map_version_present = 0; /* No mapping versioning, nonce instead */
196 lisph->instance_id_present = 1; /* Store the tun_id as Instance ID */
197 lisph->reserved_flags = 0; /* Reserved flags, set to 0 */
199 lisph->u1.nonce[0] = 0;
200 lisph->u1.nonce[1] = 0;
201 lisph->u1.nonce[2] = 0;
203 tunnel_id_to_instance_id(tun_key->tun_id, &lisph->u2.word2.instance_id[0]);
204 lisph->u2.word2.locator_status_bits = 1;
207 /* Called with rcu_read_lock and BH disabled. */
208 static int lisp_rcv(struct sock *sk, struct sk_buff *skb)
210 struct lisp_dev *lisp_dev;
211 struct net_device *dev;
212 struct lisphdr *lisph;
213 struct iphdr *inner_iph;
214 struct metadata_dst *tun_dst;
215 #ifndef USE_UPSTREAM_TUNNEL
216 struct metadata_dst temp;
222 dev = rcu_dereference_sk_user_data(sk);
226 lisp_dev = netdev_priv(dev);
227 if (iptunnel_pull_header(skb, LISP_HLEN, 0,
228 !net_eq(lisp_dev->net, dev_net(lisp_dev->dev))))
231 lisph = lisp_hdr(skb);
233 if (lisph->instance_id_present != 1)
236 key = instance_id_to_tunnel_id(&lisph->u2.word2.instance_id[0]);
238 /* Save outer tunnel values */
239 #ifndef USE_UPSTREAM_TUNNEL
241 ovs_udp_tun_rx_dst(tun_dst, skb, AF_INET, TUNNEL_KEY, key, 0);
243 tun_dst = udp_tun_rx_dst(skb, AF_INET, TUNNEL_KEY, key, 0);
245 /* Drop non-IP inner packets */
246 inner_iph = (struct iphdr *)(lisph + 1);
247 switch (inner_iph->version) {
249 protocol = htons(ETH_P_IP);
252 protocol = htons(ETH_P_IPV6);
257 skb->protocol = protocol;
259 /* Add Ethernet header */
260 ethh = (struct ethhdr *)skb_push(skb, ETH_HLEN);
261 memset(ethh, 0, ETH_HLEN);
262 ethh->h_dest[0] = 0x02;
263 ethh->h_source[0] = 0x02;
264 ethh->h_proto = protocol;
266 ovs_ip_tunnel_rcv(dev, skb, tun_dst);
275 static struct rtable *lisp_get_rt(struct sk_buff *skb,
276 struct net_device *dev,
278 const struct ip_tunnel_key *key)
280 struct net *net = dev_net(dev);
283 memset(fl, 0, sizeof(*fl));
284 fl->daddr = key->u.ipv4.dst;
285 fl->saddr = key->u.ipv4.src;
286 fl->flowi4_tos = RT_TOS(key->tos);
287 fl->flowi4_mark = skb->mark;
288 fl->flowi4_proto = IPPROTO_UDP;
290 return ip_route_output_key(net, fl);
293 /* this is to handle the return type change in handle-offload
296 #if !defined(HAVE_UDP_TUNNEL_HANDLE_OFFLOAD_RET_SKB) || !defined(USE_UPSTREAM_TUNNEL)
297 static struct sk_buff *
298 __udp_tunnel_handle_offloads(struct sk_buff *skb, bool udp_csum)
302 err = udp_tunnel_handle_offloads(skb, udp_csum);
310 #define __udp_tunnel_handle_offloads udp_tunnel_handle_offloads
313 netdev_tx_t rpl_lisp_xmit(struct sk_buff *skb)
315 struct net_device *dev = skb->dev;
316 struct lisp_dev *lisp_dev = netdev_priv(dev);
317 struct net *net = lisp_dev->net;
318 int network_offset = skb_network_offset(skb);
319 struct ip_tunnel_info *info;
320 struct ip_tunnel_key *tun_key;
323 __be16 src_port, dst_port;
328 info = skb_tunnel_info(skb);
329 if (unlikely(!info)) {
334 if (skb->protocol != htons(ETH_P_IP) &&
335 skb->protocol != htons(ETH_P_IPV6)) {
340 tun_key = &info->key;
342 rt = lisp_get_rt(skb, dev, &fl, tun_key);
348 min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
349 + sizeof(struct iphdr) + LISP_HLEN;
351 if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
352 int head_delta = SKB_DATA_ALIGN(min_headroom -
356 err = pskb_expand_head(skb, max_t(int, head_delta, 0),
362 /* Reset l2 headers. */
363 skb_pull(skb, network_offset);
364 skb_reset_mac_header(skb);
367 if (skb_is_gso(skb) && skb_is_encapsulated(skb))
370 skb = __udp_tunnel_handle_offloads(skb, false);
374 src_port = htons(get_src_port(net, skb));
375 dst_port = lisp_dev->dst_port;
377 lisp_build_header(skb, tun_key);
381 ovs_skb_set_inner_protocol(skb, skb->protocol);
383 df = tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
384 udp_tunnel_xmit_skb(rt, lisp_dev->sock->sk, skb,
385 fl.saddr, tun_key->u.ipv4.dst,
386 tun_key->tos, tun_key->ttl,
387 df, src_port, dst_port, false, true);
397 EXPORT_SYMBOL(rpl_lisp_xmit);
399 /* Setup stats when device is created */
400 static int lisp_init(struct net_device *dev)
402 dev->tstats = (typeof(dev->tstats)) netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
409 static void lisp_uninit(struct net_device *dev)
411 free_percpu(dev->tstats);
414 static struct socket *create_sock(struct net *net, bool ipv6,
418 struct udp_port_cfg udp_conf;
421 memset(&udp_conf, 0, sizeof(udp_conf));
424 udp_conf.family = AF_INET6;
426 udp_conf.family = AF_INET;
427 udp_conf.local_ip.s_addr = htonl(INADDR_ANY);
430 udp_conf.local_udp_port = port;
432 /* Open UDP socket */
433 err = udp_sock_create(net, &udp_conf, &sock);
440 static int lisp_open(struct net_device *dev)
442 struct lisp_dev *lisp = netdev_priv(dev);
443 struct udp_tunnel_sock_cfg tunnel_cfg;
444 struct net *net = lisp->net;
446 lisp->sock = create_sock(net, false, lisp->dst_port);
447 if (IS_ERR(lisp->sock))
448 return PTR_ERR(lisp->sock);
450 /* Mark socket as an encapsulation socket */
451 tunnel_cfg.sk_user_data = dev;
452 tunnel_cfg.encap_type = 1;
453 tunnel_cfg.encap_rcv = lisp_rcv;
454 tunnel_cfg.encap_destroy = NULL;
455 setup_udp_tunnel_sock(net, lisp->sock, &tunnel_cfg);
459 static int lisp_stop(struct net_device *dev)
461 struct lisp_dev *lisp = netdev_priv(dev);
463 udp_tunnel_sock_release(lisp->sock);
468 static netdev_tx_t lisp_dev_xmit(struct sk_buff *skb, struct net_device *dev)
470 #ifdef USE_UPSTREAM_TUNNEL
471 return rpl_lisp_xmit(skb);
473 /* Drop All packets coming from networking stack. OVS-CB is
474 * not initialized for these packets.
478 dev->stats.tx_dropped++;
483 static int lisp_change_mtu(struct net_device *dev, int new_mtu)
485 if (new_mtu < 68 || new_mtu > LISP_MAX_MTU)
492 static int egress_ipv4_tun_info(struct net_device *dev, struct sk_buff *skb,
493 struct ip_tunnel_info *info,
494 __be16 sport, __be16 dport)
499 rt = lisp_get_rt(skb, dev, &fl4, &info->key);
504 info->key.u.ipv4.src = fl4.saddr;
505 info->key.tp_src = sport;
506 info->key.tp_dst = dport;
510 int ovs_lisp_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
512 struct lisp_dev *lisp = netdev_priv(dev);
513 struct net *net = lisp->net;
514 struct ip_tunnel_info *info = skb_tunnel_info(skb);
517 sport = htons(get_src_port(net, skb));
518 dport = lisp->dst_port;
520 if (ip_tunnel_info_af(info) == AF_INET)
521 return egress_ipv4_tun_info(dev, skb, info, sport, dport);
524 EXPORT_SYMBOL_GPL(ovs_lisp_fill_metadata_dst);
526 static const struct net_device_ops lisp_netdev_ops = {
527 .ndo_init = lisp_init,
528 .ndo_uninit = lisp_uninit,
529 .ndo_get_stats64 = ip_tunnel_get_stats64,
530 .ndo_open = lisp_open,
531 .ndo_stop = lisp_stop,
532 .ndo_start_xmit = lisp_dev_xmit,
533 .ndo_change_mtu = lisp_change_mtu,
534 .ndo_validate_addr = eth_validate_addr,
535 .ndo_set_mac_address = eth_mac_addr,
536 #ifdef USE_UPSTREAM_TUNNEL
537 #ifdef HAVE_NDO_FILL_METADATA_DST
538 .ndo_fill_metadata_dst = lisp_fill_metadata_dst,
543 static void lisp_get_drvinfo(struct net_device *dev,
544 struct ethtool_drvinfo *drvinfo)
546 strlcpy(drvinfo->version, LISP_NETDEV_VER, sizeof(drvinfo->version));
547 strlcpy(drvinfo->driver, "lisp", sizeof(drvinfo->driver));
550 static const struct ethtool_ops lisp_ethtool_ops = {
551 .get_drvinfo = lisp_get_drvinfo,
552 .get_link = ethtool_op_get_link,
555 /* Info for udev, that this is a virtual tunnel endpoint */
556 static struct device_type lisp_type = {
560 /* Initialize the device structure. */
561 static void lisp_setup(struct net_device *dev)
565 dev->netdev_ops = &lisp_netdev_ops;
566 dev->ethtool_ops = &lisp_ethtool_ops;
567 dev->destructor = free_netdev;
569 SET_NETDEV_DEVTYPE(dev, &lisp_type);
571 dev->features |= NETIF_F_LLTX | NETIF_F_NETNS_LOCAL;
572 dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM;
573 dev->features |= NETIF_F_RXCSUM;
574 dev->features |= NETIF_F_GSO_SOFTWARE;
576 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,39)
577 dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
578 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
580 #ifdef USE_UPSTREAM_TUNNEL
583 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE;
584 eth_hw_addr_random(dev);
587 static const struct nla_policy lisp_policy[IFLA_LISP_MAX + 1] = {
588 [IFLA_LISP_PORT] = { .type = NLA_U16 },
591 static int lisp_validate(struct nlattr *tb[], struct nlattr *data[])
593 if (tb[IFLA_ADDRESS]) {
594 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
597 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
598 return -EADDRNOTAVAIL;
604 static struct lisp_dev *find_dev(struct net *net, __be16 dst_port)
606 struct lisp_net *ln = net_generic(net, lisp_net_id);
607 struct lisp_dev *dev;
609 list_for_each_entry(dev, &ln->lisp_list, next) {
610 if (dev->dst_port == dst_port)
616 static int lisp_configure(struct net *net, struct net_device *dev,
619 struct lisp_net *ln = net_generic(net, lisp_net_id);
620 struct lisp_dev *lisp = netdev_priv(dev);
626 lisp->dst_port = dst_port;
628 if (find_dev(net, dst_port))
631 err = lisp_change_mtu(dev, LISP_MAX_MTU);
635 err = register_netdevice(dev);
639 list_add(&lisp->next, &ln->lisp_list);
643 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,39)
644 static int lisp_newlink(struct net *net, struct net_device *dev,
645 struct nlattr *tb[], struct nlattr *data[])
648 static int lisp_newlink(struct net_device *dev,
649 struct nlattr *tb[], struct nlattr *data[])
652 struct net *net = &init_net;
654 __be16 dst_port = htons(LISP_UDP_PORT);
656 if (data[IFLA_LISP_PORT])
657 dst_port = nla_get_be16(data[IFLA_LISP_PORT]);
659 return lisp_configure(net, dev, dst_port);
662 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,39)
663 static void lisp_dellink(struct net_device *dev, struct list_head *head)
665 static void lisp_dellink(struct net_device *dev)
668 struct lisp_dev *lisp = netdev_priv(dev);
670 list_del(&lisp->next);
671 unregister_netdevice_queue(dev, head);
674 static size_t lisp_get_size(const struct net_device *dev)
676 return nla_total_size(sizeof(__be32)); /* IFLA_LISP_PORT */
679 static int lisp_fill_info(struct sk_buff *skb, const struct net_device *dev)
681 struct lisp_dev *lisp = netdev_priv(dev);
683 if (nla_put_be16(skb, IFLA_LISP_PORT, lisp->dst_port))
684 goto nla_put_failure;
692 static struct rtnl_link_ops lisp_link_ops __read_mostly = {
694 .maxtype = IFLA_LISP_MAX,
695 .policy = lisp_policy,
696 .priv_size = sizeof(struct lisp_dev),
698 .validate = lisp_validate,
699 .newlink = lisp_newlink,
700 .dellink = lisp_dellink,
701 .get_size = lisp_get_size,
702 .fill_info = lisp_fill_info,
705 struct net_device *rpl_lisp_dev_create_fb(struct net *net, const char *name,
706 u8 name_assign_type, u16 dst_port)
708 struct nlattr *tb[IFLA_MAX + 1];
709 struct net_device *dev;
712 memset(tb, 0, sizeof(tb));
713 dev = rtnl_create_link(net, (char *) name, name_assign_type,
718 err = lisp_configure(net, dev, htons(dst_port));
725 EXPORT_SYMBOL_GPL(rpl_lisp_dev_create_fb);
727 static int lisp_init_net(struct net *net)
729 struct lisp_net *ln = net_generic(net, lisp_net_id);
731 INIT_LIST_HEAD(&ln->lisp_list);
735 static void lisp_exit_net(struct net *net)
737 struct lisp_net *ln = net_generic(net, lisp_net_id);
738 struct lisp_dev *lisp, *next;
739 struct net_device *dev, *aux;
744 /* gather any lisp devices that were moved into this ns */
745 for_each_netdev_safe(net, dev, aux)
746 if (dev->rtnl_link_ops == &lisp_link_ops)
747 unregister_netdevice_queue(dev, &list);
749 list_for_each_entry_safe(lisp, next, &ln->lisp_list, next) {
750 /* If lisp->dev is in the same netns, it was already added
751 * to the lisp by the previous loop.
753 if (!net_eq(dev_net(lisp->dev), net))
754 unregister_netdevice_queue(lisp->dev, &list);
757 /* unregister the devices gathered above */
758 unregister_netdevice_many(&list);
762 static struct pernet_operations lisp_net_ops = {
763 .init = lisp_init_net,
764 .exit = lisp_exit_net,
766 .size = sizeof(struct lisp_net),
769 int rpl_lisp_init_module(void)
773 rc = register_pernet_subsys(&lisp_net_ops);
777 rc = rtnl_link_register(&lisp_link_ops);
781 pr_info("LISP tunneling driver\n");
784 unregister_pernet_subsys(&lisp_net_ops);
789 void rpl_lisp_cleanup_module(void)
791 rtnl_link_unregister(&lisp_link_ops);
792 unregister_pernet_subsys(&lisp_net_ops);