2 * Copyright (c) 2015 Nicira, Inc.
3 * Copyright (c) 2013 Cisco Systems, Inc.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 2 of the GNU General Public
7 * License as published by the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
16 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
18 #include <linux/version.h>
20 #include <linux/etherdevice.h>
23 #include <linux/net.h>
24 #include <linux/module.h>
25 #include <linux/rculist.h>
26 #include <linux/udp.h>
31 #include <net/net_namespace.h>
32 #include <net/netns/generic.h>
33 #include <net/route.h>
35 #include <net/udp_tunnel.h>
42 #include "vport-netdev.h"
44 #define LISP_UDP_PORT 4341
45 #define LISP_NETDEV_VER "0.1"
46 static int lisp_net_id;
48 /* Pseudo network device */
50 struct net *net; /* netns for packet i/o */
51 struct net_device *dev; /* netdev for lisp tunnel */
54 struct list_head next;
57 /* per-network namespace private data for this module */
59 struct list_head lisp_list;
63 * LISP encapsulation header:
65 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
66 * |N|L|E|V|I|flags| Nonce/Map-Version |
67 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
68 * | Instance ID/Locator Status Bits |
69 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
74 * struct lisphdr - LISP header
75 * @nonce_present: Flag indicating the presence of a 24 bit nonce value.
76 * @locator_status_bits_present: Flag indicating the presence of Locator Status
78 * @solicit_echo_nonce: Flag indicating the use of the echo noncing mechanism.
79 * @map_version_present: Flag indicating the use of mapping versioning.
80 * @instance_id_present: Flag indicating the presence of a 24 bit Instance ID.
81 * @reserved_flags: 3 bits reserved for future flags.
82 * @nonce: 24 bit nonce value.
83 * @map_version: 24 bit mapping version.
84 * @locator_status_bits: Locator Status Bits: 32 bits when instance_id_present
85 * is not set, 8 bits when it is.
86 * @instance_id: 24 bit Instance ID
89 #ifdef __LITTLE_ENDIAN_BITFIELD
90 __u8 reserved_flags:3;
91 __u8 instance_id_present:1;
92 __u8 map_version_present:1;
93 __u8 solicit_echo_nonce:1;
94 __u8 locator_status_bits_present:1;
98 __u8 locator_status_bits_present:1;
99 __u8 solicit_echo_nonce:1;
100 __u8 map_version_present:1;
101 __u8 instance_id_present:1;
102 __u8 reserved_flags:3;
109 __be32 locator_status_bits;
112 __u8 locator_status_bits;
117 #define LISP_HLEN (sizeof(struct udphdr) + sizeof(struct lisphdr))
118 #define LISP_MAX_MTU (IP_MAX_MTU - LISP_HLEN - sizeof(struct iphdr))
120 static inline struct lisphdr *lisp_hdr(const struct sk_buff *skb)
122 return (struct lisphdr *)(udp_hdr(skb) + 1);
125 /* Convert 64 bit tunnel ID to 24 bit Instance ID. */
126 static void tunnel_id_to_instance_id(__be64 tun_id, __u8 *iid)
130 iid[0] = (__force __u8)(tun_id >> 16);
131 iid[1] = (__force __u8)(tun_id >> 8);
132 iid[2] = (__force __u8)tun_id;
134 iid[0] = (__force __u8)((__force u64)tun_id >> 40);
135 iid[1] = (__force __u8)((__force u64)tun_id >> 48);
136 iid[2] = (__force __u8)((__force u64)tun_id >> 56);
140 /* Convert 24 bit Instance ID to 64 bit tunnel ID. */
141 static __be64 instance_id_to_tunnel_id(__u8 *iid)
144 return (iid[0] << 16) | (iid[1] << 8) | iid[2];
146 return (__force __be64)(((__force u64)iid[0] << 40) |
147 ((__force u64)iid[1] << 48) |
148 ((__force u64)iid[2] << 56));
152 /* Compute source UDP port for outgoing packet.
153 * Currently we use the flow hash.
155 static u16 get_src_port(struct net *net, struct sk_buff *skb)
157 u32 hash = skb_get_hash(skb);
163 if (skb->protocol == htons(ETH_P_IP)) {
165 int size = (sizeof(iph->saddr) * 2) / sizeof(u32);
167 iph = (struct iphdr *) skb_network_header(skb);
168 hash = jhash2((const u32 *)&iph->saddr, size, 0);
169 } else if (skb->protocol == htons(ETH_P_IPV6)) {
170 struct ipv6hdr *ipv6hdr;
172 ipv6hdr = (struct ipv6hdr *) skb_network_header(skb);
173 hash = jhash2((const u32 *)&ipv6hdr->saddr,
174 (sizeof(struct in6_addr) * 2) / sizeof(u32), 0);
176 pr_warn_once("LISP inner protocol is not IP when "
177 "calculating hash.\n");
181 inet_get_local_port_range(net, &low, &high);
182 range = (high - low) + 1;
183 return (((u64) hash * range) >> 32) + low;
186 static void lisp_build_header(struct sk_buff *skb,
187 const struct ip_tunnel_key *tun_key)
189 struct lisphdr *lisph;
191 lisph = (struct lisphdr *)__skb_push(skb, sizeof(struct lisphdr));
192 lisph->nonce_present = 0; /* We don't support echo nonce algorithm */
193 lisph->locator_status_bits_present = 1; /* Set LSB */
194 lisph->solicit_echo_nonce = 0; /* No echo noncing */
195 lisph->map_version_present = 0; /* No mapping versioning, nonce instead */
196 lisph->instance_id_present = 1; /* Store the tun_id as Instance ID */
197 lisph->reserved_flags = 0; /* Reserved flags, set to 0 */
199 lisph->u1.nonce[0] = 0;
200 lisph->u1.nonce[1] = 0;
201 lisph->u1.nonce[2] = 0;
203 tunnel_id_to_instance_id(tun_key->tun_id, &lisph->u2.word2.instance_id[0]);
204 lisph->u2.word2.locator_status_bits = 1;
207 /* Called with rcu_read_lock and BH disabled. */
208 static int lisp_rcv(struct sock *sk, struct sk_buff *skb)
210 struct lisp_dev *lisp_dev;
211 struct net_device *dev;
212 struct lisphdr *lisph;
213 struct iphdr *inner_iph;
214 struct metadata_dst *tun_dst;
215 #ifndef USE_UPSTREAM_TUNNEL
216 struct metadata_dst temp;
222 dev = rcu_dereference_sk_user_data(sk);
226 lisp_dev = netdev_priv(dev);
227 if (iptunnel_pull_header(skb, LISP_HLEN, 0,
228 !net_eq(lisp_dev->net, dev_net(lisp_dev->dev))))
231 lisph = lisp_hdr(skb);
233 if (lisph->instance_id_present != 1)
236 key = instance_id_to_tunnel_id(&lisph->u2.word2.instance_id[0]);
238 /* Save outer tunnel values */
239 #ifndef USE_UPSTREAM_TUNNEL
241 ovs_udp_tun_rx_dst(tun_dst, skb, AF_INET, TUNNEL_KEY, key, 0);
243 tun_dst = udp_tun_rx_dst(skb, AF_INET, TUNNEL_KEY, key, 0);
245 /* Drop non-IP inner packets */
246 inner_iph = (struct iphdr *)(lisph + 1);
247 switch (inner_iph->version) {
249 protocol = htons(ETH_P_IP);
252 protocol = htons(ETH_P_IPV6);
257 skb->protocol = protocol;
259 /* Add Ethernet header */
260 ethh = (struct ethhdr *)skb_push(skb, ETH_HLEN);
261 memset(ethh, 0, ETH_HLEN);
262 ethh->h_dest[0] = 0x02;
263 ethh->h_source[0] = 0x02;
264 ethh->h_proto = protocol;
266 ovs_ip_tunnel_rcv(dev, skb, tun_dst);
275 static struct rtable *lisp_get_rt(struct sk_buff *skb,
276 struct net_device *dev,
278 const struct ip_tunnel_key *key)
280 struct net *net = dev_net(dev);
283 memset(fl, 0, sizeof(*fl));
284 fl->daddr = key->u.ipv4.dst;
285 fl->saddr = key->u.ipv4.src;
286 fl->flowi4_tos = RT_TOS(key->tos);
287 fl->flowi4_mark = skb->mark;
288 fl->flowi4_proto = IPPROTO_UDP;
290 return ip_route_output_key(net, fl);
293 netdev_tx_t rpl_lisp_xmit(struct sk_buff *skb)
295 struct net_device *dev = skb->dev;
296 struct lisp_dev *lisp_dev = netdev_priv(dev);
297 struct net *net = lisp_dev->net;
298 int network_offset = skb_network_offset(skb);
299 struct ip_tunnel_info *info;
300 struct ip_tunnel_key *tun_key;
303 __be16 src_port, dst_port;
308 info = skb_tunnel_info(skb);
309 if (unlikely(!info)) {
314 if (skb->protocol != htons(ETH_P_IP) &&
315 skb->protocol != htons(ETH_P_IPV6)) {
320 tun_key = &info->key;
322 rt = lisp_get_rt(skb, dev, &fl, tun_key);
328 min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
329 + sizeof(struct iphdr) + LISP_HLEN;
331 if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
332 int head_delta = SKB_DATA_ALIGN(min_headroom -
336 err = pskb_expand_head(skb, max_t(int, head_delta, 0),
342 /* Reset l2 headers. */
343 skb_pull(skb, network_offset);
344 skb_reset_mac_header(skb);
347 err = udp_tunnel_handle_offloads(skb, false, false);
351 src_port = htons(get_src_port(net, skb));
352 dst_port = lisp_dev->dst_port;
354 lisp_build_header(skb, tun_key);
358 ovs_skb_set_inner_protocol(skb, skb->protocol);
360 df = tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
361 udp_tunnel_xmit_skb(rt, lisp_dev->sock->sk, skb,
362 fl.saddr, tun_key->u.ipv4.dst,
363 tun_key->tos, tun_key->ttl,
364 df, src_port, dst_port, false, true);
374 EXPORT_SYMBOL(rpl_lisp_xmit);
376 /* Setup stats when device is created */
377 static int lisp_init(struct net_device *dev)
379 dev->tstats = (typeof(dev->tstats)) netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
386 static void lisp_uninit(struct net_device *dev)
388 free_percpu(dev->tstats);
391 static struct socket *create_sock(struct net *net, bool ipv6,
395 struct udp_port_cfg udp_conf;
398 memset(&udp_conf, 0, sizeof(udp_conf));
401 udp_conf.family = AF_INET6;
403 udp_conf.family = AF_INET;
404 udp_conf.local_ip.s_addr = htonl(INADDR_ANY);
407 udp_conf.local_udp_port = port;
409 /* Open UDP socket */
410 err = udp_sock_create(net, &udp_conf, &sock);
417 static int lisp_open(struct net_device *dev)
419 struct lisp_dev *lisp = netdev_priv(dev);
420 struct udp_tunnel_sock_cfg tunnel_cfg;
421 struct net *net = lisp->net;
423 lisp->sock = create_sock(net, false, lisp->dst_port);
424 if (IS_ERR(lisp->sock))
425 return PTR_ERR(lisp->sock);
427 /* Mark socket as an encapsulation socket */
428 tunnel_cfg.sk_user_data = dev;
429 tunnel_cfg.encap_type = 1;
430 tunnel_cfg.encap_rcv = lisp_rcv;
431 tunnel_cfg.encap_destroy = NULL;
432 setup_udp_tunnel_sock(net, lisp->sock, &tunnel_cfg);
436 static int lisp_stop(struct net_device *dev)
438 struct lisp_dev *lisp = netdev_priv(dev);
440 udp_tunnel_sock_release(lisp->sock);
445 static netdev_tx_t lisp_dev_xmit(struct sk_buff *skb, struct net_device *dev)
447 #ifdef USE_UPSTREAM_TUNNEL
448 return rpl_lisp_xmit(skb);
450 /* Drop All packets coming from networking stack. OVS-CB is
451 * not initialized for these packets.
455 dev->stats.tx_dropped++;
460 static int lisp_change_mtu(struct net_device *dev, int new_mtu)
462 if (new_mtu < 68 || new_mtu > LISP_MAX_MTU)
469 static int egress_ipv4_tun_info(struct net_device *dev, struct sk_buff *skb,
470 struct ip_tunnel_info *info,
471 __be16 sport, __be16 dport)
476 rt = lisp_get_rt(skb, dev, &fl4, &info->key);
481 info->key.u.ipv4.src = fl4.saddr;
482 info->key.tp_src = sport;
483 info->key.tp_dst = dport;
487 int ovs_lisp_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
489 struct lisp_dev *lisp = netdev_priv(dev);
490 struct net *net = lisp->net;
491 struct ip_tunnel_info *info = skb_tunnel_info(skb);
494 sport = htons(get_src_port(net, skb));
495 dport = lisp->dst_port;
497 if (ip_tunnel_info_af(info) == AF_INET)
498 return egress_ipv4_tun_info(dev, skb, info, sport, dport);
501 EXPORT_SYMBOL_GPL(ovs_lisp_fill_metadata_dst);
503 static const struct net_device_ops lisp_netdev_ops = {
504 .ndo_init = lisp_init,
505 .ndo_uninit = lisp_uninit,
506 .ndo_get_stats64 = ip_tunnel_get_stats64,
507 .ndo_open = lisp_open,
508 .ndo_stop = lisp_stop,
509 .ndo_start_xmit = lisp_dev_xmit,
510 .ndo_change_mtu = lisp_change_mtu,
511 .ndo_validate_addr = eth_validate_addr,
512 .ndo_set_mac_address = eth_mac_addr,
513 #ifdef USE_UPSTREAM_TUNNEL
514 #ifdef HAVE_NDO_FILL_METADATA_DST
515 .ndo_fill_metadata_dst = lisp_fill_metadata_dst,
520 static void lisp_get_drvinfo(struct net_device *dev,
521 struct ethtool_drvinfo *drvinfo)
523 strlcpy(drvinfo->version, LISP_NETDEV_VER, sizeof(drvinfo->version));
524 strlcpy(drvinfo->driver, "lisp", sizeof(drvinfo->driver));
527 static const struct ethtool_ops lisp_ethtool_ops = {
528 .get_drvinfo = lisp_get_drvinfo,
529 .get_link = ethtool_op_get_link,
532 /* Info for udev, that this is a virtual tunnel endpoint */
533 static struct device_type lisp_type = {
537 /* Initialize the device structure. */
538 static void lisp_setup(struct net_device *dev)
542 dev->netdev_ops = &lisp_netdev_ops;
543 dev->ethtool_ops = &lisp_ethtool_ops;
544 dev->destructor = free_netdev;
546 SET_NETDEV_DEVTYPE(dev, &lisp_type);
548 dev->features |= NETIF_F_LLTX | NETIF_F_NETNS_LOCAL;
549 dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM;
550 dev->features |= NETIF_F_RXCSUM;
551 dev->features |= NETIF_F_GSO_SOFTWARE;
553 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,39)
554 dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
555 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
557 #ifdef USE_UPSTREAM_TUNNEL
560 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE;
561 eth_hw_addr_random(dev);
564 static const struct nla_policy lisp_policy[IFLA_LISP_MAX + 1] = {
565 [IFLA_LISP_PORT] = { .type = NLA_U16 },
568 static int lisp_validate(struct nlattr *tb[], struct nlattr *data[])
570 if (tb[IFLA_ADDRESS]) {
571 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
574 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
575 return -EADDRNOTAVAIL;
581 static struct lisp_dev *find_dev(struct net *net, __be16 dst_port)
583 struct lisp_net *ln = net_generic(net, lisp_net_id);
584 struct lisp_dev *dev;
586 list_for_each_entry(dev, &ln->lisp_list, next) {
587 if (dev->dst_port == dst_port)
593 static int lisp_configure(struct net *net, struct net_device *dev,
596 struct lisp_net *ln = net_generic(net, lisp_net_id);
597 struct lisp_dev *lisp = netdev_priv(dev);
603 lisp->dst_port = dst_port;
605 if (find_dev(net, dst_port))
608 err = lisp_change_mtu(dev, LISP_MAX_MTU);
612 err = register_netdevice(dev);
616 list_add(&lisp->next, &ln->lisp_list);
620 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,39)
621 static int lisp_newlink(struct net *net, struct net_device *dev,
622 struct nlattr *tb[], struct nlattr *data[])
625 static int lisp_newlink(struct net_device *dev,
626 struct nlattr *tb[], struct nlattr *data[])
629 struct net *net = &init_net;
631 __be16 dst_port = htons(LISP_UDP_PORT);
633 if (data[IFLA_LISP_PORT])
634 dst_port = nla_get_be16(data[IFLA_LISP_PORT]);
636 return lisp_configure(net, dev, dst_port);
639 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,39)
640 static void lisp_dellink(struct net_device *dev, struct list_head *head)
642 static void lisp_dellink(struct net_device *dev)
645 struct lisp_dev *lisp = netdev_priv(dev);
647 list_del(&lisp->next);
648 unregister_netdevice_queue(dev, head);
651 static size_t lisp_get_size(const struct net_device *dev)
653 return nla_total_size(sizeof(__be32)); /* IFLA_LISP_PORT */
656 static int lisp_fill_info(struct sk_buff *skb, const struct net_device *dev)
658 struct lisp_dev *lisp = netdev_priv(dev);
660 if (nla_put_be16(skb, IFLA_LISP_PORT, lisp->dst_port))
661 goto nla_put_failure;
669 static struct rtnl_link_ops lisp_link_ops __read_mostly = {
671 .maxtype = IFLA_LISP_MAX,
672 .policy = lisp_policy,
673 .priv_size = sizeof(struct lisp_dev),
675 .validate = lisp_validate,
676 .newlink = lisp_newlink,
677 .dellink = lisp_dellink,
678 .get_size = lisp_get_size,
679 .fill_info = lisp_fill_info,
682 struct net_device *rpl_lisp_dev_create_fb(struct net *net, const char *name,
683 u8 name_assign_type, u16 dst_port)
685 struct nlattr *tb[IFLA_MAX + 1];
686 struct net_device *dev;
689 memset(tb, 0, sizeof(tb));
690 dev = rtnl_create_link(net, (char *) name, name_assign_type,
695 err = lisp_configure(net, dev, htons(dst_port));
702 EXPORT_SYMBOL_GPL(rpl_lisp_dev_create_fb);
704 static int lisp_init_net(struct net *net)
706 struct lisp_net *ln = net_generic(net, lisp_net_id);
708 INIT_LIST_HEAD(&ln->lisp_list);
712 static void lisp_exit_net(struct net *net)
714 struct lisp_net *ln = net_generic(net, lisp_net_id);
715 struct lisp_dev *lisp, *next;
716 struct net_device *dev, *aux;
721 /* gather any lisp devices that were moved into this ns */
722 for_each_netdev_safe(net, dev, aux)
723 if (dev->rtnl_link_ops == &lisp_link_ops)
724 unregister_netdevice_queue(dev, &list);
726 list_for_each_entry_safe(lisp, next, &ln->lisp_list, next) {
727 /* If lisp->dev is in the same netns, it was already added
728 * to the lisp by the previous loop.
730 if (!net_eq(dev_net(lisp->dev), net))
731 unregister_netdevice_queue(lisp->dev, &list);
734 /* unregister the devices gathered above */
735 unregister_netdevice_many(&list);
739 static struct pernet_operations lisp_net_ops = {
740 .init = lisp_init_net,
741 .exit = lisp_exit_net,
743 .size = sizeof(struct lisp_net),
746 int rpl_lisp_init_module(void)
750 rc = register_pernet_subsys(&lisp_net_ops);
754 rc = rtnl_link_register(&lisp_link_ops);
758 pr_info("LISP tunneling driver\n");
761 unregister_pernet_subsys(&lisp_net_ops);
766 void rpl_lisp_cleanup_module(void)
768 rtnl_link_unregister(&lisp_link_ops);
769 unregister_pernet_subsys(&lisp_net_ops);