2 * Copyright (c) 2015 Nicira, Inc.
3 * Copyright (c) 2013 Cisco Systems, Inc.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 2 of the GNU General Public
7 * License as published by the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
16 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
18 #include <linux/version.h>
22 #include <linux/net.h>
23 #include <linux/module.h>
24 #include <linux/rculist.h>
25 #include <linux/udp.h>
30 #include <net/net_namespace.h>
31 #include <net/netns/generic.h>
32 #include <net/route.h>
34 #include <net/udp_tunnel.h>
41 #include "vport-netdev.h"
43 #define LISP_UDP_PORT 4341
44 #define LISP_NETDEV_VER "0.1"
45 static int lisp_net_id;
47 /* Pseudo network device */
49 struct net *net; /* netns for packet i/o */
50 struct net_device *dev; /* netdev for lisp tunnel */
53 struct list_head next;
56 /* per-network namespace private data for this module */
58 struct list_head lisp_list;
62 * LISP encapsulation header:
64 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
65 * |N|L|E|V|I|flags| Nonce/Map-Version |
66 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
67 * | Instance ID/Locator Status Bits |
68 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
73 * struct lisphdr - LISP header
74 * @nonce_present: Flag indicating the presence of a 24 bit nonce value.
75 * @locator_status_bits_present: Flag indicating the presence of Locator Status
77 * @solicit_echo_nonce: Flag indicating the use of the echo noncing mechanism.
78 * @map_version_present: Flag indicating the use of mapping versioning.
79 * @instance_id_present: Flag indicating the presence of a 24 bit Instance ID.
80 * @reserved_flags: 3 bits reserved for future flags.
81 * @nonce: 24 bit nonce value.
82 * @map_version: 24 bit mapping version.
83 * @locator_status_bits: Locator Status Bits: 32 bits when instance_id_present
84 * is not set, 8 bits when it is.
85 * @instance_id: 24 bit Instance ID
88 #ifdef __LITTLE_ENDIAN_BITFIELD
89 __u8 reserved_flags:3;
90 __u8 instance_id_present:1;
91 __u8 map_version_present:1;
92 __u8 solicit_echo_nonce:1;
93 __u8 locator_status_bits_present:1;
97 __u8 locator_status_bits_present:1;
98 __u8 solicit_echo_nonce:1;
99 __u8 map_version_present:1;
100 __u8 instance_id_present:1;
101 __u8 reserved_flags:3;
108 __be32 locator_status_bits;
111 __u8 locator_status_bits;
116 #define LISP_HLEN (sizeof(struct udphdr) + sizeof(struct lisphdr))
117 #define LISP_MAX_MTU (IP_MAX_MTU - LISP_HLEN - sizeof(struct iphdr))
119 static inline struct lisphdr *lisp_hdr(const struct sk_buff *skb)
121 return (struct lisphdr *)(udp_hdr(skb) + 1);
124 /* Convert 64 bit tunnel ID to 24 bit Instance ID. */
125 static void tunnel_id_to_instance_id(__be64 tun_id, __u8 *iid)
129 iid[0] = (__force __u8)(tun_id >> 16);
130 iid[1] = (__force __u8)(tun_id >> 8);
131 iid[2] = (__force __u8)tun_id;
133 iid[0] = (__force __u8)((__force u64)tun_id >> 40);
134 iid[1] = (__force __u8)((__force u64)tun_id >> 48);
135 iid[2] = (__force __u8)((__force u64)tun_id >> 56);
139 /* Convert 24 bit Instance ID to 64 bit tunnel ID. */
140 static __be64 instance_id_to_tunnel_id(__u8 *iid)
143 return (iid[0] << 16) | (iid[1] << 8) | iid[2];
145 return (__force __be64)(((__force u64)iid[0] << 40) |
146 ((__force u64)iid[1] << 48) |
147 ((__force u64)iid[2] << 56));
151 /* Compute source UDP port for outgoing packet.
152 * Currently we use the flow hash.
154 static u16 get_src_port(struct net *net, struct sk_buff *skb)
156 u32 hash = skb_get_hash(skb);
162 if (skb->protocol == htons(ETH_P_IP)) {
164 int size = (sizeof(iph->saddr) * 2) / sizeof(u32);
166 iph = (struct iphdr *) skb_network_header(skb);
167 hash = jhash2((const u32 *)&iph->saddr, size, 0);
168 } else if (skb->protocol == htons(ETH_P_IPV6)) {
169 struct ipv6hdr *ipv6hdr;
171 ipv6hdr = (struct ipv6hdr *) skb_network_header(skb);
172 hash = jhash2((const u32 *)&ipv6hdr->saddr,
173 (sizeof(struct in6_addr) * 2) / sizeof(u32), 0);
175 pr_warn_once("LISP inner protocol is not IP when "
176 "calculating hash.\n");
180 inet_get_local_port_range(net, &low, &high);
181 range = (high - low) + 1;
182 return (((u64) hash * range) >> 32) + low;
185 static void lisp_build_header(struct sk_buff *skb,
186 const struct ip_tunnel_key *tun_key)
188 struct lisphdr *lisph;
190 lisph = (struct lisphdr *)__skb_push(skb, sizeof(struct lisphdr));
191 lisph->nonce_present = 0; /* We don't support echo nonce algorithm */
192 lisph->locator_status_bits_present = 1; /* Set LSB */
193 lisph->solicit_echo_nonce = 0; /* No echo noncing */
194 lisph->map_version_present = 0; /* No mapping versioning, nonce instead */
195 lisph->instance_id_present = 1; /* Store the tun_id as Instance ID */
196 lisph->reserved_flags = 0; /* Reserved flags, set to 0 */
198 lisph->u1.nonce[0] = 0;
199 lisph->u1.nonce[1] = 0;
200 lisph->u1.nonce[2] = 0;
202 tunnel_id_to_instance_id(tun_key->tun_id, &lisph->u2.word2.instance_id[0]);
203 lisph->u2.word2.locator_status_bits = 1;
206 /* Called with rcu_read_lock and BH disabled. */
207 static int lisp_rcv(struct sock *sk, struct sk_buff *skb)
209 struct net_device *dev;
210 struct lisphdr *lisph;
211 struct iphdr *inner_iph;
212 struct metadata_dst *tun_dst;
213 #ifndef HAVE_METADATA_DST
214 struct metadata_dst temp;
220 dev = rcu_dereference_sk_user_data(sk);
224 if (iptunnel_pull_header(skb, LISP_HLEN, 0))
227 lisph = lisp_hdr(skb);
229 if (lisph->instance_id_present != 1)
232 key = instance_id_to_tunnel_id(&lisph->u2.word2.instance_id[0]);
234 /* Save outer tunnel values */
235 #ifndef HAVE_METADATA_DST
237 ovs_udp_tun_rx_dst(&tun_dst->u.tun_info, skb, AF_INET, TUNNEL_KEY, key, 0);
239 tun_dst = udp_tun_rx_dst(skb, AF_INET, TUNNEL_KEY, key, 0);
241 /* Drop non-IP inner packets */
242 inner_iph = (struct iphdr *)(lisph + 1);
243 switch (inner_iph->version) {
245 protocol = htons(ETH_P_IP);
248 protocol = htons(ETH_P_IPV6);
253 skb->protocol = protocol;
255 /* Add Ethernet header */
256 ethh = (struct ethhdr *)skb_push(skb, ETH_HLEN);
257 memset(ethh, 0, ETH_HLEN);
258 ethh->h_dest[0] = 0x02;
259 ethh->h_source[0] = 0x02;
260 ethh->h_proto = protocol;
262 ovs_ip_tunnel_rcv(dev, skb, tun_dst);
271 netdev_tx_t rpl_lisp_xmit(struct sk_buff *skb)
273 struct net_device *dev = skb->dev;
274 struct lisp_dev *lisp_dev = netdev_priv(dev);
275 struct net *net = lisp_dev->net;
276 int network_offset = skb_network_offset(skb);
277 struct ip_tunnel_info *info;
278 struct ip_tunnel_key *tun_key;
281 __be16 src_port, dst_port;
286 info = skb_tunnel_info(skb);
287 if (unlikely(!info)) {
292 if (skb->protocol != htons(ETH_P_IP) &&
293 skb->protocol != htons(ETH_P_IPV6)) {
298 tun_key = &info->key;
301 memset(&fl, 0, sizeof(fl));
302 fl.daddr = tun_key->u.ipv4.dst;
303 fl.saddr = tun_key->u.ipv4.src;
304 fl.flowi4_tos = RT_TOS(tun_key->tos);
305 fl.flowi4_mark = skb->mark;
306 fl.flowi4_proto = IPPROTO_UDP;
307 rt = ip_route_output_key(net, &fl);
313 min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len
314 + sizeof(struct iphdr) + LISP_HLEN;
316 if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
317 int head_delta = SKB_DATA_ALIGN(min_headroom -
321 err = pskb_expand_head(skb, max_t(int, head_delta, 0),
327 /* Reset l2 headers. */
328 skb_pull(skb, network_offset);
329 skb_reset_mac_header(skb);
330 vlan_set_tci(skb, 0);
332 skb = udp_tunnel_handle_offloads(skb, false, 0, false);
339 src_port = htons(get_src_port(net, skb));
340 dst_port = lisp_dev->dst_port;
342 lisp_build_header(skb, tun_key);
346 ovs_skb_set_inner_protocol(skb, skb->protocol);
348 df = tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
349 err = udp_tunnel_xmit_skb(rt, lisp_dev->sock->sk, skb,
350 fl.saddr, tun_key->u.ipv4.dst,
351 tun_key->tos, tun_key->ttl,
352 df, src_port, dst_port, false, true);
354 iptunnel_xmit_stats(err, &dev->stats, (struct pcpu_sw_netstats __percpu *)dev->tstats);
363 EXPORT_SYMBOL(rpl_lisp_xmit);
365 #ifdef HAVE_DEV_TSTATS
366 /* Setup stats when device is created */
367 static int lisp_init(struct net_device *dev)
369 dev->tstats = (typeof(dev->tstats)) netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
376 static void lisp_uninit(struct net_device *dev)
378 free_percpu(dev->tstats);
382 static struct socket *create_sock(struct net *net, bool ipv6,
386 struct udp_port_cfg udp_conf;
389 memset(&udp_conf, 0, sizeof(udp_conf));
392 udp_conf.family = AF_INET6;
394 udp_conf.family = AF_INET;
395 udp_conf.local_ip.s_addr = htonl(INADDR_ANY);
398 udp_conf.local_udp_port = port;
400 /* Open UDP socket */
401 err = udp_sock_create(net, &udp_conf, &sock);
408 static int lisp_open(struct net_device *dev)
410 struct lisp_dev *lisp = netdev_priv(dev);
411 struct udp_tunnel_sock_cfg tunnel_cfg;
412 struct net *net = lisp->net;
414 lisp->sock = create_sock(net, false, lisp->dst_port);
415 if (IS_ERR(lisp->sock))
416 return PTR_ERR(lisp->sock);
418 /* Mark socket as an encapsulation socket */
419 tunnel_cfg.sk_user_data = dev;
420 tunnel_cfg.encap_type = 1;
421 tunnel_cfg.encap_rcv = lisp_rcv;
422 tunnel_cfg.encap_destroy = NULL;
423 setup_udp_tunnel_sock(net, lisp->sock, &tunnel_cfg);
427 static int lisp_stop(struct net_device *dev)
429 struct lisp_dev *lisp = netdev_priv(dev);
431 udp_tunnel_sock_release(lisp->sock);
436 static netdev_tx_t lisp_dev_xmit(struct sk_buff *skb, struct net_device *dev)
438 #ifdef HAVE_METADATA_DST
439 return rpl_lisp_xmit(skb);
441 /* Drop All packets coming from networking stack. OVS-CB is
442 * not initialized for these packets.
446 dev->stats.tx_dropped++;
451 static int lisp_change_mtu(struct net_device *dev, int new_mtu)
453 if (new_mtu < 68 || new_mtu > LISP_MAX_MTU)
460 static const struct net_device_ops lisp_netdev_ops = {
461 #ifdef HAVE_DEV_TSTATS
462 .ndo_init = lisp_init,
463 .ndo_uninit = lisp_uninit,
464 .ndo_get_stats64 = ip_tunnel_get_stats64,
466 .ndo_open = lisp_open,
467 .ndo_stop = lisp_stop,
468 .ndo_start_xmit = lisp_dev_xmit,
469 .ndo_change_mtu = lisp_change_mtu,
470 .ndo_validate_addr = eth_validate_addr,
471 .ndo_set_mac_address = eth_mac_addr,
474 static void lisp_get_drvinfo(struct net_device *dev,
475 struct ethtool_drvinfo *drvinfo)
477 strlcpy(drvinfo->version, LISP_NETDEV_VER, sizeof(drvinfo->version));
478 strlcpy(drvinfo->driver, "lisp", sizeof(drvinfo->driver));
481 static const struct ethtool_ops lisp_ethtool_ops = {
482 .get_drvinfo = lisp_get_drvinfo,
483 .get_link = ethtool_op_get_link,
486 /* Info for udev, that this is a virtual tunnel endpoint */
487 static struct device_type lisp_type = {
491 /* Initialize the device structure. */
492 static void lisp_setup(struct net_device *dev)
496 dev->netdev_ops = &lisp_netdev_ops;
497 dev->ethtool_ops = &lisp_ethtool_ops;
498 dev->destructor = free_netdev;
500 SET_NETDEV_DEVTYPE(dev, &lisp_type);
502 dev->features |= NETIF_F_LLTX | NETIF_F_NETNS_LOCAL;
503 dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM;
504 dev->features |= NETIF_F_RXCSUM;
505 dev->features |= NETIF_F_GSO_SOFTWARE;
507 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,39)
508 dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
509 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
511 #ifdef HAVE_METADATA_DST
514 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE;
515 eth_hw_addr_random(dev);
518 static const struct nla_policy lisp_policy[IFLA_LISP_MAX + 1] = {
519 [IFLA_LISP_PORT] = { .type = NLA_U16 },
522 static int lisp_validate(struct nlattr *tb[], struct nlattr *data[])
524 if (tb[IFLA_ADDRESS]) {
525 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
528 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
529 return -EADDRNOTAVAIL;
535 static struct lisp_dev *find_dev(struct net *net, __be16 dst_port)
537 struct lisp_net *ln = net_generic(net, lisp_net_id);
538 struct lisp_dev *dev;
540 list_for_each_entry(dev, &ln->lisp_list, next) {
541 if (dev->dst_port == dst_port)
547 static int lisp_configure(struct net *net, struct net_device *dev,
550 struct lisp_net *ln = net_generic(net, lisp_net_id);
551 struct lisp_dev *lisp = netdev_priv(dev);
557 lisp->dst_port = dst_port;
559 if (find_dev(net, dst_port))
562 err = lisp_change_mtu(dev, LISP_MAX_MTU);
566 err = register_netdevice(dev);
570 list_add(&lisp->next, &ln->lisp_list);
574 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,39)
575 static int lisp_newlink(struct net *net, struct net_device *dev,
576 struct nlattr *tb[], struct nlattr *data[])
579 static int lisp_newlink(struct net_device *dev,
580 struct nlattr *tb[], struct nlattr *data[])
583 struct net *net = &init_net;
585 __be16 dst_port = htons(LISP_UDP_PORT);
587 if (data[IFLA_LISP_PORT])
588 dst_port = nla_get_be16(data[IFLA_LISP_PORT]);
590 return lisp_configure(net, dev, dst_port);
593 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,39)
594 static void lisp_dellink(struct net_device *dev, struct list_head *head)
596 static void lisp_dellink(struct net_device *dev)
599 struct lisp_dev *lisp = netdev_priv(dev);
601 list_del(&lisp->next);
602 unregister_netdevice_queue(dev, head);
605 static size_t lisp_get_size(const struct net_device *dev)
607 return nla_total_size(sizeof(__be32)); /* IFLA_LISP_PORT */
610 static int lisp_fill_info(struct sk_buff *skb, const struct net_device *dev)
612 struct lisp_dev *lisp = netdev_priv(dev);
614 if (nla_put_be16(skb, IFLA_LISP_PORT, lisp->dst_port))
615 goto nla_put_failure;
623 static struct rtnl_link_ops lisp_link_ops __read_mostly = {
625 .maxtype = IFLA_LISP_MAX,
626 .policy = lisp_policy,
627 .priv_size = sizeof(struct lisp_dev),
629 .validate = lisp_validate,
630 .newlink = lisp_newlink,
631 .dellink = lisp_dellink,
632 .get_size = lisp_get_size,
633 .fill_info = lisp_fill_info,
636 struct net_device *rpl_lisp_dev_create_fb(struct net *net, const char *name,
637 u8 name_assign_type, u16 dst_port)
639 struct nlattr *tb[IFLA_MAX + 1];
640 struct net_device *dev;
643 memset(tb, 0, sizeof(tb));
644 dev = rtnl_create_link(net, (char *) name, name_assign_type,
649 err = lisp_configure(net, dev, htons(dst_port));
656 EXPORT_SYMBOL_GPL(rpl_lisp_dev_create_fb);
658 static int lisp_init_net(struct net *net)
660 struct lisp_net *ln = net_generic(net, lisp_net_id);
662 INIT_LIST_HEAD(&ln->lisp_list);
666 static void lisp_exit_net(struct net *net)
668 struct lisp_net *ln = net_generic(net, lisp_net_id);
669 struct lisp_dev *lisp, *next;
670 struct net_device *dev, *aux;
675 /* gather any lisp devices that were moved into this ns */
676 for_each_netdev_safe(net, dev, aux)
677 if (dev->rtnl_link_ops == &lisp_link_ops)
678 unregister_netdevice_queue(dev, &list);
680 list_for_each_entry_safe(lisp, next, &ln->lisp_list, next) {
681 /* If lisp->dev is in the same netns, it was already added
682 * to the lisp by the previous loop.
684 if (!net_eq(dev_net(lisp->dev), net))
685 unregister_netdevice_queue(lisp->dev, &list);
688 /* unregister the devices gathered above */
689 unregister_netdevice_many(&list);
693 static struct pernet_operations lisp_net_ops = {
694 .init = lisp_init_net,
695 .exit = lisp_exit_net,
697 .size = sizeof(struct lisp_net),
700 DEFINE_COMPAT_PNET_REG_FUNC(device)
701 int rpl_lisp_init_module(void)
705 rc = register_pernet_subsys(&lisp_net_ops);
709 rc = rtnl_link_register(&lisp_link_ops);
713 pr_info("LISP tunneling driver\n");
716 unregister_pernet_subsys(&lisp_net_ops);
721 void rpl_lisp_cleanup_module(void)
723 rtnl_link_unregister(&lisp_link_ops);
724 unregister_pernet_subsys(&lisp_net_ops);