2 * Copyright (c) 2015 Nicira, Inc.
3 * Copyright (c) 2013 Cisco Systems, Inc.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 2 of the GNU General Public
7 * License as published by the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
16 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
18 #include <linux/version.h>
22 #include <linux/net.h>
23 #include <linux/module.h>
24 #include <linux/rculist.h>
25 #include <linux/udp.h>
30 #include <net/net_namespace.h>
31 #include <net/netns/generic.h>
32 #include <net/route.h>
34 #include <net/udp_tunnel.h>
41 #include "vport-netdev.h"
43 #define LISP_UDP_PORT 4341
44 #define LISP_NETDEV_VER "0.1"
45 static int lisp_net_id;
47 /* Pseudo network device */
49 struct net *net; /* netns for packet i/o */
50 struct net_device *dev; /* netdev for lisp tunnel */
53 struct list_head next;
56 /* per-network namespace private data for this module */
58 struct list_head lisp_list;
62 * LISP encapsulation header:
64 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
65 * |N|L|E|V|I|flags| Nonce/Map-Version |
66 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
67 * | Instance ID/Locator Status Bits |
68 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
73 * struct lisphdr - LISP header
74 * @nonce_present: Flag indicating the presence of a 24 bit nonce value.
75 * @locator_status_bits_present: Flag indicating the presence of Locator Status
77 * @solicit_echo_nonce: Flag indicating the use of the echo noncing mechanism.
78 * @map_version_present: Flag indicating the use of mapping versioning.
79 * @instance_id_present: Flag indicating the presence of a 24 bit Instance ID.
80 * @reserved_flags: 3 bits reserved for future flags.
81 * @nonce: 24 bit nonce value.
82 * @map_version: 24 bit mapping version.
83 * @locator_status_bits: Locator Status Bits: 32 bits when instance_id_present
84 * is not set, 8 bits when it is.
85 * @instance_id: 24 bit Instance ID
88 #ifdef __LITTLE_ENDIAN_BITFIELD
89 __u8 reserved_flags:3;
90 __u8 instance_id_present:1;
91 __u8 map_version_present:1;
92 __u8 solicit_echo_nonce:1;
93 __u8 locator_status_bits_present:1;
97 __u8 locator_status_bits_present:1;
98 __u8 solicit_echo_nonce:1;
99 __u8 map_version_present:1;
100 __u8 instance_id_present:1;
101 __u8 reserved_flags:3;
108 __be32 locator_status_bits;
111 __u8 locator_status_bits;
116 #define LISP_HLEN (sizeof(struct udphdr) + sizeof(struct lisphdr))
118 static inline struct lisphdr *lisp_hdr(const struct sk_buff *skb)
120 return (struct lisphdr *)(udp_hdr(skb) + 1);
123 /* Convert 64 bit tunnel ID to 24 bit Instance ID. */
124 static void tunnel_id_to_instance_id(__be64 tun_id, __u8 *iid)
128 iid[0] = (__force __u8)(tun_id >> 16);
129 iid[1] = (__force __u8)(tun_id >> 8);
130 iid[2] = (__force __u8)tun_id;
132 iid[0] = (__force __u8)((__force u64)tun_id >> 40);
133 iid[1] = (__force __u8)((__force u64)tun_id >> 48);
134 iid[2] = (__force __u8)((__force u64)tun_id >> 56);
138 /* Convert 24 bit Instance ID to 64 bit tunnel ID. */
139 static __be64 instance_id_to_tunnel_id(__u8 *iid)
142 return (iid[0] << 16) | (iid[1] << 8) | iid[2];
144 return (__force __be64)(((__force u64)iid[0] << 40) |
145 ((__force u64)iid[1] << 48) |
146 ((__force u64)iid[2] << 56));
150 /* Compute source UDP port for outgoing packet.
151 * Currently we use the flow hash.
153 static u16 get_src_port(struct net *net, struct sk_buff *skb)
155 u32 hash = skb_get_hash(skb);
161 if (skb->protocol == htons(ETH_P_IP)) {
163 int size = (sizeof(iph->saddr) * 2) / sizeof(u32);
165 iph = (struct iphdr *) skb_network_header(skb);
166 hash = jhash2((const u32 *)&iph->saddr, size, 0);
167 } else if (skb->protocol == htons(ETH_P_IPV6)) {
168 struct ipv6hdr *ipv6hdr;
170 ipv6hdr = (struct ipv6hdr *) skb_network_header(skb);
171 hash = jhash2((const u32 *)&ipv6hdr->saddr,
172 (sizeof(struct in6_addr) * 2) / sizeof(u32), 0);
174 pr_warn_once("LISP inner protocol is not IP when "
175 "calculating hash.\n");
179 inet_get_local_port_range(net, &low, &high);
180 range = (high - low) + 1;
181 return (((u64) hash * range) >> 32) + low;
184 static void lisp_build_header(struct sk_buff *skb,
185 const struct ip_tunnel_key *tun_key)
187 struct lisphdr *lisph;
189 lisph = (struct lisphdr *)__skb_push(skb, sizeof(struct lisphdr));
190 lisph->nonce_present = 0; /* We don't support echo nonce algorithm */
191 lisph->locator_status_bits_present = 1; /* Set LSB */
192 lisph->solicit_echo_nonce = 0; /* No echo noncing */
193 lisph->map_version_present = 0; /* No mapping versioning, nonce instead */
194 lisph->instance_id_present = 1; /* Store the tun_id as Instance ID */
195 lisph->reserved_flags = 0; /* Reserved flags, set to 0 */
197 lisph->u1.nonce[0] = 0;
198 lisph->u1.nonce[1] = 0;
199 lisph->u1.nonce[2] = 0;
201 tunnel_id_to_instance_id(tun_key->tun_id, &lisph->u2.word2.instance_id[0]);
202 lisph->u2.word2.locator_status_bits = 1;
205 /* Called with rcu_read_lock and BH disabled. */
206 static int lisp_rcv(struct sock *sk, struct sk_buff *skb)
208 struct net_device *dev;
209 struct lisphdr *lisph;
210 struct iphdr *inner_iph;
211 struct metadata_dst *tun_dst;
212 #ifndef HAVE_METADATA_DST
213 struct metadata_dst temp;
219 dev = rcu_dereference_sk_user_data(sk);
223 if (iptunnel_pull_header(skb, LISP_HLEN, 0))
226 lisph = lisp_hdr(skb);
228 if (lisph->instance_id_present != 1)
231 key = instance_id_to_tunnel_id(&lisph->u2.word2.instance_id[0]);
233 /* Save outer tunnel values */
234 #ifndef HAVE_METADATA_DST
236 ovs_udp_tun_rx_dst(&tun_dst->u.tun_info, skb, AF_INET, TUNNEL_KEY, key, 0);
238 tun_dst = udp_tun_rx_dst(skb, AF_INET, TUNNEL_KEY, key, 0);
240 /* Drop non-IP inner packets */
241 inner_iph = (struct iphdr *)(lisph + 1);
242 switch (inner_iph->version) {
244 protocol = htons(ETH_P_IP);
247 protocol = htons(ETH_P_IPV6);
252 skb->protocol = protocol;
254 /* Add Ethernet header */
255 ethh = (struct ethhdr *)skb_push(skb, ETH_HLEN);
256 memset(ethh, 0, ETH_HLEN);
257 ethh->h_dest[0] = 0x02;
258 ethh->h_source[0] = 0x02;
259 ethh->h_proto = protocol;
261 ovs_ip_tunnel_rcv(dev, skb, tun_dst);
270 netdev_tx_t rpl_lisp_xmit(struct sk_buff *skb)
272 struct net_device *dev = skb->dev;
273 struct lisp_dev *lisp_dev = netdev_priv(dev);
274 struct net *net = lisp_dev->net;
275 int network_offset = skb_network_offset(skb);
276 struct ip_tunnel_info *info;
277 struct ip_tunnel_key *tun_key;
280 __be16 src_port, dst_port;
285 info = skb_tunnel_info(skb);
286 if (unlikely(!info)) {
291 if (skb->protocol != htons(ETH_P_IP) &&
292 skb->protocol != htons(ETH_P_IPV6)) {
297 tun_key = &info->key;
300 memset(&fl, 0, sizeof(fl));
301 fl.daddr = tun_key->u.ipv4.dst;
302 fl.saddr = tun_key->u.ipv4.src;
303 fl.flowi4_tos = RT_TOS(tun_key->tos);
304 fl.flowi4_mark = skb->mark;
305 fl.flowi4_proto = IPPROTO_UDP;
306 rt = ip_route_output_key(net, &fl);
312 min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len
313 + sizeof(struct iphdr) + LISP_HLEN;
315 if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
316 int head_delta = SKB_DATA_ALIGN(min_headroom -
320 err = pskb_expand_head(skb, max_t(int, head_delta, 0),
326 /* Reset l2 headers. */
327 skb_pull(skb, network_offset);
328 skb_reset_mac_header(skb);
329 vlan_set_tci(skb, 0);
331 skb = udp_tunnel_handle_offloads(skb, false, 0, false);
338 src_port = htons(get_src_port(net, skb));
339 dst_port = lisp_dev->dst_port;
341 lisp_build_header(skb, tun_key);
345 ovs_skb_set_inner_protocol(skb, skb->protocol);
347 df = tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
348 err = udp_tunnel_xmit_skb(rt, lisp_dev->sock->sk, skb,
349 fl.saddr, tun_key->u.ipv4.dst,
350 tun_key->tos, tun_key->ttl,
351 df, src_port, dst_port, false, true);
353 iptunnel_xmit_stats(err, &dev->stats, (struct pcpu_sw_netstats __percpu *)dev->tstats);
362 EXPORT_SYMBOL(rpl_lisp_xmit);
364 #ifdef HAVE_DEV_TSTATS
365 /* Setup stats when device is created */
366 static int lisp_init(struct net_device *dev)
368 dev->tstats = (typeof(dev->tstats)) netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
375 static void lisp_uninit(struct net_device *dev)
377 free_percpu(dev->tstats);
381 static struct socket *create_sock(struct net *net, bool ipv6,
385 struct udp_port_cfg udp_conf;
388 memset(&udp_conf, 0, sizeof(udp_conf));
391 udp_conf.family = AF_INET6;
393 udp_conf.family = AF_INET;
394 udp_conf.local_ip.s_addr = htonl(INADDR_ANY);
397 udp_conf.local_udp_port = port;
399 /* Open UDP socket */
400 err = udp_sock_create(net, &udp_conf, &sock);
407 static int lisp_open(struct net_device *dev)
409 struct lisp_dev *lisp = netdev_priv(dev);
410 struct udp_tunnel_sock_cfg tunnel_cfg;
411 struct net *net = lisp->net;
413 lisp->sock = create_sock(net, false, lisp->dst_port);
414 if (IS_ERR(lisp->sock))
415 return PTR_ERR(lisp->sock);
417 /* Mark socket as an encapsulation socket */
418 tunnel_cfg.sk_user_data = dev;
419 tunnel_cfg.encap_type = 1;
420 tunnel_cfg.encap_rcv = lisp_rcv;
421 tunnel_cfg.encap_destroy = NULL;
422 setup_udp_tunnel_sock(net, lisp->sock, &tunnel_cfg);
426 static int lisp_stop(struct net_device *dev)
428 struct lisp_dev *lisp = netdev_priv(dev);
430 udp_tunnel_sock_release(lisp->sock);
435 static netdev_tx_t lisp_dev_xmit(struct sk_buff *skb, struct net_device *dev)
437 #ifdef HAVE_METADATA_DST
438 return rpl_lisp_xmit(skb);
440 /* Drop All packets coming from networking stack. OVS-CB is
441 * not initialized for these packets.
445 dev->stats.tx_dropped++;
450 static const struct net_device_ops lisp_netdev_ops = {
451 #ifdef HAVE_DEV_TSTATS
452 .ndo_init = lisp_init,
453 .ndo_uninit = lisp_uninit,
454 .ndo_get_stats64 = ip_tunnel_get_stats64,
456 .ndo_open = lisp_open,
457 .ndo_stop = lisp_stop,
458 .ndo_start_xmit = lisp_dev_xmit,
459 .ndo_change_mtu = eth_change_mtu,
460 .ndo_validate_addr = eth_validate_addr,
461 .ndo_set_mac_address = eth_mac_addr,
464 static void lisp_get_drvinfo(struct net_device *dev,
465 struct ethtool_drvinfo *drvinfo)
467 strlcpy(drvinfo->version, LISP_NETDEV_VER, sizeof(drvinfo->version));
468 strlcpy(drvinfo->driver, "lisp", sizeof(drvinfo->driver));
471 static const struct ethtool_ops lisp_ethtool_ops = {
472 .get_drvinfo = lisp_get_drvinfo,
473 .get_link = ethtool_op_get_link,
476 /* Info for udev, that this is a virtual tunnel endpoint */
477 static struct device_type lisp_type = {
481 /* Initialize the device structure. */
482 static void lisp_setup(struct net_device *dev)
486 dev->netdev_ops = &lisp_netdev_ops;
487 dev->ethtool_ops = &lisp_ethtool_ops;
488 dev->destructor = free_netdev;
490 SET_NETDEV_DEVTYPE(dev, &lisp_type);
492 dev->features |= NETIF_F_LLTX | NETIF_F_NETNS_LOCAL;
493 dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM;
494 dev->features |= NETIF_F_RXCSUM;
495 dev->features |= NETIF_F_GSO_SOFTWARE;
497 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,39)
498 dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
499 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
501 #ifdef HAVE_METADATA_DST
504 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE;
505 eth_hw_addr_random(dev);
508 static const struct nla_policy lisp_policy[IFLA_LISP_MAX + 1] = {
509 [IFLA_LISP_PORT] = { .type = NLA_U16 },
512 static int lisp_validate(struct nlattr *tb[], struct nlattr *data[])
514 if (tb[IFLA_ADDRESS]) {
515 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
518 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
519 return -EADDRNOTAVAIL;
525 static struct lisp_dev *find_dev(struct net *net, __be16 dst_port)
527 struct lisp_net *ln = net_generic(net, lisp_net_id);
528 struct lisp_dev *dev;
530 list_for_each_entry(dev, &ln->lisp_list, next) {
531 if (dev->dst_port == dst_port)
537 static int lisp_configure(struct net *net, struct net_device *dev,
540 struct lisp_net *ln = net_generic(net, lisp_net_id);
541 struct lisp_dev *lisp = netdev_priv(dev);
547 lisp->dst_port = dst_port;
549 if (find_dev(net, dst_port))
552 err = register_netdevice(dev);
556 list_add(&lisp->next, &ln->lisp_list);
560 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,39)
561 static int lisp_newlink(struct net *net, struct net_device *dev,
562 struct nlattr *tb[], struct nlattr *data[])
565 static int lisp_newlink(struct net_device *dev,
566 struct nlattr *tb[], struct nlattr *data[])
569 struct net *net = &init_net;
571 __be16 dst_port = htons(LISP_UDP_PORT);
573 if (data[IFLA_LISP_PORT])
574 dst_port = nla_get_be16(data[IFLA_LISP_PORT]);
576 return lisp_configure(net, dev, dst_port);
579 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,39)
580 static void lisp_dellink(struct net_device *dev, struct list_head *head)
582 static void lisp_dellink(struct net_device *dev)
585 struct lisp_dev *lisp = netdev_priv(dev);
587 list_del(&lisp->next);
588 unregister_netdevice_queue(dev, head);
591 static size_t lisp_get_size(const struct net_device *dev)
593 return nla_total_size(sizeof(__be32)); /* IFLA_LISP_PORT */
596 static int lisp_fill_info(struct sk_buff *skb, const struct net_device *dev)
598 struct lisp_dev *lisp = netdev_priv(dev);
600 if (nla_put_be16(skb, IFLA_LISP_PORT, lisp->dst_port))
601 goto nla_put_failure;
609 static struct rtnl_link_ops lisp_link_ops __read_mostly = {
611 .maxtype = IFLA_LISP_MAX,
612 .policy = lisp_policy,
613 .priv_size = sizeof(struct lisp_dev),
615 .validate = lisp_validate,
616 .newlink = lisp_newlink,
617 .dellink = lisp_dellink,
618 .get_size = lisp_get_size,
619 .fill_info = lisp_fill_info,
622 struct net_device *rpl_lisp_dev_create_fb(struct net *net, const char *name,
623 u8 name_assign_type, u16 dst_port)
625 struct nlattr *tb[IFLA_MAX + 1];
626 struct net_device *dev;
629 memset(tb, 0, sizeof(tb));
630 dev = rtnl_create_link(net, (char *) name, name_assign_type,
635 err = lisp_configure(net, dev, htons(dst_port));
642 EXPORT_SYMBOL_GPL(rpl_lisp_dev_create_fb);
644 static int lisp_init_net(struct net *net)
646 struct lisp_net *ln = net_generic(net, lisp_net_id);
648 INIT_LIST_HEAD(&ln->lisp_list);
652 static void lisp_exit_net(struct net *net)
654 struct lisp_net *ln = net_generic(net, lisp_net_id);
655 struct lisp_dev *lisp, *next;
656 struct net_device *dev, *aux;
661 /* gather any lisp devices that were moved into this ns */
662 for_each_netdev_safe(net, dev, aux)
663 if (dev->rtnl_link_ops == &lisp_link_ops)
664 unregister_netdevice_queue(dev, &list);
666 list_for_each_entry_safe(lisp, next, &ln->lisp_list, next) {
667 /* If lisp->dev is in the same netns, it was already added
668 * to the lisp by the previous loop.
670 if (!net_eq(dev_net(lisp->dev), net))
671 unregister_netdevice_queue(lisp->dev, &list);
674 /* unregister the devices gathered above */
675 unregister_netdevice_many(&list);
679 static struct pernet_operations lisp_net_ops = {
680 .init = lisp_init_net,
681 .exit = lisp_exit_net,
683 .size = sizeof(struct lisp_net),
686 DEFINE_COMPAT_PNET_REG_FUNC(device)
687 int rpl_lisp_init_module(void)
691 rc = register_pernet_subsys(&lisp_net_ops);
695 rc = rtnl_link_register(&lisp_link_ops);
699 pr_info("LISP tunneling driver\n");
702 unregister_pernet_subsys(&lisp_net_ops);
707 void rpl_lisp_cleanup_module(void)
709 rtnl_link_unregister(&lisp_link_ops);
710 unregister_pernet_subsys(&lisp_net_ops);