datapath: compat: Use dst-cache for Geneve and VxLAN tunnels.
authorPravin B Shelar <pshelar@ovn.org>
Fri, 8 Jul 2016 23:24:24 +0000 (16:24 -0700)
committerPravin B Shelar <pshelar@ovn.org>
Sat, 9 Jul 2016 02:27:49 +0000 (19:27 -0700)
It partialy backport commit:
    commit d71785ffc7e7cae3fbdc4ea8a9d05b7a1c59f7b8
    Author: Paolo Abeni <pabeni@redhat.com>

    net: add dst_cache to ovs vxlan lwtunnel

    In case of UDP traffic with datagram length
    below MTU this give about 2% performance increase
    when tunneling over ipv4 and about 60% when tunneling
    over ipv6

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Suggested-and-acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Bug fix commit db3c6139e6e ("bpf, vxlan, geneve, gre: fix usage of
dst_cache on xmit"). is also included. Geneve changes
were added in 468dfffcd762cbb2777ec5a76bc21e3748ebf47e ("geneve: add
dst caching support")

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>
datapath/flow_netlink.c
datapath/linux/compat/geneve.c
datapath/linux/compat/gso.h
datapath/linux/compat/include/net/ip_tunnels.h
datapath/linux/compat/include/net/vxlan.h
datapath/linux/compat/vxlan.c

index cbfa233..b6020ab 100644 (file)
@@ -1964,6 +1964,11 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
        if (!tun_dst)
                return -ENOMEM;
 
+       err = dst_cache_init(&tun_dst->u.tun_info.dst_cache, GFP_KERNEL);
+       if (err) {
+               dst_release((struct dst_entry *)tun_dst);
+               return err;
+       }
        a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL,
                         sizeof(*ovs_tun), log);
        if (IS_ERR(a)) {
index f5daefb..061ceb5 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/if_vlan.h>
 
 #include <net/addrconf.h>
+#include <net/dst_cache.h>
 #include <net/dst_metadata.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
@@ -88,6 +89,7 @@ struct geneve_dev {
        __be16             dst_port;
        bool               collect_md;
        u32                flags;
+       struct dst_cache   dst_cache;
 };
 
 /* Geneve device flags */
@@ -301,15 +303,27 @@ drop:
 /* Setup stats when device is created */
 static int geneve_init(struct net_device *dev)
 {
+       struct geneve_dev *geneve = netdev_priv(dev);
+       int err;
+
        dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
        if (!dev->tstats)
                return -ENOMEM;
 
+       err = dst_cache_init(&geneve->dst_cache, GFP_KERNEL);
+       if (err) {
+               free_percpu(dev->tstats);
+               return err;
+       }
+
        return 0;
 }
 
 static void geneve_uninit(struct net_device *dev)
 {
+       struct geneve_dev *geneve = netdev_priv(dev);
+
+       dst_cache_destroy(&geneve->dst_cache);
        free_percpu(dev->tstats);
 }
 
@@ -799,7 +813,9 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
                                       struct flowi4 *fl4,
                                       struct ip_tunnel_info *info)
 {
+       bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
        struct geneve_dev *geneve = netdev_priv(dev);
+       struct dst_cache *dst_cache;
        struct rtable *rt = NULL;
        __u8 tos;
 
@@ -811,16 +827,25 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
                fl4->daddr = info->key.u.ipv4.dst;
                fl4->saddr = info->key.u.ipv4.src;
                fl4->flowi4_tos = RT_TOS(info->key.tos);
+               dst_cache = &info->dst_cache;
        } else {
                tos = geneve->tos;
                if (tos == 1) {
                        const struct iphdr *iip = ip_hdr(skb);
 
                        tos = ip_tunnel_get_dsfield(iip, skb);
+                       use_cache = false;
                }
 
                fl4->flowi4_tos = RT_TOS(tos);
                fl4->daddr = geneve->remote.sin.sin_addr.s_addr;
+               dst_cache = &geneve->dst_cache;
+       }
+
+       if (use_cache) {
+               rt = dst_cache_get_ip4(dst_cache, &fl4->saddr);
+               if (rt)
+                       return rt;
        }
 
        rt = ip_route_output_key(geneve->net, fl4);
@@ -833,6 +858,8 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
                ip_rt_put(rt);
                return ERR_PTR(-ELOOP);
        }
+       if (use_cache)
+               dst_cache_set_ip4(dst_cache, &rt->dst, fl4->saddr);
        return rt;
 }
 
@@ -842,9 +869,11 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
                                           struct flowi6 *fl6,
                                           struct ip_tunnel_info *info)
 {
+       bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
        struct geneve_dev *geneve = netdev_priv(dev);
        struct geneve_sock *gs6 = geneve->sock6;
        struct dst_entry *dst = NULL;
+       struct dst_cache *dst_cache;
        __u8 prio;
 
        memset(fl6, 0, sizeof(*fl6));
@@ -856,17 +885,26 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
                fl6->saddr = info->key.u.ipv6.src;
                fl6->flowlabel = ip6_make_flowinfo(RT_TOS(info->key.tos),
                                                   info->key.label);
+               dst_cache = &info->dst_cache;
        } else {
                prio = geneve->tos;
                if (prio == 1) {
                        const struct iphdr *iip = ip_hdr(skb);
 
                        prio = ip_tunnel_get_dsfield(iip, skb);
+                       use_cache = false;
                }
 
                fl6->flowlabel = ip6_make_flowinfo(RT_TOS(prio),
                                                   geneve->label);
                fl6->daddr = geneve->remote.sin6.sin6_addr;
+               dst_cache = &geneve->dst_cache;
+       }
+
+       if (use_cache) {
+               dst = dst_cache_get_ip6(dst_cache, &fl6->saddr);
+               if (dst)
+                       return dst;
        }
 
 #ifdef HAVE_IPV6_DST_LOOKUP_NET
@@ -887,6 +925,8 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
                return ERR_PTR(-ELOOP);
        }
 
+       if (use_cache)
+               dst_cache_set_ip6(dst_cache, dst, &fl6->saddr);
        return dst;
 }
 #endif
@@ -1374,6 +1414,8 @@ static int geneve_configure(struct net *net, struct net_device *dev,
                        return -EPERM;
        }
 
+       dst_cache_reset(&geneve->dst_cache);
+
        err = register_netdevice(dev);
        if (err)
                return err;
index e24b3a6..c5357cc 100644 (file)
@@ -195,6 +195,9 @@ static inline void ovs_dst_hold(void *dst)
 
 static inline void ovs_dst_release(struct dst_entry *dst)
 {
+       struct metadata_dst *tun_dst = (struct metadata_dst *) dst;
+
+       dst_cache_destroy(&tun_dst->u.tun_info.dst_cache);
        kfree(dst);
 }
 
index 7fe6a04..e3f9b60 100644 (file)
@@ -197,6 +197,24 @@ static inline void ip_tunnel_key_init(struct ip_tunnel_key *key,
 
 #define ip_tunnel_collect_metadata() true
 
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4,6,0)
+#define TUNNEL_NOCACHE 0
+
+static inline bool
+ip_tunnel_dst_cache_usable(const struct sk_buff *skb,
+                          const struct ip_tunnel_info *info)
+{
+       if (skb->mark)
+               return false;
+       if (!info)
+               return true;
+       if (info->key.tun_flags & TUNNEL_NOCACHE)
+               return false;
+
+       return true;
+}
+#endif
+
 #define ip_tunnel rpl_ip_tunnel
 
 struct ip_tunnel {
index a6a5f30..8212d3a 100644 (file)
@@ -25,6 +25,7 @@ static inline void rpl_vxlan_cleanup_module(void)
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 #include <linux/udp.h>
+#include <net/dst_cache.h>
 #include <net/dst_metadata.h>
 
 #include "compat.h"
@@ -227,6 +228,7 @@ struct vxlan_rdst {
        u32                      remote_ifindex;
        struct list_head         list;
        struct rcu_head          rcu;
+       struct dst_cache         dst_cache;
 };
 
 struct vxlan_config {
index 5d05047..bb230cb 100644 (file)
@@ -907,11 +907,21 @@ out_free:
 static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan,
                                      struct sk_buff *skb, int oif, u8 tos,
                                      __be32 daddr, __be32 *saddr,
+                                     struct dst_cache *dst_cache,
                                      const struct ip_tunnel_info *info)
 {
+       bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
        struct rtable *rt = NULL;
        struct flowi4 fl4;
 
+       if (tos && !info)
+               use_cache = false;
+       if (use_cache) {
+               rt = dst_cache_get_ip4(dst_cache, saddr);
+               if (rt)
+                       return rt;
+       }
+
        memset(&fl4, 0, sizeof(fl4));
        fl4.flowi4_oif = oif;
        fl4.flowi4_tos = RT_TOS(tos);
@@ -923,6 +933,8 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan,
        rt = ip_route_output_key(vxlan->net, &fl4);
        if (!IS_ERR(rt)) {
                *saddr = fl4.saddr;
+               if (use_cache)
+                       dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr);
        }
        return rt;
 }
@@ -933,12 +945,22 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
                                          __be32 label,
                                          const struct in6_addr *daddr,
                                          struct in6_addr *saddr,
+                                         struct dst_cache *dst_cache,
                                          const struct ip_tunnel_info *info)
 {
+       bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
        struct dst_entry *ndst;
        struct flowi6 fl6;
        int err;
 
+       if (tos && !info)
+               use_cache = false;
+       if (use_cache) {
+               ndst = dst_cache_get_ip6(dst_cache, saddr);
+               if (ndst)
+                       return ndst;
+       }
+
        memset(&fl6, 0, sizeof(fl6));
        fl6.flowi6_oif = oif;
        fl6.daddr = *daddr;
@@ -963,6 +985,8 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
                return ERR_PTR(err);
 
        *saddr = fl6.saddr;
+       if (use_cache)
+               dst_cache_set_ip6(dst_cache, ndst, saddr);
        return ndst;
 }
 #endif
@@ -978,6 +1002,7 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
 static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                           struct vxlan_rdst *rdst, bool did_rsc)
 {
+       struct dst_cache *dst_cache;
        struct ip_tunnel_info *info;
        struct vxlan_dev *vxlan = netdev_priv(dev);
        struct sock *sk;
@@ -1002,6 +1027,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port;
                vni = rdst->remote_vni;
                dst = &rdst->remote_ip;
+               dst_cache = &rdst->dst_cache;
        } else {
                if (!info) {
                        WARN_ONCE(1, "%s: Missing encapsulation instructions\n",
@@ -1016,6 +1042,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                else
                        remote_ip.sin6.sin6_addr = info->key.u.ipv6.dst;
                dst = &remote_ip;
+               dst_cache = &info->dst_cache;
        }
 
        if (vxlan_addr_any(dst)) {
@@ -1063,7 +1090,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                rt = vxlan_get_route(vxlan, skb,
                                     rdst ? rdst->remote_ifindex : 0, tos,
                                     dst->sin.sin_addr.s_addr, &saddr,
-                                    info);
+                                    dst_cache, info);
                if (IS_ERR(rt)) {
                        netdev_dbg(dev, "no route to %pI4\n",
                                   &dst->sin.sin_addr.s_addr);
@@ -1121,7 +1148,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                ndst = vxlan6_get_route(vxlan, skb,
                                        rdst ? rdst->remote_ifindex : 0, tos,
                                        label, &dst->sin6.sin6_addr, &saddr,
-                                       info);
+                                       dst_cache, info);
                if (IS_ERR(ndst)) {
                        netdev_dbg(dev, "no route to %pI6\n",
                                   &dst->sin6.sin6_addr);
@@ -1409,7 +1436,7 @@ int ovs_vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
                        return -EINVAL;
                rt = vxlan_get_route(vxlan, skb, 0, info->key.tos,
                                     info->key.u.ipv4.dst,
-                                    &info->key.u.ipv4.src, info);
+                                    &info->key.u.ipv4.src, NULL, info);
                if (IS_ERR(rt))
                        return PTR_ERR(rt);
                ip_rt_put(rt);
@@ -1421,7 +1448,7 @@ int ovs_vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
                        return -EINVAL;
                ndst = vxlan6_get_route(vxlan, skb, 0, info->key.tos,
                                        info->key.label, &info->key.u.ipv6.dst,
-                                       &info->key.u.ipv6.src, info);
+                                       &info->key.u.ipv6.src, NULL, info);
                if (IS_ERR(ndst))
                        return PTR_ERR(ndst);
                dst_release(ndst);