datapath: backport: ip_tunnel: add support for setting flow label via collect metadata
authorPravin B Shelar <pshelar@ovn.org>
Fri, 8 Jul 2016 04:49:10 +0000 (21:49 -0700)
committerPravin B Shelar <pshelar@ovn.org>
Sat, 9 Jul 2016 02:27:49 +0000 (19:27 -0700)
Update udp_tunnel6_xmit_skb(). Specificaly changes are
related to setting ipv6 label.

Upstream commit:
    commit 134611446dc657e1bbc73ca0e4e6b599df687db0
    Author: Daniel Borkmann <daniel@iogearbox.net>

    ip_tunnel: add support for setting flow label via collect metadata

    This patch extends udp_tunnel6_xmit_skb() to pass in the IPv6 flow label
    from call sites. Currently, there's no such option and it's always set to
    zero when writing ip6_flow_hdr(). Add a label member to ip_tunnel_key, so
    that flow-based tunnels via collect metadata frontends can make use of it.
    vxlan and geneve will be converted to add flow label support separately.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>
acinclude.m4
datapath/linux/compat/include/net/dst_metadata.h
datapath/linux/compat/include/net/ip_tunnels.h
datapath/linux/compat/include/net/ipv6.h
datapath/linux/compat/include/net/udp_tunnel.h
datapath/linux/compat/include/net/vxlan.h
datapath/linux/compat/udp_tunnel.c
datapath/linux/compat/vxlan.c

index 263c31d..a596266 100644 (file)
@@ -432,8 +432,11 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [
   OVS_GREP_IFELSE([$KSRC/include/net/inetpeer.h], [vif],
                   [OVS_DEFINE([HAVE_INETPEER_VIF_SUPPORT])])
 
-  OVS_GREP_IFELSE([$KSRC/include/net/ip_tunnels.h], [iptunnel_pull_offloads],
-                  [OVS_DEFINE([HAVE_METADATA_DST])])
+  OVS_FIND_FIELD_IFELSE([$KSRC/include/net/ip_tunnels.h], [ip_tunnel_key],
+                        [label],
+                        [OVS_GREP_IFELSE([$KSRC/include/net/ip_tunnels.h],
+                                         [iptunnel_pull_offloads],
+                                         [OVS_DEFINE([HAVE_METADATA_DST])])])
 
   OVS_GREP_IFELSE([$KSRC/include/linux/net.h], [sock_create_kern.*net],
                   [OVS_DEFINE([HAVE_SOCK_CREATE_KERN_NET])])
@@ -610,8 +613,6 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [
   OVS_GREP_IFELSE([$KSRC/include/net/udp.h], [udp_v4_check])
   OVS_GREP_IFELSE([$KSRC/include/net/udp.h], [udp_set_csum])
   OVS_GREP_IFELSE([$KSRC/include/net/udp_tunnel.h], [udp_tunnel_gro_complete])
-  OVS_GREP_IFELSE([$KSRC/include/net/udp_tunnel.h], [ipv6_v6only],
-                  [OVS_DEFINE([HAVE_UDP_TUNNEL_IPV6])])
 
   OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [ignore_df],
                   [OVS_DEFINE([HAVE_IGNORE_DF_RENAME])])
index 90230c4..310cfa6 100644 (file)
@@ -73,7 +73,7 @@ static inline void ovs_ip_tun_rx_dst(struct ip_tunnel_info *tun_info,
 
        ovs_tun_rx_dst(tun_info, md_size);
        ip_tunnel_key_init(&tun_info->key,
-                          iph->saddr, iph->daddr, iph->tos, iph->ttl,
+                          iph->saddr, iph->daddr, iph->tos, iph->ttl, 0,
                           0, 0, tunnel_id, flags);
 }
 
@@ -97,6 +97,7 @@ static inline void ovs_ipv6_tun_rx_dst(struct ip_tunnel_info *info,
 
        info->key.tos = ipv6_get_dsfield(ip6h);
        info->key.ttl = ip6h->hop_limit;
+       info->key.label = ip6_flowlabel(ip6h);
 }
 
 void ovs_ip_tunnel_rcv(struct net_device *dev, struct sk_buff *skb,
index b9fa76f..7ee0875 100644 (file)
@@ -126,6 +126,7 @@ struct ip_tunnel_key {
        __be16                  tun_flags;
        u8                      tos;            /* TOS for IPv4, TC for IPv6 */
        u8                      ttl;            /* TTL for IPv4, HL for IPv6 */
+       __be32                  label;          /* Flow Label for IPv6 */
        __be16                  tp_src;
        __be16                  tp_dst;
 };
@@ -165,7 +166,7 @@ static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info,
 
 static inline void ip_tunnel_key_init(struct ip_tunnel_key *key,
                                      __be32 saddr, __be32 daddr,
-                                     u8 tos, u8 ttl,
+                                     u8 tos, u8 ttl, __be32 label,
                                      __be16 tp_src, __be16 tp_dst,
                                      __be64 tun_id, __be16 tun_flags)
 {
@@ -176,6 +177,7 @@ static inline void ip_tunnel_key_init(struct ip_tunnel_key *key,
               0, IP_TUNNEL_KEY_IPV4_PAD_LEN);
        key->tos = tos;
        key->ttl = ttl;
+       key->label = label;
        key->tun_flags = tun_flags;
 
        /* For the tunnel types on the top of IPsec, the tp_src and tp_dst of
index ac1564b..dbb66e1 100644 (file)
@@ -54,4 +54,27 @@ static inline u32 __ipv6_addr_jhash(const struct in6_addr *a, const u32 unused)
 }
 #endif
 
+#define ip6_flowlabel rpl_ip6_flowlabel
+static inline __be32 ip6_flowlabel(const struct ipv6hdr *hdr)
+{
+       return *(__be32 *)hdr & IPV6_FLOWLABEL_MASK;
+}
+
+#ifndef IPV6_TCLASS_SHIFT
+#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK)
+#define IPV6_TCLASS_SHIFT      20
+#endif
+
+#define ip6_tclass rpl_ip6_tclass
+static inline u8 ip6_tclass(__be32 flowinfo)
+{
+       return ntohl(flowinfo & IPV6_TCLASS_MASK) >> IPV6_TCLASS_SHIFT;
+}
+
+#define ip6_make_flowinfo rpl_ip6_make_flowinfo
+static inline __be32 ip6_make_flowinfo(unsigned int tclass, __be32 flowlabel)
+{
+       return htonl(tclass << IPV6_TCLASS_SHIFT) | flowlabel;
+}
+
 #endif
index e027508..a50555f 100644 (file)
@@ -81,7 +81,7 @@ int rpl_udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
                         struct sk_buff *skb,
                         struct net_device *dev, struct in6_addr *saddr,
                         struct in6_addr *daddr,
-                        __u8 prio, __u8 ttl, __be16 src_port,
+                        __u8 prio, __u8 ttl, __be32 label, __be16 src_port,
                         __be16 dst_port, bool nocheck);
 #endif
 
index 589e6f2..fa64e38 100644 (file)
@@ -167,6 +167,7 @@ struct vxlan_config {
        __u16                   port_max;
        __u8                    tos;
        __u8                    ttl;
+       __be32                  label;
        u32                     flags;
        unsigned long           age_interval;
        unsigned int            addrmax;
index f72e645..af606a9 100644 (file)
@@ -228,7 +228,7 @@ int rpl_udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
                         struct sk_buff *skb,
                         struct net_device *dev, struct in6_addr *saddr,
                         struct in6_addr *daddr,
-                        __u8 prio, __u8 ttl, __be16 src_port,
+                        __u8 prio, __u8 ttl, __be32 label, __be16 src_port,
                         __be16 dst_port, bool nocheck)
 {
        struct udphdr *uh;
@@ -253,7 +253,7 @@ int rpl_udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
        __skb_push(skb, sizeof(*ip6h));
        skb_reset_network_header(skb);
        ip6h              = ipv6_hdr(skb);
-       ip6_flow_hdr(ip6h, prio, htonl(0));
+       ip6_flow_hdr(ip6h, prio, label);
        ip6h->payload_len = htons(skb->len);
        ip6h->nexthdr     = IPPROTO_UDP;
        ip6h->hop_limit   = ttl;
index 4e65b43..86ab5a4 100644 (file)
@@ -1026,7 +1026,7 @@ static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, u32 vxflags,
 static int vxlan6_xmit_skb(struct dst_entry *dst, struct sock *sk,
                           struct sk_buff *skb,
                           struct net_device *dev, struct in6_addr *saddr,
-                          struct in6_addr *daddr, __u8 prio, __u8 ttl,
+                          struct in6_addr *daddr, __u8 prio, __u8 ttl, __be32 label,
                           __be16 src_port, __be16 dst_port, __be32 vni,
                           struct vxlan_metadata *md, bool xnet, u32 vxflags)
 {
@@ -1112,7 +1112,7 @@ static int vxlan6_xmit_skb(struct dst_entry *dst, struct sock *sk,
        ovs_skb_set_inner_protocol(skb, htons(ETH_P_TEB));
 
        udp_tunnel6_xmit_skb(dst, sk, skb, dev, saddr, daddr, prio,
-                            ttl, src_port, dst_port,
+                            ttl, label, src_port, dst_port,
                             !!(vxflags & VXLAN_F_UDP_ZERO_CSUM6_TX));
        return 0;
 err:
@@ -1220,7 +1220,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
        struct vxlan_metadata _md;
        struct vxlan_metadata *md = &_md;
        __be16 src_port = 0, dst_port;
-       u32 vni;
+       u32 vni, label;
        __be16 df = 0;
        __u8 tos, ttl;
        int err;
@@ -1270,6 +1270,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
        if (tos == 1)
                tos = ip_tunnel_get_dsfield(old_iph, skb);
 
+       label = vxlan->cfg.label;
        src_port = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min,
                                     vxlan->cfg.port_max, true);
 
@@ -1281,6 +1282,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 
                ttl = info->key.ttl;
                tos = info->key.tos;
+               label = info->key.label;
 
                if (info->options_len)
                        md = ip_tunnel_info_opts(info);
@@ -1357,6 +1359,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                fl6.saddr = vxlan->cfg.saddr.sin6.sin6_addr;
                fl6.flowi6_mark = skb->mark;
                fl6.flowi6_proto = IPPROTO_UDP;
+               fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tos), label);
 
 #ifdef HAVE_IPV6_DST_LOOKUP_NET
                if (ipv6_stub->ipv6_dst_lookup(vxlan->net, sk, &ndst, &fl6)) {
@@ -1401,7 +1404,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 
                ttl = ttl ? : ip6_dst_hoplimit(ndst);
                err = vxlan6_xmit_skb(ndst, sk, skb, dev, &fl6.saddr, &fl6.daddr,
-                                     0, ttl, src_port, dst_port, htonl(vni << 8), md,
+                                     0, ttl, label, src_port, dst_port, htonl(vni << 8), md,
                                      !net_eq(vxlan->net, dev_net(vxlan->dev)),
                                      flags);
 #endif