datapath: Support VXLAN Group Policy extension
authorThomas Graf <tgraf@noironetworks.com>
Fri, 6 Feb 2015 20:10:44 +0000 (21:10 +0100)
committerThomas Graf <tgraf@noironetworks.com>
Fri, 6 Feb 2015 20:10:44 +0000 (21:10 +0100)
Upstream commit:
    openvswitch: Support VXLAN Group Policy extension

    Introduces support for the group policy extension to the VXLAN virtual
    port. The extension is disabled by default and only enabled if the user
    has provided the respective configuration.

      ovs-vsctl add-port br0 vxlan0 -- \
         set Interface vxlan0 type=vxlan options:exts=gbp

    The configuration interface to enable the extension is based on a new
    attribute OVS_VXLAN_EXT_GBP nested inside OVS_TUNNEL_ATTR_EXTENSION
    which can carry additional extensions as needed in the future.

    The group policy metadata is stored as binary blob (struct ovs_vxlan_opts)
    internally just like Geneve options but transported as nested Netlink
    attributes to user space.

    Renames the existing TUNNEL_OPTIONS_PRESENT to TUNNEL_GENEVE_OPT with the
    binary value kept intact, a new flag TUNNEL_VXLAN_OPT is introduced.

    The attributes OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS and existing
    OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS are implemented mutually exclusive.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
Upstream: 1dd144 ("openvswitch: Support VXLAN Group Policy extension")
Signed-off-by: Thomas Graf <tgraf@noironetworks.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
datapath/Modules.mk
datapath/flow_netlink.c
datapath/linux/compat/include/linux/openvswitch.h
datapath/linux/compat/include/net/ip_tunnels.h
datapath/vport-geneve.c
datapath/vport-vxlan.c
datapath/vport-vxlan.h [new file with mode: 0644]

index cca4887..33f9dd9 100644 (file)
@@ -30,7 +30,8 @@ openvswitch_headers = \
        vlan.h \
        vport.h \
        vport-internal_dev.h \
-       vport-netdev.h
+       vport-netdev.h \
+       vport-vxlan.h
 
 openvswitch_extras = \
        README.md
index 6cd5391..34edcfe 100644 (file)
@@ -49,6 +49,7 @@
 #include "datapath.h"
 #include "flow.h"
 #include "flow_netlink.h"
+#include "vport-vxlan.h"
 
 struct ovs_len_tbl {
        int len;
@@ -268,6 +269,9 @@ size_t ovs_tun_key_attr_size(void)
                + nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_CSUM */
                + nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_OAM */
                + nla_total_size(256)  /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
+               /* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS is mutually exclusive with
+                * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it.
+                */
                + nla_total_size(2)    /* OVS_TUNNEL_KEY_ATTR_TP_SRC */
                + nla_total_size(2);   /* OVS_TUNNEL_KEY_ATTR_TP_DST */
 }
@@ -308,6 +312,7 @@ static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1]
        [OVS_TUNNEL_KEY_ATTR_TP_DST]        = { .len = sizeof(u16) },
        [OVS_TUNNEL_KEY_ATTR_OAM]           = { .len = 0 },
        [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS]   = { .len = OVS_ATTR_NESTED },
+       [OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS]    = { .len = OVS_ATTR_NESTED },
 };
 
 /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute.  */
@@ -459,6 +464,41 @@ static int genev_tun_opt_from_nlattr(const struct nlattr *a,
        return 0;
 }
 
+static const struct nla_policy vxlan_opt_policy[OVS_VXLAN_EXT_MAX + 1] = {
+       [OVS_VXLAN_EXT_GBP]     = { .type = NLA_U32 },
+};
+
+static int vxlan_tun_opt_from_nlattr(const struct nlattr *a,
+                                    struct sw_flow_match *match, bool is_mask,
+                                    bool log)
+{
+       struct nlattr *tb[OVS_VXLAN_EXT_MAX+1];
+       unsigned long opt_key_offset;
+       struct ovs_vxlan_opts opts;
+       int err;
+
+       BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts));
+
+       err = nla_parse_nested(tb, OVS_VXLAN_EXT_MAX, a, vxlan_opt_policy);
+       if (err < 0)
+               return err;
+
+       memset(&opts, 0, sizeof(opts));
+
+       if (tb[OVS_VXLAN_EXT_GBP])
+               opts.gbp = nla_get_u32(tb[OVS_VXLAN_EXT_GBP]);
+
+       if (!is_mask)
+               SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), false);
+       else
+               SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
+
+       opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts));
+       SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts),
+                                 is_mask);
+       return 0;
+}
+
 static int ipv4_tun_from_nlattr(const struct nlattr *attr,
                                struct sw_flow_match *match, bool is_mask,
                                bool log)
@@ -467,6 +507,7 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
        int rem;
        bool ttl = false;
        __be16 tun_flags = 0;
+       int opts_type = 0;
 
        nla_for_each_nested(a, attr, rem) {
                int type = nla_type(a);
@@ -526,11 +567,30 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
                        tun_flags |= TUNNEL_OAM;
                        break;
                case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
+                       if (opts_type) {
+                               OVS_NLERR(log, "Multiple metadata blocks provided");
+                               return -EINVAL;
+                       }
+
                        err = genev_tun_opt_from_nlattr(a, match, is_mask, log);
                        if (err)
                                return err;
 
-                       tun_flags |= TUNNEL_OPTIONS_PRESENT;
+                       tun_flags |= TUNNEL_GENEVE_OPT;
+                       opts_type = type;
+                       break;
+               case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
+                       if (opts_type) {
+                               OVS_NLERR(log, "Multiple metadata blocks provided");
+                               return -EINVAL;
+                       }
+
+                       err = vxlan_tun_opt_from_nlattr(a, match, is_mask, log);
+                       if (err)
+                               return err;
+
+                       tun_flags |= TUNNEL_VXLAN_OPT;
+                       opts_type = type;
                        break;
                default:
                        OVS_NLERR(log, "Unknown IPv4 tunnel attribute %d",
@@ -559,6 +619,23 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
                }
        }
 
+       return opts_type;
+}
+
+static int vxlan_opt_to_nlattr(struct sk_buff *skb,
+                              const void *tun_opts, int swkey_tun_opts_len)
+{
+       const struct ovs_vxlan_opts *opts = tun_opts;
+       struct nlattr *nla;
+
+       nla = nla_nest_start(skb, OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS);
+       if (!nla)
+               return -EMSGSIZE;
+
+       if (nla_put_u32(skb, OVS_VXLAN_EXT_GBP, opts->gbp) < 0)
+               return -EMSGSIZE;
+
+       nla_nest_end(skb, nla);
        return 0;
 }
 
@@ -595,10 +672,15 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
        if ((output->tun_flags & TUNNEL_OAM) &&
            nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
                return -EMSGSIZE;
-       if (tun_opts &&
-           nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
-                   swkey_tun_opts_len, tun_opts))
-               return -EMSGSIZE;
+       if (tun_opts) {
+               if (output->tun_flags & TUNNEL_GENEVE_OPT &&
+                   nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
+                           swkey_tun_opts_len, tun_opts))
+                       return -EMSGSIZE;
+               else if (output->tun_flags & TUNNEL_VXLAN_OPT &&
+                       vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len))
+                       return -EMSGSIZE;
+       }
 
        return 0;
 }
@@ -679,7 +761,7 @@ static int metadata_from_nlattrs(struct sw_flow_match *match,  u64 *attrs,
        }
        if (*attrs & (1ULL << OVS_KEY_ATTR_TUNNEL)) {
                if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
-                                        is_mask, log))
+                                        is_mask, log) < 0)
                        return -EINVAL;
                *attrs &= ~(1ULL << OVS_KEY_ATTR_TUNNEL);
        }
@@ -1588,17 +1670,23 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
        struct sw_flow_key key;
        struct ovs_tunnel_info *tun_info;
        struct nlattr *a;
-       int err, start;
+       int err, start, opts_type;
 
        ovs_match_init(&match, &key, NULL);
-       err = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log);
-       if (err)
-               return err;
+       opts_type = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log);
+       if (opts_type < 0)
+               return opts_type;
 
        if (key.tun_opts_len) {
-               err = validate_geneve_opts(&key);
-               if (err < 0)
-                       return err;
+               switch (opts_type) {
+               case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
+                       err = validate_geneve_opts(&key);
+                       if (err < 0)
+                               return err;
+                       break;
+               case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
+                       break;
+               }
        };
 
        start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log);
index a59e109..f1094b1 100644 (file)
@@ -278,11 +278,20 @@ enum ovs_vport_attr {
 
 #define OVS_VPORT_ATTR_MAX (__OVS_VPORT_ATTR_MAX - 1)
 
+enum {
+       OVS_VXLAN_EXT_UNSPEC,
+       OVS_VXLAN_EXT_GBP,      /* Flag or __u32 */
+       __OVS_VXLAN_EXT_MAX,
+};
+
+#define OVS_VXLAN_EXT_MAX (__OVS_VXLAN_EXT_MAX - 1)
+
 /* OVS_VPORT_ATTR_OPTIONS attributes for tunnels.
  */
 enum {
        OVS_TUNNEL_ATTR_UNSPEC,
        OVS_TUNNEL_ATTR_DST_PORT, /* 16-bit UDP port, used by L4 tunnels. */
+       OVS_TUNNEL_ATTR_EXTENSION,
        __OVS_TUNNEL_ATTR_MAX
 };
 
@@ -355,6 +364,7 @@ enum ovs_tunnel_key_attr {
        OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,        /* Array of Geneve options. */
        OVS_TUNNEL_KEY_ATTR_TP_SRC,             /* be16 src Transport Port. */
        OVS_TUNNEL_KEY_ATTR_TP_DST,             /* be16 dst Transport Port. */
+       OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS,         /* Nested OVS_VXLAN_EXT_* */
        __OVS_TUNNEL_KEY_ATTR_MAX
 };
 
index 8fb9527..74c6a2c 100644 (file)
@@ -70,10 +70,22 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto);
 
 #endif
 
-/* Not yet upstream */
+#ifndef TUNNEL_OAM
 #define TUNNEL_OAM     __cpu_to_be16(0x0200)
 #define TUNNEL_CRIT_OPT        __cpu_to_be16(0x0400)
-#define TUNNEL_OPTIONS_PRESENT __cpu_to_be16(0x0800)
+#endif
+
+#ifndef TUNNEL_GENEVE_OPT
+#define TUNNEL_GENEVE_OPT      __cpu_to_be16(0x0800)
+#endif
+
+#ifndef TUNNEL_VXLAN_OPT
+#define TUNNEL_VXLAN_OPT       __cpu_to_be16(0x1000)
+#endif
+
+/* Older kernels defined TUNNEL_OPTIONS_PRESENT to GENEVE only */
+#undef TUNNEL_OPTIONS_PRESENT
+#define TUNNEL_OPTIONS_PRESENT (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT)
 
 bool skb_is_encapsulated(struct sk_buff *skb);
 
index 6c6ec75..56306bb 100644 (file)
@@ -189,7 +189,7 @@ static int geneve_rcv(struct sock *sk, struct sk_buff *skb)
 
        geneveh = geneve_hdr(skb);
 
-       flags = TUNNEL_KEY | TUNNEL_OPTIONS_PRESENT |
+       flags = TUNNEL_KEY | TUNNEL_GENEVE_OPT |
                (udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0) |
                (geneveh->oam ? TUNNEL_OAM : 0) |
                (geneveh->critical ? TUNNEL_CRIT_OPT : 0);
@@ -432,6 +432,10 @@ static int geneve_send(struct vport *vport, struct sk_buff *skb)
 
        df = tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
 
+       /* NOTE: If geneve_xmit_skb() is backported, opts may only be passed
+        * in if TUNNEL_GENEVE_OPT is set, see upstream.
+        */
+
        sent_len = iptunnel_xmit(skb->sk, rt, skb,
                             saddr, tun_key->ipv4_dst,
                             IPPROTO_UDP, tun_key->ipv4_tos,
index 45ea911..3dcd69c 100644 (file)
@@ -41,6 +41,7 @@
 
 #include "datapath.h"
 #include "vport.h"
+#include "vport-vxlan.h"
 
 /**
  * struct vxlan_port - Keeps track of open UDP ports
@@ -50,6 +51,7 @@
 struct vxlan_port {
        struct vxlan_sock *vs;
        char name[IFNAMSIZ];
+       u32 exts; /* VXLAN_F_* in <net/vxlan.h> */
 };
 
 static inline struct vxlan_port *vxlan_vport(const struct vport *vport)
@@ -61,16 +63,26 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
                      struct vxlan_metadata *md)
 {
        struct ovs_tunnel_info tun_info;
+       struct vxlan_port *vxlan_port;
        struct vport *vport = vs->data;
        struct iphdr *iph;
+       struct ovs_vxlan_opts opts = {
+               .gbp = md->gbp,
+       };
        __be64 key;
+       __be16 flags;
+
+       flags = TUNNEL_KEY;
+       vxlan_port = vxlan_vport(vport);
+       if (vxlan_port->exts & VXLAN_F_GBP && md->gbp)
+               flags |= TUNNEL_VXLAN_OPT;
 
        /* Save outer tunnel values */
        iph = ip_hdr(skb);
        key = cpu_to_be64(ntohl(md->vni) >> 8);
        ovs_flow_tun_info_init(&tun_info, iph,
                               udp_hdr(skb)->source, udp_hdr(skb)->dest,
-                              key, TUNNEL_KEY, NULL, 0);
+                              key, flags, &opts, sizeof(opts));
 
        ovs_vport_receive(vport, skb, &tun_info);
 }
@@ -82,6 +94,21 @@ static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb)
 
        if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(dst_port)))
                return -EMSGSIZE;
+
+       if (vxlan_port->exts) {
+               struct nlattr *exts;
+
+               exts = nla_nest_start(skb, OVS_TUNNEL_ATTR_EXTENSION);
+               if (!exts)
+                       return -EMSGSIZE;
+
+               if (vxlan_port->exts & VXLAN_F_GBP &&
+                   nla_put_flag(skb, OVS_VXLAN_EXT_GBP))
+                       return -EMSGSIZE;
+
+               nla_nest_end(skb, exts);
+       }
+
        return 0;
 }
 
@@ -94,6 +121,31 @@ static void vxlan_tnl_destroy(struct vport *vport)
        ovs_vport_deferred_free(vport);
 }
 
+static const struct nla_policy exts_policy[OVS_VXLAN_EXT_MAX+1] = {
+       [OVS_VXLAN_EXT_GBP]     = { .type = NLA_FLAG, },
+};
+
+static int vxlan_configure_exts(struct vport *vport, struct nlattr *attr)
+{
+       struct nlattr *exts[OVS_VXLAN_EXT_MAX+1];
+       struct vxlan_port *vxlan_port;
+       int err;
+
+       if (nla_len(attr) < sizeof(struct nlattr))
+               return -EINVAL;
+
+       err = nla_parse_nested(exts, OVS_VXLAN_EXT_MAX, attr, exts_policy);
+       if (err < 0)
+               return err;
+
+       vxlan_port = vxlan_vport(vport);
+
+       if (exts[OVS_VXLAN_EXT_GBP])
+               vxlan_port->exts |= VXLAN_F_GBP;
+
+       return 0;
+}
+
 static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
 {
        struct net *net = ovs_dp_get_net(parms->dp);
@@ -126,7 +178,17 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
        vxlan_port = vxlan_vport(vport);
        strncpy(vxlan_port->name, parms->name, IFNAMSIZ);
 
-       vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true, 0);
+       a = nla_find_nested(options, OVS_TUNNEL_ATTR_EXTENSION);
+       if (a) {
+               err = vxlan_configure_exts(vport, a);
+               if (err) {
+                       ovs_vport_free(vport);
+                       goto error;
+               }
+       }
+
+       vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true,
+                           vxlan_port->exts);
        if (IS_ERR(vs)) {
                ovs_vport_free(vport);
                return (void *)vs;
@@ -139,6 +201,21 @@ error:
        return ERR_PTR(err);
 }
 
+static int vxlan_ext_gbp(struct sk_buff *skb)
+{
+       const struct ovs_tunnel_info *tun_info;
+       const struct ovs_vxlan_opts *opts;
+
+       tun_info = OVS_CB(skb)->egress_tun_info;
+       opts = tun_info->options;
+
+       if (tun_info->tunnel.tun_flags & TUNNEL_VXLAN_OPT &&
+           tun_info->options_len >= sizeof(*opts))
+               return opts->gbp;
+       else
+               return 0;
+}
+
 static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
 {
        struct ovs_key_ipv4_tunnel *tun_key;
@@ -175,6 +252,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
 
        src_port = udp_flow_src_port(net, skb, 0, 0, true);
        md.vni = htonl(be64_to_cpu(tun_key->tun_id) << 8);
+       md.gbp = vxlan_ext_gbp(skb);
 
        err = vxlan_xmit_skb(vxlan_port->vs, rt, skb,
                             saddr, tun_key->ipv4_dst,
diff --git a/datapath/vport-vxlan.h b/datapath/vport-vxlan.h
new file mode 100644 (file)
index 0000000..4b08233
--- /dev/null
@@ -0,0 +1,11 @@
+#ifndef VPORT_VXLAN_H
+#define VPORT_VXLAN_H 1
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+struct ovs_vxlan_opts {
+       __u32 gbp;
+};
+
+#endif