tunneling: Add userspace tunnel support for Geneve.
authorJesse Gross <jesse@nicira.com>
Thu, 26 Mar 2015 20:51:06 +0000 (13:51 -0700)
committerJesse Gross <jesse@nicira.com>
Tue, 7 Apr 2015 23:51:43 +0000 (16:51 -0700)
This adds basic userspace dataplane support for the Geneve
tunneling protocol. The rest of userspace only has the ability
to handle Geneve without options and this follows that pattern
for the time being. However, when the rest of userspace is updated
it should be easy to extend the dataplane as well.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
NEWS
lib/netdev-vport.c
lib/odp-util.c
lib/packets.h
tests/odp.at
tests/tunnel-push-pop.at

diff --git a/NEWS b/NEWS
index 9f9dc4c..87460a7 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -62,8 +62,8 @@ Post-v2.3.0
    - A simple wrapper script, 'ovs-docker', to integrate OVS with Docker
      containers. If and when there is a native integration of Open vSwitch
      with Docker, the wrapper script will be retired.
-   - Added support for DPDK Tunneling. VXLAN and GRE are supported protocols.
-     This is generic tunneling mechanism for userspace datapath.
+   - Added support for DPDK Tunneling. VXLAN, GRE, and Geneve are supported
+     protocols. This is generic tunneling mechanism for userspace datapath.
    - Support for multicast snooping (IGMPv1 and IGMPv2)
    - Support for Linux kernels up to 3.19.x
    - The documentation now use the term 'destination' to mean one of syslog,
index 4d0c3fb..ed407dc 100644 (file)
@@ -61,6 +61,11 @@ static struct vlog_rate_limit err_rl = VLOG_RATE_LIMIT_INIT(60, 5);
                       sizeof(struct udp_header) +         \
                       sizeof(struct vxlanhdr))
 
+#define GENEVE_BASE_HLEN   (sizeof(struct eth_header) +         \
+                            sizeof(struct ip_header)  +         \
+                            sizeof(struct udp_header) +         \
+                            sizeof(struct genevehdr))
+
 #define DEFAULT_TTL 64
 
 struct netdev_vport {
@@ -1203,6 +1208,112 @@ netdev_vxlan_push_header(const struct netdev *netdev OVS_UNUSED,
     return 0;
 }
 
+static void
+geneve_extract_md(struct dp_packet *packet)
+{
+    struct pkt_metadata *md = &packet->md;
+    struct flow_tnl *tnl = &md->tunnel;
+    struct genevehdr *gnh;
+    unsigned int hlen;
+
+    memset(md, 0, sizeof *md);
+    if (GENEVE_BASE_HLEN > dp_packet_size(packet)) {
+        VLOG_WARN_RL(&err_rl, "geneve packet too small: min header=%u packet size=%u\n",
+                     (unsigned int)GENEVE_BASE_HLEN, dp_packet_size(packet));
+        return;
+    }
+
+    gnh = udp_extract_tnl_md(packet, tnl);
+    if (!gnh) {
+        return;
+    }
+
+    hlen = GENEVE_BASE_HLEN + gnh->opt_len * 4;
+    if (hlen > dp_packet_size(packet)) {
+        VLOG_WARN_RL(&err_rl, "geneve packet too small: header len=%u packet size=%u\n",
+                     hlen, dp_packet_size(packet));
+        reset_tnl_md(md);
+        return;
+    }
+
+    if (gnh->ver != 0) {
+        VLOG_WARN_RL(&err_rl, "unknown geneve version: %"PRIu8"\n", gnh->ver);
+        reset_tnl_md(md);
+        return;
+    }
+
+    if (gnh->opt_len && gnh->critical) {
+        VLOG_WARN_RL(&err_rl, "unknown geneve critical options: %"PRIu8" bytes\n",
+                     gnh->opt_len * 4);
+        reset_tnl_md(md);
+        return;
+    }
+
+    if (gnh->proto_type != htons(ETH_TYPE_TEB)) {
+        VLOG_WARN_RL(&err_rl, "unknown geneve encapsulated protocol: %#x\n",
+                     ntohs(gnh->proto_type));
+        reset_tnl_md(md);
+        return;
+    }
+
+    tnl->flags |= gnh->oam ? FLOW_TNL_F_OAM : 0;
+    tnl->tun_id = htonll(ntohl(get_16aligned_be32(&gnh->vni)) >> 8);
+    tnl->flags |= FLOW_TNL_F_KEY;
+
+    dp_packet_reset_packet(packet, hlen);
+}
+
+static int
+netdev_geneve_pop_header(struct netdev *netdev_ OVS_UNUSED,
+                         struct dp_packet **pkt, int cnt)
+{
+    int i;
+
+    for (i = 0; i < cnt; i++) {
+        geneve_extract_md(pkt[i]);
+    }
+    return 0;
+}
+
+static int
+netdev_geneve_build_header(const struct netdev *netdev,
+                           struct ovs_action_push_tnl *data,
+                           const struct flow *tnl_flow)
+{
+    struct netdev_vport *dev = netdev_vport_cast(netdev);
+    struct netdev_tunnel_config *tnl_cfg;
+    struct genevehdr *gnh;
+
+    /* XXX: RCUfy tnl_cfg. */
+    ovs_mutex_lock(&dev->mutex);
+    tnl_cfg = &dev->tnl_cfg;
+
+    gnh = udp_build_header(tnl_cfg, data);
+
+    gnh->oam = !!(tnl_flow->tunnel.flags & FLOW_TNL_F_OAM);
+    gnh->proto_type = htons(ETH_TYPE_TEB);
+    put_16aligned_be32(&gnh->vni, htonl(ntohll(tnl_flow->tunnel.tun_id) << 8));
+
+    ovs_mutex_unlock(&dev->mutex);
+    data->header_len = GENEVE_BASE_HLEN;
+    data->tnl_type = OVS_VPORT_TYPE_GENEVE;
+    return 0;
+}
+
+static int
+netdev_geneve_push_header(const struct netdev *netdev OVS_UNUSED,
+                          struct dp_packet **packets, int cnt,
+                          const struct ovs_action_push_tnl *data)
+{
+    int i;
+
+    for (i = 0; i < cnt; i++) {
+        push_udp_header(packets[i], data->header, data->header_len);
+        packets[i]->md = PKT_METADATA_INITIALIZER(u32_to_odp(data->out_port));
+    }
+    return 0;
+}
+
 static void
 netdev_vport_range(struct unixctl_conn *conn, int argc,
                    const char *argv[], void *aux OVS_UNUSED)
@@ -1332,7 +1443,9 @@ netdev_vport_tunnel_register(void)
     /* The name of the dpif_port should be short enough to accomodate adding
      * a port number to the end if one is necessary. */
     static const struct vport_class vport_classes[] = {
-        TUNNEL_CLASS("geneve", "genev_sys", NULL, NULL, NULL),
+        TUNNEL_CLASS("geneve", "genev_sys", netdev_geneve_build_header,
+                                            netdev_geneve_push_header,
+                                            netdev_geneve_pop_header),
         TUNNEL_CLASS("gre", "gre_sys", netdev_gre_build_header,
                                        netdev_gre_push_header,
                                        netdev_gre_pop_header),
index e944c85..4e6a06d 100644 (file)
@@ -559,6 +559,14 @@ format_odp_tnl_push_header(struct ds *ds, struct ovs_action_push_tnl *data)
         ds_put_format(ds, "vxlan(flags=0x%"PRIx32",vni=0x%"PRIx32")",
                       ntohl(get_16aligned_be32(&vxh->vx_flags)),
                       ntohl(get_16aligned_be32(&vxh->vx_vni)) >> 8);
+    } else if (data->tnl_type == OVS_VPORT_TYPE_GENEVE) {
+        const struct genevehdr *gnh;
+
+        gnh = format_udp_tnl_push_header(ds, ip);
+
+        ds_put_format(ds, "geneve(%svni=0x%"PRIx32")",
+                      gnh->oam ? "oam," : "",
+                      ntohl(get_16aligned_be32(&gnh->vni)) >> 8);
     } else if (data->tnl_type == OVS_VPORT_TYPE_GRE) {
         const struct gre_base_hdr *greh;
         ovs_16aligned_be32 *options;
@@ -893,7 +901,7 @@ ovs_parse_tnl_push(const char *s, struct ovs_action_push_tnl *data)
     greh = (struct gre_base_hdr *) l4;
     if (ovs_scan_len(s, &n, "udp(src=%"SCNi16",dst=%"SCNi16"),",
                          &udp_src, &udp_dst)) {
-        uint32_t vx_flags, vx_vni;
+        uint32_t vx_flags, vni;
 
         udp->udp_src = htons(udp_src);
         udp->udp_dst = htons(udp_dst);
@@ -901,14 +909,28 @@ ovs_parse_tnl_push(const char *s, struct ovs_action_push_tnl *data)
         udp->udp_csum = 0;
 
         if (ovs_scan_len(s, &n, "vxlan(flags=0x%"SCNx32",vni=0x%"SCNx32"))",
-                            &vx_flags, &vx_vni)) {
+                            &vx_flags, &vni)) {
             struct vxlanhdr *vxh = (struct vxlanhdr *) (udp + 1);
 
             put_16aligned_be32(&vxh->vx_flags, htonl(vx_flags));
-            put_16aligned_be32(&vxh->vx_vni, htonl(vx_vni << 8));
+            put_16aligned_be32(&vxh->vx_vni, htonl(vni << 8));
             tnl_type = OVS_VPORT_TYPE_VXLAN;
             header_len = sizeof *eth + sizeof *ip +
                          sizeof *udp + sizeof *vxh;
+        } else if (ovs_scan_len(s, &n, "geneve(")) {
+            struct genevehdr *gnh = (struct genevehdr *) (udp + 1);
+
+            if (ovs_scan_len(s, &n, "oam,")) {
+                gnh->oam = 1;
+            }
+            if (!ovs_scan_len(s, &n, "vni=0x%"SCNx32"))", &vni)) {
+                return -EINVAL;
+            }
+            gnh->proto_type = htons(ETH_TYPE_TEB);
+            put_16aligned_be32(&gnh->vni, htonl(vni << 8));
+            tnl_type = OVS_VPORT_TYPE_GENEVE;
+            header_len = sizeof *eth + sizeof *ip +
+                         sizeof *udp + sizeof *gnh;
         } else {
             return -EINVAL;
         }
index e80de6b..2bbe6d9 100644 (file)
@@ -733,6 +733,25 @@ struct geneve_opt {
     uint8_t   opt_data[];
 };
 
+struct genevehdr {
+#ifdef WORDS_BIGENDIAN
+    uint8_t ver:2;
+    uint8_t opt_len:6;
+    uint8_t oam:1;
+    uint8_t critical:1;
+    uint8_t rsvd1:6;
+#else
+    uint8_t opt_len:6;
+    uint8_t ver:2;
+    uint8_t rsvd1:6;
+    uint8_t critical:1;
+    uint8_t oam:1;
+#endif
+    ovs_be16 proto_type;
+    ovs_16aligned_be32 vni;
+    struct geneve_opt options[];
+};
+
 /* GRE protocol header */
 struct gre_base_hdr {
     ovs_be16 flags;
index bc68d35..7fda449 100644 (file)
@@ -282,6 +282,7 @@ tnl_pop(4)
 tnl_push(tnl_port(4),header(size=42,type=3,eth(dst=f8:bc:12:44:34:b6,src=f8:bc:12:46:58:e0,dl_type=0x0800),ipv4(src=1.1.2.88,dst=1.1.2.92,proto=47,tos=0,ttl=64,frag=0x40),gre((flags=0x20,proto=0x6558),key=0x1e241)),out_port(1))
 tnl_push(tnl_port(4),header(size=46,type=3,eth(dst=f8:bc:12:44:34:b6,src=f8:bc:12:46:58:e0,dl_type=0x0800),ipv4(src=1.1.2.88,dst=1.1.2.92,proto=47,tos=0,ttl=64,frag=0x40),gre((flags=0xa0,proto=0x6558),csum=0x0,key=0x1e241)),out_port(1))
 tnl_push(tnl_port(6),header(size=50,type=4,eth(dst=f8:bc:12:44:34:b6,src=f8:bc:12:46:58:e0,dl_type=0x0800),ipv4(src=1.1.2.88,dst=1.1.2.92,proto=17,tos=0,ttl=64,frag=0x40),udp(src=0,dst=4789),vxlan(flags=0x8000000,vni=0x1c7)),out_port(1))
+tnl_push(tnl_port(6),header(size=50,type=5,eth(dst=f8:bc:12:44:34:b6,src=f8:bc:12:46:58:e0,dl_type=0x0800),ipv4(src=1.1.2.88,dst=1.1.2.92,proto=17,tos=0,ttl=64,frag=0x40),udp(src=0,dst=6081),geneve(oam,vni=0x1c7)),out_port(1))
 ])
 AT_CHECK_UNQUOTED([ovstest test-odp parse-actions < actions.txt], [0],
   [`cat actions.txt`
index 3c3f5d8..77dde69 100644 (file)
@@ -10,6 +10,8 @@ AT_CHECK([ovs-vsctl add-port int-br t2 -- set Interface t2 type=vxlan \
                        options:remote_ip=1.1.2.92 options:key=456 ofport_request=3\
                     -- add-port int-br t3 -- set Interface t3 type=vxlan \
                        options:remote_ip=1.1.2.93 options:out_key=flow ofport_request=4\
+                    -- add-port int-br t4 -- set Interface t4 type=geneve \
+                       options:remote_ip=1.1.2.92 options:key=123 ofport_request=5\
                        ], [0])
 
 AT_CHECK([ovs-appctl dpif/show], [0], [dnl
@@ -22,6 +24,7 @@ dummy@ovs-dummy: hit:0 missed:0
                t1 3/3: (gre: key=456, remote_ip=1.1.2.92)
                t2 2/4789: (vxlan: key=123, remote_ip=1.1.2.92)
                t3 4/4789: (vxlan: out_key=flow, remote_ip=1.1.2.93)
+               t4 5/6081: (geneve: key=123, remote_ip=1.1.2.92)
 ])
 
 AT_CHECK([ovs-appctl ovs/route/add 1.1.2.92/24 br0], [0], [OK
@@ -54,6 +57,12 @@ AT_CHECK([tail -1 stdout], [0],
   [Datapath actions: tnl_pop(3)
 ])
 
+dnl Check Geneve tunnel pop
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x0800),ipv4(src=1.1.2.92,dst=1.1.2.88,proto=17,tos=0,ttl=64,frag=no),udp(src=51283,dst=6081)'], [0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
+  [Datapath actions: tnl_pop(6081)
+])
+
 dnl Check VXLAN tunnel push
 AT_CHECK([ovs-ofctl add-flow int-br action=2])
 AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth_type(0x0800),ipv4(src=1.1.3.88,dst=1.1.3.112,proto=47,tos=0,ttl=64,frag=no)'], [0], [stdout])
@@ -75,6 +84,13 @@ AT_CHECK([tail -1 stdout], [0],
   [Datapath actions: tnl_push(tnl_port(3),header(size=42,type=3,eth(dst=f8:bc:12:44:34:b6,src=aa:55:aa:55:00:00,dl_type=0x0800),ipv4(src=1.1.2.88,dst=1.1.2.92,proto=47,tos=0,ttl=64,frag=0x40),gre((flags=0x20,proto=0x6558),key=0x1c8)),out_port(100))
 ])
 
+dnl Check Geneve tunnel push
+AT_CHECK([ovs-ofctl add-flow int-br action=5])
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth_type(0x0800),ipv4(src=1.1.3.88,dst=1.1.3.112,proto=47,tos=0,ttl=64,frag=no)'], [0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
+  [Datapath actions: tnl_push(tnl_port(6081),header(size=50,type=5,eth(dst=f8:bc:12:44:34:b6,src=aa:55:aa:55:00:00,dl_type=0x0800),ipv4(src=1.1.2.88,dst=1.1.2.92,proto=17,tos=0,ttl=64,frag=0x40),udp(src=0,dst=6081,csum=0x0),geneve(vni=0x7b)),out_port(100))
+])
+
 dnl Check decapsulation of GRE packet
 AT_CHECK([ovs-appctl netdev-dummy/receive p0 '001b213cac30001b213cab6408004500007e79464000402f99080101025c0101025820006558000001c8fe71d883724fbeb6f4e1494a080045000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637'])
 ovs-appctl time/warp 1000