mcast-snooping: Add Multicast Listener Discovery support
[cascardo/ovs.git] / ofproto / ofproto-dpif-xlate.c
index 59cd088..5417087 100644 (file)
@@ -159,6 +159,9 @@ struct xlate_ctx {
 
     const struct xbridge *xbridge;
 
+    /* Flow tables version at the beginning of the translation. */
+    cls_version_t tables_version;
+
     /* Flow at the last commit. */
     struct flow base_flow;
 
@@ -433,7 +436,8 @@ static bool may_receive(const struct xport *, struct xlate_ctx *);
 static void do_xlate_actions(const struct ofpact *, size_t ofpacts_len,
                              struct xlate_ctx *);
 static void xlate_normal(struct xlate_ctx *);
-static inline void xlate_report(struct xlate_ctx *, const char *);
+static inline void xlate_report(struct xlate_ctx *, const char *, ...)
+    OVS_PRINTF_FORMAT(2, 3);
 static void xlate_table_action(struct xlate_ctx *, ofp_port_t in_port,
                                uint8_t table_id, bool may_packet_in,
                                bool honor_table_miss);
@@ -502,10 +506,14 @@ static void xlate_xport_copy(struct xbridge *, struct xbundle *,
 static void xlate_xcfg_free(struct xlate_cfg *);
 
 static inline void
-xlate_report(struct xlate_ctx *ctx, const char *s)
+xlate_report(struct xlate_ctx *ctx, const char *format, ...)
 {
     if (OVS_UNLIKELY(ctx->xin->report_hook)) {
-        ctx->xin->report_hook(ctx->xin, s, ctx->recurse);
+        va_list args;
+
+        va_start(args, format);
+        ctx->xin->report_hook(ctx->xin, ctx->recurse, format, args);
+        va_end(args);
     }
 }
 
@@ -1990,26 +1998,28 @@ update_learning_table(const struct xbridge *xbridge,
 /* Updates multicast snooping table 'ms' given that a packet matching 'flow'
  * was received on 'in_xbundle' in 'vlan' and is either Report or Query. */
 static void
-update_mcast_snooping_table__(const struct xbridge *xbridge,
-                              const struct flow *flow,
-                              struct mcast_snooping *ms,
-                              ovs_be32 ip4, int vlan,
-                              struct xbundle *in_xbundle)
+update_mcast_snooping_table4__(const struct xbridge *xbridge,
+                               const struct flow *flow,
+                               struct mcast_snooping *ms, int vlan,
+                               struct xbundle *in_xbundle,
+                               const struct dp_packet *packet)
     OVS_REQ_WRLOCK(ms->rwlock)
 {
     static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(60, 30);
+    int count;
+    ovs_be32 ip4 = flow->igmp_group_ip4;
 
     switch (ntohs(flow->tp_src)) {
     case IGMP_HOST_MEMBERSHIP_REPORT:
     case IGMPV2_HOST_MEMBERSHIP_REPORT:
-        if (mcast_snooping_add_group(ms, ip4, vlan, in_xbundle->ofbundle)) {
+        if (mcast_snooping_add_group4(ms, ip4, vlan, in_xbundle->ofbundle)) {
             VLOG_DBG_RL(&rl, "bridge %s: multicast snooping learned that "
                         IP_FMT" is on port %s in VLAN %d",
                         xbridge->name, IP_ARGS(ip4), in_xbundle->name, vlan);
         }
         break;
     case IGMP_HOST_LEAVE_MESSAGE:
-        if (mcast_snooping_leave_group(ms, ip4, vlan, in_xbundle->ofbundle)) {
+        if (mcast_snooping_leave_group4(ms, ip4, vlan, in_xbundle->ofbundle)) {
             VLOG_DBG_RL(&rl, "bridge %s: multicast snooping leaving "
                         IP_FMT" is on port %s in VLAN %d",
                         xbridge->name, IP_ARGS(ip4), in_xbundle->name, vlan);
@@ -2024,6 +2034,47 @@ update_mcast_snooping_table__(const struct xbridge *xbridge,
                         in_xbundle->name, vlan);
         }
         break;
+    case IGMPV3_HOST_MEMBERSHIP_REPORT:
+        if ((count = mcast_snooping_add_report(ms, packet, vlan,
+                                               in_xbundle->ofbundle))) {
+            VLOG_DBG_RL(&rl, "bridge %s: multicast snooping processed %d "
+                        "addresses on port %s in VLAN %d",
+                        xbridge->name, count, in_xbundle->name, vlan);
+        }
+        break;
+    }
+}
+
+static void
+update_mcast_snooping_table6__(const struct xbridge *xbridge,
+                               const struct flow *flow,
+                               struct mcast_snooping *ms, int vlan,
+                               struct xbundle *in_xbundle,
+                               const struct dp_packet *packet)
+    OVS_REQ_WRLOCK(ms->rwlock)
+{
+    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(60, 30);
+    int count;
+
+    switch (ntohs(flow->tp_src)) {
+    case MLD_QUERY:
+        if (!ipv6_addr_equals(&flow->ipv6_src, &in6addr_any)
+            && mcast_snooping_add_mrouter(ms, vlan, in_xbundle->ofbundle)) {
+            VLOG_DBG_RL(&rl, "bridge %s: multicast snooping query on port %s"
+                        "in VLAN %d",
+                        xbridge->name, in_xbundle->name, vlan);
+        }
+        break;
+    case MLD_REPORT:
+    case MLD_DONE:
+    case MLD2_REPORT:
+        count = mcast_snooping_add_mld(ms, packet, vlan, in_xbundle->ofbundle);
+        if (count) {
+            VLOG_DBG_RL(&rl, "bridge %s: multicast snooping processed %d "
+                        "addresses on port %s in VLAN %d",
+                        xbridge->name, count, in_xbundle->name, vlan);
+        }
+        break;
     }
 }
 
@@ -2032,7 +2083,8 @@ update_mcast_snooping_table__(const struct xbridge *xbridge,
 static void
 update_mcast_snooping_table(const struct xbridge *xbridge,
                             const struct flow *flow, int vlan,
-                            struct xbundle *in_xbundle)
+                            struct xbundle *in_xbundle,
+                            const struct dp_packet *packet)
 {
     struct mcast_snooping *ms = xbridge->ms;
     struct xlate_cfg *xcfg;
@@ -2056,8 +2108,13 @@ update_mcast_snooping_table(const struct xbridge *xbridge,
     }
 
     if (!mcast_xbundle || mcast_xbundle != in_xbundle) {
-        update_mcast_snooping_table__(xbridge, flow, ms, flow->igmp_group_ip4,
-                                      vlan, in_xbundle);
+        if (flow->dl_type == htons(ETH_TYPE_IP)) {
+            update_mcast_snooping_table4__(xbridge, flow, ms, vlan,
+                                           in_xbundle, packet);
+        } else {
+            update_mcast_snooping_table6__(xbridge, flow, ms, vlan,
+                                           in_xbundle, packet);
+        }
     }
     ovs_rwlock_unlock(&ms->rwlock);
 }
@@ -2261,17 +2318,23 @@ xlate_normal(struct xlate_ctx *ctx)
     if (mcast_snooping_enabled(ctx->xbridge->ms)
         && !eth_addr_is_broadcast(flow->dl_dst)
         && eth_addr_is_multicast(flow->dl_dst)
-        && flow->dl_type == htons(ETH_TYPE_IP)) {
+        && is_ip_any(flow)) {
         struct mcast_snooping *ms = ctx->xbridge->ms;
-        struct mcast_group *grp;
+        struct mcast_group *grp = NULL;
 
-        if (flow->nw_proto == IPPROTO_IGMP) {
-            if (ctx->xin->may_learn) {
-                if (mcast_snooping_is_membership(flow->tp_src) ||
-                    mcast_snooping_is_query(flow->tp_src)) {
+        if (is_igmp(flow)) {
+            if (mcast_snooping_is_membership(flow->tp_src) ||
+                mcast_snooping_is_query(flow->tp_src)) {
+                if (ctx->xin->may_learn) {
                     update_mcast_snooping_table(ctx->xbridge, flow, vlan,
-                                                in_xbundle);
-                    }
+                                                in_xbundle, ctx->xin->packet);
+                }
+                /*
+                 * IGMP packets need to take the slow path, in order to be
+                 * processed for mdb updates. That will prevent expires
+                 * firing off even after hosts have sent reports.
+                 */
+                ctx->xout->slow |= SLOW_ACTION;
             }
 
             if (mcast_snooping_is_membership(flow->tp_src)) {
@@ -2292,8 +2355,26 @@ xlate_normal(struct xlate_ctx *ctx)
                 xlate_normal_flood(ctx, in_xbundle, vlan);
             }
             return;
+        } else if (is_mld(flow)) {
+            ctx->xout->slow |= SLOW_ACTION;
+            if (ctx->xin->may_learn) {
+                update_mcast_snooping_table(ctx->xbridge, flow, vlan,
+                                            in_xbundle, ctx->xin->packet);
+            }
+            if (is_mld_report(flow)) {
+                ovs_rwlock_rdlock(&ms->rwlock);
+                xlate_normal_mcast_send_mrouters(ctx, ms, in_xbundle, vlan);
+                xlate_normal_mcast_send_rports(ctx, ms, in_xbundle, vlan);
+                ovs_rwlock_unlock(&ms->rwlock);
+            } else {
+                xlate_report(ctx, "MLD query, flooding");
+                xlate_normal_flood(ctx, in_xbundle, vlan);
+            }
         } else {
-            if (ip_is_local_multicast(flow->nw_dst)) {
+            if ((flow->dl_type == htons(ETH_TYPE_IP)
+                 && ip_is_local_multicast(flow->nw_dst))
+                || (flow->dl_type == htons(ETH_TYPE_IPV6)
+                    && ipv6_is_all_hosts(&flow->ipv6_dst))) {
                 /* RFC4541: section 2.1.2, item 2: Packets with a dst IP
                  * address in the 224.0.0.x range which are not IGMP must
                  * be forwarded on all ports */
@@ -2305,7 +2386,11 @@ xlate_normal(struct xlate_ctx *ctx)
 
         /* forwarding to group base ports */
         ovs_rwlock_rdlock(&ms->rwlock);
-        grp = mcast_snooping_lookup(ms, flow->nw_dst, vlan);
+        if (flow->dl_type == htons(ETH_TYPE_IP)) {
+            grp = mcast_snooping_lookup4(ms, flow->nw_dst, vlan);
+        } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
+            grp = mcast_snooping_lookup(ms, &flow->ipv6_dst, vlan);
+        }
         if (grp) {
             xlate_normal_mcast_send_group(ctx, ms, grp, in_xbundle, vlan);
             xlate_normal_mcast_send_fports(ctx, ms, in_xbundle, vlan);
@@ -2656,14 +2741,15 @@ tnl_send_arp_request(const struct xport *out_dev, const uint8_t eth_src[ETH_ADDR
     struct dp_packet packet;
 
     dp_packet_init(&packet, 0);
-    compose_arp(&packet, eth_src, ip_src, ip_dst);
+    compose_arp(&packet, ARP_OP_REQUEST,
+                eth_src, eth_addr_zero, true, ip_src, ip_dst);
 
     xlate_flood_packet(xbridge, &packet);
     dp_packet_uninit(&packet);
 }
 
 static int
-build_tunnel_send(const struct xlate_ctx *ctx, const struct xport *xport,
+build_tunnel_send(struct xlate_ctx *ctx, const struct xport *xport,
                   const struct flow *flow, odp_port_t tunnel_odp_port)
 {
     struct ovs_action_push_tnl tnl_push_data;
@@ -2675,22 +2761,30 @@ build_tunnel_send(const struct xlate_ctx *ctx, const struct xport *xport,
 
     err = tnl_route_lookup_flow(flow, &d_ip, &out_dev);
     if (err) {
+        xlate_report(ctx, "native tunnel routing failed");
         return err;
     }
+    xlate_report(ctx, "tunneling to "IP_FMT" via %s",
+                 IP_ARGS(d_ip), netdev_get_name(out_dev->netdev));
 
     /* Use mac addr of bridge port of the peer. */
     err = netdev_get_etheraddr(out_dev->netdev, smac);
     if (err) {
+        xlate_report(ctx, "tunnel output device lacks Ethernet address");
         return err;
     }
 
     err = netdev_get_in4(out_dev->netdev, (struct in_addr *) &s_ip, NULL);
     if (err) {
+        xlate_report(ctx, "tunnel output device lacks IPv4 address");
         return err;
     }
 
     err = tnl_arp_lookup(out_dev->xbridge->name, d_ip, dmac);
     if (err) {
+        xlate_report(ctx, "ARP cache miss for "IP_FMT" on bridge %s, "
+                     "sending ARP request",
+                     IP_ARGS(d_ip), out_dev->xbridge->name);
         tnl_send_arp_request(out_dev, smac, s_ip, d_ip);
         return err;
     }
@@ -2702,6 +2796,11 @@ build_tunnel_send(const struct xlate_ctx *ctx, const struct xport *xport,
                     sizeof entry->u.tnl_arp_cache.br_name);
         entry->u.tnl_arp_cache.d_ip = d_ip;
     }
+
+    xlate_report(ctx, "tunneling from "ETH_ADDR_FMT" "IP_FMT
+                 " to "ETH_ADDR_FMT" "IP_FMT,
+                 ETH_ADDR_ARGS(smac), IP_ARGS(s_ip),
+                 ETH_ADDR_ARGS(dmac), IP_ARGS(d_ip));
     err = tnl_port_build_header(xport->ofport, flow,
                                 dmac, smac, s_ip, &tnl_push_data);
     if (err) {
@@ -2730,7 +2829,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
 
     /* If 'struct flow' gets additional metadata, we'll need to zero it out
      * before traversing a patch port. */
-    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 31);
+    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 32);
     memset(&flow_tnl, 0, sizeof flow_tnl);
 
     if (!xport) {
@@ -2774,6 +2873,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
         const struct xport *peer = xport->peer;
         struct flow old_flow = ctx->xin->flow;
         bool old_was_mpls = ctx->was_mpls;
+        cls_version_t old_version = ctx->tables_version;
         enum slow_path_reason special;
         struct ofpbuf old_stack = ctx->stack;
         union mf_subvalue new_stack[1024 / sizeof(union mf_subvalue)];
@@ -2789,6 +2889,10 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
         memset(flow->regs, 0, sizeof flow->regs);
         flow->actset_output = OFPP_UNSET;
 
+        /* The bridge is now known so obtain its table version. */
+        ctx->tables_version
+            = ofproto_dpif_get_tables_version(ctx->xbridge->ofproto);
+
         special = process_special(ctx, &ctx->xin->flow, peer,
                                   ctx->xin->packet);
         if (special) {
@@ -2835,6 +2939,9 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
         ofpbuf_uninit(&ctx->stack);
         ctx->stack = old_stack;
 
+        /* Restore calling bridge's lookup version. */
+        ctx->tables_version = old_version;
+
         /* The peer bridge popping MPLS should have no effect on the original
          * bridge. */
         ctx->was_mpls = old_was_mpls;
@@ -2903,8 +3010,10 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
         }
         out_port = odp_port;
         if (ovs_native_tunneling_is_on(ctx->xbridge->ofproto)) {
+            xlate_report(ctx, "output to native tunnel");
             tnl_push_pop_send = true;
         } else {
+            xlate_report(ctx, "output to kernel tunnel");
             commit_odp_tunnel_action(flow, &ctx->base_flow,
                                      ctx->xout->odp_actions);
             flow->tunnel = flow_tnl; /* Restore tunnel metadata */
@@ -3056,6 +3165,7 @@ xlate_table_action(struct xlate_ctx *ctx, ofp_port_t in_port, uint8_t table_id,
         wc = (ctx->xin->skip_wildcards) ? NULL : &ctx->xout->wc;
 
         rule = rule_dpif_lookup_from_table(ctx->xbridge->ofproto,
+                                           ctx->tables_version,
                                            &ctx->xin->flow, wc,
                                            ctx->xin->xcache != NULL,
                                            ctx->xin->resubmit_stats,
@@ -3280,6 +3390,7 @@ xlate_select_group(struct xlate_ctx *ctx, struct group_dpif *group)
 static void
 xlate_group_action__(struct xlate_ctx *ctx, struct group_dpif *group)
 {
+    bool was_in_group = ctx->in_group;
     ctx->in_group = true;
 
     switch (group_dpif_get_type(group)) {
@@ -3298,37 +3409,13 @@ xlate_group_action__(struct xlate_ctx *ctx, struct group_dpif *group)
     }
     group_dpif_unref(group);
 
-    ctx->in_group = false;
-}
-
-static bool
-xlate_group_resource_check(struct xlate_ctx *ctx)
-{
-    if (!xlate_resubmit_resource_check(ctx)) {
-        return false;
-    } else if (ctx->in_group) {
-        /* Prevent nested translation of OpenFlow groups.
-         *
-         * OpenFlow allows this restriction.  We enforce this restriction only
-         * because, with the current architecture, we would otherwise have to
-         * take a possibly recursive read lock on the ofgroup rwlock, which is
-         * unsafe given that POSIX allows taking a read lock to block if there
-         * is a thread blocked on taking the write lock.  Other solutions
-         * without this restriction are also possible, but seem unwarranted
-         * given the current limited use of groups. */
-        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
-
-        VLOG_ERR_RL(&rl, "cannot recursively translate OpenFlow group");
-        return false;
-    } else {
-        return true;
-    }
+    ctx->in_group = was_in_group;
 }
 
 static bool
 xlate_group_action(struct xlate_ctx *ctx, uint32_t group_id)
 {
-    if (xlate_group_resource_check(ctx)) {
+    if (xlate_resubmit_resource_check(ctx)) {
         struct group_dpif *group;
         bool got_group;
 
@@ -4826,9 +4913,12 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout)
                      flow->recirc_id);
         return;
     }
+    /* The bridge is now known so obtain its table version. */
+    ctx.tables_version = ofproto_dpif_get_tables_version(ctx.xbridge->ofproto);
 
     if (!xin->ofpacts && !ctx.rule) {
-        rule = rule_dpif_lookup_from_table(ctx.xbridge->ofproto, flow, wc,
+        rule = rule_dpif_lookup_from_table(ctx.xbridge->ofproto,
+                                           ctx.tables_version, flow, wc,
                                            ctx.xin->xcache != NULL,
                                            ctx.xin->resubmit_stats,
                                            &ctx.table_id,