datapath: Use skb_postpull_rcsum().

[cascardo/ovs.git] / ofproto / ofproto-dpif-xlate.c
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c

index 7ccaaea..125d4c6 100644 (file)
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -160,7 +160,7 @@ struct xlate_ctx {
      const struct xbridge *xbridge;
  
      /* Flow tables version at the beginning of the translation. */
-    long long tables_version;
+    cls_version_t tables_version;
  
      /* Flow at the last commit. */
      struct flow base_flow;
@@ -1564,7 +1564,7 @@ add_mirror_actions(struct xlate_ctx *ctx, const struct flow *orig_flow)
      while (mirrors) {
          mirror_mask_t dup_mirrors;
          struct ofbundle *out;
-        unsigned long *vlans;
+        const unsigned long *vlans;
          bool vlan_mirrored;
          bool has_mirror;
          int out_vlan;
@@ -1577,7 +1577,6 @@ add_mirror_actions(struct xlate_ctx *ctx, const struct flow *orig_flow)
              ctx->xout->wc.masks.vlan_tci |= htons(VLAN_CFI | VLAN_VID_MASK);
          }
          vlan_mirrored = !vlans || bitmap_is_set(vlans, vlan);
-        free(vlans);
  
          if (!vlan_mirrored) {
              mirrors = zero_rightmost_1bit(mirrors);
@@ -1735,7 +1734,7 @@ output_normal(struct xlate_ctx *ctx, const struct xbundle *out_xbundle,
          struct flow_wildcards *wc = &ctx->xout->wc;
          struct ofport_dpif *ofport;
  
-        if (ctx->xbridge->support.recirc) {
+        if (ctx->xbridge->support.odp.recirc) {
              use_recirc = bond_may_recirc(
                  out_xbundle->bond, &xr.recirc_id, &xr.hash_basis);
  
@@ -1998,26 +1997,28 @@ update_learning_table(const struct xbridge *xbridge,
  /* Updates multicast snooping table 'ms' given that a packet matching 'flow'
   * was received on 'in_xbundle' in 'vlan' and is either Report or Query. */
  static void
-update_mcast_snooping_table__(const struct xbridge *xbridge,
-                              const struct flow *flow,
-                              struct mcast_snooping *ms,
-                              ovs_be32 ip4, int vlan,
-                              struct xbundle *in_xbundle)
+update_mcast_snooping_table4__(const struct xbridge *xbridge,
+                               const struct flow *flow,
+                               struct mcast_snooping *ms, int vlan,
+                               struct xbundle *in_xbundle,
+                               const struct dp_packet *packet)
      OVS_REQ_WRLOCK(ms->rwlock)
  {
      static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(60, 30);
+    int count;
+    ovs_be32 ip4 = flow->igmp_group_ip4;
  
      switch (ntohs(flow->tp_src)) {
      case IGMP_HOST_MEMBERSHIP_REPORT:
      case IGMPV2_HOST_MEMBERSHIP_REPORT:
-        if (mcast_snooping_add_group(ms, ip4, vlan, in_xbundle->ofbundle)) {
+        if (mcast_snooping_add_group4(ms, ip4, vlan, in_xbundle->ofbundle)) {
              VLOG_DBG_RL(&rl, "bridge %s: multicast snooping learned that "
                          IP_FMT" is on port %s in VLAN %d",
                          xbridge->name, IP_ARGS(ip4), in_xbundle->name, vlan);
          }
          break;
      case IGMP_HOST_LEAVE_MESSAGE:
-        if (mcast_snooping_leave_group(ms, ip4, vlan, in_xbundle->ofbundle)) {
+        if (mcast_snooping_leave_group4(ms, ip4, vlan, in_xbundle->ofbundle)) {
              VLOG_DBG_RL(&rl, "bridge %s: multicast snooping leaving "
                          IP_FMT" is on port %s in VLAN %d",
                          xbridge->name, IP_ARGS(ip4), in_xbundle->name, vlan);
@@ -2032,6 +2033,47 @@ update_mcast_snooping_table__(const struct xbridge *xbridge,
                          in_xbundle->name, vlan);
          }
          break;
+    case IGMPV3_HOST_MEMBERSHIP_REPORT:
+        if ((count = mcast_snooping_add_report(ms, packet, vlan,
+                                               in_xbundle->ofbundle))) {
+            VLOG_DBG_RL(&rl, "bridge %s: multicast snooping processed %d "
+                        "addresses on port %s in VLAN %d",
+                        xbridge->name, count, in_xbundle->name, vlan);
+        }
+        break;
+    }
+}
+
+static void
+update_mcast_snooping_table6__(const struct xbridge *xbridge,
+                               const struct flow *flow,
+                               struct mcast_snooping *ms, int vlan,
+                               struct xbundle *in_xbundle,
+                               const struct dp_packet *packet)
+    OVS_REQ_WRLOCK(ms->rwlock)
+{
+    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(60, 30);
+    int count;
+
+    switch (ntohs(flow->tp_src)) {
+    case MLD_QUERY:
+        if (!ipv6_addr_equals(&flow->ipv6_src, &in6addr_any)
+            && mcast_snooping_add_mrouter(ms, vlan, in_xbundle->ofbundle)) {
+            VLOG_DBG_RL(&rl, "bridge %s: multicast snooping query on port %s"
+                        "in VLAN %d",
+                        xbridge->name, in_xbundle->name, vlan);
+        }
+        break;
+    case MLD_REPORT:
+    case MLD_DONE:
+    case MLD2_REPORT:
+        count = mcast_snooping_add_mld(ms, packet, vlan, in_xbundle->ofbundle);
+        if (count) {
+            VLOG_DBG_RL(&rl, "bridge %s: multicast snooping processed %d "
+                        "addresses on port %s in VLAN %d",
+                        xbridge->name, count, in_xbundle->name, vlan);
+        }
+        break;
      }
  }
  
@@ -2040,7 +2082,8 @@ update_mcast_snooping_table__(const struct xbridge *xbridge,
  static void
  update_mcast_snooping_table(const struct xbridge *xbridge,
                              const struct flow *flow, int vlan,
-                            struct xbundle *in_xbundle)
+                            struct xbundle *in_xbundle,
+                            const struct dp_packet *packet)
  {
      struct mcast_snooping *ms = xbridge->ms;
      struct xlate_cfg *xcfg;
@@ -2064,8 +2107,13 @@ update_mcast_snooping_table(const struct xbridge *xbridge,
      }
  
      if (!mcast_xbundle || mcast_xbundle != in_xbundle) {
-        update_mcast_snooping_table__(xbridge, flow, ms, flow->igmp_group_ip4,
-                                      vlan, in_xbundle);
+        if (flow->dl_type == htons(ETH_TYPE_IP)) {
+            update_mcast_snooping_table4__(xbridge, flow, ms, vlan,
+                                           in_xbundle, packet);
+        } else {
+            update_mcast_snooping_table6__(xbridge, flow, ms, vlan,
+                                           in_xbundle, packet);
+        }
      }
      ovs_rwlock_unlock(&ms->rwlock);
  }
@@ -2269,17 +2317,23 @@ xlate_normal(struct xlate_ctx *ctx)
      if (mcast_snooping_enabled(ctx->xbridge->ms)
          && !eth_addr_is_broadcast(flow->dl_dst)
          && eth_addr_is_multicast(flow->dl_dst)
-        && flow->dl_type == htons(ETH_TYPE_IP)) {
+        && is_ip_any(flow)) {
          struct mcast_snooping *ms = ctx->xbridge->ms;
-        struct mcast_group *grp;
+        struct mcast_group *grp = NULL;
  
-        if (flow->nw_proto == IPPROTO_IGMP) {
-            if (ctx->xin->may_learn) {
-                if (mcast_snooping_is_membership(flow->tp_src) ||
-                    mcast_snooping_is_query(flow->tp_src)) {
+        if (is_igmp(flow)) {
+            if (mcast_snooping_is_membership(flow->tp_src) ||
+                mcast_snooping_is_query(flow->tp_src)) {
+                if (ctx->xin->may_learn) {
                      update_mcast_snooping_table(ctx->xbridge, flow, vlan,
-                                                in_xbundle);
-                    }
+                                                in_xbundle, ctx->xin->packet);
+                }
+                /*
+                 * IGMP packets need to take the slow path, in order to be
+                 * processed for mdb updates. That will prevent expires
+                 * firing off even after hosts have sent reports.
+                 */
+                ctx->xout->slow |= SLOW_ACTION;
              }
  
              if (mcast_snooping_is_membership(flow->tp_src)) {
@@ -2300,8 +2354,26 @@ xlate_normal(struct xlate_ctx *ctx)
                  xlate_normal_flood(ctx, in_xbundle, vlan);
              }
              return;
+        } else if (is_mld(flow)) {
+            ctx->xout->slow |= SLOW_ACTION;
+            if (ctx->xin->may_learn) {
+                update_mcast_snooping_table(ctx->xbridge, flow, vlan,
+                                            in_xbundle, ctx->xin->packet);
+            }
+            if (is_mld_report(flow)) {
+                ovs_rwlock_rdlock(&ms->rwlock);
+                xlate_normal_mcast_send_mrouters(ctx, ms, in_xbundle, vlan);
+                xlate_normal_mcast_send_rports(ctx, ms, in_xbundle, vlan);
+                ovs_rwlock_unlock(&ms->rwlock);
+            } else {
+                xlate_report(ctx, "MLD query, flooding");
+                xlate_normal_flood(ctx, in_xbundle, vlan);
+            }
          } else {
-            if (ip_is_local_multicast(flow->nw_dst)) {
+            if ((flow->dl_type == htons(ETH_TYPE_IP)
+                 && ip_is_local_multicast(flow->nw_dst))
+                || (flow->dl_type == htons(ETH_TYPE_IPV6)
+                    && ipv6_is_all_hosts(&flow->ipv6_dst))) {
                  /* RFC4541: section 2.1.2, item 2: Packets with a dst IP
                   * address in the 224.0.0.x range which are not IGMP must
                   * be forwarded on all ports */
@@ -2313,7 +2385,11 @@ xlate_normal(struct xlate_ctx *ctx)
  
          /* forwarding to group base ports */
          ovs_rwlock_rdlock(&ms->rwlock);
-        grp = mcast_snooping_lookup(ms, flow->nw_dst, vlan);
+        if (flow->dl_type == htons(ETH_TYPE_IP)) {
+            grp = mcast_snooping_lookup4(ms, flow->nw_dst, vlan);
+        } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
+            grp = mcast_snooping_lookup(ms, &flow->ipv6_dst, vlan);
+        }
          if (grp) {
              xlate_normal_mcast_send_group(ctx, ms, grp, in_xbundle, vlan);
              xlate_normal_mcast_send_fports(ctx, ms, in_xbundle, vlan);
@@ -2363,7 +2439,8 @@ compose_sample_action(const struct xbridge *xbridge,
                        const uint32_t probability,
                        const union user_action_cookie *cookie,
                        const size_t cookie_size,
-                      const odp_port_t tunnel_out_port)
+                      const odp_port_t tunnel_out_port,
+                      bool include_actions)
  {
      size_t sample_offset, actions_offset;
      odp_port_t odp_port;
@@ -2380,7 +2457,9 @@ compose_sample_action(const struct xbridge *xbridge,
      pid = dpif_port_get_pid(xbridge->dpif, odp_port,
                              flow_hash_5tuple(flow, 0));
      cookie_offset = odp_put_userspace_action(pid, cookie, cookie_size,
-                                             tunnel_out_port, odp_actions);
+                                             tunnel_out_port,
+                                             include_actions,
+                                             odp_actions);
  
      nl_msg_end_nested(odp_actions, actions_offset);
      nl_msg_end_nested(odp_actions, sample_offset);
@@ -2438,7 +2517,8 @@ compose_sflow_action(const struct xbridge *xbridge,
                           odp_port == ODPP_NONE ? 0 : 1, &cookie);
  
      return compose_sample_action(xbridge, odp_actions, flow,  probability,
-                                 &cookie, sizeof cookie.sflow, ODPP_NONE);
+                                 &cookie, sizeof cookie.sflow, ODPP_NONE,
+                                 true);
  }
  
  static void
@@ -2501,7 +2581,8 @@ compose_ipfix_action(const struct xbridge *xbridge,
      compose_ipfix_cookie(&cookie, output_odp_port);
  
      compose_sample_action(xbridge, odp_actions, flow,  probability,
-                          &cookie, sizeof cookie.ipfix, tunnel_out_port);
+                          &cookie, sizeof cookie.ipfix, tunnel_out_port,
+                          false);
  }
  
  /* SAMPLE action for sFlow must be first action in any given list of
@@ -2752,7 +2833,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
  
      /* If 'struct flow' gets additional metadata, we'll need to zero it out
       * before traversing a patch port. */
-    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 31);
+    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 33);
      memset(&flow_tnl, 0, sizeof flow_tnl);
  
      if (!xport) {
@@ -2796,7 +2877,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
          const struct xport *peer = xport->peer;
          struct flow old_flow = ctx->xin->flow;
          bool old_was_mpls = ctx->was_mpls;
-        long long old_version = ctx->tables_version;
+        cls_version_t old_version = ctx->tables_version;
          enum slow_path_reason special;
          struct ofpbuf old_stack = ctx->stack;
          union mf_subvalue new_stack[1024 / sizeof(union mf_subvalue)];
@@ -3313,6 +3394,7 @@ xlate_select_group(struct xlate_ctx *ctx, struct group_dpif *group)
  static void
  xlate_group_action__(struct xlate_ctx *ctx, struct group_dpif *group)
  {
+    bool was_in_group = ctx->in_group;
      ctx->in_group = true;
  
      switch (group_dpif_get_type(group)) {
@@ -3331,37 +3413,13 @@ xlate_group_action__(struct xlate_ctx *ctx, struct group_dpif *group)
      }
      group_dpif_unref(group);
  
-    ctx->in_group = false;
-}
-
-static bool
-xlate_group_resource_check(struct xlate_ctx *ctx)
-{
-    if (!xlate_resubmit_resource_check(ctx)) {
-        return false;
-    } else if (ctx->in_group) {
-        /* Prevent nested translation of OpenFlow groups.
-         *
-         * OpenFlow allows this restriction.  We enforce this restriction only
-         * because, with the current architecture, we would otherwise have to
-         * take a possibly recursive read lock on the ofgroup rwlock, which is
-         * unsafe given that POSIX allows taking a read lock to block if there
-         * is a thread blocked on taking the write lock.  Other solutions
-         * without this restriction are also possible, but seem unwarranted
-         * given the current limited use of groups. */
-        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
-
-        VLOG_ERR_RL(&rl, "cannot recursively translate OpenFlow group");
-        return false;
-    } else {
-        return true;
-    }
+    ctx->in_group = was_in_group;
  }
  
  static bool
  xlate_group_action(struct xlate_ctx *ctx, uint32_t group_id)
  {
-    if (xlate_group_resource_check(ctx)) {
+    if (xlate_resubmit_resource_check(ctx)) {
          struct group_dpif *group;
          bool got_group;
  
@@ -3571,7 +3629,7 @@ compose_mpls_pop_action(struct xlate_ctx *ctx, ovs_be16 eth_type)
      int n = flow_count_mpls_labels(flow, wc);
  
      if (flow_pop_mpls(flow, n, eth_type, wc)) {
-        if (ctx->xbridge->support.recirc) {
+        if (ctx->xbridge->support.odp.recirc) {
              ctx->was_mpls = true;
          }
      } else if (n >= FLOW_MAX_MPLS_LABELS) {
@@ -3915,7 +3973,8 @@ xlate_sample_action(struct xlate_ctx *ctx,
                                 os->obs_domain_id, os->obs_point_id, &cookie);
      compose_sample_action(ctx->xbridge, ctx->xout->odp_actions,
                            &ctx->xin->flow, probability, &cookie,
-                          sizeof cookie.flow_sample, ODPP_NONE);
+                          sizeof cookie.flow_sample, ODPP_NONE,
+                          false);
  }
  
  static bool
@@ -3962,6 +4021,7 @@ xlate_write_actions(struct xlate_ctx *ctx, const struct ofpact *a)
              } else if (inner->type == OFPACT_GROUP) {
                  ctx->xin->flow.actset_output = OFPP_UNSET;
                  ctx->action_set_has_group = true;
+                break;
              }
          }
      }
@@ -4522,23 +4582,6 @@ xlate_actions_for_side_effects(struct xlate_in *xin)
      xlate_actions(xin, &xout);
      xlate_out_uninit(&xout);
  }
-
-void
-xlate_out_copy(struct xlate_out *dst, const struct xlate_out *src)
-{
-    dst->wc = src->wc;
-    dst->slow = src->slow;
-    dst->has_learn = src->has_learn;
-    dst->has_normal = src->has_normal;
-    dst->has_fin_timeout = src->has_fin_timeout;
-    dst->nf_output_iface = src->nf_output_iface;
-    dst->mirrors = src->mirrors;
-
-    dst->odp_actions = &dst->odp_actions_buf;
-    ofpbuf_use_stub(dst->odp_actions, dst->odp_actions_stub,
-                    sizeof dst->odp_actions_stub);
-    ofpbuf_put(dst->odp_actions, src->odp_actions->data, src->odp_actions->size);
-}
  \f
  static struct skb_priority_to_dscp *
  get_skb_priority(const struct xport *xport, uint32_t skb_priority)
@@ -4754,7 +4797,7 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout)
          if (is_ip_any(flow)) {
              wc->masks.nw_frag |= FLOW_NW_FRAG_MASK;
          }
-        if (xbridge->support.recirc) {
+        if (xbridge->support.odp.recirc) {
              /* Always exactly match recirc_id when datapath supports
               * recirculation.  */
              wc->masks.recirc_id = UINT32_MAX;
@@ -4765,7 +4808,7 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout)
      }
      is_icmp = is_icmpv4(flow) || is_icmpv6(flow);
  
-    tnl_may_send = tnl_xlate_init(&ctx.base_flow, flow, wc);
+    tnl_may_send = tnl_xlate_init(flow, wc);
  
      ctx.recurse = 0;
      ctx.resubmits = 0;