ofproto-dpif: Unset DPIF_FP_MODIFY flag when creating a new flo
[cascardo/ovs.git] / ofproto / ofproto-dpif.c
index 4cb4db6..aa087b2 100644 (file)
@@ -66,6 +66,7 @@ COVERAGE_DEFINE(facet_changed_rule);
 COVERAGE_DEFINE(facet_revalidate);
 COVERAGE_DEFINE(facet_unexpected);
 COVERAGE_DEFINE(facet_suppress);
+COVERAGE_DEFINE(subfacet_install_fail);
 
 /* Maximum depth of flow table recursion (due to resubmit actions) in a
  * flow translation. */
@@ -791,6 +792,10 @@ static void update_max_subfacet_count(struct ofproto_dpif *ofproto);
  * for debugging the asynchronous flow_mod implementation.) */
 static bool clogged;
 
+/* By default, flows in the datapath are wildcarded (megaflows).  They
+ * may be disabled with the "ovs-appctl dpif/disable-megaflows" command. */
+static bool enable_megaflows = true;
+
 /* All existing ofproto_dpif instances, indexed by ->up.name. */
 static struct hmap all_ofproto_dpifs = HMAP_INITIALIZER(&all_ofproto_dpifs);
 
@@ -2288,10 +2293,12 @@ stp_wait(struct ofproto_dpif *ofproto)
     }
 }
 
-/* Returns true if STP should process 'flow'. */
+/* Returns true if STP should process 'flow'.  Sets fields in 'wc' that
+ * were used to make the determination.*/
 static bool
-stp_should_process_flow(const struct flow *flow)
+stp_should_process_flow(const struct flow *flow, struct flow_wildcards *wc)
 {
+    memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst);
     return eth_addr_equals(flow->dl_dst, eth_addr_stp);
 }
 
@@ -3546,6 +3553,13 @@ struct flow_miss_op {
     uint64_t slow_stub[128 / 8]; /* Buffer for compose_slow_path() */
     struct xlate_out xout;
     bool xout_garbage;           /* 'xout' needs to be uninitialized? */
+
+    struct ofpbuf mask;          /* Flow mask for "put" ops. */
+    struct odputil_keybuf maskbuf;
+
+    /* If this is a "put" op, then a pointer to the subfacet that should
+     * be marked as uninstalled if the operation fails. */
+    struct subfacet *subfacet;
 };
 
 /* Sends an OFPT_PACKET_IN message for 'packet' of type OFPR_NO_MATCH to each
@@ -3573,12 +3587,15 @@ send_packet_in_miss(struct ofproto_dpif *ofproto, const struct ofpbuf *packet,
 }
 
 static enum slow_path_reason
-process_special(struct ofproto_dpif *ofproto, const struct flow *flow,
+process_special(struct xlate_ctx *ctx, const struct flow *flow,
                 const struct ofport_dpif *ofport, const struct ofpbuf *packet)
 {
+    struct ofproto_dpif *ofproto = ctx->ofproto;
+    struct flow_wildcards *wc = &ctx->xout->wc;
+
     if (!ofport) {
         return 0;
-    } else if (ofport->cfm && cfm_should_process_flow(ofport->cfm, flow)) {
+    } else if (ofport->cfm && cfm_should_process_flow(ofport->cfm, flow, wc)) {
         if (packet) {
             cfm_process_heartbeat(ofport->cfm, packet);
         }
@@ -3589,7 +3606,7 @@ process_special(struct ofproto_dpif *ofproto, const struct flow *flow,
             lacp_process_packet(ofport->bundle->lacp, ofport, packet);
         }
         return SLOW_LACP;
-    } else if (ofproto->stp && stp_should_process_flow(flow)) {
+    } else if (ofproto->stp && stp_should_process_flow(flow, wc)) {
         if (packet) {
             stp_process_packet(ofport, packet);
         }
@@ -3633,11 +3650,13 @@ init_flow_miss_execute_op(struct flow_miss *miss, struct ofpbuf *packet,
         eth_pop_vlan(packet);
     }
 
+    op->subfacet = NULL;
     op->xout_garbage = false;
     op->dpif_op.type = DPIF_OP_EXECUTE;
     op->dpif_op.u.execute.key = miss->key;
     op->dpif_op.u.execute.key_len = miss->key_len;
     op->dpif_op.u.execute.packet = packet;
+    ofpbuf_use_stack(&op->mask, &op->maskbuf, sizeof op->maskbuf);
 }
 
 /* Helper for handle_flow_miss_without_facet() and
@@ -3752,11 +3771,7 @@ handle_flow_miss_with_facet(struct flow_miss *miss, struct facet *facet,
     struct subfacet *subfacet;
     struct ofpbuf *packet;
 
-    subfacet = subfacet_create(facet, miss, now);
     want_path = facet->xout.slow ? SF_SLOW_PATH : SF_FAST_PATH;
-    if (stats) {
-        subfacet_update_stats(subfacet, stats);
-    }
 
     LIST_FOR_EACH (packet, list_node, &miss->packets) {
         struct flow_miss_op *op = &ops[*n_ops];
@@ -3781,17 +3796,48 @@ handle_flow_miss_with_facet(struct flow_miss *miss, struct facet *facet,
         }
     }
 
+    /* Don't install the flow if it's the result of the "userspace"
+     * action for an already installed facet.  This can occur when a
+     * datapath flow with wildcards has a "userspace" action and flows
+     * sent to userspace result in a different subfacet, which will then
+     * be rejected as overlapping by the datapath. */
+    if (miss->upcall_type == DPIF_UC_ACTION
+        && !list_is_empty(&facet->subfacets)) {
+        if (stats) {
+            facet->used = MAX(facet->used, stats->used);
+            facet->packet_count += stats->n_packets;
+            facet->byte_count += stats->n_bytes;
+            facet->tcp_flags |= stats->tcp_flags;
+        }
+        return;
+    }
+
+    subfacet = subfacet_create(facet, miss, now);
+    if (stats) {
+        subfacet_update_stats(subfacet, stats);
+    }
+
     if (miss->upcall_type == DPIF_UC_MISS || subfacet->path != want_path) {
         struct flow_miss_op *op = &ops[(*n_ops)++];
         struct dpif_flow_put *put = &op->dpif_op.u.flow_put;
 
         subfacet->path = want_path;
 
+        ofpbuf_use_stack(&op->mask, &op->maskbuf, sizeof op->maskbuf);
+        if (enable_megaflows) {
+            odp_flow_key_from_mask(&op->mask, &facet->xout.wc.masks,
+                                   &miss->flow, UINT32_MAX);
+        }
+
         op->xout_garbage = false;
         op->dpif_op.type = DPIF_OP_FLOW_PUT;
-        put->flags = DPIF_FP_CREATE | DPIF_FP_MODIFY;
+        op->subfacet = subfacet;
+        put->flags = DPIF_FP_CREATE;
         put->key = miss->key;
         put->key_len = miss->key_len;
+        put->mask = op->mask.data;
+        put->mask_len = op->mask.size;
+
         if (want_path == SF_FAST_PATH) {
             put->actions = facet->xout.odp_actions.data;
             put->actions_len = facet->xout.odp_actions.size;
@@ -4077,7 +4123,8 @@ handle_miss_upcalls(struct dpif_backer *backer, struct dpif_upcall *upcalls,
                 hmap_insert(&backer->drop_keys, &drop_key->hmap_node,
                             hash_bytes(drop_key->key, drop_key->key_len, 0));
                 dpif_flow_put(backer->dpif, DPIF_FP_CREATE | DPIF_FP_MODIFY,
-                              drop_key->key, drop_key->key_len, NULL, 0, NULL);
+                              drop_key->key, drop_key->key_len,
+                              NULL, 0, NULL, 0, NULL);
             }
             continue;
         }
@@ -4122,8 +4169,30 @@ handle_miss_upcalls(struct dpif_backer *backer, struct dpif_upcall *upcalls,
     }
     dpif_operate(backer->dpif, dpif_ops, n_ops);
 
-    /* Free memory. */
     for (i = 0; i < n_ops; i++) {
+        if (dpif_ops[i]->error != 0
+            && flow_miss_ops[i].dpif_op.type == DPIF_OP_FLOW_PUT
+            && flow_miss_ops[i].subfacet) {
+            struct subfacet *subfacet = flow_miss_ops[i].subfacet;
+
+            COVERAGE_INC(subfacet_install_fail);
+
+            /* Zero-out subfacet counters when installation failed, but
+             * datapath reported hits.  This should not happen and
+             * indicates a bug, since if the datapath flow exists, we
+             * should not be attempting to create a new subfacet.  A
+             * buggy datapath could trigger this, so just zero out the
+             * counters and log an error. */
+            if (subfacet->dp_packet_count || subfacet->dp_byte_count) {
+                VLOG_ERR_RL(&rl, "failed to install subfacet for which "
+                            "datapath reported hits");
+                subfacet->dp_packet_count = subfacet->dp_byte_count = 0;
+            }
+
+            subfacet->path = SF_NOT_INSTALLED;
+        }
+
+        /* Free memory. */
         if (flow_miss_ops[i].xout_garbage) {
             xlate_out_uninit(&flow_miss_ops[i].xout);
         }
@@ -4455,12 +4524,13 @@ update_stats(struct dpif_backer *backer)
 {
     const struct dpif_flow_stats *stats;
     struct dpif_flow_dump dump;
-    const struct nlattr *key;
     struct ofproto_dpif *ofproto;
-    size_t key_len;
+    const struct nlattr *key, *mask;
+    size_t key_len, mask_len;
 
     dpif_flow_dump_start(&dump, backer->dpif);
-    while (dpif_flow_dump_next(&dump, &key, &key_len, NULL, NULL, &stats)) {
+    while (dpif_flow_dump_next(&dump, &key, &key_len,
+                               &mask, &mask_len, NULL, NULL, &stats)) {
         struct flow flow;
         struct subfacet *subfacet;
         uint32_t key_hash;
@@ -5369,12 +5439,15 @@ subfacet_install(struct subfacet *subfacet, const struct ofpbuf *odp_actions,
     enum subfacet_path path = facet->xout.slow ? SF_SLOW_PATH : SF_FAST_PATH;
     const struct nlattr *actions = odp_actions->data;
     size_t actions_len = odp_actions->size;
+    struct odputil_keybuf maskbuf;
+    struct ofpbuf mask;
 
     uint64_t slow_path_stub[128 / 8];
     enum dpif_flow_put_flags flags;
     int ret;
 
-    flags = DPIF_FP_CREATE | DPIF_FP_MODIFY;
+    flags = subfacet->path == SF_NOT_INSTALLED ? DPIF_FP_CREATE
+                                               : DPIF_FP_MODIFY;
     if (stats) {
         flags |= DPIF_FP_ZERO_STATS;
     }
@@ -5385,14 +5458,23 @@ subfacet_install(struct subfacet *subfacet, const struct ofpbuf *odp_actions,
                           &actions, &actions_len);
     }
 
+    ofpbuf_use_stack(&mask, &maskbuf, sizeof maskbuf);
+    if (enable_megaflows) {
+        odp_flow_key_from_mask(&mask, &facet->xout.wc.masks,
+                               &facet->flow, UINT32_MAX);
+    }
+
     ret = dpif_flow_put(ofproto->backer->dpif, flags, subfacet->key,
-                        subfacet->key_len, actions, actions_len, stats);
+                        subfacet->key_len,  mask.data, mask.size,
+                        actions, actions_len, stats);
 
     if (stats) {
         subfacet_reset_dp_stats(subfacet, stats);
     }
 
-    if (!ret) {
+    if (ret) {
+        COVERAGE_INC(subfacet_install_fail);
+    } else {
         subfacet->path = path;
     }
     return ret;
@@ -5492,6 +5574,11 @@ rule_dpif_lookup__(struct ofproto_dpif *ofproto, const struct flow *flow,
         return NULL;
     }
 
+    if (wc) {
+        memset(&wc->masks.dl_type, 0xff, sizeof wc->masks.dl_type);
+        wc->masks.nw_frag |= FLOW_NW_FRAG_MASK;
+    }
+
     cls = &ofproto->up.tables[table_id].cls;
     frag = (flow->nw_frag & FLOW_NW_FRAG_ANY) != 0;
     if (frag && ofproto->up.frag_handling == OFPC_FRAG_NORMAL) {
@@ -5988,6 +6075,7 @@ compose_output_action__(struct xlate_ctx *ctx, uint16_t ofp_port,
                         bool check_stp)
 {
     const struct ofport_dpif *ofport = get_ofp_port(ctx->ofproto, ofp_port);
+    struct flow_wildcards *wc = &ctx->xout->wc;
     ovs_be16 flow_vlan_tci;
     uint32_t flow_skb_mark;
     uint8_t flow_nw_tos;
@@ -6034,7 +6122,7 @@ compose_output_action__(struct xlate_ctx *ctx, uint16_t ofp_port,
         memset(ctx->xin->flow.regs, 0, sizeof ctx->xin->flow.regs);
 
         in_port = get_ofp_port(ctx->ofproto, ctx->xin->flow.in_port);
-        special = process_special(ctx->ofproto, &ctx->xin->flow, in_port,
+        special = process_special(ctx, &ctx->xin->flow, in_port,
                                   ctx->xin->packet);
         if (special) {
             ctx->xout->slow = special;
@@ -6069,6 +6157,7 @@ compose_output_action__(struct xlate_ctx *ctx, uint16_t ofp_port,
 
     pdscp = get_priority(ofport, ctx->xin->flow.skb_priority);
     if (pdscp) {
+        wc->masks.nw_tos |= IP_ECN_MASK;
         ctx->xin->flow.nw_tos &= ~IP_DSCP_MASK;
         ctx->xin->flow.nw_tos |= pdscp->dscp;
     }
@@ -6095,7 +6184,11 @@ compose_output_action__(struct xlate_ctx *ctx, uint16_t ofp_port,
         ctx->xin->flow.tunnel = flow_tnl; /* Restore tunnel metadata */
     } else {
         uint16_t vlandev_port;
+
         odp_port = ofport->odp_port;
+        if (!hmap_is_empty(&ctx->ofproto->realdev_vid_map)) {
+            wc->masks.vlan_tci |= htons(VLAN_VID_MASK | VLAN_CFI);
+        }
         vlandev_port = vsp_realdev_to_vlandev(ctx->ofproto, ofp_port,
                                               ctx->xin->flow.vlan_tci);
         if (vlandev_port == ofp_port) {
@@ -6107,7 +6200,7 @@ compose_output_action__(struct xlate_ctx *ctx, uint16_t ofp_port,
         ctx->xin->flow.skb_mark &= ~IPSEC_MARK;
     }
     commit_odp_actions(&ctx->xin->flow, &ctx->base_flow,
-                       &ctx->xout->odp_actions);
+                       &ctx->xout->odp_actions, &ctx->xout->wc);
     nl_msg_put_u32(&ctx->xout->odp_actions, OVS_ACTION_ATTR_OUTPUT, out_port);
 
     ctx->sflow_odp_port = odp_port;
@@ -6331,14 +6424,11 @@ execute_controller_action(struct xlate_ctx *ctx, int len,
 static void
 execute_mpls_push_action(struct xlate_ctx *ctx, ovs_be16 eth_type)
 {
+    struct flow_wildcards *wc = &ctx->xout->wc;
     ovs_assert(eth_type_mpls(eth_type));
 
-    memset(&ctx->xout->wc.masks.dl_type, 0xff,
-               sizeof ctx->xout->wc.masks.dl_type);
-    memset(&ctx->xout->wc.masks.mpls_lse, 0xff,
-               sizeof ctx->xout->wc.masks.mpls_lse);
-    memset(&ctx->xout->wc.masks.mpls_depth, 0xff,
-               sizeof ctx->xout->wc.masks.mpls_depth);
+    memset(&wc->masks.mpls_lse, 0xff, sizeof wc->masks.mpls_lse);
+    memset(&wc->masks.mpls_depth, 0xff, sizeof wc->masks.mpls_depth);
 
     if (ctx->base_flow.mpls_depth) {
         ctx->xin->flow.mpls_lse &= ~htonl(MPLS_BOS_MASK);
@@ -6352,6 +6442,8 @@ execute_mpls_push_action(struct xlate_ctx *ctx, ovs_be16 eth_type)
         } else {
             label = htonl(0x0); /* IPV4 Explicit Null. */
         }
+        wc->masks.nw_tos |= IP_DSCP_MASK;
+        wc->masks.nw_ttl = 0xff;
         tc = (ctx->xin->flow.nw_tos & IP_DSCP_MASK) >> 2;
         ttl = ctx->xin->flow.nw_ttl ? ctx->xin->flow.nw_ttl : 0x40;
         ctx->xin->flow.mpls_lse = set_mpls_lse_values(ttl, tc, 1, label);
@@ -6363,15 +6455,13 @@ execute_mpls_push_action(struct xlate_ctx *ctx, ovs_be16 eth_type)
 static void
 execute_mpls_pop_action(struct xlate_ctx *ctx, ovs_be16 eth_type)
 {
+    struct flow_wildcards *wc = &ctx->xout->wc;
+
     ovs_assert(eth_type_mpls(ctx->xin->flow.dl_type));
     ovs_assert(!eth_type_mpls(eth_type));
 
-    memset(&ctx->xout->wc.masks.dl_type, 0xff,
-               sizeof ctx->xout->wc.masks.dl_type);
-    memset(&ctx->xout->wc.masks.mpls_lse, 0xff,
-               sizeof ctx->xout->wc.masks.mpls_lse);
-    memset(&ctx->xout->wc.masks.mpls_depth, 0xff,
-               sizeof ctx->xout->wc.masks.mpls_depth);
+    memset(&wc->masks.mpls_lse, 0xff, sizeof wc->masks.mpls_lse);
+    memset(&wc->masks.mpls_depth, 0xff, sizeof wc->masks.mpls_depth);
 
     if (ctx->xin->flow.mpls_depth) {
         ctx->xin->flow.mpls_depth--;
@@ -6390,6 +6480,7 @@ compose_dec_ttl(struct xlate_ctx *ctx, struct ofpact_cnt_ids *ids)
         return false;
     }
 
+    ctx->xout->wc.masks.nw_ttl = 0xff;
     if (ctx->xin->flow.nw_ttl > 1) {
         ctx->xin->flow.nw_ttl--;
         return false;
@@ -6421,6 +6512,9 @@ static bool
 execute_dec_mpls_ttl_action(struct xlate_ctx *ctx)
 {
     uint8_t ttl = mpls_lse_to_ttl(ctx->xin->flow.mpls_lse);
+    struct flow_wildcards *wc = &ctx->xout->wc;
+
+    memset(&wc->masks.mpls_lse, 0xff, sizeof wc->masks.mpls_lse);
 
     if (!eth_type_mpls(ctx->xin->flow.dl_type)) {
         return false;
@@ -6638,6 +6732,10 @@ xlate_fin_timeout(struct xlate_ctx *ctx,
     if (ctx->xin->tcp_flags & (TCP_FIN | TCP_RST) && ctx->rule) {
         struct rule_dpif *rule = ctx->rule;
 
+        if (list_is_empty(&rule->up.expirable)) {
+            list_insert(&ctx->ofproto->up.expirable, &rule->up.expirable);
+        }
+
         reduce_timeout(oft->fin_idle_timeout, &rule->up.idle_timeout);
         reduce_timeout(oft->fin_hard_timeout, &rule->up.hard_timeout);
     }
@@ -6653,7 +6751,7 @@ xlate_sample_action(struct xlate_ctx *ctx,
   uint32_t probability = (os->probability << 16) | os->probability;
 
   commit_odp_actions(&ctx->xin->flow, &ctx->base_flow,
-                     &ctx->xout->odp_actions);
+                     &ctx->xout->odp_actions, &ctx->xout->wc);
 
   compose_flow_sample_cookie(os->probability, os->collector_set_id,
                              os->obs_domain_id, os->obs_point_id, &cookie);
@@ -6774,16 +6872,12 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
             break;
 
         case OFPACT_SET_IPV4_SRC:
-            memset(&ctx->xout->wc.masks.dl_type, 0xff,
-                   sizeof ctx->xout->wc.masks.dl_type);
             if (ctx->xin->flow.dl_type == htons(ETH_TYPE_IP)) {
                 ctx->xin->flow.nw_src = ofpact_get_SET_IPV4_SRC(a)->ipv4;
             }
             break;
 
         case OFPACT_SET_IPV4_DST:
-            memset(&ctx->xout->wc.masks.dl_type, 0xff,
-                   sizeof ctx->xout->wc.masks.dl_type);
             if (ctx->xin->flow.dl_type == htons(ETH_TYPE_IP)) {
                 ctx->xin->flow.nw_dst = ofpact_get_SET_IPV4_DST(a)->ipv4;
             }
@@ -6791,8 +6885,6 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
 
         case OFPACT_SET_IPV4_DSCP:
             /* OpenFlow 1.0 only supports IPv4. */
-            memset(&ctx->xout->wc.masks.dl_type, 0xff,
-                   sizeof ctx->xout->wc.masks.dl_type);
             if (ctx->xin->flow.dl_type == htons(ETH_TYPE_IP)) {
                 ctx->xin->flow.nw_tos &= ~IP_DSCP_MASK;
                 ctx->xin->flow.nw_tos |= ofpact_get_SET_IPV4_DSCP(a)->dscp;
@@ -6800,8 +6892,6 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
             break;
 
         case OFPACT_SET_L4_SRC_PORT:
-            memset(&ctx->xout->wc.masks.dl_type, 0xff,
-                   sizeof ctx->xout->wc.masks.dl_type);
             memset(&ctx->xout->wc.masks.nw_proto, 0xff,
                     sizeof ctx->xout->wc.masks.nw_proto);
             if (is_ip_any(&ctx->xin->flow)) {
@@ -6811,8 +6901,6 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
             break;
 
         case OFPACT_SET_L4_DST_PORT:
-            memset(&ctx->xout->wc.masks.dl_type, 0xff,
-                   sizeof ctx->xout->wc.masks.dl_type);
             memset(&ctx->xout->wc.masks.nw_proto, 0xff,
                     sizeof ctx->xout->wc.masks.nw_proto);
             if (is_ip_any(&ctx->xin->flow)) {
@@ -6835,9 +6923,6 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
             break;
 
         case OFPACT_POP_QUEUE:
-            memset(&ctx->xout->wc.masks.skb_priority, 0xff,
-                   sizeof ctx->xout->wc.masks.skb_priority);
-
             ctx->xin->flow.skb_priority = ctx->orig_skb_priority;
             break;
 
@@ -6882,8 +6967,6 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
             break;
 
         case OFPACT_DEC_TTL:
-            memset(&ctx->xout->wc.masks.dl_type, 0xff,
-                   sizeof ctx->xout->wc.masks.dl_type);
             if (compose_dec_ttl(ctx, ofpact_get_DEC_TTL(a))) {
                 goto out;
             }
@@ -6916,8 +6999,6 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
             break;
 
         case OFPACT_FIN_TIMEOUT:
-            memset(&ctx->xout->wc.masks.dl_type, 0xff,
-                   sizeof ctx->xout->wc.masks.dl_type);
             memset(&ctx->xout->wc.masks.nw_proto, 0xff,
                    sizeof ctx->xout->wc.masks.nw_proto);
             ctx->xout->has_fin_timeout = true;
@@ -7027,6 +7108,7 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout)
      * that in the future we always keep a copy of the original flow for
      * tracing purposes. */
     static bool hit_resubmit_limit;
+    struct flow_wildcards *wc = &xout->wc;
 
     enum slow_path_reason special;
     const struct ofpact *ofpacts;
@@ -7048,12 +7130,12 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout)
      *   to another device without any modifications this will cause us to
      *   insert a new tag since the original one was stripped off by the
      *   VLAN device.
-     * - Tunnel 'flow' is largely cleared when transitioning between
-     *   the input and output stages since it does not make sense to output
-     *   a packet with the exact headers that it was received with (i.e.
-     *   the destination IP is us).  The one exception is the tun_id, which
-     *   is preserved to allow use in later resubmit lookups and loads into
-     *   registers.
+     * - Tunnel metadata as received is retained in 'flow'. This allows
+     *   tunnel metadata matching also in later tables.
+     *   Since a kernel action for setting the tunnel metadata will only be
+     *   generated with actual tunnel output, changing the tunnel metadata
+     *   values in 'flow' (such as tun_id) will only have effect with a later
+     *   tunnel output action.
      * - Tunnel 'base_flow' is completely cleared since that is what the
      *   kernel does.  If we wish to maintain the original values an action
      *   needs to be generated. */
@@ -7070,34 +7152,18 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout)
     ctx.base_flow.tunnel.ip_tos = xin->initial_vals.tunnel_ip_tos;
 
     flow_wildcards_init_catchall(&ctx.xout->wc);
-    memset(&ctx.xout->wc.masks.in_port, 0xff,
-           sizeof ctx.xout->wc.masks.in_port);
+    memset(&wc->masks.in_port, 0xff, sizeof wc->masks.in_port);
+    memset(&wc->masks.skb_priority, 0xff, sizeof wc->masks.skb_priority);
+    memset(&wc->masks.dl_type, 0xff, sizeof wc->masks.dl_type);
+    wc->masks.nw_frag |= FLOW_NW_FRAG_MASK;
 
     if (tnl_port_should_receive(&ctx.xin->flow)) {
-        memset(&ctx.xout->wc.masks.tunnel, 0xff,
-               sizeof ctx.xout->wc.masks.tunnel);
+        memset(&wc->masks.tunnel, 0xff, sizeof wc->masks.tunnel);
     }
 
     /* Disable most wildcarding for NetFlow. */
     if (xin->ofproto->netflow) {
-        memset(&ctx.xout->wc.masks.dl_src, 0xff,
-               sizeof ctx.xout->wc.masks.dl_src);
-        memset(&ctx.xout->wc.masks.dl_dst, 0xff,
-               sizeof ctx.xout->wc.masks.dl_dst);
-        memset(&ctx.xout->wc.masks.dl_type, 0xff,
-               sizeof ctx.xout->wc.masks.dl_type);
-        memset(&ctx.xout->wc.masks.vlan_tci, 0xff,
-               sizeof ctx.xout->wc.masks.vlan_tci);
-        memset(&ctx.xout->wc.masks.nw_proto, 0xff,
-               sizeof ctx.xout->wc.masks.nw_proto);
-        memset(&ctx.xout->wc.masks.nw_src, 0xff,
-               sizeof ctx.xout->wc.masks.nw_src);
-        memset(&ctx.xout->wc.masks.nw_dst, 0xff,
-               sizeof ctx.xout->wc.masks.nw_dst);
-        memset(&ctx.xout->wc.masks.tp_src, 0xff,
-               sizeof ctx.xout->wc.masks.tp_src);
-        memset(&ctx.xout->wc.masks.tp_dst, 0xff,
-               sizeof ctx.xout->wc.masks.tp_dst);
+        netflow_mask_wc(&ctx.xin->flow, wc);
     }
 
     ctx.xout->tags = 0;
@@ -7160,7 +7226,7 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout)
     }
 
     in_port = get_ofp_port(ctx.ofproto, ctx.xin->flow.in_port);
-    special = process_special(ctx.ofproto, &ctx.xin->flow, in_port,
+    special = process_special(&ctx, &ctx.xin->flow, in_port,
                               ctx.xin->packet);
     if (special) {
         ctx.xout->slow = special;
@@ -7736,6 +7802,7 @@ is_admissible(struct xlate_ctx *ctx, struct ofport_dpif *in_port,
 static void
 xlate_normal(struct xlate_ctx *ctx)
 {
+    struct flow_wildcards *wc = &ctx->xout->wc;
     struct ofport_dpif *in_port;
     struct ofbundle *in_bundle;
     struct mac_entry *mac;
@@ -7744,16 +7811,9 @@ xlate_normal(struct xlate_ctx *ctx)
 
     ctx->xout->has_normal = true;
 
-    /* Check the dl_type, since we may check for gratuituous ARP. */
-    memset(&ctx->xout->wc.masks.dl_type, 0xff,
-           sizeof ctx->xout->wc.masks.dl_type);
-
-    memset(&ctx->xout->wc.masks.dl_src, 0xff,
-           sizeof ctx->xout->wc.masks.dl_src);
-    memset(&ctx->xout->wc.masks.dl_dst, 0xff,
-           sizeof ctx->xout->wc.masks.dl_dst);
-    memset(&ctx->xout->wc.masks.vlan_tci, 0xff,
-           sizeof ctx->xout->wc.masks.vlan_tci);
+    memset(&wc->masks.dl_src, 0xff, sizeof wc->masks.dl_src);
+    memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst);
+    wc->masks.vlan_tci |= htons(VLAN_VID_MASK | VLAN_CFI);
 
     in_bundle = lookup_input_bundle(ctx->ofproto, ctx->xin->flow.in_port,
                                     ctx->xin->packet != NULL, &in_port);
@@ -7802,7 +7862,7 @@ xlate_normal(struct xlate_ctx *ctx)
 
     /* Learn source MAC. */
     if (ctx->xin->may_learn) {
-        update_learning_table(ctx->ofproto, &ctx->xin->flow, &ctx->xout->wc,
+        update_learning_table(ctx->ofproto, &ctx->xin->flow, wc,
                               vlan, in_bundle);
     }
 
@@ -8222,7 +8282,7 @@ ofproto_unixctl_trace(struct unixctl_conn *conn, int argc, const char *argv[],
 {
     const char *dpname = argv[1];
     struct ofproto_dpif *ofproto;
-    struct ofpbuf odp_key;
+    struct ofpbuf odp_key, odp_mask;
     struct ofpbuf *packet;
     struct initial_vals initial_vals;
     struct ds result;
@@ -8232,6 +8292,7 @@ ofproto_unixctl_trace(struct unixctl_conn *conn, int argc, const char *argv[],
     packet = NULL;
     ofpbuf_init(&odp_key, 0);
     ds_init(&result);
+    ofpbuf_init(&odp_mask, 0);
 
     ofproto = ofproto_dpif_lookup(dpname);
     if (!ofproto) {
@@ -8259,7 +8320,7 @@ ofproto_unixctl_trace(struct unixctl_conn *conn, int argc, const char *argv[],
 
             /* Convert string to datapath key. */
             ofpbuf_init(&odp_key, 0);
-            error = odp_flow_key_from_string(flow_s, NULL, &odp_key);
+            error = odp_flow_from_string(flow_s, NULL, &odp_key, &odp_mask);
             if (error) {
                 unixctl_command_reply_error(conn, "Bad flow syntax");
                 goto exit;
@@ -8334,6 +8395,7 @@ exit:
     ds_destroy(&result);
     ofpbuf_delete(packet);
     ofpbuf_uninit(&odp_key);
+    ofpbuf_uninit(&odp_mask);
 }
 
 static void
@@ -8711,6 +8773,48 @@ ofproto_unixctl_dpif_dump_megaflows(struct unixctl_conn *conn,
     ds_destroy(&ds);
 }
 
+/* Disable using the megaflows.
+ *
+ * This command is only needed for advanced debugging, so it's not
+ * documented in the man page. */
+static void
+ofproto_unixctl_dpif_disable_megaflows(struct unixctl_conn *conn,
+                                       int argc OVS_UNUSED,
+                                       const char *argv[] OVS_UNUSED,
+                                       void *aux OVS_UNUSED)
+{
+    struct ofproto_dpif *ofproto;
+
+    enable_megaflows = false;
+
+    HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
+        flush(&ofproto->up);
+    }
+
+    unixctl_command_reply(conn, "megaflows disabled");
+}
+
+/* Re-enable using megaflows.
+ *
+ * This command is only needed for advanced debugging, so it's not
+ * documented in the man page. */
+static void
+ofproto_unixctl_dpif_enable_megaflows(struct unixctl_conn *conn,
+                                      int argc OVS_UNUSED,
+                                      const char *argv[] OVS_UNUSED,
+                                      void *aux OVS_UNUSED)
+{
+    struct ofproto_dpif *ofproto;
+
+    enable_megaflows = true;
+
+    HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
+        flush(&ofproto->up);
+    }
+
+    unixctl_command_reply(conn, "megaflows enabled");
+}
+
 static void
 ofproto_unixctl_dpif_dump_flows(struct unixctl_conn *conn,
                                 int argc OVS_UNUSED, const char *argv[],
@@ -8730,8 +8834,17 @@ ofproto_unixctl_dpif_dump_flows(struct unixctl_conn *conn,
 
     HMAP_FOR_EACH (subfacet, hmap_node, &ofproto->subfacets) {
         struct facet *facet = subfacet->facet;
+        struct odputil_keybuf maskbuf;
+        struct ofpbuf mask;
+
+        ofpbuf_use_stack(&mask, &maskbuf, sizeof maskbuf);
+        if (enable_megaflows) {
+            odp_flow_key_from_mask(&mask, &facet->xout.wc.masks,
+                                   &facet->flow, UINT32_MAX);
+        }
 
-        odp_flow_key_format(subfacet->key, subfacet->key_len, &ds);
+        odp_flow_format(subfacet->key, subfacet->key_len,
+                        mask.data, mask.size, &ds);
 
         ds_put_format(&ds, ", packets:%"PRIu64", bytes:%"PRIu64", used:",
                       subfacet->dp_packet_count, subfacet->dp_byte_count);
@@ -8820,6 +8933,10 @@ ofproto_dpif_unixctl_init(void)
                              ofproto_unixctl_dpif_del_flows, NULL);
     unixctl_command_register("dpif/dump-megaflows", "bridge", 1, 1,
                              ofproto_unixctl_dpif_dump_megaflows, NULL);
+    unixctl_command_register("dpif/disable-megaflows", "", 0, 0,
+                             ofproto_unixctl_dpif_disable_megaflows, NULL);
+    unixctl_command_register("dpif/enable-megaflows", "", 0, 0,
+                             ofproto_unixctl_dpif_enable_megaflows, NULL);
 }
 \f
 /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.)