ofproto-dpif: Introduce multicast snooping handler
[cascardo/ovs.git] / ofproto / ofproto-dpif.c
index a3fd792..27233cb 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc.
+ * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -37,6 +37,7 @@
 #include "lacp.h"
 #include "learn.h"
 #include "mac-learning.h"
+#include "mcast-snooping.h"
 #include "meta-flow.h"
 #include "multipath.h"
 #include "netdev-vport.h"
@@ -53,6 +54,7 @@
 #include "ofproto-dpif-ipfix.h"
 #include "ofproto-dpif-mirror.h"
 #include "ofproto-dpif-monitor.h"
+#include "ofproto-dpif-rid.h"
 #include "ofproto-dpif-sflow.h"
 #include "ofproto-dpif-upcall.h"
 #include "ofproto-dpif-xlate.h"
@@ -72,11 +74,6 @@ VLOG_DEFINE_THIS_MODULE(ofproto_dpif);
 COVERAGE_DEFINE(ofproto_dpif_expired);
 COVERAGE_DEFINE(packet_in_overflow);
 
-/* Number of implemented OpenFlow tables. */
-enum { N_TABLES = 255 };
-enum { TBL_INTERNAL = N_TABLES - 1 };    /* Used for internal hidden rules. */
-BUILD_ASSERT_DECL(N_TABLES >= 2 && N_TABLES <= 255);
-
 struct flow_miss;
 
 struct rule_dpif {
@@ -87,11 +84,20 @@ struct rule_dpif {
      *   - Do include packets and bytes from datapath flows which have not
      *   recently been processed by a revalidator. */
     struct ovs_mutex stats_mutex;
-    uint64_t packet_count OVS_GUARDED;  /* Number of packets received. */
-    uint64_t byte_count OVS_GUARDED;    /* Number of bytes received. */
+    struct dpif_flow_stats stats OVS_GUARDED;
+
+    /* If non-zero then the recirculation id that has
+     * been allocated for use with this rule.
+     * The recirculation id and associated internal flow should
+     * be freed when the rule is freed */
+    uint32_t recirc_id;
 };
 
-static void rule_get_stats(struct rule *, uint64_t *packets, uint64_t *bytes);
+/* RULE_CAST() depends on this. */
+BUILD_ASSERT_DECL(offsetof(struct rule_dpif, up) == 0);
+
+static void rule_get_stats(struct rule *, uint64_t *packets, uint64_t *bytes,
+                           long long int *used);
 static struct rule_dpif *rule_dpif_cast(const struct rule *);
 static void rule_expire(struct rule_dpif *);
 
@@ -105,7 +111,6 @@ struct group_dpif {
     struct ovs_mutex stats_mutex;
     uint64_t packet_count OVS_GUARDED;  /* Number of packets received. */
     uint64_t byte_count OVS_GUARDED;    /* Number of bytes received. */
-    struct bucket_counter *bucket_stats OVS_GUARDED;  /* Bucket statistics. */
 };
 
 struct ofbundle {
@@ -208,11 +213,6 @@ static int set_bfd(struct ofport *, const struct smap *);
 static int set_cfm(struct ofport *, const struct cfm_settings *);
 static void ofport_update_peer(struct ofport_dpif *);
 
-struct dpif_completion {
-    struct list list_node;
-    struct ofoperation *op;
-};
-
 /* Reasons that we might need to revalidate every datapath flow, and
  * corresponding coverage counters.
  *
@@ -228,6 +228,7 @@ enum revalidate_reason {
     REV_PORT_TOGGLED,          /* Port enabled or disabled by CFM, LACP, ...*/
     REV_FLOW_TABLE,            /* Flow table changed. */
     REV_MAC_LEARNING,          /* Mac learning changed. */
+    REV_MCAST_SNOOPING,        /* Multicast snooping changed. */
 };
 COVERAGE_DEFINE(rev_reconfigure);
 COVERAGE_DEFINE(rev_stp);
@@ -235,6 +236,7 @@ COVERAGE_DEFINE(rev_bond);
 COVERAGE_DEFINE(rev_port_toggled);
 COVERAGE_DEFINE(rev_flow_table);
 COVERAGE_DEFINE(rev_mac_learning);
+COVERAGE_DEFINE(rev_mcast_snooping);
 
 /* All datapaths of a given type share a single dpif backer instance. */
 struct dpif_backer {
@@ -252,10 +254,18 @@ struct dpif_backer {
 
     bool recv_set_enable; /* Enables or disables receiving packets. */
 
+    /* Recirculation. */
+    struct recirc_id_pool *rid_pool;       /* Recirculation ID pool. */
+    bool enable_recirc;   /* True if the datapath supports recirculation */
+
     /* True if the datapath supports variable-length
      * OVS_USERSPACE_ATTR_USERDATA in OVS_ACTION_ATTR_USERSPACE actions.
      * False if the datapath supports only 8-byte (or shorter) userdata. */
     bool variable_length_userdata;
+
+    /* Maximum number of MPLS label stack entries that the datapath supports
+     * in a match */
+    size_t max_mpls_depth;
 };
 
 /* All existing ofproto_backer instances, indexed by ofproto->up.type. */
@@ -279,6 +289,7 @@ struct ofproto_dpif {
     struct dpif_ipfix *ipfix;
     struct hmap bundles;        /* Contains "struct ofbundle"s. */
     struct mac_learning *ml;
+    struct mcast_snooping *ms;
     bool has_bonded_bundles;
     bool lacp_enabled;
     struct mbridge *mbridge;
@@ -305,6 +316,8 @@ struct ofproto_dpif {
 
     /* Work queues. */
     struct guarded_list pins;      /* Contains "struct ofputil_packet_in"s. */
+    struct seq *pins_seq;          /* For notifying 'pins' reception. */
+    uint64_t pins_seqno;
 };
 
 /* All existing ofproto_dpif instances, indexed by ->up.name. */
@@ -319,9 +332,21 @@ ofproto_dpif_cast(const struct ofproto *ofproto)
     return CONTAINER_OF(ofproto, struct ofproto_dpif, up);
 }
 
+size_t
+ofproto_dpif_get_max_mpls_depth(const struct ofproto_dpif *ofproto)
+{
+    return ofproto->backer->max_mpls_depth;
+}
+
+bool
+ofproto_dpif_get_enable_recirc(const struct ofproto_dpif *ofproto)
+{
+    return ofproto->backer->enable_recirc;
+}
+
 static struct ofport_dpif *get_ofp_port(const struct ofproto_dpif *ofproto,
                                         ofp_port_t ofp_port);
-static void ofproto_trace(struct ofproto_dpif *, const struct flow *,
+static void ofproto_trace(struct ofproto_dpif *, struct flow *,
                           const struct ofpbuf *packet,
                           const struct ofpact[], size_t ofpacts_len,
                           struct ds *);
@@ -352,6 +377,21 @@ ofproto_dpif_send_packet_in(struct ofproto_dpif *ofproto,
         free(CONST_CAST(void *, pin->up.packet));
         free(pin);
     }
+
+    /* Wakes up main thread for packet-in I/O. */
+    seq_change(ofproto->pins_seq);
+}
+
+/* The default "table-miss" behaviour for OpenFlow1.3+ is to drop the
+ * packet rather than to send the packet to the controller.
+ *
+ * This function returns false to indicate that a packet_in message
+ * for a "table-miss" should be sent to at least one controller.
+ * False otherwise. */
+bool
+ofproto_dpif_wants_packet_in_on_miss(struct ofproto_dpif *ofproto)
+{
+    return connmgr_wants_packet_in_on_miss(ofproto->up.connmgr);
 }
 \f
 /* Factory functions. */
@@ -532,12 +572,13 @@ type_run(const char *type)
         simap_destroy(&tmp_backers);
 
         switch (backer->need_revalidate) {
-        case REV_RECONFIGURE:   COVERAGE_INC(rev_reconfigure);   break;
-        case REV_STP:           COVERAGE_INC(rev_stp);           break;
-        case REV_BOND:          COVERAGE_INC(rev_bond);          break;
-        case REV_PORT_TOGGLED:  COVERAGE_INC(rev_port_toggled);  break;
-        case REV_FLOW_TABLE:    COVERAGE_INC(rev_flow_table);    break;
-        case REV_MAC_LEARNING:  COVERAGE_INC(rev_mac_learning);  break;
+        case REV_RECONFIGURE:    COVERAGE_INC(rev_reconfigure);    break;
+        case REV_STP:            COVERAGE_INC(rev_stp);            break;
+        case REV_BOND:           COVERAGE_INC(rev_bond);           break;
+        case REV_PORT_TOGGLED:   COVERAGE_INC(rev_port_toggled);   break;
+        case REV_FLOW_TABLE:     COVERAGE_INC(rev_flow_table);     break;
+        case REV_MAC_LEARNING:   COVERAGE_INC(rev_mac_learning);   break;
+        case REV_MCAST_SNOOPING: COVERAGE_INC(rev_mcast_snooping); break;
         }
         backer->need_revalidate = 0;
 
@@ -549,16 +590,18 @@ type_run(const char *type)
                 continue;
             }
 
-            ovs_rwlock_wrlock(&xlate_rwlock);
+            xlate_txn_start();
             xlate_ofproto_set(ofproto, ofproto->up.name,
                               ofproto->backer->dpif, ofproto->miss_rule,
                               ofproto->no_packet_in_rule, ofproto->ml,
-                              ofproto->stp, ofproto->mbridge,
+                              ofproto->stp, ofproto->ms, ofproto->mbridge,
                               ofproto->sflow, ofproto->ipfix,
                               ofproto->netflow, ofproto->up.frag_handling,
                               ofproto->up.forward_bpdu,
                               connmgr_has_in_band(ofproto->up.connmgr),
-                              ofproto->backer->variable_length_userdata);
+                              ofproto->backer->enable_recirc,
+                              ofproto->backer->variable_length_userdata,
+                              ofproto->backer->max_mpls_depth);
 
             HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) {
                 xlate_bundle_set(ofproto, bundle, bundle->name,
@@ -580,7 +623,7 @@ type_run(const char *type)
                                  ofport->up.pp.config, ofport->up.pp.state,
                                  ofport->is_tunnel, ofport->may_enable);
             }
-            ovs_rwlock_unlock(&xlate_rwlock);
+            xlate_txn_commit();
         }
 
         udpif_revalidate(backer->udpif);
@@ -768,9 +811,9 @@ close_dpif_backer(struct dpif_backer *backer)
     ovs_rwlock_destroy(&backer->odp_to_ofport_lock);
     hmap_destroy(&backer->odp_to_ofport_map);
     shash_find_and_delete(&all_dpif_backers, backer->type);
+    recirc_id_pool_destroy(backer->rid_pool);
     free(backer->type);
     dpif_close(backer->dpif);
-
     free(backer);
 }
 
@@ -781,6 +824,8 @@ struct odp_garbage {
 };
 
 static bool check_variable_length_userdata(struct dpif_backer *backer);
+static size_t check_max_mpls_depth(struct dpif_backer *backer);
+static bool check_recirc(struct dpif_backer *backer);
 
 static int
 open_dpif_backer(const char *type, struct dpif_backer **backerp)
@@ -791,6 +836,7 @@ open_dpif_backer(const char *type, struct dpif_backer **backerp)
     struct shash_node *node;
     struct list garbage_list;
     struct odp_garbage *garbage, *next;
+
     struct sset names;
     char *backer_name;
     const char *name;
@@ -880,7 +926,10 @@ open_dpif_backer(const char *type, struct dpif_backer **backerp)
         close_dpif_backer(backer);
         return error;
     }
+    backer->enable_recirc = check_recirc(backer);
     backer->variable_length_userdata = check_variable_length_userdata(backer);
+    backer->max_mpls_depth = check_max_mpls_depth(backer);
+    backer->rid_pool = recirc_id_pool_create();
 
     if (backer->recv_set_enable) {
         udpif_set_threads(backer->udpif, n_handlers, n_revalidators);
@@ -889,6 +938,61 @@ open_dpif_backer(const char *type, struct dpif_backer **backerp)
     return error;
 }
 
+/* Tests whether 'backer''s datapath supports recirculation.  Only newer
+ * datapaths support OVS_KEY_ATTR_RECIRC_ID in keys.  We need to disable some
+ * features on older datapaths that don't support this feature.
+ *
+ * Returns false if 'backer' definitely does not support recirculation, true if
+ * it seems to support recirculation or if at least the error we get is
+ * ambiguous. */
+static bool
+check_recirc(struct dpif_backer *backer)
+{
+    struct flow flow;
+    struct odputil_keybuf keybuf;
+    struct ofpbuf key;
+    int error;
+    bool enable_recirc = false;
+
+    memset(&flow, 0, sizeof flow);
+    flow.recirc_id = 1;
+    flow.dp_hash = 1;
+
+    ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
+    odp_flow_key_from_flow(&key, &flow, NULL, 0, true);
+
+    error = dpif_flow_put(backer->dpif, DPIF_FP_CREATE | DPIF_FP_MODIFY,
+                          ofpbuf_data(&key), ofpbuf_size(&key), NULL, 0, NULL,
+                          0, NULL);
+    if (error && error != EEXIST) {
+        if (error != EINVAL) {
+            VLOG_WARN("%s: Reciculation flow probe failed (%s)",
+                      dpif_name(backer->dpif), ovs_strerror(error));
+        }
+        goto done;
+    }
+
+    error = dpif_flow_del(backer->dpif, ofpbuf_data(&key), ofpbuf_size(&key),
+                          NULL);
+    if (error) {
+        VLOG_WARN("%s: failed to delete recirculation feature probe flow",
+                  dpif_name(backer->dpif));
+    }
+
+    enable_recirc = true;
+
+done:
+    if (enable_recirc) {
+        VLOG_INFO("%s: Datapath supports recirculation",
+                  dpif_name(backer->dpif));
+    } else {
+        VLOG_INFO("%s: Datapath does not support recirculation",
+                  dpif_name(backer->dpif));
+    }
+
+    return enable_recirc;
+}
+
 /* Tests whether 'backer''s datapath supports variable-length
  * OVS_USERSPACE_ATTR_USERDATA in OVS_ACTION_ATTR_USERSPACE actions.  We need
  * to disable some features on older datapaths that don't support this
@@ -917,7 +1021,7 @@ check_variable_length_userdata(struct dpif_backer *backer)
     ofpbuf_init(&actions, 64);
     start = nl_msg_start_nested(&actions, OVS_ACTION_ATTR_USERSPACE);
     nl_msg_put_u32(&actions, OVS_USERSPACE_ATTR_PID,
-                   dpif_port_get_pid(backer->dpif, ODPP_NONE));
+                   dpif_port_get_pid(backer->dpif, ODPP_NONE, 0));
     nl_msg_put_unspec_zero(&actions, OVS_USERSPACE_ATTR_USERDATA, 4);
     nl_msg_end_nested(&actions, start);
 
@@ -928,8 +1032,8 @@ check_variable_length_userdata(struct dpif_backer *backer)
 
     /* Execute the actions.  On older datapaths this fails with ERANGE, on
      * newer datapaths it succeeds. */
-    execute.actions = actions.data;
-    execute.actions_len = actions.size;
+    execute.actions = ofpbuf_data(&actions);
+    execute.actions_len = ofpbuf_size(&actions);
     execute.packet = &packet;
     execute.md = PKT_METADATA_INITIALIZER(0);
     execute.needs_help = false;
@@ -965,6 +1069,53 @@ check_variable_length_userdata(struct dpif_backer *backer)
     }
 }
 
+/* Tests the MPLS label stack depth supported by 'backer''s datapath.
+ *
+ * Returns the number of elements in a struct flow's mpls_lse field
+ * if the datapath supports at least that many entries in an
+ * MPLS label stack.
+ * Otherwise returns the number of MPLS push actions supported by
+ * the datapath. */
+static size_t
+check_max_mpls_depth(struct dpif_backer *backer)
+{
+    struct flow flow;
+    int n;
+
+    for (n = 0; n < FLOW_MAX_MPLS_LABELS; n++) {
+        struct odputil_keybuf keybuf;
+        struct ofpbuf key;
+        int error;
+
+        memset(&flow, 0, sizeof flow);
+        flow.dl_type = htons(ETH_TYPE_MPLS);
+        flow_set_mpls_bos(&flow, n, 1);
+
+        ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
+        odp_flow_key_from_flow(&key, &flow, NULL, 0, false);
+
+        error = dpif_flow_put(backer->dpif, DPIF_FP_CREATE | DPIF_FP_MODIFY,
+                              ofpbuf_data(&key), ofpbuf_size(&key), NULL, 0, NULL, 0, NULL);
+        if (error && error != EEXIST) {
+            if (error != EINVAL) {
+                VLOG_WARN("%s: MPLS stack length feature probe failed (%s)",
+                          dpif_name(backer->dpif), ovs_strerror(error));
+            }
+            break;
+        }
+
+        error = dpif_flow_del(backer->dpif, ofpbuf_data(&key), ofpbuf_size(&key), NULL);
+        if (error) {
+            VLOG_WARN("%s: failed to delete MPLS feature probe flow",
+                      dpif_name(backer->dpif));
+        }
+    }
+
+    VLOG_INFO("%s: MPLS label stack length probed as %d",
+              dpif_name(backer->dpif), n);
+    return n;
+}
+
 static int
 construct(struct ofproto *ofproto_)
 {
@@ -984,10 +1135,11 @@ construct(struct ofproto *ofproto_)
     ofproto->dump_seq = 0;
     hmap_init(&ofproto->bundles);
     ofproto->ml = mac_learning_create(MAC_ENTRY_DEFAULT_IDLE_TIME);
+    ofproto->ms = NULL;
     ofproto->mbridge = mbridge_create();
     ofproto->has_bonded_bundles = false;
     ofproto->lacp_enabled = false;
-    ovs_mutex_init(&ofproto->stats_mutex);
+    ovs_mutex_init_adaptive(&ofproto->stats_mutex);
     ovs_mutex_init(&ofproto->vsp_mutex);
 
     guarded_list_init(&ofproto->pins);
@@ -1002,6 +1154,9 @@ construct(struct ofproto *ofproto_)
     sset_init(&ofproto->port_poll_set);
     ofproto->port_poll_errno = 0;
     ofproto->change_seq = 0;
+    ofproto->pins_seq = seq_create();
+    ofproto->pins_seqno = seq_read(ofproto->pins_seq);
+
 
     SHASH_FOR_EACH_SAFE (node, next, &init_ofp_ports) {
         struct iface_hint *iface_hint = node->data;
@@ -1025,50 +1180,28 @@ construct(struct ofproto *ofproto_)
 
     ofproto_init_tables(ofproto_, N_TABLES);
     error = add_internal_flows(ofproto);
+
     ofproto->up.tables[TBL_INTERNAL].flags = OFTABLE_HIDDEN | OFTABLE_READONLY;
 
     return error;
 }
 
 static int
-add_internal_flow(struct ofproto_dpif *ofproto, int id,
+add_internal_miss_flow(struct ofproto_dpif *ofproto, int id,
                   const struct ofpbuf *ofpacts, struct rule_dpif **rulep)
 {
-    struct ofputil_flow_mod fm;
+    struct match match;
     int error;
+    struct rule *rule;
 
-    match_init_catchall(&fm.match);
-    fm.priority = 0;
-    match_set_reg(&fm.match, 0, id);
-    fm.new_cookie = htonll(0);
-    fm.cookie = htonll(0);
-    fm.cookie_mask = htonll(0);
-    fm.modify_cookie = false;
-    fm.table_id = TBL_INTERNAL;
-    fm.command = OFPFC_ADD;
-    fm.idle_timeout = 0;
-    fm.hard_timeout = 0;
-    fm.buffer_id = 0;
-    fm.out_port = 0;
-    fm.flags = 0;
-    fm.ofpacts = ofpacts->data;
-    fm.ofpacts_len = ofpacts->size;
-
-    error = ofproto_flow_mod(&ofproto->up, &fm);
-    if (error) {
-        VLOG_ERR_RL(&rl, "failed to add internal flow %d (%s)",
-                    id, ofperr_to_string(error));
-        return error;
-    }
+    match_init_catchall(&match);
+    match_set_reg(&match, 0, id);
 
-    if (rule_dpif_lookup_in_table(ofproto, &fm.match.flow, NULL, TBL_INTERNAL,
-                                  rulep)) {
-        rule_dpif_unref(*rulep);
-    } else {
-        OVS_NOT_REACHED();
-    }
+    error = ofproto_dpif_add_internal_flow(ofproto, &match, 0, 0, ofpacts,
+                                           &rule);
+    *rulep = error ? NULL : rule_dpif_cast(rule);
 
-    return 0;
+    return error;
 }
 
 static int
@@ -1077,6 +1210,9 @@ add_internal_flows(struct ofproto_dpif *ofproto)
     struct ofpact_controller *controller;
     uint64_t ofpacts_stub[128 / 8];
     struct ofpbuf ofpacts;
+    struct rule *unused_rulep OVS_UNUSED;
+    struct ofpact_resubmit *resubmit;
+    struct match match;
     int error;
     int id;
 
@@ -1089,20 +1225,53 @@ add_internal_flows(struct ofproto_dpif *ofproto)
     controller->reason = OFPR_NO_MATCH;
     ofpact_pad(&ofpacts);
 
-    error = add_internal_flow(ofproto, id++, &ofpacts, &ofproto->miss_rule);
+    error = add_internal_miss_flow(ofproto, id++, &ofpacts,
+                                   &ofproto->miss_rule);
     if (error) {
         return error;
     }
 
     ofpbuf_clear(&ofpacts);
-    error = add_internal_flow(ofproto, id++, &ofpacts,
-                              &ofproto->no_packet_in_rule);
+    error = add_internal_miss_flow(ofproto, id++, &ofpacts,
+                                   &ofproto->no_packet_in_rule);
+    if (error) {
+        return error;
+    }
+
+    error = add_internal_miss_flow(ofproto, id++, &ofpacts,
+                                   &ofproto->drop_frags_rule);
     if (error) {
         return error;
     }
 
-    error = add_internal_flow(ofproto, id++, &ofpacts,
-                              &ofproto->drop_frags_rule);
+    /* Continue non-recirculation rule lookups from table 0.
+     *
+     * (priority=2), recirc=0, actions=resubmit(, 0)
+     */
+    resubmit = ofpact_put_RESUBMIT(&ofpacts);
+    resubmit->ofpact.compat = 0;
+    resubmit->in_port = OFPP_IN_PORT;
+    resubmit->table_id = 0;
+
+    match_init_catchall(&match);
+    match_set_recirc_id(&match, 0);
+
+    error = ofproto_dpif_add_internal_flow(ofproto, &match, 2, 0, &ofpacts,
+                                           &unused_rulep);
+    if (error) {
+        return error;
+    }
+
+    /* Drop any run away recirc rule lookups. Recirc_id has to be
+     * non-zero when reaching this rule.
+     *
+     * (priority=1), *, actions=drop
+     */
+    ofpbuf_clear(&ofpacts);
+    match_init_catchall(&match);
+    error = ofproto_dpif_add_internal_flow(ofproto, &match, 1, 0, &ofpacts,
+                                           &unused_rulep);
+
     return error;
 }
 
@@ -1116,22 +1285,22 @@ destruct(struct ofproto *ofproto_)
     struct list pins;
 
     ofproto->backer->need_revalidate = REV_RECONFIGURE;
-    ovs_rwlock_wrlock(&xlate_rwlock);
+    xlate_txn_start();
     xlate_remove_ofproto(ofproto);
-    ovs_rwlock_unlock(&xlate_rwlock);
+    xlate_txn_commit();
 
-    /* Discard any flow_miss_batches queued up for 'ofproto', avoiding a
-     * use-after-free error. */
-    udpif_revalidate(ofproto->backer->udpif);
+    /* Ensure that the upcall processing threads have no remaining references
+     * to the ofproto or anything in it. */
+    udpif_synchronize(ofproto->backer->udpif);
 
     hmap_remove(&all_ofproto_dpifs, &ofproto->all_ofproto_dpifs_node);
 
     OFPROTO_FOR_EACH_TABLE (table, &ofproto->up) {
         struct cls_cursor cursor;
 
-        ovs_rwlock_rdlock(&table->cls.rwlock);
+        fat_rwlock_rdlock(&table->cls.rwlock);
         cls_cursor_init(&cursor, &table->cls, NULL);
-        ovs_rwlock_unlock(&table->cls.rwlock);
+        fat_rwlock_unlock(&table->cls.rwlock);
         CLS_CURSOR_FOR_EACH_SAFE (rule, next_rule, up.cr, &cursor) {
             ofproto_rule_delete(&ofproto->up, &rule->up);
         }
@@ -1151,6 +1320,7 @@ destruct(struct ofproto *ofproto_)
     dpif_sflow_unref(ofproto->sflow);
     hmap_destroy(&ofproto->bundles);
     mac_learning_unref(ofproto->ml);
+    mcast_snooping_unref(ofproto->ms);
 
     hmap_destroy(&ofproto->vlandev_map);
     hmap_destroy(&ofproto->realdev_vid_map);
@@ -1162,6 +1332,8 @@ destruct(struct ofproto *ofproto_)
     ovs_mutex_destroy(&ofproto->stats_mutex);
     ovs_mutex_destroy(&ofproto->vsp_mutex);
 
+    seq_destroy(ofproto->pins_seq);
+
     close_dpif_backer(ofproto->backer);
 }
 
@@ -1176,8 +1348,15 @@ run(struct ofproto *ofproto_)
         ovs_rwlock_wrlock(&ofproto->ml->rwlock);
         mac_learning_flush(ofproto->ml);
         ovs_rwlock_unlock(&ofproto->ml->rwlock);
+        mcast_snooping_mdb_flush(ofproto->ms);
     }
 
+    /* Always updates the ofproto->pins_seqno to avoid frequent wakeup during
+     * flow restore.  Even though nothing is processed during flow restore,
+     * all queued 'pins' will be handled immediately when flow restore
+     * completes. */
+    ofproto->pins_seqno = seq_read(ofproto->pins_seq);
+
     /* Do not perform any periodic activity required by 'ofproto' while
      * waiting for flow restore to complete. */
     if (!ofproto_get_flow_restore_wait()) {
@@ -1228,6 +1407,10 @@ run(struct ofproto *ofproto_)
     }
     ovs_rwlock_unlock(&ofproto->ml->rwlock);
 
+    if (mcast_snooping_run(ofproto->ms)) {
+        ofproto->backer->need_revalidate = REV_MCAST_SNOOPING;
+    }
+
     new_dump_seq = seq_read(udpif_dump_seq(ofproto->backer->udpif));
     if (ofproto->dump_seq != new_dump_seq) {
         struct rule *rule, *next_rule;
@@ -1289,6 +1472,7 @@ wait(struct ofproto *ofproto_)
     ovs_rwlock_rdlock(&ofproto->ml->rwlock);
     mac_learning_wait(ofproto->ml);
     ovs_rwlock_unlock(&ofproto->ml->rwlock);
+    mcast_snooping_wait(ofproto->ms);
     stp_wait(ofproto);
     if (ofproto->backer->need_revalidate) {
         /* Shouldn't happen, but if it does just go around again. */
@@ -1297,6 +1481,7 @@ wait(struct ofproto *ofproto_)
     }
 
     seq_wait(udpif_dump_seq(ofproto->backer->udpif), ofproto->dump_seq);
+    seq_wait(ofproto->pins_seq, ofproto->pins_seqno);
 }
 
 static void
@@ -1311,9 +1496,14 @@ type_get_memory_usage(const char *type, struct simap *usage)
 }
 
 static void
-flush(struct ofproto *ofproto OVS_UNUSED)
+flush(struct ofproto *ofproto_)
 {
-    udpif_flush();
+    struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
+    struct dpif_backer *backer = ofproto->backer;
+
+    if (backer) {
+        udpif_flush(backer->udpif);
+    }
 }
 
 static void
@@ -1336,23 +1526,20 @@ get_features(struct ofproto *ofproto_ OVS_UNUSED,
 }
 
 static void
-get_tables(struct ofproto *ofproto_, struct ofp12_table_stats *ots)
+get_tables(struct ofproto *ofproto, struct ofp12_table_stats *ots)
 {
-    struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
-    struct dpif_dp_stats s;
-    uint64_t n_miss, n_no_pkt_in, n_bytes, n_dropped_frags;
-    uint64_t n_lookup;
+    int i;
 
     strcpy(ots->name, "classifier");
 
-    dpif_get_dp_stats(ofproto->backer->dpif, &s);
-    rule_get_stats(&ofproto->miss_rule->up, &n_miss, &n_bytes);
-    rule_get_stats(&ofproto->no_packet_in_rule->up, &n_no_pkt_in, &n_bytes);
-    rule_get_stats(&ofproto->drop_frags_rule->up, &n_dropped_frags, &n_bytes);
+    for (i = 0; i < ofproto->n_tables; i++) {
+        unsigned long missed, matched;
 
-    n_lookup = s.n_hit + s.n_missed - n_dropped_frags;
-    ots->lookup_count = htonll(n_lookup);
-    ots->matched_count = htonll(n_lookup - n_miss - n_no_pkt_in);
+        atomic_read(&ofproto->tables[i].n_matched, &matched);
+        ots[i].matched_count = htonll(matched);
+        atomic_read(&ofproto->tables[i].n_missed, &missed);
+        ots[i].lookup_count = htonll(matched + missed);
+    }
 }
 
 static struct ofport *
@@ -1453,9 +1640,9 @@ port_destruct(struct ofport *port_)
     const char *dp_port_name;
 
     ofproto->backer->need_revalidate = REV_RECONFIGURE;
-    ovs_rwlock_wrlock(&xlate_rwlock);
+    xlate_txn_start();
     xlate_ofport_remove(port);
-    ovs_rwlock_unlock(&xlate_rwlock);
+    xlate_txn_commit();
 
     dp_port_name = netdev_vport_get_dpif_port(port->up.netdev, namebuf,
                                               sizeof namebuf);
@@ -1486,6 +1673,9 @@ port_destruct(struct ofport *port_)
     bundle_remove(port_);
     set_cfm(port_, NULL);
     set_bfd(port_, NULL);
+    if (port->stp_port) {
+        stp_port_disable(port->stp_port);
+    }
     if (ofproto->sflow) {
         dpif_sflow_del_port(ofproto->sflow, port->odp_port);
     }
@@ -1630,21 +1820,27 @@ out:
 }
 
 static bool
+cfm_status_changed(struct ofport *ofport_)
+{
+    struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
+
+    return ofport->cfm ? cfm_check_status_change(ofport->cfm) : true;
+}
+
+static int
 get_cfm_status(const struct ofport *ofport_,
-               struct ofproto_cfm_status *status)
+               struct cfm_status *status)
 {
     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
+    int ret = 0;
 
     if (ofport->cfm) {
-        status->faults = cfm_get_fault(ofport->cfm);
-        status->flap_count = cfm_get_flap_count(ofport->cfm);
-        status->remote_opstate = cfm_get_opup(ofport->cfm);
-        status->health = cfm_get_health(ofport->cfm);
-        cfm_get_remote_mpids(ofport->cfm, &status->rmps, &status->n_rmps);
-        return true;
+        cfm_get_status(ofport->cfm, status);
     } else {
-        return false;
+        ret = ENOENT;
     }
+
+    return ret;
 }
 
 static int
@@ -1665,17 +1861,27 @@ set_bfd(struct ofport *ofport_, const struct smap *cfg)
     return 0;
 }
 
+static bool
+bfd_status_changed(struct ofport *ofport_)
+{
+    struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
+
+    return ofport->bfd ? bfd_check_status_change(ofport->bfd) : true;
+}
+
 static int
 get_bfd_status(struct ofport *ofport_, struct smap *smap)
 {
     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
+    int ret = 0;
 
     if (ofport->bfd) {
         bfd_get_status(ofport->bfd, smap);
-        return 0;
     } else {
-        return ENOENT;
+        ret = ENOENT;
     }
+
+    return ret;
 }
 \f
 /* Spanning Tree. */
@@ -1692,7 +1898,7 @@ send_bpdu_cb(struct ofpbuf *pkt, int port_num, void *ofproto_)
         VLOG_WARN_RL(&rl, "%s: cannot send BPDU on unknown port %d",
                      ofproto->up.name, port_num);
     } else {
-        struct eth_header *eth = pkt->l2;
+        struct eth_header *eth = ofpbuf_l2(pkt);
 
         netdev_get_etheraddr(ofport->up.netdev, eth->eth_src);
         if (eth_addr_is_zero(eth->eth_src)) {
@@ -1784,6 +1990,7 @@ update_stp_port_state(struct ofport_dpif *ofport)
             ovs_rwlock_wrlock(&ofproto->ml->rwlock);
             mac_learning_flush(ofproto->ml);
             ovs_rwlock_unlock(&ofproto->ml->rwlock);
+            mcast_snooping_mdb_flush(ofproto->ms);
         }
         fwd_change = stp_forward_in_state(ofport->stp_state)
                         != stp_forward_in_state(state);
@@ -1909,6 +2116,7 @@ stp_run(struct ofproto_dpif *ofproto)
             ovs_rwlock_wrlock(&ofproto->ml->rwlock);
             mac_learning_flush(ofproto->ml);
             ovs_rwlock_unlock(&ofproto->ml->rwlock);
+            mcast_snooping_mdb_flush(ofproto->ms);
         }
     }
 }
@@ -2083,9 +2291,9 @@ bundle_destroy(struct ofbundle *bundle)
     ofproto = bundle->ofproto;
     mbridge_unregister_bundle(ofproto->mbridge, bundle->aux);
 
-    ovs_rwlock_wrlock(&xlate_rwlock);
+    xlate_txn_start();
     xlate_bundle_remove(bundle);
-    ovs_rwlock_unlock(&xlate_rwlock);
+    xlate_txn_commit();
 
     LIST_FOR_EACH_SAFE (port, next_port, bundle_node, &bundle->ports) {
         bundle_del_port(port);
@@ -2260,12 +2468,13 @@ bundle_set(struct ofproto *ofproto_, void *aux,
                 ofproto->backer->need_revalidate = REV_RECONFIGURE;
             }
         } else {
-            bundle->bond = bond_create(s->bond);
+            bundle->bond = bond_create(s->bond, ofproto);
             ofproto->backer->need_revalidate = REV_RECONFIGURE;
         }
 
         LIST_FOR_EACH (port, bundle_node, &bundle->ports) {
-            bond_slave_register(bundle->bond, port, port->up.netdev);
+            bond_slave_register(bundle->bond, port,
+                                port->up.ofp_port, port->up.netdev);
         }
     } else {
         bond_unref(bundle->bond);
@@ -2343,7 +2552,9 @@ bundle_send_learning_packets(struct ofbundle *bundle)
             learning_packet = bond_compose_learning_packet(bundle->bond,
                                                            e->mac, e->vlan,
                                                            &port_void);
-            learning_packet->private_p = port_void;
+            /* Temporarily use 'frame' as a private pointer (see below). */
+            ovs_assert(learning_packet->frame == ofpbuf_data(learning_packet));
+            learning_packet->frame = port_void;
             list_push_back(&packets, &learning_packet->list_node);
         }
     }
@@ -2352,8 +2563,11 @@ bundle_send_learning_packets(struct ofbundle *bundle)
     error = n_packets = n_errors = 0;
     LIST_FOR_EACH (learning_packet, list_node, &packets) {
         int ret;
+        void *port_void = learning_packet->frame;
 
-        ret = ofproto_dpif_send_packet(learning_packet->private_p, learning_packet);
+        /* Restore 'frame'. */
+        learning_packet->frame = ofpbuf_data(learning_packet);
+        ret = ofproto_dpif_send_packet(port_void, learning_packet);
         if (ret) {
             error = ret;
             n_errors++;
@@ -2851,24 +3065,37 @@ static void
 rule_expire(struct rule_dpif *rule)
     OVS_REQUIRES(ofproto_mutex)
 {
-    uint16_t idle_timeout, hard_timeout;
+    uint16_t hard_timeout, idle_timeout;
     long long int now = time_msec();
-    int reason;
+    int reason = -1;
 
-    ovs_assert(!rule->up.pending);
-
-    /* Has 'rule' expired? */
-    ovs_mutex_lock(&rule->up.mutex);
     hard_timeout = rule->up.hard_timeout;
     idle_timeout = rule->up.idle_timeout;
-    if (hard_timeout && now > rule->up.modified + hard_timeout * 1000) {
-        reason = OFPRR_HARD_TIMEOUT;
-    } else if (idle_timeout && now > rule->up.used + idle_timeout * 1000) {
-        reason = OFPRR_IDLE_TIMEOUT;
-    } else {
-        reason = -1;
+
+    /* Has 'rule' expired? */
+    if (hard_timeout) {
+        long long int modified;
+
+        ovs_mutex_lock(&rule->up.mutex);
+        modified = rule->up.modified;
+        ovs_mutex_unlock(&rule->up.mutex);
+
+        if (now > modified + hard_timeout * 1000) {
+            reason = OFPRR_HARD_TIMEOUT;
+        }
+    }
+
+    if (reason < 0 && idle_timeout) {
+        long long int used;
+
+        ovs_mutex_lock(&rule->stats_mutex);
+        used = rule->stats.used;
+        ovs_mutex_unlock(&rule->stats_mutex);
+
+        if (now > used + idle_timeout * 1000) {
+            reason = OFPRR_IDLE_TIMEOUT;
+        }
     }
-    ovs_mutex_unlock(&rule->up.mutex);
 
     if (reason >= 0) {
         COVERAGE_INC(ofproto_dpif_expired);
@@ -2895,6 +3122,7 @@ ofproto_dpif_execute_actions(struct ofproto_dpif *ofproto,
     ovs_assert((rule != NULL) != (ofpacts != NULL));
 
     dpif_flow_stats_extract(flow, packet, time_msec(), &stats);
+
     if (rule) {
         rule_dpif_credit_stats(rule, &stats);
     }
@@ -2905,18 +3133,18 @@ ofproto_dpif_execute_actions(struct ofproto_dpif *ofproto,
     xin.resubmit_stats = &stats;
     xlate_actions(&xin, &xout);
 
+    execute.actions = ofpbuf_data(&xout.odp_actions);
+    execute.actions_len = ofpbuf_size(&xout.odp_actions);
+    execute.packet = packet;
+    execute.md = pkt_metadata_from_flow(flow);
+    execute.needs_help = (xout.slow & SLOW_ACTION) != 0;
+
+    /* Fix up in_port. */
     in_port = flow->in_port.ofp_port;
     if (in_port == OFPP_NONE) {
         in_port = OFPP_LOCAL;
     }
-    execute.actions = xout.odp_actions.data;
-    execute.actions_len = xout.odp_actions.size;
-    execute.packet = packet;
-    execute.md.tunnel = flow->tunnel;
-    execute.md.skb_priority = flow->skb_priority;
-    execute.md.pkt_mark = flow->pkt_mark;
-    execute.md.in_port = ofp_port_to_odp_port(ofproto, in_port);
-    execute.needs_help = (xout.slow & SLOW_ACTION) != 0;
+    execute.md.in_port.odp_port = ofp_port_to_odp_port(ofproto, in_port);
 
     error = dpif_execute(ofproto->backer->dpif, &execute);
 
@@ -2930,24 +3158,12 @@ rule_dpif_credit_stats(struct rule_dpif *rule,
                        const struct dpif_flow_stats *stats)
 {
     ovs_mutex_lock(&rule->stats_mutex);
-    rule->packet_count += stats->n_packets;
-    rule->byte_count += stats->n_bytes;
-    rule->up.used = MAX(rule->up.used, stats->used);
+    rule->stats.n_packets += stats->n_packets;
+    rule->stats.n_bytes += stats->n_bytes;
+    rule->stats.used = MAX(rule->stats.used, stats->used);
     ovs_mutex_unlock(&rule->stats_mutex);
 }
 
-bool
-rule_dpif_is_fail_open(const struct rule_dpif *rule)
-{
-    return is_fail_open_rule(&rule->up);
-}
-
-bool
-rule_dpif_is_table_miss(const struct rule_dpif *rule)
-{
-    return rule_is_table_miss(&rule->up);
-}
-
 ovs_be64
 rule_dpif_get_flow_cookie(const struct rule_dpif *rule)
     OVS_REQUIRES(rule->up.mutex)
@@ -2965,99 +3181,266 @@ rule_dpif_reduce_timeouts(struct rule_dpif *rule, uint16_t idle_timeout,
 /* Returns 'rule''s actions.  The caller owns a reference on the returned
  * actions and must eventually release it (with rule_actions_unref()) to avoid
  * a memory leak. */
-struct rule_actions *
+const struct rule_actions *
 rule_dpif_get_actions(const struct rule_dpif *rule)
 {
     return rule_get_actions(&rule->up);
 }
 
-/* Lookup 'flow' in 'ofproto''s classifier.  If 'wc' is non-null, sets
- * the fields that were relevant as part of the lookup. */
+/* Sets 'rule''s recirculation id. */
+static void
+rule_dpif_set_recirc_id(struct rule_dpif *rule, uint32_t id)
+    OVS_REQUIRES(rule->up.mutex)
+{
+    ovs_assert(!rule->recirc_id);
+    rule->recirc_id = id;
+}
+
+/* Returns 'rule''s recirculation id. */
+uint32_t
+rule_dpif_get_recirc_id(struct rule_dpif *rule)
+    OVS_REQUIRES(rule->up.mutex)
+{
+    if (!rule->recirc_id) {
+        struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
+
+        rule_dpif_set_recirc_id(rule, ofproto_dpif_alloc_recirc_id(ofproto));
+    }
+    return rule->recirc_id;
+}
+
+/* Sets 'rule''s recirculation id. */
 void
-rule_dpif_lookup(struct ofproto_dpif *ofproto, const struct flow *flow,
-                 struct flow_wildcards *wc, struct rule_dpif **rule)
+rule_set_recirc_id(struct rule *rule_, uint32_t id)
 {
-    struct ofport_dpif *port;
+    struct rule_dpif *rule = rule_dpif_cast(rule_);
 
-    if (rule_dpif_lookup_in_table(ofproto, flow, wc, 0, rule)) {
-        return;
+    ovs_mutex_lock(&rule->up.mutex);
+    rule_dpif_set_recirc_id(rule, id);
+    ovs_mutex_unlock(&rule->up.mutex);
+}
+
+/* Lookup 'flow' in table 0 of 'ofproto''s classifier.
+ * If 'wc' is non-null, sets the fields that were relevant as part of
+ * the lookup. Returns the table_id where a match or miss occurred.
+ *
+ * The return value will be zero unless there was a miss and
+ * OFPTC11_TABLE_MISS_CONTINUE is in effect for the sequence of tables
+ * where misses occur.
+ *
+ * The rule is returned in '*rule', which is valid at least until the next
+ * RCU quiescent period.  If the '*rule' needs to stay around longer,
+ * a non-zero 'take_ref' must be passed in to cause a reference to be taken
+ * on it before this returns. */
+uint8_t
+rule_dpif_lookup(struct ofproto_dpif *ofproto, struct flow *flow,
+                 struct flow_wildcards *wc, struct rule_dpif **rule,
+                 bool take_ref, const struct dpif_flow_stats *stats)
+{
+    enum rule_dpif_lookup_verdict verdict;
+    enum ofputil_port_config config = 0;
+    uint8_t table_id;
+
+    if (ofproto_dpif_get_enable_recirc(ofproto)) {
+        /* Always exactly match recirc_id since datapath supports
+         * recirculation.  */
+        if (wc) {
+            wc->masks.recirc_id = UINT32_MAX;
+        }
+
+        /* Start looking up from internal table for post recirculation flows
+         * or packets. We can also simply send all, including normal flows
+         * or packets to the internal table. They will not match any post
+         * recirculation rules except the 'catch all' rule that resubmit
+         * them to table 0.
+         *
+         * As an optimization, we send normal flows and packets to table 0
+         * directly, saving one table lookup.  */
+        table_id = flow->recirc_id ? TBL_INTERNAL : 0;
+    } else {
+        table_id = 0;
     }
-    port = get_ofp_port(ofproto, flow->in_port.ofp_port);
-    if (!port) {
-        VLOG_WARN_RL(&rl, "packet-in on unknown OpenFlow port %"PRIu16,
-                     flow->in_port.ofp_port);
+
+    verdict = rule_dpif_lookup_from_table(ofproto, flow, wc, true,
+                                          &table_id, rule, take_ref, stats);
+
+    switch (verdict) {
+    case RULE_DPIF_LOOKUP_VERDICT_MATCH:
+        return table_id;
+    case RULE_DPIF_LOOKUP_VERDICT_CONTROLLER: {
+        struct ofport_dpif *port;
+
+        port = get_ofp_port(ofproto, flow->in_port.ofp_port);
+        if (!port) {
+            VLOG_WARN_RL(&rl, "packet-in on unknown OpenFlow port %"PRIu16,
+                         flow->in_port.ofp_port);
+        }
+        config = port ? port->up.pp.config : 0;
+        break;
+    }
+    case RULE_DPIF_LOOKUP_VERDICT_DROP:
+        config = OFPUTIL_PC_NO_PACKET_IN;
+        break;
+    case RULE_DPIF_LOOKUP_VERDICT_DEFAULT:
+        if (!connmgr_wants_packet_in_on_miss(ofproto->up.connmgr)) {
+            config = OFPUTIL_PC_NO_PACKET_IN;
+        }
+        break;
+    default:
+        OVS_NOT_REACHED();
     }
 
-    choose_miss_rule(port ? port->up.pp.config : 0, ofproto->miss_rule,
-                     ofproto->no_packet_in_rule, rule);
+    choose_miss_rule(config, ofproto->miss_rule,
+                     ofproto->no_packet_in_rule, rule, take_ref);
+    return table_id;
 }
 
-bool
-rule_dpif_lookup_in_table(struct ofproto_dpif *ofproto,
+/* The returned rule is valid at least until the next RCU quiescent period.
+ * If the '*rule' needs to stay around longer, a non-zero 'take_ref' must be
+ * passed in to cause a reference to be taken on it before this returns. */
+static struct rule_dpif *
+rule_dpif_lookup_in_table(struct ofproto_dpif *ofproto, uint8_t table_id,
                           const struct flow *flow, struct flow_wildcards *wc,
-                          uint8_t table_id, struct rule_dpif **rule)
+                          bool take_ref)
 {
+    struct classifier *cls = &ofproto->up.tables[table_id].cls;
     const struct cls_rule *cls_rule;
-    struct classifier *cls;
-    bool frag;
-
-    *rule = NULL;
-    if (table_id >= N_TABLES) {
-        return false;
-    }
+    struct rule_dpif *rule;
 
-    if (wc) {
-        memset(&wc->masks.dl_type, 0xff, sizeof wc->masks.dl_type);
-        wc->masks.nw_frag |= FLOW_NW_FRAG_MASK;
-    }
+    fat_rwlock_rdlock(&cls->rwlock);
+    if (ofproto->up.frag_handling != OFPC_FRAG_NX_MATCH) {
+        if (wc) {
+            memset(&wc->masks.dl_type, 0xff, sizeof wc->masks.dl_type);
+            if (is_ip_any(flow)) {
+                wc->masks.nw_frag |= FLOW_NW_FRAG_MASK;
+            }
+        }
 
-    cls = &ofproto->up.tables[table_id].cls;
-    ovs_rwlock_rdlock(&cls->rwlock);
-    frag = (flow->nw_frag & FLOW_NW_FRAG_ANY) != 0;
-    if (frag && ofproto->up.frag_handling == OFPC_FRAG_NORMAL) {
-        /* We must pretend that transport ports are unavailable. */
-        struct flow ofpc_normal_flow = *flow;
-        ofpc_normal_flow.tp_src = htons(0);
-        ofpc_normal_flow.tp_dst = htons(0);
-        cls_rule = classifier_lookup(cls, &ofpc_normal_flow, wc);
-    } else if (frag && ofproto->up.frag_handling == OFPC_FRAG_DROP) {
-        cls_rule = &ofproto->drop_frags_rule->up.cr;
-        /* Frag mask in wc already set above. */
+        if (flow->nw_frag & FLOW_NW_FRAG_ANY) {
+            if (ofproto->up.frag_handling == OFPC_FRAG_NORMAL) {
+                /* We must pretend that transport ports are unavailable. */
+                struct flow ofpc_normal_flow = *flow;
+                ofpc_normal_flow.tp_src = htons(0);
+                ofpc_normal_flow.tp_dst = htons(0);
+                cls_rule = classifier_lookup(cls, &ofpc_normal_flow, wc);
+            } else {
+                /* Must be OFPC_FRAG_DROP (we don't have OFPC_FRAG_REASM). */
+                cls_rule = &ofproto->drop_frags_rule->up.cr;
+            }
+        } else {
+            cls_rule = classifier_lookup(cls, flow, wc);
+        }
     } else {
         cls_rule = classifier_lookup(cls, flow, wc);
     }
 
-    *rule = rule_dpif_cast(rule_from_cls_rule(cls_rule));
-    rule_dpif_ref(*rule);
-    ovs_rwlock_unlock(&cls->rwlock);
+    rule = rule_dpif_cast(rule_from_cls_rule(cls_rule));
+    if (take_ref) {
+        rule_dpif_ref(rule);
+    }
+    fat_rwlock_unlock(&cls->rwlock);
 
-    return *rule != NULL;
+    return rule;
 }
 
-/* Given a port configuration (specified as zero if there's no port), chooses
- * which of 'miss_rule' and 'no_packet_in_rule' should be used in case of a
- * flow table miss. */
-void
-choose_miss_rule(enum ofputil_port_config config, struct rule_dpif *miss_rule,
-                 struct rule_dpif *no_packet_in_rule, struct rule_dpif **rule)
-{
-    *rule = config & OFPUTIL_PC_NO_PACKET_IN ? no_packet_in_rule : miss_rule;
-    rule_dpif_ref(*rule);
-}
+/* Look up 'flow' in 'ofproto''s classifier starting from table '*table_id'.
+ * Stores the rule that was found in '*rule', or NULL if none was found.
+ * Updates 'wc', if nonnull, to reflect the fields that were used during the
+ * lookup.
+ *
+ * If 'honor_table_miss' is true, the first lookup occurs in '*table_id', but
+ * if none is found then the table miss configuration for that table is
+ * honored, which can result in additional lookups in other OpenFlow tables.
+ * In this case the function updates '*table_id' to reflect the final OpenFlow
+ * table that was searched.
+ *
+ * If 'honor_table_miss' is false, then only one table lookup occurs, in
+ * '*table_id'.
+ *
+ * Returns:
+ *
+ *    - RULE_DPIF_LOOKUP_VERDICT_MATCH if a rule (in '*rule') was found.
+ *
+ *    - RULE_OFPTC_TABLE_MISS_CONTROLLER if no rule was found and either:
+ *      + 'honor_table_miss' is false
+ *      + a table miss configuration specified that the packet should be
+ *        sent to the controller in this case.
+ *
+ *    - RULE_DPIF_LOOKUP_VERDICT_DROP if no rule was found, 'honor_table_miss'
+ *      is true and a table miss configuration specified that the packet
+ *      should be dropped in this case.
+ *
+ *    - RULE_DPIF_LOOKUP_VERDICT_DEFAULT if no rule was found,
+ *      'honor_table_miss' is true and a table miss configuration has
+ *      not been specified in this case.
+ *
+ * The rule is returned in '*rule', which is valid at least until the next
+ * RCU quiescent period.  If the '*rule' needs to stay around longer,
+ * a non-zero 'take_ref' must be passed in to cause a reference to be taken
+ * on it before this returns. */
+enum rule_dpif_lookup_verdict
+rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto,
+                            const struct flow *flow,
+                            struct flow_wildcards *wc,
+                            bool honor_table_miss,
+                            uint8_t *table_id, struct rule_dpif **rule,
+                            bool take_ref, const struct dpif_flow_stats *stats)
+{
+    uint8_t next_id;
+
+    for (next_id = *table_id;
+         next_id < ofproto->up.n_tables;
+         next_id++, next_id += (next_id == TBL_INTERNAL))
+    {
+        *table_id = next_id;
+        *rule = rule_dpif_lookup_in_table(ofproto, *table_id, flow, wc,
+                                          take_ref);
+        if (stats) {
+            struct oftable *tbl = &ofproto->up.tables[next_id];
+            atomic_ulong *stat = *rule ? &tbl->n_matched : &tbl->n_missed;
+            unsigned long orig;
+            atomic_add(stat, stats->n_packets, &orig);
+        }
+        if (*rule) {
+            return RULE_DPIF_LOOKUP_VERDICT_MATCH;
+        } else if (!honor_table_miss) {
+            return RULE_DPIF_LOOKUP_VERDICT_CONTROLLER;
+        } else {
+            switch (ofproto_table_get_config(&ofproto->up, *table_id)) {
+            case OFPROTO_TABLE_MISS_CONTINUE:
+                break;
 
-void
-rule_dpif_ref(struct rule_dpif *rule)
-{
-    if (rule) {
-        ofproto_rule_ref(&rule->up);
+            case OFPROTO_TABLE_MISS_CONTROLLER:
+                return RULE_DPIF_LOOKUP_VERDICT_CONTROLLER;
+
+            case OFPROTO_TABLE_MISS_DROP:
+                return RULE_DPIF_LOOKUP_VERDICT_DROP;
+
+            case OFPROTO_TABLE_MISS_DEFAULT:
+                return RULE_DPIF_LOOKUP_VERDICT_DEFAULT;
+            }
+        }
     }
+
+    return RULE_DPIF_LOOKUP_VERDICT_CONTROLLER;
 }
 
+/* Given a port configuration (specified as zero if there's no port), chooses
+ * which of 'miss_rule' and 'no_packet_in_rule' should be used in case of a
+ * flow table miss.
+ *
+ * The rule is returned in '*rule', which is valid at least until the next
+ * RCU quiescent period.  If the '*rule' needs to stay around longer,
+ * a reference must be taken on it (rule_dpif_ref()).
+ */
 void
-rule_dpif_unref(struct rule_dpif *rule)
+choose_miss_rule(enum ofputil_port_config config, struct rule_dpif *miss_rule,
+                 struct rule_dpif *no_packet_in_rule, struct rule_dpif **rule,
+                 bool take_ref)
 {
-    if (rule) {
-        ofproto_rule_unref(&rule->up);
+    *rule = config & OFPUTIL_PC_NO_PACKET_IN ? no_packet_in_rule : miss_rule;
+    if (take_ref) {
+        rule_dpif_ref(*rule);
     }
 }
 
@@ -3068,7 +3451,6 @@ complete_operation(struct rule_dpif *rule)
     struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
 
     ofproto->backer->need_revalidate = REV_FLOW_TABLE;
-    ofoperation_complete(rule->up.pending, 0);
 }
 
 static struct rule_dpif *rule_dpif_cast(const struct rule *rule)
@@ -3092,22 +3474,25 @@ rule_dealloc(struct rule *rule_)
 
 static enum ofperr
 rule_construct(struct rule *rule_)
+    OVS_NO_THREAD_SAFETY_ANALYSIS
 {
     struct rule_dpif *rule = rule_dpif_cast(rule_);
-    ovs_mutex_init(&rule->stats_mutex);
-    ovs_mutex_lock(&rule->stats_mutex);
-    rule->packet_count = 0;
-    rule->byte_count = 0;
-    ovs_mutex_unlock(&rule->stats_mutex);
+    ovs_mutex_init_adaptive(&rule->stats_mutex);
+    rule->stats.n_packets = 0;
+    rule->stats.n_bytes = 0;
+    rule->stats.used = rule->up.modified;
+    rule->recirc_id = 0;
+
     return 0;
 }
 
-static void
+static enum ofperr
 rule_insert(struct rule *rule_)
     OVS_REQUIRES(ofproto_mutex)
 {
     struct rule_dpif *rule = rule_dpif_cast(rule_);
     complete_operation(rule);
+    return 0;
 }
 
 static void
@@ -3122,17 +3507,25 @@ static void
 rule_destruct(struct rule *rule_)
 {
     struct rule_dpif *rule = rule_dpif_cast(rule_);
+
     ovs_mutex_destroy(&rule->stats_mutex);
+    if (rule->recirc_id) {
+        struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
+
+        ofproto_dpif_free_recirc_id(ofproto, rule->recirc_id);
+    }
 }
 
 static void
-rule_get_stats(struct rule *rule_, uint64_t *packets, uint64_t *bytes)
+rule_get_stats(struct rule *rule_, uint64_t *packets, uint64_t *bytes,
+               long long int *used)
 {
     struct rule_dpif *rule = rule_dpif_cast(rule_);
 
     ovs_mutex_lock(&rule->stats_mutex);
-    *packets = rule->packet_count;
-    *bytes = rule->byte_count;
+    *packets = rule->stats.n_packets;
+    *bytes = rule->stats.n_bytes;
+    *used = rule->stats.used;
     ovs_mutex_unlock(&rule->stats_mutex);
 }
 
@@ -3162,8 +3555,8 @@ rule_modify_actions(struct rule *rule_, bool reset_counters)
 
     if (reset_counters) {
         ovs_mutex_lock(&rule->stats_mutex);
-        rule->packet_count = 0;
-        rule->byte_count = 0;
+        rule->stats.n_packets = 0;
+        rule->stats.n_bytes = 0;
         ovs_mutex_unlock(&rule->stats_mutex);
     }
 
@@ -3193,58 +3586,80 @@ static void
 group_construct_stats(struct group_dpif *group)
     OVS_REQUIRES(group->stats_mutex)
 {
+    struct ofputil_bucket *bucket;
+    const struct list *buckets;
+
     group->packet_count = 0;
     group->byte_count = 0;
-    if (!group->bucket_stats) {
-        group->bucket_stats = xcalloc(group->up.n_buckets,
-                                      sizeof *group->bucket_stats);
-    } else {
-        memset(group->bucket_stats, 0, group->up.n_buckets *
-               sizeof *group->bucket_stats);
+
+    group_dpif_get_buckets(group, &buckets);
+    LIST_FOR_EACH (bucket, list_node, buckets) {
+        bucket->stats.packet_count = 0;
+        bucket->stats.byte_count = 0;
+    }
+}
+
+void
+group_dpif_credit_stats(struct group_dpif *group,
+                        struct ofputil_bucket *bucket,
+                        const struct dpif_flow_stats *stats)
+{
+    ovs_mutex_lock(&group->stats_mutex);
+    group->packet_count += stats->n_packets;
+    group->byte_count += stats->n_bytes;
+    if (bucket) {
+        bucket->stats.packet_count += stats->n_packets;
+        bucket->stats.byte_count += stats->n_bytes;
+    } else { /* Credit to all buckets */
+        const struct list *buckets;
+
+        group_dpif_get_buckets(group, &buckets);
+        LIST_FOR_EACH (bucket, list_node, buckets) {
+            bucket->stats.packet_count += stats->n_packets;
+            bucket->stats.byte_count += stats->n_bytes;
+        }
     }
+    ovs_mutex_unlock(&group->stats_mutex);
 }
 
 static enum ofperr
 group_construct(struct ofgroup *group_)
 {
     struct group_dpif *group = group_dpif_cast(group_);
-    ovs_mutex_init(&group->stats_mutex);
+    const struct ofputil_bucket *bucket;
+
+    /* Prevent group chaining because our locking structure makes it hard to
+     * implement deadlock-free.  (See xlate_group_resource_check().) */
+    LIST_FOR_EACH (bucket, list_node, &group->up.buckets) {
+        const struct ofpact *a;
+
+        OFPACT_FOR_EACH (a, bucket->ofpacts, bucket->ofpacts_len) {
+            if (a->type == OFPACT_GROUP) {
+                return OFPERR_OFPGMFC_CHAINING_UNSUPPORTED;
+            }
+        }
+    }
+
+    ovs_mutex_init_adaptive(&group->stats_mutex);
     ovs_mutex_lock(&group->stats_mutex);
     group_construct_stats(group);
     ovs_mutex_unlock(&group->stats_mutex);
     return 0;
 }
 
-static void
-group_destruct__(struct group_dpif *group)
-    OVS_REQUIRES(group->stats_mutex)
-{
-    free(group->bucket_stats);
-    group->bucket_stats = NULL;
-}
-
 static void
 group_destruct(struct ofgroup *group_)
 {
     struct group_dpif *group = group_dpif_cast(group_);
-    ovs_mutex_lock(&group->stats_mutex);
-    group_destruct__(group);
-    ovs_mutex_unlock(&group->stats_mutex);
     ovs_mutex_destroy(&group->stats_mutex);
 }
 
 static enum ofperr
-group_modify(struct ofgroup *group_, struct ofgroup *victim_)
+group_modify(struct ofgroup *group_)
 {
-    struct group_dpif *group = group_dpif_cast(group_);
-    struct group_dpif *victim = group_dpif_cast(victim_);
+    struct ofproto_dpif *ofproto = ofproto_dpif_cast(group_->ofproto);
 
-    ovs_mutex_lock(&group->stats_mutex);
-    if (victim->up.n_buckets < group->up.n_buckets) {
-        group_destruct__(group);
-    }
-    group_construct_stats(group);
-    ovs_mutex_unlock(&group->stats_mutex);
+    ofproto->backer->need_revalidate = REV_FLOW_TABLE;
 
     return 0;
 }
@@ -3253,39 +3668,43 @@ static enum ofperr
 group_get_stats(const struct ofgroup *group_, struct ofputil_group_stats *ogs)
 {
     struct group_dpif *group = group_dpif_cast(group_);
+    struct ofputil_bucket *bucket;
+    const struct list *buckets;
+    struct bucket_counter *bucket_stats;
 
     ovs_mutex_lock(&group->stats_mutex);
     ogs->packet_count = group->packet_count;
     ogs->byte_count = group->byte_count;
-    memcpy(ogs->bucket_stats, group->bucket_stats,
-           group->up.n_buckets * sizeof *group->bucket_stats);
+
+    group_dpif_get_buckets(group, &buckets);
+    bucket_stats = ogs->bucket_stats;
+    LIST_FOR_EACH (bucket, list_node, buckets) {
+        bucket_stats->packet_count = bucket->stats.packet_count;
+        bucket_stats->byte_count = bucket->stats.byte_count;
+        bucket_stats++;
+    }
     ovs_mutex_unlock(&group->stats_mutex);
 
     return 0;
 }
 
+/* If the group exists, this function increments the groups's reference count.
+ *
+ * Make sure to call group_dpif_unref() after no longer needing to maintain
+ * a reference to the group. */
 bool
 group_dpif_lookup(struct ofproto_dpif *ofproto, uint32_t group_id,
                   struct group_dpif **group)
-    OVS_TRY_RDLOCK(true, (*group)->up.rwlock)
 {
     struct ofgroup *ofgroup;
     bool found;
 
-    *group = NULL;
     found = ofproto_group_lookup(&ofproto->up, group_id, &ofgroup);
     *group = found ?  group_dpif_cast(ofgroup) : NULL;
 
     return found;
 }
 
-void
-group_dpif_release(struct group_dpif *group)
-    OVS_RELEASES(group->up.rwlock)
-{
-    ofproto_group_release(&group->up);
-}
-
 void
 group_dpif_get_buckets(const struct group_dpif *group,
                        const struct list **buckets)
@@ -3312,7 +3731,7 @@ ofproto_dpif_send_packet(const struct ofport_dpif *ofport, struct ofpbuf *packet
 
     ovs_mutex_lock(&ofproto->stats_mutex);
     ofproto->stats.tx_packets++;
-    ofproto->stats.tx_bytes += packet->size;
+    ofproto->stats.tx_bytes += ofpbuf_size(packet);
     ovs_mutex_unlock(&ofproto->stats_mutex);
     return error;
 }
@@ -3455,14 +3874,16 @@ ofproto_unixctl_fdb_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
 struct trace_ctx {
     struct xlate_out xout;
     struct xlate_in xin;
+    const struct flow *key;
     struct flow flow;
+    struct flow_wildcards wc;
     struct ds *result;
 };
 
 static void
 trace_format_rule(struct ds *result, int level, const struct rule_dpif *rule)
 {
-    struct rule_actions *actions;
+    const struct rule_actions *actions;
     ovs_be64 cookie;
 
     ds_put_char_multiple(result, '\t', level);
@@ -3486,8 +3907,6 @@ trace_format_rule(struct ds *result, int level, const struct rule_dpif *rule)
     ds_put_cstr(result, "OpenFlow actions=");
     ofpacts_format(actions->ofpacts, actions->ofpacts_len, result);
     ds_put_char(result, '\n');
-
-    rule_actions_unref(actions);
 }
 
 static void
@@ -3496,7 +3915,9 @@ trace_format_flow(struct ds *result, int level, const char *title,
 {
     ds_put_char_multiple(result, '\t', level);
     ds_put_format(result, "%s: ", title);
-    if (flow_equal(&trace->xin.flow, &trace->flow)) {
+    /* Do not report unchanged flows for resubmits. */
+    if ((level > 0 && flow_equal(&trace->xin.flow, &trace->flow))
+        || (level == 0 && flow_equal(&trace->xin.flow, trace->key))) {
         ds_put_cstr(result, "unchanged");
     } else {
         flow_format(result, &trace->xin.flow);
@@ -3527,7 +3948,22 @@ trace_format_odp(struct ds *result, int level, const char *title,
 
     ds_put_char_multiple(result, '\t', level);
     ds_put_format(result, "%s: ", title);
-    format_odp_actions(result, odp_actions->data, odp_actions->size);
+    format_odp_actions(result, ofpbuf_data(odp_actions),
+                               ofpbuf_size(odp_actions));
+    ds_put_char(result, '\n');
+}
+
+static void
+trace_format_megaflow(struct ds *result, int level, const char *title,
+                      struct trace_ctx *trace)
+{
+    struct match match;
+
+    ds_put_char_multiple(result, '\t', level);
+    ds_put_format(result, "%s: ", title);
+    flow_wildcards_or(&trace->wc, &trace->xout.wc, &trace->wc);
+    match_init(&match, trace->key, &trace->wc);
+    match_format(&match, result, OFP_DEFAULT_PRIORITY);
     ds_put_char(result, '\n');
 }
 
@@ -3541,6 +3977,7 @@ trace_resubmit(struct xlate_in *xin, struct rule_dpif *rule, int recurse)
     trace_format_flow(result, recurse + 1, "Resubmitted flow", trace);
     trace_format_regs(result, recurse + 1, "Resubmitted regs", trace);
     trace_format_odp(result,  recurse + 1, "Resubmitted  odp", trace);
+    trace_format_megaflow(result, recurse + 1, "Resubmitted megaflow", trace);
     trace_format_rule(result, recurse + 1, rule);
 }
 
@@ -3636,8 +4073,9 @@ parse_flow_and_packet(int argc, const char *argv[],
             goto exit;
         }
 
-        if (xlate_receive(backer, NULL, odp_key.data, odp_key.size, flow,
-                          NULL, ofprotop, NULL, NULL, NULL, NULL)) {
+        if (xlate_receive(backer, NULL, ofpbuf_data(&odp_key),
+                          ofpbuf_size(&odp_key), flow,
+                          ofprotop, NULL, NULL, NULL, NULL)) {
             error = "Invalid datapath flow";
             goto exit;
         }
@@ -3664,15 +4102,14 @@ parse_flow_and_packet(int argc, const char *argv[],
 
     /* Generate a packet, if requested. */
     if (packet) {
-        if (!packet->size) {
+        if (!ofpbuf_size(packet)) {
             flow_compose(packet, flow);
         } else {
-            union flow_in_port in_port = flow->in_port;
+            struct pkt_metadata md = pkt_metadata_from_flow(flow);
 
             /* Use the metadata from the flow and the packet argument
              * to reconstruct the flow. */
-            flow_extract(packet, flow->skb_priority, flow->pkt_mark, NULL,
-                         &in_port, flow);
+            flow_extract(packet, &md, flow);
         }
     }
 
@@ -3778,11 +4215,11 @@ ofproto_unixctl_trace_actions(struct unixctl_conn *conn, int argc,
         goto exit;
     }
     if (enforce_consistency) {
-        retval = ofpacts_check_consistency(ofpacts.data, ofpacts.size, &flow,
-                                           u16_to_ofp(ofproto->up.max_ports),
+        retval = ofpacts_check_consistency(ofpbuf_data(&ofpacts), ofpbuf_size(&ofpacts),
+                                           &flow, u16_to_ofp(ofproto->up.max_ports),
                                            0, 0, usable_protocols);
     } else {
-        retval = ofpacts_check(ofpacts.data, ofpacts.size, &flow,
+        retval = ofpacts_check(ofpbuf_data(&ofpacts), ofpbuf_size(&ofpacts), &flow,
                                u16_to_ofp(ofproto->up.max_ports), 0, 0,
                                &usable_protocols);
     }
@@ -3794,7 +4231,8 @@ ofproto_unixctl_trace_actions(struct unixctl_conn *conn, int argc,
         goto exit;
     }
 
-    ofproto_trace(ofproto, &flow, packet, ofpacts.data, ofpacts.size, &result);
+    ofproto_trace(ofproto, &flow, packet,
+                  ofpbuf_data(&ofpacts), ofpbuf_size(&ofpacts), &result);
     unixctl_command_reply(conn, ds_cstr(&result));
 
 exit:
@@ -3813,24 +4251,24 @@ exit:
  * If 'ofpacts' is nonnull then its 'ofpacts_len' bytes specify the actions to
  * trace, otherwise the actions are determined by a flow table lookup. */
 static void
-ofproto_trace(struct ofproto_dpif *ofproto, const struct flow *flow,
+ofproto_trace(struct ofproto_dpif *ofproto, struct flow *flow,
               const struct ofpbuf *packet,
               const struct ofpact ofpacts[], size_t ofpacts_len,
               struct ds *ds)
 {
     struct rule_dpif *rule;
-    struct flow_wildcards wc;
+    struct trace_ctx trace;
 
     ds_put_format(ds, "Bridge: %s\n", ofproto->up.name);
     ds_put_cstr(ds, "Flow: ");
     flow_format(ds, flow);
     ds_put_char(ds, '\n');
 
-    flow_wildcards_init_catchall(&wc);
+    flow_wildcards_init_catchall(&trace.wc);
     if (ofpacts) {
         rule = NULL;
     } else {
-        rule_dpif_lookup(ofproto, flow, &wc, &rule);
+        rule_dpif_lookup(ofproto, flow, &trace.wc, &rule, false, NULL);
 
         trace_format_rule(ds, 0, rule);
         if (rule == ofproto->miss_rule) {
@@ -3845,18 +4283,11 @@ ofproto_trace(struct ofproto_dpif *ofproto, const struct flow *flow,
     }
 
     if (rule || ofpacts) {
-        uint64_t odp_actions_stub[1024 / 8];
-        struct ofpbuf odp_actions;
-        struct trace_ctx trace;
-        struct match match;
-        uint16_t tcp_flags;
-
-        tcp_flags = packet ? packet_get_tcp_flags(packet, flow) : 0;
         trace.result = ds;
-        trace.flow = *flow;
-        ofpbuf_use_stub(&odp_actions,
-                        odp_actions_stub, sizeof odp_actions_stub);
-        xlate_in_init(&trace.xin, ofproto, flow, rule, tcp_flags, packet);
+        trace.key = flow; /* Original flow key, used for megaflow. */
+        trace.flow = *flow; /* May be modified by actions. */
+        xlate_in_init(&trace.xin, ofproto, flow, rule, ntohs(flow->tcp_flags),
+                      packet);
         if (ofpacts) {
             trace.xin.ofpacts = ofpacts;
             trace.xin.ofpacts_len = ofpacts_len;
@@ -3865,19 +4296,14 @@ ofproto_trace(struct ofproto_dpif *ofproto, const struct flow *flow,
         trace.xin.report_hook = trace_report;
 
         xlate_actions(&trace.xin, &trace.xout);
-        flow_wildcards_or(&trace.xout.wc, &trace.xout.wc, &wc);
 
         ds_put_char(ds, '\n');
         trace_format_flow(ds, 0, "Final flow", &trace);
-
-        match_init(&match, flow, &trace.xout.wc);
-        ds_put_cstr(ds, "Relevant fields: ");
-        match_format(&match, ds, OFP_DEFAULT_PRIORITY);
-        ds_put_char(ds, '\n');
+        trace_format_megaflow(ds, 0, "Megaflow", &trace);
 
         ds_put_cstr(ds, "Datapath actions: ");
-        format_odp_actions(ds, trace.xout.odp_actions.data,
-                           trace.xout.odp_actions.size);
+        format_odp_actions(ds, ofpbuf_data(&trace.xout.odp_actions),
+                           ofpbuf_size(&trace.xout.odp_actions));
 
         if (trace.xout.slow) {
             enum slow_path_reason slow;
@@ -3898,8 +4324,6 @@ ofproto_trace(struct ofproto_dpif *ofproto, const struct flow *flow,
 
         xlate_out_uninit(&trace.xout);
     }
-
-    rule_dpif_unref(rule);
 }
 
 /* Store the current ofprotos in 'ofproto_shash'.  Returns a sorted list
@@ -4033,11 +4457,10 @@ static bool
 ofproto_dpif_contains_flow(const struct ofproto_dpif *ofproto,
                            const struct nlattr *key, size_t key_len)
 {
-    enum odp_key_fitness fitness;
     struct ofproto_dpif *ofp;
     struct flow flow;
 
-    xlate_receive(ofproto->backer, NULL, key, key_len, &flow, &fitness, &ofp,
+    xlate_receive(ofproto->backer, NULL, key, key_len, &flow, &ofp,
                   NULL, NULL, NULL, NULL);
     return ofp == ofproto;
 }
@@ -4047,21 +4470,20 @@ ofproto_unixctl_dpif_dump_flows(struct unixctl_conn *conn,
                                 int argc OVS_UNUSED, const char *argv[],
                                 void *aux OVS_UNUSED)
 {
-    struct ds ds = DS_EMPTY_INITIALIZER;
-    const struct dpif_flow_stats *stats;
     const struct ofproto_dpif *ofproto;
-    struct dpif_flow_dump flow_dump;
-    const struct nlattr *actions;
-    const struct nlattr *mask;
-    const struct nlattr *key;
-    size_t actions_len;
-    size_t mask_len;
-    size_t key_len;
+
+    struct ds ds = DS_EMPTY_INITIALIZER;
     bool verbosity = false;
+
     struct dpif_port dpif_port;
     struct dpif_port_dump port_dump;
     struct hmap portno_names;
 
+    struct dpif_flow_dump *flow_dump;
+    struct dpif_flow_dump_thread *flow_dump_thread;
+    struct dpif_flow f;
+    int error;
+
     ofproto = ofproto_dpif_lookup(argv[argc - 1]);
     if (!ofproto) {
         unixctl_command_reply_error(conn, "no such bridge");
@@ -4078,23 +4500,25 @@ ofproto_unixctl_dpif_dump_flows(struct unixctl_conn *conn,
     }
 
     ds_init(&ds);
-    dpif_flow_dump_start(&flow_dump, ofproto->backer->dpif);
-    while (dpif_flow_dump_next(&flow_dump, &key, &key_len, &mask, &mask_len,
-                               &actions, &actions_len, &stats)) {
-        if (!ofproto_dpif_contains_flow(ofproto, key, key_len)) {
+    flow_dump = dpif_flow_dump_create(ofproto->backer->dpif);
+    flow_dump_thread = dpif_flow_dump_thread_create(flow_dump);
+    while (dpif_flow_dump_next(flow_dump_thread, &f, 1)) {
+        if (!ofproto_dpif_contains_flow(ofproto, f.key, f.key_len)) {
             continue;
         }
 
-        odp_flow_format(key, key_len, mask, mask_len, &portno_names, &ds,
-                        verbosity);
+        odp_flow_format(f.key, f.key_len, f.mask, f.mask_len,
+                        &portno_names, &ds, verbosity);
         ds_put_cstr(&ds, ", ");
-        dpif_flow_stats_format(stats, &ds);
+        dpif_flow_stats_format(&f.stats, &ds);
         ds_put_cstr(&ds, ", actions:");
-        format_odp_actions(&ds, actions, actions_len);
+        format_odp_actions(&ds, f.actions, f.actions_len);
         ds_put_char(&ds, '\n');
     }
+    dpif_flow_dump_thread_destroy(flow_dump_thread);
+    error = dpif_flow_dump_destroy(flow_dump);
 
-    if (dpif_flow_dump_done(&flow_dump)) {
+    if (error) {
         ds_clear(&ds);
         ds_put_format(&ds, "dpif/dump_flows failed: %s", ovs_strerror(errno));
         unixctl_command_reply_error(conn, ds_cstr(&ds));
@@ -4134,6 +4558,14 @@ ofproto_dpif_unixctl_init(void)
     unixctl_command_register("dpif/dump-flows", "[-m] bridge", 1, 2,
                              ofproto_unixctl_dpif_dump_flows, NULL);
 }
+
+/* Returns true if 'table' is the table used for internal rules,
+ * false otherwise. */
+bool
+table_is_internal(uint8_t table_id)
+{
+    return table_id == TBL_INTERNAL;
+}
 \f
 /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.)
  *
@@ -4161,7 +4593,7 @@ set_realdev(struct ofport *ofport_, ofp_port_t realdev_ofp_port, int vid)
     if (realdev_ofp_port && ofport->bundle) {
         /* vlandevs are enslaved to their realdevs, so they are not allowed to
          * themselves be part of a bundle. */
-        bundle_set(ofport->up.ofproto, ofport->bundle, NULL);
+        bundle_set(ofport_->ofproto, ofport->bundle, NULL);
     }
 
     ofport->realdev_ofp_port = realdev_ofp_port;
@@ -4184,14 +4616,11 @@ bool
 ofproto_has_vlan_splinters(const struct ofproto_dpif *ofproto)
     OVS_EXCLUDED(ofproto->vsp_mutex)
 {
-    bool ret;
-
-    ovs_mutex_lock(&ofproto->vsp_mutex);
-    ret = !hmap_is_empty(&ofproto->realdev_vid_map);
-    ovs_mutex_unlock(&ofproto->vsp_mutex);
-    return ret;
+    /* hmap_is_empty is thread safe. */
+    return !hmap_is_empty(&ofproto->realdev_vid_map);
 }
 
+
 static ofp_port_t
 vsp_realdev_to_vlandev__(const struct ofproto_dpif *ofproto,
                          ofp_port_t realdev_ofp_port, ovs_be16 vlan_tci)
@@ -4227,6 +4656,10 @@ vsp_realdev_to_vlandev(const struct ofproto_dpif *ofproto,
 {
     ofp_port_t ret;
 
+    /* hmap_is_empty is thread safe, see if we can return immediately. */
+    if (hmap_is_empty(&ofproto->realdev_vid_map)) {
+        return realdev_ofp_port;
+    }
     ovs_mutex_lock(&ofproto->vsp_mutex);
     ret = vsp_realdev_to_vlandev__(ofproto, realdev_ofp_port, vlan_tci);
     ovs_mutex_unlock(&ofproto->vsp_mutex);
@@ -4290,6 +4723,11 @@ vsp_adjust_flow(const struct ofproto_dpif *ofproto, struct flow *flow)
     ofp_port_t realdev;
     int vid;
 
+    /* hmap_is_empty is thread safe. */
+    if (hmap_is_empty(&ofproto->vlandev_map)) {
+        return false;
+    }
+
     ovs_mutex_lock(&ofproto->vsp_mutex);
     realdev = vsp_vlandev_to_realdev(ofproto, flow->in_port.ofp_port, &vid);
     ovs_mutex_unlock(&ofproto->vsp_mutex);
@@ -4390,6 +4828,94 @@ odp_port_to_ofp_port(const struct ofproto_dpif *ofproto, odp_port_t odp_port)
     }
 }
 
+uint32_t
+ofproto_dpif_alloc_recirc_id(struct ofproto_dpif *ofproto)
+{
+    struct dpif_backer *backer = ofproto->backer;
+
+    return  recirc_id_alloc(backer->rid_pool);
+}
+
+void
+ofproto_dpif_free_recirc_id(struct ofproto_dpif *ofproto, uint32_t recirc_id)
+{
+    struct dpif_backer *backer = ofproto->backer;
+
+    recirc_id_free(backer->rid_pool, recirc_id);
+}
+
+int
+ofproto_dpif_add_internal_flow(struct ofproto_dpif *ofproto,
+                               const struct match *match, int priority,
+                               uint16_t idle_timeout,
+                               const struct ofpbuf *ofpacts,
+                               struct rule **rulep)
+{
+    struct ofputil_flow_mod fm;
+    struct rule_dpif *rule;
+    int error;
+
+    fm.match = *match;
+    fm.priority = priority;
+    fm.new_cookie = htonll(0);
+    fm.cookie = htonll(0);
+    fm.cookie_mask = htonll(0);
+    fm.modify_cookie = false;
+    fm.table_id = TBL_INTERNAL;
+    fm.command = OFPFC_ADD;
+    fm.idle_timeout = idle_timeout;
+    fm.hard_timeout = 0;
+    fm.buffer_id = 0;
+    fm.out_port = 0;
+    fm.flags = OFPUTIL_FF_HIDDEN_FIELDS | OFPUTIL_FF_NO_READONLY;
+    fm.ofpacts = ofpbuf_data(ofpacts);
+    fm.ofpacts_len = ofpbuf_size(ofpacts);
+
+    error = ofproto_flow_mod(&ofproto->up, &fm);
+    if (error) {
+        VLOG_ERR_RL(&rl, "failed to add internal flow (%s)",
+                    ofperr_to_string(error));
+        *rulep = NULL;
+        return error;
+    }
+
+    rule = rule_dpif_lookup_in_table(ofproto, TBL_INTERNAL, &fm.match.flow,
+                                     &fm.match.wc, false);
+    if (rule) {
+        *rulep = &rule->up;
+    } else {
+        OVS_NOT_REACHED();
+    }
+    return 0;
+}
+
+int
+ofproto_dpif_delete_internal_flow(struct ofproto_dpif *ofproto,
+                                  struct match *match, int priority)
+{
+    struct ofputil_flow_mod fm;
+    int error;
+
+    fm.match = *match;
+    fm.priority = priority;
+    fm.new_cookie = htonll(0);
+    fm.cookie = htonll(0);
+    fm.cookie_mask = htonll(0);
+    fm.modify_cookie = false;
+    fm.table_id = TBL_INTERNAL;
+    fm.flags = OFPUTIL_FF_HIDDEN_FIELDS | OFPUTIL_FF_NO_READONLY;
+    fm.command = OFPFC_DELETE_STRICT;
+
+    error = ofproto_flow_mod(&ofproto->up, &fm);
+    if (error) {
+        VLOG_ERR_RL(&rl, "failed to delete internal flow (%s)",
+                    ofperr_to_string(error));
+        return error;
+    }
+
+    return 0;
+}
+
 const struct ofproto_class ofproto_dpif_class = {
     init,
     enumerate_types,
@@ -4434,6 +4960,7 @@ const struct ofproto_class ofproto_dpif_class = {
     rule_dealloc,
     rule_get_stats,
     rule_execute,
+    NULL,                       /* rule_premodify_actions */
     rule_modify_actions,
     set_frag_handling,
     packet_out,
@@ -4442,8 +4969,10 @@ const struct ofproto_class ofproto_dpif_class = {
     set_sflow,
     set_ipfix,
     set_cfm,
+    cfm_status_changed,
     get_cfm_status,
     set_bfd,
+    bfd_status_changed,
     get_bfd_status,
     set_stp,
     get_stp_status,