guarded-list: New data structure for thread-safe queue.
[cascardo/ovs.git] / ofproto / ofproto-dpif.c
index 03d5c1f..96cbd7b 100644 (file)
@@ -31,6 +31,7 @@
 #include "dpif.h"
 #include "dynamic-string.h"
 #include "fail-open.h"
+#include "guarded-list.h"
 #include "hmapx.h"
 #include "lacp.h"
 #include "learn.h"
@@ -485,9 +486,6 @@ struct ofproto_dpif {
     struct classifier facets;     /* Contains 'struct facet's. */
     long long int consistency_rl;
 
-    /* Support for debugging async flow mods. */
-    struct list completions;
-
     struct netdev_stats stats; /* To account packets generated and consumed in
                                 * userspace. */
 
@@ -511,19 +509,10 @@ struct ofproto_dpif {
     uint64_t n_missed;
 
     /* Work queues. */
-    struct ovs_mutex flow_mod_mutex;
-    struct list flow_mods OVS_GUARDED;
-    size_t n_flow_mods OVS_GUARDED;
-
-    struct ovs_mutex pin_mutex;
-    struct list pins OVS_GUARDED;
-    size_t n_pins OVS_GUARDED;
+    struct guarded_list flow_mods; /* Contains "struct flow_mod"s. */
+    struct guarded_list pins;      /* Contains "struct ofputil_packet_in"s. */
 };
 
-/* Defer flow mod completion until "ovs-appctl ofproto/unclog"?  (Useful only
- * for debugging the asynchronous flow_mod implementation.) */
-static bool clogged;
-
 /* By default, flows in the datapath are wildcarded (megaflows).  They
  * may be disabled with the "ovs-appctl dpif/disable-megaflows" command. */
 static bool enable_megaflows = true;
@@ -568,18 +557,11 @@ void
 ofproto_dpif_flow_mod(struct ofproto_dpif *ofproto,
                       struct ofputil_flow_mod *fm)
 {
-    ovs_mutex_lock(&ofproto->flow_mod_mutex);
-    if (ofproto->n_flow_mods > 1024) {
-        ovs_mutex_unlock(&ofproto->flow_mod_mutex);
+    if (!guarded_list_push_back(&ofproto->flow_mods, &fm->list_node, 1024)) {
         COVERAGE_INC(flow_mod_overflow);
         free(fm->ofpacts);
         free(fm);
-        return;
     }
-
-    list_push_back(&ofproto->flow_mods, &fm->list_node);
-    ofproto->n_flow_mods++;
-    ovs_mutex_unlock(&ofproto->flow_mod_mutex);
 }
 
 /* Appends 'pin' to the queue of "packet ins" to be sent to the controller.
@@ -588,18 +570,11 @@ void
 ofproto_dpif_send_packet_in(struct ofproto_dpif *ofproto,
                             struct ofputil_packet_in *pin)
 {
-    ovs_mutex_lock(&ofproto->pin_mutex);
-    if (ofproto->n_pins > 1024) {
-        ovs_mutex_unlock(&ofproto->pin_mutex);
+    if (!guarded_list_push_back(&ofproto->pins, &pin->list_node, 1024)) {
         COVERAGE_INC(packet_in_overflow);
         free(CONST_CAST(void *, pin->packet));
         free(pin);
-        return;
     }
-
-    list_push_back(&ofproto->pins, &pin->list_node);
-    ofproto->n_pins++;
-    ovs_mutex_unlock(&ofproto->pin_mutex);
 }
 \f
 /* Factory functions. */
@@ -1032,7 +1007,6 @@ process_dpif_port_error(struct dpif_backer *backer, int error)
 static int
 dpif_backer_run_fast(struct dpif_backer *backer)
 {
-    udpif_run(backer->udpif);
     handle_upcalls(backer);
 
     return 0;
@@ -1294,19 +1268,8 @@ construct(struct ofproto *ofproto_)
     classifier_init(&ofproto->facets);
     ofproto->consistency_rl = LLONG_MIN;
 
-    list_init(&ofproto->completions);
-
-    ovs_mutex_init(&ofproto->flow_mod_mutex);
-    ovs_mutex_lock(&ofproto->flow_mod_mutex);
-    list_init(&ofproto->flow_mods);
-    ofproto->n_flow_mods = 0;
-    ovs_mutex_unlock(&ofproto->flow_mod_mutex);
-
-    ovs_mutex_init(&ofproto->pin_mutex);
-    ovs_mutex_lock(&ofproto->pin_mutex);
-    list_init(&ofproto->pins);
-    ofproto->n_pins = 0;
-    ovs_mutex_unlock(&ofproto->pin_mutex);
+    guarded_list_init(&ofproto->flow_mods);
+    guarded_list_init(&ofproto->pins);
 
     ofproto_dpif_unixctl_init();
 
@@ -1381,7 +1344,7 @@ add_internal_flow(struct ofproto_dpif *ofproto, int id,
 
     if (rule_dpif_lookup_in_table(ofproto, &fm.match.flow, NULL, TBL_INTERNAL,
                                   rulep)) {
-        rule_dpif_release(*rulep);
+        rule_dpif_unref(*rulep);
     } else {
         NOT_REACHED();
     }
@@ -1424,18 +1387,6 @@ add_internal_flows(struct ofproto_dpif *ofproto)
     return error;
 }
 
-static void
-complete_operations(struct ofproto_dpif *ofproto)
-{
-    struct dpif_completion *c, *next;
-
-    LIST_FOR_EACH_SAFE (c, next, list_node, &ofproto->completions) {
-        ofoperation_complete(c->op, 0);
-        list_remove(&c->list_node);
-        free(c);
-    }
-}
-
 static void
 destruct(struct ofproto *ofproto_)
 {
@@ -1444,6 +1395,7 @@ destruct(struct ofproto *ofproto_)
     struct ofputil_packet_in *pin, *next_pin;
     struct ofputil_flow_mod *fm, *next_fm;
     struct facet *facet, *next_facet;
+    struct list flow_mods, pins;
     struct cls_cursor cursor;
     struct oftable *table;
 
@@ -1459,10 +1411,11 @@ destruct(struct ofproto *ofproto_)
     xlate_remove_ofproto(ofproto);
     ovs_rwlock_unlock(&xlate_rwlock);
 
-    flow_miss_batch_ofproto_destroyed(ofproto->backer->udpif, ofproto);
+    /* Discard any flow_miss_batches queued up for 'ofproto', avoiding a
+     * use-after-free error. */
+    udpif_revalidate(ofproto->backer->udpif);
 
     hmap_remove(&all_ofproto_dpifs, &ofproto->all_ofproto_dpifs_node);
-    complete_operations(ofproto);
 
     OFPROTO_FOR_EACH_TABLE (table, &ofproto->up) {
         struct cls_cursor cursor;
@@ -1474,27 +1427,22 @@ destruct(struct ofproto *ofproto_)
         }
         ovs_rwlock_unlock(&table->cls.rwlock);
     }
-    complete_operations(ofproto);
 
-    ovs_mutex_lock(&ofproto->flow_mod_mutex);
-    LIST_FOR_EACH_SAFE (fm, next_fm, list_node, &ofproto->flow_mods) {
+    guarded_list_pop_all(&ofproto->flow_mods, &flow_mods);
+    LIST_FOR_EACH_SAFE (fm, next_fm, list_node, &flow_mods) {
         list_remove(&fm->list_node);
-        ofproto->n_flow_mods--;
         free(fm->ofpacts);
         free(fm);
     }
-    ovs_mutex_unlock(&ofproto->flow_mod_mutex);
-    ovs_mutex_destroy(&ofproto->flow_mod_mutex);
+    guarded_list_destroy(&ofproto->flow_mods);
 
-    ovs_mutex_lock(&ofproto->pin_mutex);
-    LIST_FOR_EACH_SAFE (pin, next_pin, list_node, &ofproto->pins) {
+    guarded_list_pop_all(&ofproto->pins, &pins);
+    LIST_FOR_EACH_SAFE (pin, next_pin, list_node, &pins) {
         list_remove(&pin->list_node);
-        ofproto->n_pins--;
         free(CONST_CAST(void *, pin->packet));
         free(pin);
     }
-    ovs_mutex_unlock(&ofproto->pin_mutex);
-    ovs_mutex_destroy(&ofproto->pin_mutex);
+    guarded_list_destroy(&ofproto->pins);
 
     mbridge_unref(ofproto->mbridge);
 
@@ -1532,17 +1480,7 @@ run_fast(struct ofproto *ofproto_)
         return 0;
     }
 
-    ovs_mutex_lock(&ofproto->flow_mod_mutex);
-    if (ofproto->n_flow_mods) {
-        flow_mods = ofproto->flow_mods;
-        list_moved(&flow_mods);
-        list_init(&ofproto->flow_mods);
-        ofproto->n_flow_mods = 0;
-    } else {
-        list_init(&flow_mods);
-    }
-    ovs_mutex_unlock(&ofproto->flow_mod_mutex);
-
+    guarded_list_pop_all(&ofproto->flow_mods, &flow_mods);
     LIST_FOR_EACH_SAFE (fm, next_fm, list_node, &flow_mods) {
         int error = ofproto_flow_mod(&ofproto->up, fm);
         if (error && !VLOG_DROP_WARN(&rl)) {
@@ -1555,17 +1493,7 @@ run_fast(struct ofproto *ofproto_)
         free(fm);
     }
 
-    ovs_mutex_lock(&ofproto->pin_mutex);
-    if (ofproto->n_pins) {
-        pins = ofproto->pins;
-        list_moved(&pins);
-        list_init(&ofproto->pins);
-        ofproto->n_pins = 0;
-    } else {
-        list_init(&pins);
-    }
-    ovs_mutex_unlock(&ofproto->pin_mutex);
-
+    guarded_list_pop_all(&ofproto->pins, &pins);
     LIST_FOR_EACH_SAFE (pin, next_pin, list_node, &pins) {
         connmgr_send_packet_in(ofproto->up.connmgr, pin);
         list_remove(&pin->list_node);
@@ -1588,10 +1516,6 @@ run(struct ofproto *ofproto_)
     struct ofbundle *bundle;
     int error;
 
-    if (!clogged) {
-        complete_operations(ofproto);
-    }
-
     if (mbridge_need_revalidate(ofproto->mbridge)) {
         ofproto->backer->need_revalidate = REV_RECONFIGURE;
         ovs_rwlock_wrlock(&ofproto->ml->rwlock);
@@ -1669,10 +1593,6 @@ wait(struct ofproto *ofproto_)
     struct ofport_dpif *ofport;
     struct ofbundle *bundle;
 
-    if (!clogged && !list_is_empty(&ofproto->completions)) {
-        poll_immediate_wake();
-    }
-
     if (ofproto_get_flow_restore_wait()) {
         return;
     }
@@ -3993,10 +3913,7 @@ rule_expire(struct rule_dpif *rule)
     long long int now;
     uint8_t reason;
 
-    if (rule->up.pending) {
-        /* We'll have to expire it later. */
-        return;
-    }
+    ovs_assert(!rule->up.pending);
 
     ovs_mutex_lock(&rule->up.timeout_mutex);
     hard_timeout = rule->up.hard_timeout;
@@ -4211,12 +4128,13 @@ facet_is_controller_flow(struct facet *facet)
         bool is_controller;
 
         rule_dpif_lookup(ofproto, &facet->flow, NULL, &rule);
-        ofpacts_len = rule->up.ofpacts_len;
-        ofpacts = rule->up.ofpacts;
+        ofpacts_len = rule->up.actions->ofpacts_len;
+        ofpacts = rule->up.actions->ofpacts;
         is_controller = ofpacts_len > 0
             && ofpacts->type == OFPACT_CONTROLLER
             && ofpact_next(ofpacts) >= ofpact_end(ofpacts, ofpacts_len);
-        rule_dpif_release(rule);
+        rule_dpif_unref(rule);
+
         return is_controller;
     }
     return false;
@@ -4310,7 +4228,7 @@ facet_check_consistency(struct facet *facet)
     rule_dpif_lookup(facet->ofproto, &facet->flow, NULL, &rule);
     xlate_in_init(&xin, facet->ofproto, &facet->flow, rule, 0, NULL);
     xlate_actions(&xin, &xout);
-    rule_dpif_release(rule);
+    rule_dpif_unref(rule);
 
     ok = ofpbuf_equal(&facet->xout.odp_actions, &xout.odp_actions)
         && facet->xout.slow == xout.slow;
@@ -4408,7 +4326,7 @@ facet_revalidate(struct facet *facet)
         || memcmp(&facet->xout.wc, &xout.wc, sizeof xout.wc)) {
         facet_remove(facet);
         xlate_out_uninit(&xout);
-        rule_dpif_release(new_rule);
+        rule_dpif_unref(new_rule);
         return false;
     }
 
@@ -4440,7 +4358,7 @@ facet_revalidate(struct facet *facet)
     facet->used = MAX(facet->used, new_rule->up.created);
 
     xlate_out_uninit(&xout);
-    rule_dpif_release(new_rule);
+    rule_dpif_unref(new_rule);
     return true;
 }
 
@@ -4473,7 +4391,7 @@ flow_push_stats(struct ofproto_dpif *ofproto, struct flow *flow,
     xin.resubmit_stats = stats;
     xin.may_learn = may_learn;
     xlate_actions_for_side_effects(&xin);
-    rule_dpif_release(rule);
+    rule_dpif_unref(rule);
 }
 
 static void
@@ -4568,12 +4486,20 @@ rule_dpif_reduce_timeouts(struct rule_dpif *rule, uint16_t idle_timeout,
     ofproto_rule_reduce_timeouts(&rule->up, idle_timeout, hard_timeout);
 }
 
-void
-rule_dpif_get_actions(const struct rule_dpif *rule,
-                      const struct ofpact **ofpacts, size_t *ofpacts_len)
+/* Returns 'rule''s actions.  The caller owns a reference on the returned
+ * actions and must eventually release it (with rule_actions_unref()) to avoid
+ * a memory leak. */
+struct rule_actions *
+rule_dpif_get_actions(const struct rule_dpif *rule)
 {
-    *ofpacts = rule->up.ofpacts;
-    *ofpacts_len = rule->up.ofpacts_len;
+    struct rule_actions *actions;
+
+    ovs_rwlock_rdlock(&rule->up.rwlock);
+    actions = rule->up.actions;
+    rule_actions_ref(actions);
+    ovs_rwlock_unlock(&rule->up.rwlock);
+
+    return actions;
 }
 \f
 /* Subfacets. */
@@ -4851,7 +4777,6 @@ bool
 rule_dpif_lookup_in_table(struct ofproto_dpif *ofproto,
                           const struct flow *flow, struct flow_wildcards *wc,
                           uint8_t table_id, struct rule_dpif **rule)
-    OVS_TRY_RDLOCK(true, (*rule)->up.rwlock)
 {
     struct cls_rule *cls_rule;
     struct classifier *cls;
@@ -4886,11 +4811,7 @@ rule_dpif_lookup_in_table(struct ofproto_dpif *ofproto,
     }
 
     *rule = rule_dpif_cast(rule_from_cls_rule(cls_rule));
-    if (*rule && ovs_rwlock_tryrdlock(&(*rule)->up.rwlock)) {
-        /* The rule is in the process of being removed.  Best we can do is
-         * pretend it isn't there. */
-        *rule = NULL;
-    }
+    rule_dpif_ref(*rule);
     ovs_rwlock_unlock(&cls->rwlock);
 
     return *rule != NULL;
@@ -4902,18 +4823,24 @@ rule_dpif_lookup_in_table(struct ofproto_dpif *ofproto,
 void
 choose_miss_rule(enum ofputil_port_config config, struct rule_dpif *miss_rule,
                  struct rule_dpif *no_packet_in_rule, struct rule_dpif **rule)
-    OVS_NO_THREAD_SAFETY_ANALYSIS
 {
     *rule = config & OFPUTIL_PC_NO_PACKET_IN ? no_packet_in_rule : miss_rule;
-    ovs_rwlock_rdlock(&(*rule)->up.rwlock);
+    rule_dpif_ref(*rule);
+}
+
+void
+rule_dpif_ref(struct rule_dpif *rule)
+{
+    if (rule) {
+        ofproto_rule_ref(&rule->up);
+    }
 }
 
 void
-rule_dpif_release(struct rule_dpif *rule)
-    OVS_NO_THREAD_SAFETY_ANALYSIS
+rule_dpif_unref(struct rule_dpif *rule)
 {
     if (rule) {
-        ovs_rwlock_unlock(&rule->up.rwlock);
+        ofproto_rule_unref(&rule->up);
     }
 }
 
@@ -4923,13 +4850,7 @@ complete_operation(struct rule_dpif *rule)
     struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
 
     ofproto->backer->need_revalidate = REV_FLOW_TABLE;
-    if (clogged) {
-        struct dpif_completion *c = xmalloc(sizeof *c);
-        c->op = rule->up.pending;
-        list_push_back(&ofproto->completions, &c->list_node);
-    } else {
-        ofoperation_complete(rule->up.pending, 0);
-    }
+    ofoperation_complete(rule->up.pending, 0);
 }
 
 static struct rule_dpif *rule_dpif_cast(const struct rule *rule)
@@ -5363,7 +5284,8 @@ trace_format_rule(struct ds *result, int level, const struct rule_dpif *rule)
 
     ds_put_char_multiple(result, '\t', level);
     ds_put_cstr(result, "OpenFlow ");
-    ofpacts_format(rule->up.ofpacts, rule->up.ofpacts_len, result);
+    ofpacts_format(rule->up.actions->ofpacts, rule->up.actions->ofpacts_len,
+                   result);
     ds_put_char(result, '\n');
 }
 
@@ -5634,23 +5556,7 @@ ofproto_trace(struct ofproto_dpif *ofproto, const struct flow *flow,
         xlate_out_uninit(&trace.xout);
     }
 
-    rule_dpif_release(rule);
-}
-
-static void
-ofproto_dpif_clog(struct unixctl_conn *conn OVS_UNUSED, int argc OVS_UNUSED,
-                  const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
-{
-    clogged = true;
-    unixctl_command_reply(conn, NULL);
-}
-
-static void
-ofproto_dpif_unclog(struct unixctl_conn *conn OVS_UNUSED, int argc OVS_UNUSED,
-                    const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
-{
-    clogged = false;
-    unixctl_command_reply(conn, NULL);
+    rule_dpif_unref(rule);
 }
 
 /* Runs a self-check of flow translations in 'ofproto'.  Appends a message to
@@ -6069,10 +5975,6 @@ ofproto_dpif_unixctl_init(void)
                              ofproto_unixctl_fdb_flush, NULL);
     unixctl_command_register("fdb/show", "bridge", 1, 1,
                              ofproto_unixctl_fdb_show, NULL);
-    unixctl_command_register("ofproto/clog", "", 0, 0,
-                             ofproto_dpif_clog, NULL);
-    unixctl_command_register("ofproto/unclog", "", 0, 0,
-                             ofproto_dpif_unclog, NULL);
     unixctl_command_register("ofproto/self-check", "[bridge]", 0, 1,
                              ofproto_dpif_self_check, NULL);
     unixctl_command_register("dpif/dump-dps", "", 0, 0,