struct seq *dump_seq; /* Increments each dump iteration. */
atomic_bool enable_ufid; /* If true, skip dumping flow attrs. */
+ /* These variables provide a mechanism for the main thread to pause
+ * all revalidation without having to completely shut the threads down.
+ * 'pause_latch' is shared between the main thread and the lead
+ * revalidator thread, so when it is desirable to halt revalidation, the
+ * main thread will set the latch. 'pause' and 'pause_barrier' are shared
+ * by revalidator threads. The lead revalidator will set 'pause' when it
+ * observes the latch has been set, and this will cause all revalidator
+ * threads to wait on 'pause_barrier' at the beginning of the next
+ * revalidation round. */
+ bool pause; /* Set by leader on 'pause_latch. */
+ struct latch pause_latch; /* Set to force revalidators pause. */
+ struct ovs_barrier pause_barrier; /* Barrier used to pause all */
+ /* revalidators by main thread. */
+
/* There are 'N_UMAPS' maps containing 'struct udpif_key' elements.
*
* During the flow dump phase, revalidators insert into these with a random
IPFIX_UPCALL /* Per-bridge sampling. */
};
+enum reval_result {
+ UKEY_KEEP,
+ UKEY_DELETE,
+ UKEY_MODIFY
+};
+
struct upcall {
struct ofproto_dpif *ofproto; /* Parent ofproto. */
const struct recirc_id_node *recirc; /* Recirculation context. */
unsigned pmd_id; /* Datapath poll mode driver id. */
const struct dp_packet *packet; /* Packet associated with this upcall. */
ofp_port_t in_port; /* OpenFlow in port, or OFPP_NONE. */
+ uint16_t mru; /* If !0, Maximum receive unit of
+ fragmented IP packet */
enum dpif_upcall_type type; /* Datapath type of the upcall. */
const struct nlattr *userdata; /* Userdata for DPIF_UC_ACTION Upcalls. */
static void udpif_stop_threads(struct udpif *);
static void udpif_start_threads(struct udpif *, size_t n_handlers,
size_t n_revalidators);
+static void udpif_pause_revalidators(struct udpif *);
+static void udpif_resume_revalidators(struct udpif *);
static void *udpif_upcall_handler(void *);
static void *udpif_revalidator(void *);
static unsigned long udpif_get_n_flows(struct udpif *);
static void revalidate(struct revalidator *);
+static void revalidator_pause(struct revalidator *);
static void revalidator_sweep(struct revalidator *);
static void revalidator_purge(struct revalidator *);
static void upcall_unixctl_show(struct unixctl_conn *conn, int argc,
static int upcall_receive(struct upcall *, const struct dpif_backer *,
const struct dp_packet *packet, enum dpif_upcall_type,
const struct nlattr *userdata, const struct flow *,
+ const unsigned int mru,
const ovs_u128 *ufid, const unsigned pmd_id);
static void upcall_uninit(struct upcall *);
static upcall_callback upcall_cb;
+static dp_purge_callback dp_purge_cb;
static atomic_bool enable_megaflows = ATOMIC_VAR_INIT(true);
static atomic_bool enable_ufid = ATOMIC_VAR_INIT(true);
udpif->reval_seq = seq_create();
udpif->dump_seq = seq_create();
latch_init(&udpif->exit_latch);
+ latch_init(&udpif->pause_latch);
list_push_back(&all_udpifs, &udpif->list_node);
atomic_init(&udpif->enable_ufid, false);
atomic_init(&udpif->n_flows, 0);
}
dpif_register_upcall_cb(dpif, upcall_cb, udpif);
+ dpif_register_dp_purge_cb(dpif, dp_purge_cb, udpif);
return udpif;
}
list_remove(&udpif->list_node);
latch_destroy(&udpif->exit_latch);
+ latch_destroy(&udpif->pause_latch);
seq_destroy(udpif->reval_seq);
seq_destroy(udpif->dump_seq);
ovs_mutex_destroy(&udpif->n_flows_mutex);
latch_poll(&udpif->exit_latch);
ovs_barrier_destroy(&udpif->reval_barrier);
+ ovs_barrier_destroy(&udpif->pause_barrier);
free(udpif->revalidators);
udpif->revalidators = NULL;
dpif_enable_upcall(udpif->dpif);
ovs_barrier_init(&udpif->reval_barrier, udpif->n_revalidators);
+ ovs_barrier_init(&udpif->pause_barrier, udpif->n_revalidators + 1);
udpif->reval_exit = false;
+ udpif->pause = false;
udpif->revalidators = xzalloc(udpif->n_revalidators
* sizeof *udpif->revalidators);
for (i = 0; i < udpif->n_revalidators; i++) {
}
}
+/* Pauses all revalidators. Should only be called by the main thread.
+ * When function returns, all revalidators are paused and will proceed
+ * only after udpif_resume_revalidators() is called. */
+static void
+udpif_pause_revalidators(struct udpif *udpif)
+{
+ if (ofproto_dpif_backer_enabled(udpif->backer)) {
+ latch_set(&udpif->pause_latch);
+ ovs_barrier_block(&udpif->pause_barrier);
+ }
+}
+
+/* Resumes the pausing of revalidators. Should only be called by the
+ * main thread. */
+static void
+udpif_resume_revalidators(struct udpif *udpif)
+{
+ if (ofproto_dpif_backer_enabled(udpif->backer)) {
+ latch_poll(&udpif->pause_latch);
+ ovs_barrier_block(&udpif->pause_barrier);
+ }
+}
+
/* Tells 'udpif' how many threads it should use to handle upcalls.
* 'n_handlers' and 'n_revalidators' can never be zero. 'udpif''s
* datapath handle must have packet reception enabled before starting
struct dpif_upcall *dupcall = &dupcalls[n_upcalls];
struct upcall *upcall = &upcalls[n_upcalls];
struct flow *flow = &flows[n_upcalls];
+ unsigned int mru;
int error;
ofpbuf_use_stub(recv_buf, recv_stubs[n_upcalls],
goto free_dupcall;
}
+ if (dupcall->mru) {
+ mru = nl_attr_get_u16(dupcall->mru);
+ } else {
+ mru = 0;
+ }
+
error = upcall_receive(upcall, udpif->backer, &dupcall->packet,
- dupcall->type, dupcall->userdata, flow,
+ dupcall->type, dupcall->userdata, flow, mru,
&dupcall->ufid, PMD_ID_NULL);
if (error) {
if (error == ENODEV) {
udpif->max_n_flows = MAX(n_flows, udpif->max_n_flows);
udpif->avg_n_flows = (udpif->avg_n_flows + n_flows) / 2;
+ /* Only the leader checks the pause latch to prevent a race where
+ * some threads think it's false and proceed to block on
+ * reval_barrier and others think it's true and block indefinitely
+ * on the pause_barrier */
+ udpif->pause = latch_is_set(&udpif->pause_latch);
+
/* Only the leader checks the exit latch to prevent a race where
* some threads think it's true and exit and others think it's
* false and block indefinitely on the reval_barrier */
/* Wait for the leader to start the flow dump. */
ovs_barrier_block(&udpif->reval_barrier);
+ if (udpif->pause) {
+ revalidator_pause(revalidator);
+ }
+
if (udpif->reval_exit) {
break;
}
poll_timer_wait_until(start_time + MIN(ofproto_max_idle, 500));
seq_wait(udpif->reval_seq, last_reval_seq);
latch_wait(&udpif->exit_latch);
+ latch_wait(&udpif->pause_latch);
poll_block();
}
}
upcall_receive(struct upcall *upcall, const struct dpif_backer *backer,
const struct dp_packet *packet, enum dpif_upcall_type type,
const struct nlattr *userdata, const struct flow *flow,
+ const unsigned int mru,
const ovs_u128 *ufid, const unsigned pmd_id)
{
int error;
upcall->ukey = NULL;
upcall->key = NULL;
upcall->key_len = 0;
+ upcall->mru = mru;
upcall->out_tun_key = NULL;
upcall->actions = NULL;
ofpbuf_use_const(&upcall->put_actions,
odp_actions->data, odp_actions->size);
} else {
- ofpbuf_init(&upcall->put_actions, 0);
+ /* upcall->put_actions already initialized by upcall_receive(). */
compose_slow_path(udpif, &upcall->xout, upcall->flow,
upcall->flow->in_port.odp_port,
&upcall->put_actions);
atomic_read_relaxed(&udpif->flow_limit, &flow_limit);
error = upcall_receive(&upcall, udpif->backer, packet, type, userdata,
- flow, ufid, pmd_id);
+ flow, 0, ufid, pmd_id);
if (error) {
return error;
}
unsigned int flow_limit;
size_t n_ops, n_opsp, i;
bool may_put;
- bool megaflow;
atomic_read_relaxed(&udpif->flow_limit, &flow_limit);
- atomic_read_relaxed(&enable_megaflows, &megaflow);
may_put = udpif_get_n_flows(udpif) < flow_limit;
op->dop.u.execute.actions_len = upcall->odp_actions.size;
op->dop.u.execute.needs_help = (upcall->xout.slow & SLOW_ACTION) != 0;
op->dop.u.execute.probe = false;
+ op->dop.u.execute.mtu = upcall->mru;
}
}
/* Check the flow actions for recirculation action. As recirculation
* relies on OVS userspace internal state, we need to delete all old
- * datapath flows with recirculation upon OVS restart. */
+ * datapath flows with either a non-zero recirc_id in the key, or any
+ * recirculation actions upon OVS restart. */
+ NL_ATTR_FOR_EACH_UNSAFE (a, left, flow->key, flow->key_len) {
+ if (nl_attr_type(a) == OVS_KEY_ATTR_RECIRC_ID
+ && nl_attr_get_u32(a) != 0) {
+ return EINVAL;
+ }
+ }
NL_ATTR_FOR_EACH_UNSAFE (a, left, flow->actions, flow->actions_len) {
if (nl_attr_type(a) == OVS_ACTION_ATTR_RECIRC) {
return EINVAL;
return false;
}
-static bool
+/* Verifies that the datapath actions of 'ukey' are still correct, and pushes
+ * 'stats' for it.
+ *
+ * Returns a recommended action for 'ukey', options include:
+ * UKEY_DELETE The ukey should be deleted.
+ * UKEY_KEEP The ukey is fine as is.
+ * UKEY_MODIFY The ukey's actions should be changed but is otherwise
+ * fine. Callers should change the actions to those found
+ * in the caller supplied 'odp_actions' buffer. */
+static enum reval_result
revalidate_ukey(struct udpif *udpif, struct udpif_key *ukey,
- const struct dpif_flow_stats *stats, uint64_t reval_seq)
+ const struct dpif_flow_stats *stats,
+ struct ofpbuf *odp_actions, uint64_t reval_seq)
OVS_REQUIRES(ukey->mutex)
{
- uint64_t odp_actions_stub[1024 / 8];
- struct ofpbuf odp_actions = OFPBUF_STUB_INITIALIZER(odp_actions_stub);
-
struct xlate_out xout, *xoutp;
struct netflow *netflow;
struct ofproto_dpif *ofproto;
struct dpif_flow_stats push;
- struct flow flow, dp_mask;
- struct flow_wildcards wc;
- uint64_t *dp64, *xout64;
+ struct flow flow;
+ struct flow_wildcards dp_mask, wc;
+ enum reval_result result;
ofp_port_t ofp_in_port;
struct xlate_in xin;
long long int last_used;
int error;
- size_t i;
- bool ok;
bool need_revalidate;
- ok = false;
+ result = UKEY_DELETE;
xoutp = NULL;
netflow = NULL;
+ ofpbuf_clear(odp_actions);
need_revalidate = (ukey->reval_seq != reval_seq);
last_used = ukey->stats.used;
push.used = stats->used;
if (need_revalidate && last_used
&& !should_revalidate(udpif, push.n_packets, last_used)) {
- ok = false;
goto exit;
}
/* We will push the stats, so update the ukey stats cache. */
ukey->stats = *stats;
if (!push.n_packets && !need_revalidate) {
- ok = true;
+ result = UKEY_KEEP;
goto exit;
}
if (ukey->xcache && !need_revalidate) {
xlate_push_stats(ukey->xcache, &push);
- ok = true;
+ result = UKEY_KEEP;
goto exit;
}
}
xlate_in_init(&xin, ofproto, &flow, ofp_in_port, NULL, push.tcp_flags,
- NULL, need_revalidate ? &wc : NULL, &odp_actions);
+ NULL, need_revalidate ? &wc : NULL, odp_actions);
if (push.n_packets) {
xin.resubmit_stats = &push;
xin.may_learn = true;
xoutp = &xout;
if (!need_revalidate) {
- ok = true;
+ result = UKEY_KEEP;
goto exit;
}
if (xout.slow) {
- ofpbuf_clear(&odp_actions);
+ ofpbuf_clear(odp_actions);
compose_slow_path(udpif, &xout, &flow, flow.in_port.odp_port,
- &odp_actions);
+ odp_actions);
}
- if (!ofpbuf_equal(&odp_actions,
- ovsrcu_get(struct ofpbuf *, &ukey->actions))) {
+ if (odp_flow_key_to_mask(ukey->mask, ukey->mask_len, ukey->key,
+ ukey->key_len, &dp_mask.masks, &flow)
+ == ODP_FIT_ERROR) {
goto exit;
}
- if (odp_flow_key_to_mask(ukey->mask, ukey->mask_len, ukey->key,
- ukey->key_len, &dp_mask, &flow) == ODP_FIT_ERROR) {
+ /* Do not modify if any bit is wildcarded by the installed datapath flow,
+ * but not the newly revalidated wildcard mask (wc), i.e., if revalidation
+ * tells that the datapath flow is now too generic and must be narrowed
+ * down. Note that we do not know if the datapath has ignored any of the
+ * wildcarded bits, so we may be overtly conservative here. */
+ if (flow_wildcards_has_extra(&dp_mask, &wc)) {
goto exit;
}
- /* Since the kernel is free to ignore wildcarded bits in the mask, we can't
- * directly check that the masks are the same. Instead we check that the
- * mask in the kernel is more specific i.e. less wildcarded, than what
- * we've calculated here. This guarantees we don't catch any packets we
- * shouldn't with the megaflow. */
- dp64 = (uint64_t *) &dp_mask;
- xout64 = (uint64_t *) &wc.masks;
- for (i = 0; i < FLOW_U64S; i++) {
- if ((dp64[i] | xout64[i]) != dp64[i]) {
- goto exit;
- }
+ if (!ofpbuf_equal(odp_actions,
+ ovsrcu_get(struct ofpbuf *, &ukey->actions))) {
+ /* The datapath mask was OK, but the actions seem to have changed.
+ * Let's modify it in place. */
+ result = UKEY_MODIFY;
+ goto exit;
}
- ok = true;
+ result = UKEY_KEEP;
exit:
- if (ok) {
+ if (result != UKEY_DELETE) {
ukey->reval_seq = reval_seq;
}
- if (netflow && !ok) {
+ if (netflow && result == UKEY_DELETE) {
netflow_flow_clear(netflow, &flow);
}
xlate_out_uninit(xoutp);
- ofpbuf_uninit(&odp_actions);
- return ok;
+ return result;
}
static void
op->dop.u.flow_del.terse = udpif_use_ufid(udpif);
}
+static void
+modify_op_init(struct ukey_op *op, struct udpif_key *ukey)
+{
+ op->ukey = ukey;
+ op->dop.type = DPIF_OP_FLOW_PUT;
+ op->dop.u.flow_put.flags = DPIF_FP_MODIFY;
+ op->dop.u.flow_put.key = ukey->key;
+ op->dop.u.flow_put.key_len = ukey->key_len;
+ op->dop.u.flow_put.mask = ukey->mask;
+ op->dop.u.flow_put.mask_len = ukey->mask_len;
+ op->dop.u.flow_put.ufid = &ukey->ufid;
+ op->dop.u.flow_put.pmd_id = ukey->pmd_id;
+ op->dop.u.flow_put.stats = NULL;
+ ukey_get_actions(ukey, &op->dop.u.flow_put.actions,
+ &op->dop.u.flow_put.actions_len);
+}
+
static void
push_ukey_ops__(struct udpif *udpif, struct ukey_op *ops, size_t n_ops)
{
stats = op->dop.u.flow_del.stats;
push = &push_buf;
+ if (op->dop.type != DPIF_OP_FLOW_DEL) {
+ /* Only deleted flows need their stats pushed. */
+ continue;
+ }
+
+ if (op->dop.error) {
+ /* flow_del error, 'stats' is unusable. */
+ continue;
+ }
+
if (op->ukey) {
ovs_mutex_lock(&op->ukey->mutex);
push->used = MAX(stats->used, op->ukey->stats.used);
static void
revalidate(struct revalidator *revalidator)
{
+ uint64_t odp_actions_stub[1024 / 8];
+ struct ofpbuf odp_actions = OFPBUF_STUB_INITIALIZER(odp_actions_stub);
+
struct udpif *udpif = revalidator->udpif;
struct dpif_flow_dump_thread *dump_thread;
uint64_t dump_seq, reval_seq;
for (f = flows; f < &flows[n_dumped]; f++) {
long long int used = f->stats.used;
+ enum reval_result result;
struct udpif_key *ukey;
- bool already_dumped, keep;
+ bool already_dumped;
int error;
if (ukey_acquire(udpif, f, &ukey, &error)) {
used = ukey->created;
}
if (kill_them_all || (used && used < now - max_idle)) {
- keep = false;
+ result = UKEY_DELETE;
} else {
- keep = revalidate_ukey(udpif, ukey, &f->stats, reval_seq);
+ result = revalidate_ukey(udpif, ukey, &f->stats, &odp_actions,
+ reval_seq);
}
ukey->dump_seq = dump_seq;
- ukey->flow_exists = keep;
+ ukey->flow_exists = result != UKEY_DELETE;
- if (!keep) {
+ if (result == UKEY_DELETE) {
delete_op_init(udpif, &ops[n_ops++], ukey);
+ } else if (result == UKEY_MODIFY) {
+ ukey_set_actions(ukey, &odp_actions);
+ modify_op_init(&ops[n_ops++], ukey);
}
ovs_mutex_unlock(&ukey->mutex);
}
ovsrcu_quiesce();
}
dpif_flow_dump_thread_destroy(dump_thread);
+ ofpbuf_uninit(&odp_actions);
}
-static bool
-handle_missed_revalidation(struct udpif *udpif, uint64_t reval_seq,
- struct udpif_key *ukey)
+/* Pauses the 'revalidator', can only proceed after main thread
+ * calls udpif_resume_revalidators(). */
+static void
+revalidator_pause(struct revalidator *revalidator)
{
- struct dpif_flow_stats stats;
- bool keep;
-
- COVERAGE_INC(revalidate_missed_dp_flow);
-
- memset(&stats, 0, sizeof stats);
- ovs_mutex_lock(&ukey->mutex);
- keep = revalidate_ukey(udpif, ukey, &stats, reval_seq);
- ovs_mutex_unlock(&ukey->mutex);
-
- return keep;
+ /* The first block is for sync'ing the pause with main thread. */
+ ovs_barrier_block(&revalidator->udpif->pause_barrier);
+ /* The second block is for pausing until main thread resumes. */
+ ovs_barrier_block(&revalidator->udpif->pause_barrier);
}
static void
ovs_assert(slice < udpif->n_revalidators);
for (int i = slice; i < N_UMAPS; i += udpif->n_revalidators) {
+ uint64_t odp_actions_stub[1024 / 8];
+ struct ofpbuf odp_actions = OFPBUF_STUB_INITIALIZER(odp_actions_stub);
+
struct ukey_op ops[REVALIDATE_MAX_BATCH];
struct udpif_key *ukey;
struct umap *umap = &udpif->ukeys[i];
CMAP_FOR_EACH(ukey, cmap_node, &umap->cmap) {
bool flow_exists, seq_mismatch;
+ enum reval_result result;
/* Handler threads could be holding a ukey lock while it installs a
* new flow, so don't hang around waiting for access to it. */
flow_exists = ukey->flow_exists;
seq_mismatch = (ukey->dump_seq != dump_seq
&& ukey->reval_seq != reval_seq);
+
+ if (purge) {
+ result = UKEY_DELETE;
+ } else if (!seq_mismatch) {
+ result = UKEY_KEEP;
+ } else {
+ struct dpif_flow_stats stats;
+ COVERAGE_INC(revalidate_missed_dp_flow);
+ memset(&stats, 0, sizeof stats);
+ result = revalidate_ukey(udpif, ukey, &stats, &odp_actions,
+ reval_seq);
+ }
ovs_mutex_unlock(&ukey->mutex);
- if (flow_exists
- && (purge
- || (seq_mismatch
- && !handle_missed_revalidation(udpif, reval_seq,
- ukey)))) {
- struct ukey_op *op = &ops[n_ops++];
+ if (result == UKEY_DELETE) {
+ delete_op_init(udpif, &ops[n_ops++], ukey);
+ } else if (result == UKEY_MODIFY) {
+ ukey_set_actions(ukey, &odp_actions);
+ modify_op_init(&ops[n_ops++], ukey);
+ }
- delete_op_init(udpif, op, ukey);
- if (n_ops == REVALIDATE_MAX_BATCH) {
- push_ukey_ops(udpif, umap, ops, n_ops);
- n_ops = 0;
- }
- } else if (!flow_exists) {
+ if (n_ops == REVALIDATE_MAX_BATCH) {
+ push_ukey_ops(udpif, umap, ops, n_ops);
+ n_ops = 0;
+ }
+
+ if (!flow_exists) {
ovs_mutex_lock(&umap->mutex);
ukey_delete(umap, ukey);
ovs_mutex_unlock(&umap->mutex);
if (n_ops) {
push_ukey_ops(udpif, umap, ops, n_ops);
}
+
+ ofpbuf_uninit(&odp_actions);
ovsrcu_quiesce();
}
}
{
revalidator_sweep__(revalidator, true);
}
+
+/* In reaction to dpif purge, purges all 'ukey's with same 'pmd_id'. */
+static void
+dp_purge_cb(void *aux, unsigned pmd_id)
+{
+ struct udpif *udpif = aux;
+ size_t i;
+
+ udpif_pause_revalidators(udpif);
+ for (i = 0; i < N_UMAPS; i++) {
+ struct ukey_op ops[REVALIDATE_MAX_BATCH];
+ struct udpif_key *ukey;
+ struct umap *umap = &udpif->ukeys[i];
+ size_t n_ops = 0;
+
+ CMAP_FOR_EACH(ukey, cmap_node, &umap->cmap) {
+ if (ukey->pmd_id == pmd_id) {
+ delete_op_init(udpif, &ops[n_ops++], ukey);
+ if (n_ops == REVALIDATE_MAX_BATCH) {
+ push_ukey_ops(udpif, umap, ops, n_ops);
+ n_ops = 0;
+ }
+ }
+ }
+
+ if (n_ops) {
+ push_ukey_ops(udpif, umap, ops, n_ops);
+ }
+
+ ovsrcu_quiesce();
+ }
+ udpif_resume_revalidators(udpif);
+}
\f
static void
upcall_unixctl_show(struct unixctl_conn *conn, int argc OVS_UNUSED,