-/* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc.
+/* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
};
/* In the absence of a multiple-writer multiple-reader datastructure for
- * storing ukeys, we use a large number of cmaps, each with its own lock for
- * writing. */
+ * storing udpif_keys ("ukeys"), we use a large number of cmaps, each with its
+ * own lock for writing. */
#define N_UMAPS 512 /* per udpif. */
struct umap {
struct ovs_mutex mutex; /* Take for writing to the following. */
};
/* A thread that processes datapath flows, updates OpenFlow statistics, and
- * updates or removes them if necessary. */
+ * updates or removes them if necessary.
+ *
+ * Revalidator threads operate in two phases: "dump" and "sweep". In between
+ * each phase, all revalidators sync up so that all revalidator threads are
+ * either in one phase or the other, but not a combination.
+ *
+ * During the dump phase, revalidators fetch flows from the datapath and
+ * attribute the statistics to OpenFlow rules. Each datapath flow has a
+ * corresponding ukey which caches the most recently seen statistics. If
+ * a flow needs to be deleted (for example, because it is unused over a
+ * period of time), revalidator threads may delete the flow during the
+ * dump phase. The datapath is not guaranteed to reliably dump all flows
+ * from the datapath, and there is no mapping between datapath flows to
+ * revalidators, so a particular flow may be handled by zero or more
+ * revalidators during a single dump phase. To avoid duplicate attribution
+ * of statistics, ukeys are never deleted during this phase.
+ *
+ * During the sweep phase, each revalidator takes ownership of a different
+ * slice of umaps and sweeps through all ukeys in those umaps to figure out
+ * whether they need to be deleted. During this phase, revalidators may
+ * fetch individual flows which were not dumped during the dump phase to
+ * validate them and attribute statistics.
+ */
struct revalidator {
struct udpif *udpif; /* Parent udpif. */
pthread_t thread; /* Thread ID. */
unsigned pmd_id; /* Datapath poll mode driver id. */
const struct dp_packet *packet; /* Packet associated with this upcall. */
ofp_port_t in_port; /* OpenFlow in port, or OFPP_NONE. */
+ uint16_t mru; /* If !0, Maximum receive unit of
+ fragmented IP packet */
enum dpif_upcall_type type; /* Datapath type of the upcall. */
const struct nlattr *userdata; /* Userdata for DPIF_UC_ACTION Upcalls. */
struct nlattr nla;
} keybuf, maskbuf;
- /* Recirculation IDs with references held by the ukey. */
- unsigned n_recircs;
- uint32_t recircs[]; /* 'n_recircs' id's for which references are held. */
+ uint32_t key_recirc_id; /* Non-zero if reference is held by the ukey. */
+ struct recirc_refs recircs; /* Action recirc IDs with references held. */
};
/* Datapath operation with optional ukey attached. */
static int upcall_receive(struct upcall *, const struct dpif_backer *,
const struct dp_packet *packet, enum dpif_upcall_type,
const struct nlattr *userdata, const struct flow *,
+ const unsigned int mru,
const ovs_u128 *ufid, const unsigned pmd_id);
static void upcall_uninit(struct upcall *);
struct dpif_upcall *dupcall = &dupcalls[n_upcalls];
struct upcall *upcall = &upcalls[n_upcalls];
struct flow *flow = &flows[n_upcalls];
+ unsigned int mru;
int error;
ofpbuf_use_stub(recv_buf, recv_stubs[n_upcalls],
goto free_dupcall;
}
+ if (dupcall->mru) {
+ mru = nl_attr_get_u16(dupcall->mru);
+ } else {
+ mru = 0;
+ }
+
error = upcall_receive(upcall, udpif->backer, &dupcall->packet,
- dupcall->type, dupcall->userdata, flow,
+ dupcall->type, dupcall->userdata, flow, mru,
&dupcall->ufid, PMD_ID_NULL);
if (error) {
if (error == ENODEV) {
upcall_receive(struct upcall *upcall, const struct dpif_backer *backer,
const struct dp_packet *packet, enum dpif_upcall_type type,
const struct nlattr *userdata, const struct flow *flow,
+ const unsigned int mru,
const ovs_u128 *ufid, const unsigned pmd_id)
{
int error;
upcall->ukey = NULL;
upcall->key = NULL;
upcall->key_len = 0;
+ upcall->mru = mru;
upcall->out_tun_key = NULL;
upcall->actions = NULL;
ofpbuf_use_const(&upcall->put_actions,
odp_actions->data, odp_actions->size);
} else {
- ofpbuf_init(&upcall->put_actions, 0);
+ /* upcall->put_actions already initialized by upcall_receive(). */
compose_slow_path(udpif, &upcall->xout, upcall->flow,
upcall->flow->in_port.odp_port,
&upcall->put_actions);
atomic_read_relaxed(&udpif->flow_limit, &flow_limit);
error = upcall_receive(&upcall, udpif->backer, packet, type, userdata,
- flow, ufid, pmd_id);
+ flow, 0, ufid, pmd_id);
if (error) {
return error;
}
unsigned int flow_limit;
size_t n_ops, n_opsp, i;
bool may_put;
- bool megaflow;
atomic_read_relaxed(&udpif->flow_limit, &flow_limit);
- atomic_read_relaxed(&enable_megaflows, &megaflow);
may_put = udpif_get_n_flows(udpif) < flow_limit;
op->dop.u.execute.actions_len = upcall->odp_actions.size;
op->dop.u.execute.needs_help = (upcall->xout.slow & SLOW_ACTION) != 0;
op->dop.u.execute.probe = false;
+ op->dop.u.execute.mtu = upcall->mru;
}
}
bool ufid_present, const ovs_u128 *ufid,
const unsigned pmd_id, const struct ofpbuf *actions,
uint64_t dump_seq, uint64_t reval_seq, long long int used,
- const struct recirc_id_node *key_recirc, struct xlate_out *xout)
+ uint32_t key_recirc_id, struct xlate_out *xout)
OVS_NO_THREAD_SAFETY_ANALYSIS
{
- unsigned n_recircs = (key_recirc ? 1 : 0) + (xout ? xout->n_recircs : 0);
- struct udpif_key *ukey = xmalloc(sizeof *ukey +
- n_recircs * sizeof *ukey->recircs);
+ struct udpif_key *ukey = xmalloc(sizeof *ukey);
memcpy(&ukey->keybuf, key, key_len);
ukey->key = &ukey->keybuf.nla;
ukey->stats.used = used;
ukey->xcache = NULL;
- ukey->n_recircs = n_recircs;
- if (key_recirc) {
- ukey->recircs[0] = key_recirc->id;
+ ukey->key_recirc_id = key_recirc_id;
+ recirc_refs_init(&ukey->recircs);
+ if (xout) {
+ /* Take ownership of the action recirc id references. */
+ recirc_refs_swap(&ukey->recircs, &xout->recircs);
}
- if (xout && xout->n_recircs) {
- const uint32_t *act_recircs = xlate_out_get_recircs(xout);
- memcpy(ukey->recircs + (key_recirc ? 1 : 0), act_recircs,
- xout->n_recircs * sizeof *ukey->recircs);
- xlate_out_take_recircs(xout);
- }
return ukey;
}
true, upcall->ufid, upcall->pmd_id,
&upcall->put_actions, upcall->dump_seq,
upcall->reval_seq, 0,
- upcall->have_recirc_ref ? upcall->recirc : NULL,
+ upcall->have_recirc_ref ? upcall->recirc->id : 0,
&upcall->xout);
}
/* Check the flow actions for recirculation action. As recirculation
* relies on OVS userspace internal state, we need to delete all old
- * datapath flows with recirculation upon OVS restart. */
+ * datapath flows with either a non-zero recirc_id in the key, or any
+ * recirculation actions upon OVS restart. */
+ NL_ATTR_FOR_EACH_UNSAFE (a, left, flow->key, flow->key_len) {
+ if (nl_attr_type(a) == OVS_KEY_ATTR_RECIRC_ID
+ && nl_attr_get_u32(a) != 0) {
+ return EINVAL;
+ }
+ }
NL_ATTR_FOR_EACH_UNSAFE (a, left, flow->actions, flow->actions_len) {
if (nl_attr_type(a) == OVS_ACTION_ATTR_RECIRC) {
return EINVAL;
*ukey = ukey_create__(flow->key, flow->key_len,
flow->mask, flow->mask_len, flow->ufid_present,
&flow->ufid, flow->pmd_id, &actions, dump_seq,
- reval_seq, flow->stats.used, NULL, NULL);
+ reval_seq, flow->stats.used, 0, NULL);
return 0;
}
OVS_NO_THREAD_SAFETY_ANALYSIS
{
if (ukey) {
- for (int i = 0; i < ukey->n_recircs; i++) {
- recirc_free_id(ukey->recircs[i]);
+ if (ukey->key_recirc_id) {
+ recirc_free_id(ukey->key_recirc_id);
}
+ recirc_refs_unref(&ukey->recircs);
xlate_cache_delete(ukey->xcache);
ofpbuf_delete(ovsrcu_get(struct ofpbuf *, &ukey->actions));
ovs_mutex_destroy(&ukey->mutex);
* UKEY_KEEP The ukey is fine as is.
* UKEY_MODIFY The ukey's actions should be changed but is otherwise
* fine. Callers should change the actions to those found
- * in the caller supplied 'odp_actions' buffer. */
+ * in the caller supplied 'odp_actions' buffer. The
+ * recirculation references can be found in 'recircs' and
+ * must be handled by the caller.
+ *
+ * If the result is UKEY_MODIFY, then references to all recirc_ids used by the
+ * new flow will be held within 'recircs' (which may be none).
+ *
+ * The caller is responsible for both initializing 'recircs' prior this call,
+ * and ensuring any references are eventually freed.
+ */
static enum reval_result
revalidate_ukey(struct udpif *udpif, struct udpif_key *ukey,
const struct dpif_flow_stats *stats,
- struct ofpbuf *odp_actions, uint64_t reval_seq)
+ struct ofpbuf *odp_actions, uint64_t reval_seq,
+ struct recirc_refs *recircs)
OVS_REQUIRES(ukey->mutex)
{
struct xlate_out xout, *xoutp;
struct netflow *netflow;
struct ofproto_dpif *ofproto;
struct dpif_flow_stats push;
- struct flow flow, dp_mask;
- struct flow_wildcards wc;
+ struct flow flow;
+ struct flow_wildcards dp_mask, wc;
enum reval_result result;
- uint64_t *dp64, *xout64;
ofp_port_t ofp_in_port;
struct xlate_in xin;
long long int last_used;
int error;
- size_t i;
bool need_revalidate;
result = UKEY_DELETE;
}
if (odp_flow_key_to_mask(ukey->mask, ukey->mask_len, ukey->key,
- ukey->key_len, &dp_mask, &flow) == ODP_FIT_ERROR) {
+ ukey->key_len, &dp_mask, &flow)
+ == ODP_FIT_ERROR) {
goto exit;
}
- /* Since the kernel is free to ignore wildcarded bits in the mask, we can't
- * directly check that the masks are the same. Instead we check that the
- * mask in the kernel is more specific i.e. less wildcarded, than what
- * we've calculated here. This guarantees we don't catch any packets we
- * shouldn't with the megaflow. */
- dp64 = (uint64_t *) &dp_mask;
- xout64 = (uint64_t *) &wc.masks;
- for (i = 0; i < FLOW_U64S; i++) {
- if ((dp64[i] | xout64[i]) != dp64[i]) {
- goto exit;
- }
+ /* Do not modify if any bit is wildcarded by the installed datapath flow,
+ * but not the newly revalidated wildcard mask (wc), i.e., if revalidation
+ * tells that the datapath flow is now too generic and must be narrowed
+ * down. Note that we do not know if the datapath has ignored any of the
+ * wildcarded bits, so we may be overtly conservative here. */
+ if (flow_wildcards_has_extra(&dp_mask, &wc)) {
+ goto exit;
}
if (!ofpbuf_equal(odp_actions,
/* The datapath mask was OK, but the actions seem to have changed.
* Let's modify it in place. */
result = UKEY_MODIFY;
+ /* Transfer recirc action ID references to the caller. */
+ recirc_refs_swap(recircs, &xoutp->recircs);
goto exit;
}
&op->dop.u.flow_put.actions_len);
}
+/* Executes datapath operations 'ops' and attributes stats retrieved from the
+ * datapath as part of those operations. */
static void
-push_ukey_ops__(struct udpif *udpif, struct ukey_op *ops, size_t n_ops)
+push_dp_ops(struct udpif *udpif, struct ukey_op *ops, size_t n_ops)
{
struct dpif_op *opsp[REVALIDATE_MAX_BATCH];
size_t i;
}
}
+/* Executes datapath operations 'ops', attributes stats retrieved from the
+ * datapath, and deletes ukeys corresponding to deleted flows. */
static void
push_ukey_ops(struct udpif *udpif, struct umap *umap,
struct ukey_op *ops, size_t n_ops)
{
int i;
- push_ukey_ops__(udpif, ops, n_ops);
+ push_dp_ops(udpif, ops, n_ops);
ovs_mutex_lock(&umap->mutex);
for (i = 0; i < n_ops; i++) {
- ukey_delete(umap, ops[i].ukey);
+ if (ops[i].dop.type == DPIF_OP_FLOW_DEL) {
+ ukey_delete(umap, ops[i].ukey);
+ }
}
ovs_mutex_unlock(&umap->mutex);
}
VLOG_WARN_RL(&rl, "%s", ds_cstr(&ds));
}
+static void
+reval_op_init(struct ukey_op *op, enum reval_result result,
+ struct udpif *udpif, struct udpif_key *ukey,
+ struct recirc_refs *recircs, struct ofpbuf *odp_actions)
+{
+ if (result == UKEY_DELETE) {
+ delete_op_init(udpif, op, ukey);
+ } else if (result == UKEY_MODIFY) {
+ /* Store the new recircs. */
+ recirc_refs_swap(&ukey->recircs, recircs);
+ /* Release old recircs. */
+ recirc_refs_unref(recircs);
+ /* ukey->key_recirc_id remains, as the key is the same as before. */
+
+ ukey_set_actions(ukey, odp_actions);
+ modify_op_init(op, ukey);
+ }
+}
+
static void
revalidate(struct revalidator *revalidator)
{
for (f = flows; f < &flows[n_dumped]; f++) {
long long int used = f->stats.used;
+ struct recirc_refs recircs = RECIRC_REFS_EMPTY_INITIALIZER;
enum reval_result result;
struct udpif_key *ukey;
bool already_dumped;
result = UKEY_DELETE;
} else {
result = revalidate_ukey(udpif, ukey, &f->stats, &odp_actions,
- reval_seq);
+ reval_seq, &recircs);
}
ukey->dump_seq = dump_seq;
ukey->flow_exists = result != UKEY_DELETE;
- if (result == UKEY_DELETE) {
- delete_op_init(udpif, &ops[n_ops++], ukey);
- } else if (result == UKEY_MODIFY) {
- ukey_set_actions(ukey, &odp_actions);
- modify_op_init(&ops[n_ops++], ukey);
+ if (result != UKEY_KEEP) {
+ /* Takes ownership of 'recircs'. */
+ reval_op_init(&ops[n_ops++], result, udpif, ukey, &recircs,
+ &odp_actions);
}
ovs_mutex_unlock(&ukey->mutex);
}
if (n_ops) {
- push_ukey_ops__(udpif, ops, n_ops);
+ /* Push datapath ops but defer ukey deletion to 'sweep' phase. */
+ push_dp_ops(udpif, ops, n_ops);
}
ovsrcu_quiesce();
}
size_t n_ops = 0;
CMAP_FOR_EACH(ukey, cmap_node, &umap->cmap) {
- bool flow_exists, seq_mismatch;
- enum reval_result result;
+ bool flow_exists;
/* Handler threads could be holding a ukey lock while it installs a
* new flow, so don't hang around waiting for access to it. */
continue;
}
flow_exists = ukey->flow_exists;
- seq_mismatch = (ukey->dump_seq != dump_seq
- && ukey->reval_seq != reval_seq);
-
- if (purge) {
- result = UKEY_DELETE;
- } else if (!seq_mismatch) {
- result = UKEY_KEEP;
- } else {
- struct dpif_flow_stats stats;
- COVERAGE_INC(revalidate_missed_dp_flow);
- memset(&stats, 0, sizeof stats);
- result = revalidate_ukey(udpif, ukey, &stats, &odp_actions,
- reval_seq);
+ if (flow_exists) {
+ struct recirc_refs recircs = RECIRC_REFS_EMPTY_INITIALIZER;
+ bool seq_mismatch = (ukey->dump_seq != dump_seq
+ && ukey->reval_seq != reval_seq);
+ enum reval_result result;
+
+ if (purge) {
+ result = UKEY_DELETE;
+ } else if (!seq_mismatch) {
+ result = UKEY_KEEP;
+ } else {
+ struct dpif_flow_stats stats;
+ COVERAGE_INC(revalidate_missed_dp_flow);
+ memset(&stats, 0, sizeof stats);
+ result = revalidate_ukey(udpif, ukey, &stats, &odp_actions,
+ reval_seq, &recircs);
+ }
+ if (result != UKEY_KEEP) {
+ /* Clears 'recircs' if filled by revalidate_ukey(). */
+ reval_op_init(&ops[n_ops++], result, udpif, ukey, &recircs,
+ &odp_actions);
+ }
}
ovs_mutex_unlock(&ukey->mutex);
- if (result == UKEY_DELETE) {
- delete_op_init(udpif, &ops[n_ops++], ukey);
- } else if (result == UKEY_MODIFY) {
- ukey_set_actions(ukey, &odp_actions);
- modify_op_init(&ops[n_ops++], ukey);
+ if (!flow_exists) {
+ /* The common flow deletion case involves deletion of the flow
+ * during the dump phase and ukey deletion here. */
+ ovs_mutex_lock(&umap->mutex);
+ ukey_delete(umap, ukey);
+ ovs_mutex_unlock(&umap->mutex);
}
if (n_ops == REVALIDATE_MAX_BATCH) {
+ /* Update/delete missed flows and clean up corresponding ukeys
+ * if necessary. */
push_ukey_ops(udpif, umap, ops, n_ops);
n_ops = 0;
}
-
- if (!flow_exists) {
- ovs_mutex_lock(&umap->mutex);
- ukey_delete(umap, ukey);
- ovs_mutex_unlock(&umap->mutex);
- }
}
if (n_ops) {