#include "tnl-arp-cache.h"
#include "unixctl.h"
#include "util.h"
-#include "vlog.h"
+#include "openvswitch/vlog.h"
VLOG_DEFINE_THIS_MODULE(dpif_netdev);
struct emc_cache {
struct emc_entry entries[EM_FLOW_HASH_ENTRIES];
+ int sweep_idx; /* For emc_cache_slow_sweep(). */
};
/* Iterate in the exact match cache through every entry that might contain a
*
* dp_netdev_mutex (global)
* port_mutex
- * flow_mutex
*/
struct dp_netdev {
const struct dpif_class *const class;
struct ovs_refcount ref_cnt;
atomic_flag destroyed;
- /* Flows.
- *
- * Writers of 'flow_table' must take the 'flow_mutex'. Corresponding
- * changes to 'cls' must be made while still holding the 'flow_mutex'.
- */
- struct ovs_mutex flow_mutex;
- struct dpcls cls;
- struct cmap flow_table OVS_GUARDED; /* Flow table. */
-
- /* Statistics.
- *
- * ovsthread_stats is internally synchronized. */
- struct ovsthread_stats stats; /* Contains 'struct dp_netdev_stats *'. */
-
/* Ports.
*
* Protected by RCU. Take the mutex to add or remove ports. */
DP_N_STATS
};
-/* Contained by struct dp_netdev's 'stats' member. */
-struct dp_netdev_stats {
- struct ovs_mutex mutex; /* Protects 'n'. */
-
- /* Indexed by DP_STAT_*, protected by 'mutex'. */
- unsigned long long int n[DP_N_STATS] OVS_GUARDED;
-};
-
-
/* A port in a netdev-based datapath. */
struct dp_netdev_port {
struct cmap_node node; /* Node in dp_netdev's 'ports'. */
char *type; /* Port type as requested by user. */
};
-\f
-/* A flow in dp_netdev's 'flow_table'.
+/* Contained by struct dp_netdev_flow's 'stats' member. */
+struct dp_netdev_flow_stats {
+ long long int used; /* Last used time, in monotonic msecs. */
+ long long int packet_count; /* Number of packets matched. */
+ long long int byte_count; /* Number of bytes matched. */
+ uint16_t tcp_flags; /* Bitwise-OR of seen tcp_flags values. */
+};
+
+/* A flow in 'dp_netdev_pmd_thread's 'flow_table'.
*
*
* Thread-safety
* =============
*
* Except near the beginning or ending of its lifespan, rule 'rule' belongs to
- * its dp_netdev's classifier. The text below calls this classifier 'cls'.
+ * its pmd thread's classifier. The text below calls this classifier 'cls'.
*
* Motivation
* ----------
bool dead;
/* Hash table index by unmasked flow. */
- const struct cmap_node node; /* In owning dp_netdev's 'flow_table'. */
+ const struct cmap_node node; /* In owning dp_netdev_pmd_thread's */
+ /* 'flow_table'. */
+ const ovs_u128 ufid; /* Unique flow identifier. */
const struct flow flow; /* Unmasked flow that created this entry. */
+ const int pmd_id; /* The 'core_id' of pmd thread owning this */
+ /* flow. */
/* Number of references.
* The classifier owns one reference.
* reference. */
struct ovs_refcount ref_cnt;
- /* Statistics.
- *
- * Reading or writing these members requires 'mutex'. */
- struct ovsthread_stats stats; /* Contains "struct dp_netdev_flow_stats". */
+ /* Statistics. */
+ struct dp_netdev_flow_stats stats;
/* Actions. */
OVSRCU_TYPE(struct dp_netdev_actions *) actions;
static void dp_netdev_flow_unref(struct dp_netdev_flow *);
static bool dp_netdev_flow_ref(struct dp_netdev_flow *);
-
-/* Contained by struct dp_netdev_flow's 'stats' member. */
-struct dp_netdev_flow_stats {
- struct ovs_mutex mutex; /* Guards all the other members. */
-
- long long int used OVS_GUARDED; /* Last used time, in monotonic msecs. */
- long long int packet_count OVS_GUARDED; /* Number of packets matched. */
- long long int byte_count OVS_GUARDED; /* Number of bytes matched. */
- uint16_t tcp_flags OVS_GUARDED; /* Bitwise-OR of seen tcp_flags values. */
-};
+static int dpif_netdev_flow_from_nlattrs(const struct nlattr *, uint32_t,
+ struct flow *);
/* A set of datapath actions within a "struct dp_netdev_flow".
*
const struct dp_netdev_flow *);
static void dp_netdev_actions_free(struct dp_netdev_actions *);
+/* Contained by struct dp_netdev_pmd_thread's 'stats' member. */
+struct dp_netdev_pmd_stats {
+ /* Indexed by DP_STAT_*. */
+ unsigned long long int n[DP_N_STATS];
+};
+
/* PMD: Poll modes drivers. PMD accesses devices via polling to eliminate
* the performance overhead of interrupt processing. Therefore netdev can
* not implement rx-wait for these devices. dpif-netdev needs to poll
* these device to check for recv buffer. pmd-thread does polling for
- * devices assigned to itself thread.
+ * devices assigned to itself.
*
* DPDK used PMD for accessing NIC.
*
* Note, instance with cpu core id NON_PMD_CORE_ID will be reserved for
* I/O of all non-pmd threads. There will be no actual thread created
* for the instance.
- **/
+ *
+ * Each struct has its own flow table and classifier. Packets received
+ * from managed ports are looked up in the corresponding pmd thread's
+ * flow table, and are executed with the found actions.
+ * */
struct dp_netdev_pmd_thread {
struct dp_netdev *dp;
+ struct ovs_refcount ref_cnt; /* Every reference must be refcount'ed. */
struct cmap_node node; /* In 'dp->poll_threads'. */
pthread_cond_t cond; /* For synchronizing pmd thread reload. */
* need to be protected (e.g. by 'dp_netdev_mutex'). All other
* instances will only be accessed by its own pmd thread. */
struct emc_cache flow_cache;
+
+ /* Classifier and Flow-Table.
+ *
+ * Writers of 'flow_table' must take the 'flow_mutex'. Corresponding
+ * changes to 'cls' must be made while still holding the 'flow_mutex'.
+ */
+ struct ovs_mutex flow_mutex;
+ struct dpcls cls;
+ struct cmap flow_table OVS_GUARDED; /* Flow table. */
+
+ /* Statistics. */
+ struct dp_netdev_pmd_stats stats;
+
struct latch exit_latch; /* For terminating the pmd thread. */
atomic_uint change_seq; /* For reloading pmd ports. */
pthread_t thread;
struct dp_netdev_port **portp);
static void dp_netdev_free(struct dp_netdev *)
OVS_REQUIRES(dp_netdev_mutex);
-static void dp_netdev_flow_flush(struct dp_netdev *);
static int do_add_port(struct dp_netdev *dp, const char *devname,
const char *type, odp_port_t port_no)
OVS_REQUIRES(dp->port_mutex);
static void dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd,
struct dp_netdev *dp, int index,
int core_id, int numa_id);
+static void dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd);
static void dp_netdev_set_nonpmd(struct dp_netdev *dp);
-static struct dp_netdev_pmd_thread *dp_netdev_get_nonpmd(struct dp_netdev *dp);
+static struct dp_netdev_pmd_thread *dp_netdev_get_pmd(struct dp_netdev *dp,
+ int core_id);
+static struct dp_netdev_pmd_thread *
+dp_netdev_pmd_get_next(struct dp_netdev *dp, struct cmap_position *pos);
static void dp_netdev_destroy_all_pmds(struct dp_netdev *dp);
static void dp_netdev_del_pmds_on_numa(struct dp_netdev *dp, int numa_id);
static void dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int numa_id);
static void dp_netdev_reset_pmd_threads(struct dp_netdev *dp);
+static bool dp_netdev_pmd_try_ref(struct dp_netdev_pmd_thread *pmd);
+static void dp_netdev_pmd_unref(struct dp_netdev_pmd_thread *pmd);
+static void dp_netdev_pmd_flow_flush(struct dp_netdev_pmd_thread *pmd);
+static inline bool emc_entry_alive(struct emc_entry *ce);
static void emc_clear_entry(struct emc_entry *ce);
static void
BUILD_ASSERT(offsetof(struct miniflow, inline_values) == sizeof(uint64_t));
+ flow_cache->sweep_idx = 0;
for (i = 0; i < ARRAY_SIZE(flow_cache->entries); i++) {
flow_cache->entries[i].flow = NULL;
flow_cache->entries[i].key.hash = 0;
}
}
+/* Check and clear dead flow references slowly (one entry at each
+ * invocation). */
+static void
+emc_cache_slow_sweep(struct emc_cache *flow_cache)
+{
+ struct emc_entry *entry = &flow_cache->entries[flow_cache->sweep_idx];
+
+ if (!emc_entry_alive(entry)) {
+ emc_clear_entry(entry);
+ }
+ flow_cache->sweep_idx = (flow_cache->sweep_idx + 1) & EM_FLOW_HASH_MASK;
+}
+
static struct dpif_netdev *
dpif_netdev_cast(const struct dpif *dpif)
{
ovs_refcount_init(&dp->ref_cnt);
atomic_flag_clear(&dp->destroyed);
- ovs_mutex_init(&dp->flow_mutex);
- dpcls_init(&dp->cls);
- cmap_init(&dp->flow_table);
-
- ovsthread_stats_init(&dp->stats);
-
ovs_mutex_init(&dp->port_mutex);
cmap_init(&dp->ports);
dp->port_seq = seq_create();
OVS_REQUIRES(dp_netdev_mutex)
{
struct dp_netdev_port *port;
- struct dp_netdev_stats *bucket;
- int i;
shash_find_and_delete(&dp_netdevs, dp->name);
ovs_mutex_destroy(&dp->non_pmd_mutex);
ovsthread_key_delete(dp->per_pmd_key);
- dp_netdev_flow_flush(dp);
ovs_mutex_lock(&dp->port_mutex);
CMAP_FOR_EACH (port, node, &dp->ports) {
do_del_port(dp, port);
}
ovs_mutex_unlock(&dp->port_mutex);
- OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &dp->stats) {
- ovs_mutex_destroy(&bucket->mutex);
- free_cacheline(bucket);
- }
- ovsthread_stats_destroy(&dp->stats);
-
- dpcls_destroy(&dp->cls);
- cmap_destroy(&dp->flow_table);
- ovs_mutex_destroy(&dp->flow_mutex);
seq_destroy(dp->port_seq);
cmap_destroy(&dp->ports);
dpif_netdev_get_stats(const struct dpif *dpif, struct dpif_dp_stats *stats)
{
struct dp_netdev *dp = get_dp_netdev(dpif);
- struct dp_netdev_stats *bucket;
- size_t i;
-
- stats->n_flows = cmap_count(&dp->flow_table);
+ struct dp_netdev_pmd_thread *pmd;
- stats->n_hit = stats->n_missed = stats->n_lost = 0;
- OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &dp->stats) {
- ovs_mutex_lock(&bucket->mutex);
- stats->n_hit += bucket->n[DP_STAT_HIT];
- stats->n_missed += bucket->n[DP_STAT_MISS];
- stats->n_lost += bucket->n[DP_STAT_LOST];
- ovs_mutex_unlock(&bucket->mutex);
+ stats->n_flows = stats->n_hit = stats->n_missed = stats->n_lost = 0;
+ CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
+ stats->n_flows += cmap_count(&pmd->flow_table);
+ stats->n_hit += pmd->stats.n[DP_STAT_HIT];
+ stats->n_missed += pmd->stats.n[DP_STAT_MISS];
+ stats->n_lost += pmd->stats.n[DP_STAT_LOST];
}
stats->n_masks = UINT32_MAX;
stats->n_mask_hit = UINT64_MAX;
static void
dp_netdev_flow_free(struct dp_netdev_flow *flow)
{
- struct dp_netdev_flow_stats *bucket;
- size_t i;
-
- OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &flow->stats) {
- ovs_mutex_destroy(&bucket->mutex);
- free_cacheline(bucket);
- }
- ovsthread_stats_destroy(&flow->stats);
-
dp_netdev_actions_free(dp_netdev_flow_get_actions(flow));
free(flow);
}
}
}
+static uint32_t
+dp_netdev_flow_hash(const ovs_u128 *ufid)
+{
+ return ufid->u32[0];
+}
+
static void
-dp_netdev_remove_flow(struct dp_netdev *dp, struct dp_netdev_flow *flow)
- OVS_REQUIRES(dp->flow_mutex)
+dp_netdev_pmd_remove_flow(struct dp_netdev_pmd_thread *pmd,
+ struct dp_netdev_flow *flow)
+ OVS_REQUIRES(pmd->flow_mutex)
{
struct cmap_node *node = CONST_CAST(struct cmap_node *, &flow->node);
- dpcls_remove(&dp->cls, &flow->cr);
- cmap_remove(&dp->flow_table, node, flow_hash(&flow->flow, 0));
+ dpcls_remove(&pmd->cls, &flow->cr);
+ cmap_remove(&pmd->flow_table, node, dp_netdev_flow_hash(&flow->ufid));
flow->dead = true;
dp_netdev_flow_unref(flow);
}
static void
-dp_netdev_flow_flush(struct dp_netdev *dp)
+dp_netdev_pmd_flow_flush(struct dp_netdev_pmd_thread *pmd)
{
struct dp_netdev_flow *netdev_flow;
- ovs_mutex_lock(&dp->flow_mutex);
- CMAP_FOR_EACH (netdev_flow, node, &dp->flow_table) {
- dp_netdev_remove_flow(dp, netdev_flow);
+ ovs_mutex_lock(&pmd->flow_mutex);
+ CMAP_FOR_EACH (netdev_flow, node, &pmd->flow_table) {
+ dp_netdev_pmd_remove_flow(pmd, netdev_flow);
}
- ovs_mutex_unlock(&dp->flow_mutex);
+ ovs_mutex_unlock(&pmd->flow_mutex);
}
static int
dpif_netdev_flow_flush(struct dpif *dpif)
{
struct dp_netdev *dp = get_dp_netdev(dpif);
+ struct dp_netdev_pmd_thread *pmd;
+
+ CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
+ dp_netdev_pmd_flow_flush(pmd);
+ }
- dp_netdev_flow_flush(dp);
return 0;
}
}
static struct dp_netdev_flow *
-dp_netdev_lookup_flow(const struct dp_netdev *dp,
- const struct netdev_flow_key *key)
+dp_netdev_pmd_lookup_flow(const struct dp_netdev_pmd_thread *pmd,
+ const struct netdev_flow_key *key)
{
struct dp_netdev_flow *netdev_flow;
struct dpcls_rule *rule;
- dpcls_lookup(&dp->cls, key, &rule, 1);
+ dpcls_lookup(&pmd->cls, key, &rule, 1);
netdev_flow = dp_netdev_flow_cast(rule);
return netdev_flow;
}
static struct dp_netdev_flow *
-dp_netdev_find_flow(const struct dp_netdev *dp, const struct flow *flow)
+dp_netdev_pmd_find_flow(const struct dp_netdev_pmd_thread *pmd,
+ const ovs_u128 *ufidp, const struct nlattr *key,
+ size_t key_len)
{
struct dp_netdev_flow *netdev_flow;
+ struct flow flow;
+ ovs_u128 ufid;
+
+ /* If a UFID is not provided, determine one based on the key. */
+ if (!ufidp && key && key_len
+ && !dpif_netdev_flow_from_nlattrs(key, key_len, &flow)) {
+ dpif_flow_hash(pmd->dp->dpif, &flow, sizeof flow, &ufid);
+ ufidp = &ufid;
+ }
- CMAP_FOR_EACH_WITH_HASH (netdev_flow, node, flow_hash(flow, 0),
- &dp->flow_table) {
- if (flow_equal(&netdev_flow->flow, flow)) {
- return netdev_flow;
+ if (ufidp) {
+ CMAP_FOR_EACH_WITH_HASH (netdev_flow, node, dp_netdev_flow_hash(ufidp),
+ &pmd->flow_table) {
+ if (ovs_u128_equal(&netdev_flow->ufid, ufidp)) {
+ return netdev_flow;
+ }
}
}
get_dpif_flow_stats(const struct dp_netdev_flow *netdev_flow,
struct dpif_flow_stats *stats)
{
- struct dp_netdev_flow_stats *bucket;
- size_t i;
-
- memset(stats, 0, sizeof *stats);
- OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &netdev_flow->stats) {
- ovs_mutex_lock(&bucket->mutex);
- stats->n_packets += bucket->packet_count;
- stats->n_bytes += bucket->byte_count;
- stats->used = MAX(stats->used, bucket->used);
- stats->tcp_flags |= bucket->tcp_flags;
- ovs_mutex_unlock(&bucket->mutex);
- }
+ stats->n_packets = netdev_flow->stats.packet_count;
+ stats->n_bytes = netdev_flow->stats.byte_count;
+ stats->used = netdev_flow->stats.used;
+ stats->tcp_flags = netdev_flow->stats.tcp_flags;
}
+/* Converts to the dpif_flow format, using 'key_buf' and 'mask_buf' for
+ * storing the netlink-formatted key/mask. 'key_buf' may be the same as
+ * 'mask_buf'. Actions will be returned without copying, by relying on RCU to
+ * protect them. */
static void
dp_netdev_flow_to_dpif_flow(const struct dp_netdev_flow *netdev_flow,
- struct ofpbuf *buffer, struct dpif_flow *flow)
+ struct ofpbuf *key_buf, struct ofpbuf *mask_buf,
+ struct dpif_flow *flow, bool terse)
{
- struct flow_wildcards wc;
- struct dp_netdev_actions *actions;
+ if (terse) {
+ memset(flow, 0, sizeof *flow);
+ } else {
+ struct flow_wildcards wc;
+ struct dp_netdev_actions *actions;
+ size_t offset;
+
+ miniflow_expand(&netdev_flow->cr.mask->mf, &wc.masks);
- miniflow_expand(&netdev_flow->cr.mask->mf, &wc.masks);
- odp_flow_key_from_mask(buffer, &wc.masks, &netdev_flow->flow,
- odp_to_u32(wc.masks.in_port.odp_port),
- SIZE_MAX, true);
- flow->mask = ofpbuf_data(buffer);
- flow->mask_len = ofpbuf_size(buffer);
+ /* Key */
+ offset = ofpbuf_size(key_buf);
+ flow->key = ofpbuf_tail(key_buf);
+ odp_flow_key_from_flow(key_buf, &netdev_flow->flow, &wc.masks,
+ netdev_flow->flow.in_port.odp_port, true);
+ flow->key_len = ofpbuf_size(key_buf) - offset;
+
+ /* Mask */
+ offset = ofpbuf_size(mask_buf);
+ flow->mask = ofpbuf_tail(mask_buf);
+ odp_flow_key_from_mask(mask_buf, &wc.masks, &netdev_flow->flow,
+ odp_to_u32(wc.masks.in_port.odp_port),
+ SIZE_MAX, true);
+ flow->mask_len = ofpbuf_size(mask_buf) - offset;
- actions = dp_netdev_flow_get_actions(netdev_flow);
- flow->actions = actions->actions;
- flow->actions_len = actions->size;
+ /* Actions */
+ actions = dp_netdev_flow_get_actions(netdev_flow);
+ flow->actions = actions->actions;
+ flow->actions_len = actions->size;
+ }
+ flow->ufid = netdev_flow->ufid;
+ flow->ufid_present = true;
+ flow->pmd_id = netdev_flow->pmd_id;
get_dpif_flow_stats(netdev_flow, &flow->stats);
}
{
struct dp_netdev *dp = get_dp_netdev(dpif);
struct dp_netdev_flow *netdev_flow;
- struct flow key;
- int error;
+ struct dp_netdev_pmd_thread *pmd;
+ int pmd_id = get->pmd_id == PMD_ID_NULL ? NON_PMD_CORE_ID : get->pmd_id;
+ int error = 0;
- error = dpif_netdev_flow_from_nlattrs(get->key, get->key_len, &key);
- if (error) {
- return error;
+ pmd = dp_netdev_get_pmd(dp, pmd_id);
+ if (!pmd) {
+ return EINVAL;
}
- netdev_flow = dp_netdev_find_flow(dp, &key);
-
+ netdev_flow = dp_netdev_pmd_find_flow(pmd, get->ufid, get->key,
+ get->key_len);
if (netdev_flow) {
- dp_netdev_flow_to_dpif_flow(netdev_flow, get->buffer, get->flow);
- } else {
+ dp_netdev_flow_to_dpif_flow(netdev_flow, get->buffer, get->buffer,
+ get->flow, false);
+ } else {
error = ENOENT;
}
+ dp_netdev_pmd_unref(pmd);
+
return error;
}
static struct dp_netdev_flow *
-dp_netdev_flow_add(struct dp_netdev *dp, struct match *match,
+dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
+ struct match *match, const ovs_u128 *ufid,
const struct nlattr *actions, size_t actions_len)
- OVS_REQUIRES(dp->flow_mutex)
+ OVS_REQUIRES(pmd->flow_mutex)
{
struct dp_netdev_flow *flow;
struct netdev_flow_key mask;
/* Do not allocate extra space. */
flow = xmalloc(sizeof *flow - sizeof flow->cr.flow.mf + mask.len);
+ memset(&flow->stats, 0, sizeof flow->stats);
flow->dead = false;
+ *CONST_CAST(int *, &flow->pmd_id) = pmd->core_id;
*CONST_CAST(struct flow *, &flow->flow) = match->flow;
+ *CONST_CAST(ovs_u128 *, &flow->ufid) = *ufid;
ovs_refcount_init(&flow->ref_cnt);
- ovsthread_stats_init(&flow->stats);
ovsrcu_set(&flow->actions, dp_netdev_actions_create(actions, actions_len));
- cmap_insert(&dp->flow_table,
+ cmap_insert(&pmd->flow_table,
CONST_CAST(struct cmap_node *, &flow->node),
- flow_hash(&flow->flow, 0));
+ dp_netdev_flow_hash(&flow->ufid));
netdev_flow_key_init_masked(&flow->cr.flow, &match->flow, &mask);
- dpcls_insert(&dp->cls, &flow->cr, &mask);
+ dpcls_insert(&pmd->cls, &flow->cr, &mask);
if (OVS_UNLIKELY(VLOG_IS_DBG_ENABLED())) {
struct match match;
miniflow_expand(&flow->cr.mask->mf, &match.wc.masks);
ds_put_cstr(&ds, "flow_add: ");
+ odp_format_ufid(ufid, &ds);
+ ds_put_cstr(&ds, " ");
match_format(&match, &ds, OFP_DEFAULT_PRIORITY);
ds_put_cstr(&ds, ", actions:");
format_odp_actions(&ds, actions, actions_len);
return flow;
}
-static void
-clear_stats(struct dp_netdev_flow *netdev_flow)
-{
- struct dp_netdev_flow_stats *bucket;
- size_t i;
-
- OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &netdev_flow->stats) {
- ovs_mutex_lock(&bucket->mutex);
- bucket->used = 0;
- bucket->packet_count = 0;
- bucket->byte_count = 0;
- bucket->tcp_flags = 0;
- ovs_mutex_unlock(&bucket->mutex);
- }
-}
-
static int
dpif_netdev_flow_put(struct dpif *dpif, const struct dpif_flow_put *put)
{
struct dp_netdev *dp = get_dp_netdev(dpif);
struct dp_netdev_flow *netdev_flow;
struct netdev_flow_key key;
+ struct dp_netdev_pmd_thread *pmd;
struct match match;
+ ovs_u128 ufid;
+ int pmd_id = put->pmd_id == PMD_ID_NULL ? NON_PMD_CORE_ID : put->pmd_id;
int error;
error = dpif_netdev_flow_from_nlattrs(put->key, put->key_len, &match.flow);
return error;
}
+ pmd = dp_netdev_get_pmd(dp, pmd_id);
+ if (!pmd) {
+ return EINVAL;
+ }
+
/* Must produce a netdev_flow_key for lookup.
* This interface is no longer performance critical, since it is not used
* for upcall processing any more. */
netdev_flow_key_from_flow(&key, &match.flow);
- ovs_mutex_lock(&dp->flow_mutex);
- netdev_flow = dp_netdev_lookup_flow(dp, &key);
+ if (put->ufid) {
+ ufid = *put->ufid;
+ } else {
+ dpif_flow_hash(dpif, &match.flow, sizeof match.flow, &ufid);
+ }
+
+ ovs_mutex_lock(&pmd->flow_mutex);
+ netdev_flow = dp_netdev_pmd_lookup_flow(pmd, &key);
if (!netdev_flow) {
if (put->flags & DPIF_FP_CREATE) {
- if (cmap_count(&dp->flow_table) < MAX_FLOWS) {
+ if (cmap_count(&pmd->flow_table) < MAX_FLOWS) {
if (put->stats) {
memset(put->stats, 0, sizeof *put->stats);
}
- dp_netdev_flow_add(dp, &match, put->actions, put->actions_len);
+ dp_netdev_flow_add(pmd, &match, &ufid, put->actions,
+ put->actions_len);
error = 0;
} else {
error = EFBIG;
get_dpif_flow_stats(netdev_flow, put->stats);
}
if (put->flags & DPIF_FP_ZERO_STATS) {
- clear_stats(netdev_flow);
+ memset(&netdev_flow->stats, 0, sizeof netdev_flow->stats);
}
ovsrcu_postpone(dp_netdev_actions_free, old_actions);
error = EINVAL;
}
}
- ovs_mutex_unlock(&dp->flow_mutex);
+ ovs_mutex_unlock(&pmd->flow_mutex);
+ dp_netdev_pmd_unref(pmd);
return error;
}
{
struct dp_netdev *dp = get_dp_netdev(dpif);
struct dp_netdev_flow *netdev_flow;
- struct flow key;
- int error;
+ struct dp_netdev_pmd_thread *pmd;
+ int pmd_id = del->pmd_id == PMD_ID_NULL ? NON_PMD_CORE_ID : del->pmd_id;
+ int error = 0;
- error = dpif_netdev_flow_from_nlattrs(del->key, del->key_len, &key);
- if (error) {
- return error;
+ pmd = dp_netdev_get_pmd(dp, pmd_id);
+ if (!pmd) {
+ return EINVAL;
}
- ovs_mutex_lock(&dp->flow_mutex);
- netdev_flow = dp_netdev_find_flow(dp, &key);
+ ovs_mutex_lock(&pmd->flow_mutex);
+ netdev_flow = dp_netdev_pmd_find_flow(pmd, del->ufid, del->key,
+ del->key_len);
if (netdev_flow) {
if (del->stats) {
get_dpif_flow_stats(netdev_flow, del->stats);
}
- dp_netdev_remove_flow(dp, netdev_flow);
+ dp_netdev_pmd_remove_flow(pmd, netdev_flow);
} else {
error = ENOENT;
}
- ovs_mutex_unlock(&dp->flow_mutex);
+ ovs_mutex_unlock(&pmd->flow_mutex);
+ dp_netdev_pmd_unref(pmd);
return error;
}
struct dpif_netdev_flow_dump {
struct dpif_flow_dump up;
- struct cmap_position pos;
+ struct cmap_position poll_thread_pos;
+ struct cmap_position flow_pos;
+ struct dp_netdev_pmd_thread *cur_pmd;
int status;
struct ovs_mutex mutex;
};
}
static struct dpif_flow_dump *
-dpif_netdev_flow_dump_create(const struct dpif *dpif_)
+dpif_netdev_flow_dump_create(const struct dpif *dpif_, bool terse)
{
struct dpif_netdev_flow_dump *dump;
- dump = xmalloc(sizeof *dump);
+ dump = xzalloc(sizeof *dump);
dpif_flow_dump_init(&dump->up, dpif_);
- memset(&dump->pos, 0, sizeof dump->pos);
- dump->status = 0;
+ dump->up.terse = terse;
ovs_mutex_init(&dump->mutex);
return &dump->up;
struct dpif_netdev_flow_dump_thread *thread
= dpif_netdev_flow_dump_thread_cast(thread_);
struct dpif_netdev_flow_dump *dump = thread->dump;
- struct dpif_netdev *dpif = dpif_netdev_cast(thread->up.dpif);
struct dp_netdev_flow *netdev_flows[FLOW_DUMP_MAX_BATCH];
- struct dp_netdev *dp = get_dp_netdev(&dpif->dpif);
int n_flows = 0;
int i;
ovs_mutex_lock(&dump->mutex);
if (!dump->status) {
- for (n_flows = 0; n_flows < MIN(max_flows, FLOW_DUMP_MAX_BATCH);
- n_flows++) {
- struct cmap_node *node;
+ struct dpif_netdev *dpif = dpif_netdev_cast(thread->up.dpif);
+ struct dp_netdev *dp = get_dp_netdev(&dpif->dpif);
+ struct dp_netdev_pmd_thread *pmd = dump->cur_pmd;
+ int flow_limit = MIN(max_flows, FLOW_DUMP_MAX_BATCH);
+
+ /* First call to dump_next(), extracts the first pmd thread.
+ * If there is no pmd thread, returns immediately. */
+ if (!pmd) {
+ pmd = dp_netdev_pmd_get_next(dp, &dump->poll_thread_pos);
+ if (!pmd) {
+ ovs_mutex_unlock(&dump->mutex);
+ return n_flows;
- node = cmap_next_position(&dp->flow_table, &dump->pos);
- if (!node) {
- dump->status = EOF;
- break;
}
- netdev_flows[n_flows] = CONTAINER_OF(node, struct dp_netdev_flow,
- node);
}
+
+ do {
+ for (n_flows = 0; n_flows < flow_limit; n_flows++) {
+ struct cmap_node *node;
+
+ node = cmap_next_position(&pmd->flow_table, &dump->flow_pos);
+ if (!node) {
+ break;
+ }
+ netdev_flows[n_flows] = CONTAINER_OF(node,
+ struct dp_netdev_flow,
+ node);
+ }
+ /* When finishing dumping the current pmd thread, moves to
+ * the next. */
+ if (n_flows < flow_limit) {
+ memset(&dump->flow_pos, 0, sizeof dump->flow_pos);
+ dp_netdev_pmd_unref(pmd);
+ pmd = dp_netdev_pmd_get_next(dp, &dump->poll_thread_pos);
+ if (!pmd) {
+ dump->status = EOF;
+ break;
+ }
+ }
+ /* Keeps the reference to next caller. */
+ dump->cur_pmd = pmd;
+
+ /* If the current dump is empty, do not exit the loop, since the
+ * remaining pmds could have flows to be dumped. Just dumps again
+ * on the new 'pmd'. */
+ } while (!n_flows);
}
ovs_mutex_unlock(&dump->mutex);
struct odputil_keybuf *keybuf = &thread->keybuf[i];
struct dp_netdev_flow *netdev_flow = netdev_flows[i];
struct dpif_flow *f = &flows[i];
- struct dp_netdev_actions *dp_actions;
- struct flow_wildcards wc;
- struct ofpbuf buf;
-
- miniflow_expand(&netdev_flow->cr.mask->mf, &wc.masks);
-
- /* Key. */
- ofpbuf_use_stack(&buf, keybuf, sizeof *keybuf);
- odp_flow_key_from_flow(&buf, &netdev_flow->flow, &wc.masks,
- netdev_flow->flow.in_port.odp_port, true);
- f->key = ofpbuf_data(&buf);
- f->key_len = ofpbuf_size(&buf);
-
- /* Mask. */
- ofpbuf_use_stack(&buf, maskbuf, sizeof *maskbuf);
- odp_flow_key_from_mask(&buf, &wc.masks, &netdev_flow->flow,
- odp_to_u32(wc.masks.in_port.odp_port),
- SIZE_MAX, true);
- f->mask = ofpbuf_data(&buf);
- f->mask_len = ofpbuf_size(&buf);
-
- /* Actions. */
- dp_actions = dp_netdev_flow_get_actions(netdev_flow);
- f->actions = dp_actions->actions;
- f->actions_len = dp_actions->size;
+ struct ofpbuf key, mask;
- /* Stats. */
- get_dpif_flow_stats(netdev_flow, &f->stats);
+ ofpbuf_use_stack(&key, keybuf, sizeof *keybuf);
+ ofpbuf_use_stack(&mask, maskbuf, sizeof *maskbuf);
+ dp_netdev_flow_to_dpif_flow(netdev_flow, &key, &mask, f,
+ dump->up.terse);
}
return n_flows;
/* Tries finding the 'pmd'. If NULL is returned, that means
* the current thread is a non-pmd thread and should use
- * dp_netdev_get_nonpmd(). */
+ * dp_netdev_get_pmd(dp, NON_PMD_CORE_ID). */
pmd = ovsthread_getspecific(dp->per_pmd_key);
if (!pmd) {
- pmd = dp_netdev_get_nonpmd(dp);
+ pmd = dp_netdev_get_pmd(dp, NON_PMD_CORE_ID);
}
/* If the current thread is non-pmd thread, acquires
* the 'non_pmd_mutex'. */
if (pmd->core_id == NON_PMD_CORE_ID) {
ovs_mutex_lock(&dp->non_pmd_mutex);
+ ovs_mutex_lock(&dp->port_mutex);
}
+
dp_netdev_execute_actions(pmd, &pp, 1, false, execute->actions,
execute->actions_len);
if (pmd->core_id == NON_PMD_CORE_ID) {
+ dp_netdev_pmd_unref(pmd);
+ ovs_mutex_unlock(&dp->port_mutex);
ovs_mutex_unlock(&dp->non_pmd_mutex);
}
{
struct dp_netdev_port *port;
struct dp_netdev *dp = get_dp_netdev(dpif);
- struct dp_netdev_pmd_thread *non_pmd = dp_netdev_get_nonpmd(dp);
+ struct dp_netdev_pmd_thread *non_pmd = dp_netdev_get_pmd(dp,
+ NON_PMD_CORE_ID);
uint64_t new_tnl_seq;
ovs_mutex_lock(&dp->non_pmd_mutex);
}
}
ovs_mutex_unlock(&dp->non_pmd_mutex);
+ dp_netdev_pmd_unref(non_pmd);
+
tnl_arp_cache_run();
new_tnl_seq = seq_read(tnl_conf_seq);
lc = 0;
+ emc_cache_slow_sweep(&pmd->flow_cache);
ovsrcu_quiesce();
atomic_read_relaxed(&pmd->change_seq, &seq);
ovs_mutex_unlock(&pmd->cond_mutex);
}
-/* Returns the pointer to the dp_netdev_pmd_thread for non-pmd threads. */
+/* Finds and refs the dp_netdev_pmd_thread on core 'core_id'. Returns
+ * the pointer if succeeds, otherwise, NULL.
+ *
+ * Caller must unrefs the returned reference. */
static struct dp_netdev_pmd_thread *
-dp_netdev_get_nonpmd(struct dp_netdev *dp)
+dp_netdev_get_pmd(struct dp_netdev *dp, int core_id)
{
struct dp_netdev_pmd_thread *pmd;
const struct cmap_node *pnode;
- pnode = cmap_find(&dp->poll_threads, hash_int(NON_PMD_CORE_ID, 0));
- ovs_assert(pnode);
+ pnode = cmap_find(&dp->poll_threads, hash_int(core_id, 0));
+ if (!pnode) {
+ return NULL;
+ }
pmd = CONTAINER_OF(pnode, struct dp_netdev_pmd_thread, node);
- return pmd;
+ return dp_netdev_pmd_try_ref(pmd) ? pmd : NULL;
}
/* Sets the 'struct dp_netdev_pmd_thread' for non-pmd threads. */
OVS_NUMA_UNSPEC);
}
+/* Caller must have valid pointer to 'pmd'. */
+static bool
+dp_netdev_pmd_try_ref(struct dp_netdev_pmd_thread *pmd)
+{
+ return ovs_refcount_try_ref_rcu(&pmd->ref_cnt);
+}
+
+static void
+dp_netdev_pmd_unref(struct dp_netdev_pmd_thread *pmd)
+{
+ if (pmd && ovs_refcount_unref(&pmd->ref_cnt) == 1) {
+ ovsrcu_postpone(dp_netdev_destroy_pmd, pmd);
+ }
+}
+
+/* Given cmap position 'pos', tries to ref the next node. If try_ref()
+ * fails, keeps checking for next node until reaching the end of cmap.
+ *
+ * Caller must unrefs the returned reference. */
+static struct dp_netdev_pmd_thread *
+dp_netdev_pmd_get_next(struct dp_netdev *dp, struct cmap_position *pos)
+{
+ struct dp_netdev_pmd_thread *next;
+
+ do {
+ struct cmap_node *node;
+
+ node = cmap_next_position(&dp->poll_threads, pos);
+ next = node ? CONTAINER_OF(node, struct dp_netdev_pmd_thread, node)
+ : NULL;
+ } while (next && !dp_netdev_pmd_try_ref(next));
+
+ return next;
+}
+
/* Configures the 'pmd' based on the input argument. */
static void
dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp,
pmd->index = index;
pmd->core_id = core_id;
pmd->numa_id = numa_id;
+
+ ovs_refcount_init(&pmd->ref_cnt);
latch_init(&pmd->exit_latch);
atomic_init(&pmd->change_seq, PMD_INITIAL_SEQ);
xpthread_cond_init(&pmd->cond, NULL);
ovs_mutex_init(&pmd->cond_mutex);
+ ovs_mutex_init(&pmd->flow_mutex);
+ dpcls_init(&pmd->cls);
+ cmap_init(&pmd->flow_table);
/* init the 'flow_cache' since there is no
* actual thread created for NON_PMD_CORE_ID. */
if (core_id == NON_PMD_CORE_ID) {
hash_int(core_id, 0));
}
-/* Stops the pmd thread, removes it from the 'dp->poll_threads'
- * and destroys the struct. */
+static void
+dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd)
+{
+ dp_netdev_pmd_flow_flush(pmd);
+ dpcls_destroy(&pmd->cls);
+ cmap_destroy(&pmd->flow_table);
+ ovs_mutex_destroy(&pmd->flow_mutex);
+ latch_destroy(&pmd->exit_latch);
+ xpthread_cond_destroy(&pmd->cond);
+ ovs_mutex_destroy(&pmd->cond_mutex);
+ free(pmd);
+}
+
+/* Stops the pmd thread, removes it from the 'dp->poll_threads',
+ * and unrefs the struct. */
static void
dp_netdev_del_pmd(struct dp_netdev_pmd_thread *pmd)
{
/* Uninit the 'flow_cache' since there is
- * no actual thread uninit it. */
+ * no actual thread uninit it for NON_PMD_CORE_ID. */
if (pmd->core_id == NON_PMD_CORE_ID) {
emc_cache_uninit(&pmd->flow_cache);
} else {
xpthread_join(pmd->thread, NULL);
}
cmap_remove(&pmd->dp->poll_threads, &pmd->node, hash_int(pmd->core_id, 0));
- latch_destroy(&pmd->exit_latch);
- xpthread_cond_destroy(&pmd->cond);
- ovs_mutex_destroy(&pmd->cond_mutex);
- free(pmd);
+ dp_netdev_pmd_unref(pmd);
}
/* Destroys all pmd threads. */
}
\f
-static void *
-dp_netdev_flow_stats_new_cb(void)
-{
- struct dp_netdev_flow_stats *bucket = xzalloc_cacheline(sizeof *bucket);
- ovs_mutex_init(&bucket->mutex);
- return bucket;
-}
-
/* Called after pmd threads config change. Restarts pmd threads with
* new configuration. */
static void
}
static void
-dp_netdev_flow_used(struct dp_netdev_flow *netdev_flow,
- int cnt, int size,
+dp_netdev_flow_used(struct dp_netdev_flow *netdev_flow, int cnt, int size,
uint16_t tcp_flags)
{
long long int now = time_msec();
- struct dp_netdev_flow_stats *bucket;
-
- bucket = ovsthread_stats_bucket_get(&netdev_flow->stats,
- dp_netdev_flow_stats_new_cb);
-
- ovs_mutex_lock(&bucket->mutex);
- bucket->used = MAX(now, bucket->used);
- bucket->packet_count += cnt;
- bucket->byte_count += size;
- bucket->tcp_flags |= tcp_flags;
- ovs_mutex_unlock(&bucket->mutex);
-}
-static void *
-dp_netdev_stats_new_cb(void)
-{
- struct dp_netdev_stats *bucket = xzalloc_cacheline(sizeof *bucket);
- ovs_mutex_init(&bucket->mutex);
- return bucket;
+ netdev_flow->stats.used = MAX(now, netdev_flow->stats.used);
+ netdev_flow->stats.packet_count += cnt;
+ netdev_flow->stats.byte_count += size;
+ netdev_flow->stats.tcp_flags |= tcp_flags;
}
static void
-dp_netdev_count_packet(struct dp_netdev *dp, enum dp_stat_type type, int cnt)
+dp_netdev_count_packet(struct dp_netdev_pmd_thread *pmd,
+ enum dp_stat_type type, int cnt)
{
- struct dp_netdev_stats *bucket;
-
- bucket = ovsthread_stats_bucket_get(&dp->stats, dp_netdev_stats_new_cb);
- ovs_mutex_lock(&bucket->mutex);
- bucket->n[type] += cnt;
- ovs_mutex_unlock(&bucket->mutex);
+ pmd->stats.n[type] += cnt;
}
static int
-dp_netdev_upcall(struct dp_netdev *dp, struct dpif_packet *packet_,
- struct flow *flow, struct flow_wildcards *wc,
+dp_netdev_upcall(struct dp_netdev_pmd_thread *pmd, struct dpif_packet *packet_,
+ struct flow *flow, struct flow_wildcards *wc, ovs_u128 *ufid,
enum dpif_upcall_type type, const struct nlattr *userdata,
struct ofpbuf *actions, struct ofpbuf *put_actions)
{
+ struct dp_netdev *dp = pmd->dp;
struct ofpbuf *packet = &packet_->ofpbuf;
if (type == DPIF_UC_MISS) {
- dp_netdev_count_packet(dp, DP_STAT_MISS, 1);
+ dp_netdev_count_packet(pmd, DP_STAT_MISS, 1);
}
if (OVS_UNLIKELY(!dp->upcall_cb)) {
ds_destroy(&ds);
}
- return dp->upcall_cb(packet, flow, type, userdata, actions, wc,
- put_actions, dp->upcall_aux);
+ return dp->upcall_cb(packet, flow, ufid, pmd->core_id, type, userdata,
+ actions, wc, put_actions, dp->upcall_aux);
}
static inline uint32_t
dp_netdev_execute_actions(pmd, batch->packets, batch->packet_count, true,
actions->actions, actions->size);
- dp_netdev_count_packet(pmd->dp, DP_STAT_HIT, batch->packet_count);
+ dp_netdev_count_packet(pmd, DP_STAT_HIT, batch->packet_count);
}
static inline bool
/* Key length is needed in all the cases, hash computed on demand. */
keys[i].len = netdev_flow_key_size(count_1bits(keys[i].mf.map));
}
- any_miss = !dpcls_lookup(&dp->cls, keys, rules, cnt);
+ any_miss = !dpcls_lookup(&pmd->cls, keys, rules, cnt);
if (OVS_UNLIKELY(any_miss) && !fat_rwlock_tryrdlock(&dp->upcall_rwlock)) {
uint64_t actions_stub[512 / 8], slow_stub[512 / 8];
struct ofpbuf actions, put_actions;
+ ovs_u128 ufid;
ofpbuf_use_stub(&actions, actions_stub, sizeof actions_stub);
ofpbuf_use_stub(&put_actions, slow_stub, sizeof slow_stub);
/* It's possible that an earlier slow path execution installed
* a rule covering this flow. In this case, it's a lot cheaper
* to catch it here than execute a miss. */
- netdev_flow = dp_netdev_lookup_flow(dp, &keys[i]);
+ netdev_flow = dp_netdev_pmd_lookup_flow(pmd, &keys[i]);
if (netdev_flow) {
rules[i] = &netdev_flow->cr;
continue;
ofpbuf_clear(&actions);
ofpbuf_clear(&put_actions);
- error = dp_netdev_upcall(dp, packets[i], &match.flow, &match.wc,
- DPIF_UC_MISS, NULL, &actions,
+ dpif_flow_hash(dp->dpif, &match.flow, sizeof match.flow, &ufid);
+ error = dp_netdev_upcall(pmd, packets[i], &match.flow, &match.wc,
+ &ufid, DPIF_UC_MISS, NULL, &actions,
&put_actions);
if (OVS_UNLIKELY(error && error != ENOSPC)) {
continue;
* mutex lock outside the loop, but that's an awful long time
* to be locking everyone out of making flow installs. If we
* move to a per-core classifier, it would be reasonable. */
- ovs_mutex_lock(&dp->flow_mutex);
- netdev_flow = dp_netdev_lookup_flow(dp, &keys[i]);
+ ovs_mutex_lock(&pmd->flow_mutex);
+ netdev_flow = dp_netdev_pmd_lookup_flow(pmd, &keys[i]);
if (OVS_LIKELY(!netdev_flow)) {
- netdev_flow = dp_netdev_flow_add(dp, &match,
+ netdev_flow = dp_netdev_flow_add(pmd, &match, &ufid,
ofpbuf_data(add_actions),
ofpbuf_size(add_actions));
}
- ovs_mutex_unlock(&dp->flow_mutex);
+ ovs_mutex_unlock(&pmd->flow_mutex);
emc_insert(flow_cache, &keys[i], netdev_flow);
}
}
}
- dp_netdev_count_packet(dp, DP_STAT_LOST, dropped_cnt);
+ dp_netdev_count_packet(pmd, DP_STAT_LOST, dropped_cnt);
}
n_batches = 0;
const struct nlattr *userdata;
struct ofpbuf actions;
struct flow flow;
+ ovs_u128 ufid;
userdata = nl_attr_find_nested(a, OVS_USERSPACE_ATTR_USERDATA);
ofpbuf_init(&actions, 0);
ofpbuf_clear(&actions);
flow_extract(&packets[i]->ofpbuf, &packets[i]->md, &flow);
- error = dp_netdev_upcall(dp, packets[i], &flow, NULL,
- DPIF_UC_ACTION, userdata, &actions,
+ dpif_flow_hash(dp->dpif, &flow, sizeof flow, &ufid);
+ error = dp_netdev_upcall(pmd, packets[i], &flow, NULL, &ufid,
+ DPIF_UC_ACTION, userdata,&actions,
NULL);
if (!error || error == ENOSPC) {
dp_netdev_execute_actions(pmd, &packets[i], 1, may_steal,
}
break;
- case OVS_ACTION_ATTR_HASH: {
- const struct ovs_action_hash *hash_act;
- uint32_t hash;
-
- hash_act = nl_attr_get(a);
-
- for (i = 0; i < cnt; i++) {
-
- if (hash_act->hash_alg == OVS_HASH_ALG_L4) {
- /* Hash need not be symmetric, nor does it need to include
- * L2 fields. */
- hash = hash_2words(dpif_packet_get_dp_hash(packets[i]),
- hash_act->hash_basis);
- } else {
- VLOG_WARN("Unknown hash algorithm specified "
- "for the hash action.");
- hash = 2;
- }
-
- if (!hash) {
- hash = 1; /* 0 is not valid */
- }
-
- dpif_packet_set_dp_hash(packets[i], hash);
- }
- return;
- }
-
case OVS_ACTION_ATTR_RECIRC:
if (*depth < MAX_RECIRC_DEPTH) {
case OVS_ACTION_ATTR_SET:
case OVS_ACTION_ATTR_SET_MASKED:
case OVS_ACTION_ATTR_SAMPLE:
+ case OVS_ACTION_ATTR_HASH:
case OVS_ACTION_ATTR_UNSPEC:
case __OVS_ACTION_ATTR_MAX:
OVS_NOT_REACHED();
netdev_flow_key_clone(&subtable->mask, mask);
cmap_insert(&cls->subtables_map, &subtable->cmap_node, mask->hash);
pvector_insert(&cls->subtables, subtable, 0);
+ pvector_publish(&cls->subtables);
return subtable;
}
if (cmap_remove(&subtable->rules, &rule->cmap_node, rule->flow.hash)
== 0) {
dpcls_destroy_subtable(cls, subtable);
+ pvector_publish(&cls->subtables);
}
}