static struct vlog_rate_limit upcall_rl = VLOG_RATE_LIMIT_INIT(600, 600);
+static struct odp_support dp_netdev_support = {
+ .max_mpls_depth = SIZE_MAX,
+ .recirc = true,
+};
+
/* Stores a miniflow with inline values */
struct netdev_flow_key {
* If dp_netdev_input is not called from a pmd thread, a mutex is used.
*/
-#define EM_FLOW_HASH_SHIFT 10
+#define EM_FLOW_HASH_SHIFT 13
#define EM_FLOW_HASH_ENTRIES (1u << EM_FLOW_HASH_SHIFT)
#define EM_FLOW_HASH_MASK (EM_FLOW_HASH_ENTRIES - 1)
#define EM_FLOW_HASH_SEGS 2
/* A port in a netdev-based datapath. */
struct dp_netdev_port {
- struct pkt_metadata md;
+ odp_port_t port_no;
struct netdev *netdev;
struct cmap_node node; /* Node in dp_netdev's 'ports'. */
struct netdev_saved_flags *sf;
const struct cmap_node node; /* In owning dp_netdev_pmd_thread's */
/* 'flow_table'. */
const ovs_u128 ufid; /* Unique flow identifier. */
- const int pmd_id; /* The 'core_id' of pmd thread owning this */
+ const unsigned pmd_id; /* The 'core_id' of pmd thread owning this */
/* flow. */
/* Number of references.
pthread_t thread;
int index; /* Idx of this pmd thread among pmd*/
/* threads on same numa node. */
- int core_id; /* CPU core id of this pmd thread. */
+ unsigned core_id; /* CPU core id of this pmd thread. */
int numa_id; /* numa node id of this pmd thread. */
+ int tx_qid; /* Queue id used by this pmd thread to
+ * send packets on all netdevs */
/* Only a pmd thread can write on its own 'cycles' and 'stats'.
* The main thread keeps 'stats_zero' and 'cycles_zero' as base
void dp_netdev_pmd_reload_done(struct dp_netdev_pmd_thread *pmd);
static void dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd,
struct dp_netdev *dp, int index,
- int core_id, int numa_id);
+ unsigned core_id, int numa_id);
static void dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd);
static void dp_netdev_set_nonpmd(struct dp_netdev *dp);
static struct dp_netdev_pmd_thread *dp_netdev_get_pmd(struct dp_netdev *dp,
- int core_id);
+ unsigned core_id);
static struct dp_netdev_pmd_thread *
dp_netdev_pmd_get_next(struct dp_netdev *dp, struct cmap_position *pos);
static void dp_netdev_destroy_all_pmds(struct dp_netdev *dp);
flow_cache->sweep_idx = (flow_cache->sweep_idx + 1) & EM_FLOW_HASH_MASK;
}
+/* Returns true if 'dpif' is a netdev or dummy dpif, false otherwise. */
+bool
+dpif_is_netdev(const struct dpif *dpif)
+{
+ return dpif->dpif_class->open == dpif_netdev_open;
+}
+
static struct dpif_netdev *
dpif_netdev_cast(const struct dpif *dpif)
{
- ovs_assert(dpif->dpif_class->open == dpif_netdev_open);
+ ovs_assert(dpif_is_netdev(dpif));
return CONTAINER_OF(dpif, struct dpif_netdev, dpif);
}
if (pmd->numa_id != OVS_NUMA_UNSPEC) {
ds_put_format(reply, " numa_id %d", pmd->numa_id);
}
- if (pmd->core_id != OVS_CORE_UNSPEC) {
- ds_put_format(reply, " core_id %d", pmd->core_id);
+ if (pmd->core_id != OVS_CORE_UNSPEC && pmd->core_id != NON_PMD_CORE_ID) {
+ ds_put_format(reply, " core_id %u", pmd->core_id);
}
ds_put_cstr(reply, ":\n");
ovs_mutex_init_recursive(&dp->non_pmd_mutex);
ovsthread_key_create(&dp->per_pmd_key, NULL);
- /* Reserves the core NON_PMD_CORE_ID for all non-pmd threads. */
- ovs_numa_try_pin_core_specific(NON_PMD_CORE_ID);
dp_netdev_set_nonpmd(dp);
dp->n_dpdk_rxqs = NR_QUEUE;
return ENOENT;
}
/* There can only be ovs_numa_get_n_cores() pmd threads,
- * so creates a txq for each. */
- error = netdev_set_multiq(netdev, n_cores, dp->n_dpdk_rxqs);
+ * so creates a txq for each, and one extra for the non
+ * pmd threads. */
+ error = netdev_set_multiq(netdev, n_cores + 1, dp->n_dpdk_rxqs);
if (error && (error != EOPNOTSUPP)) {
VLOG_ERR("%s, cannot set multiq", devname);
return errno;
}
}
port = xzalloc(sizeof *port);
- port->md = PKT_METADATA_INITIALIZER(port_no);
+ port->port_no = port_no;
port->netdev = netdev;
port->rxq = xmalloc(sizeof *port->rxq * netdev_n_rxq(netdev));
port->type = xstrdup(type);
struct dp_netdev_port *port;
CMAP_FOR_EACH_WITH_HASH (port, node, hash_port_no(port_no), &dp->ports) {
- if (port->md.in_port.odp_port == port_no) {
+ if (port->port_no == port_no) {
return port;
}
}
do_del_port(struct dp_netdev *dp, struct dp_netdev_port *port)
OVS_REQUIRES(dp->port_mutex)
{
- cmap_remove(&dp->ports, &port->node,
- hash_odp_port(port->md.in_port.odp_port));
+ cmap_remove(&dp->ports, &port->node, hash_odp_port(port->port_no));
seq_change(dp->port_seq);
if (netdev_is_pmd(port->netdev)) {
int numa_id = netdev_get_numa_id(port->netdev);
{
dpif_port->name = xstrdup(netdev_get_name(port->netdev));
dpif_port->type = xstrdup(port->type);
- dpif_port->port_no = port->md.in_port.odp_port;
+ dpif_port->port_no = port->port_no;
}
static int
state->name = xstrdup(netdev_get_name(port->netdev));
dpif_port->name = state->name;
dpif_port->type = port->type;
- dpif_port->port_no = port->md.in_port.odp_port;
+ dpif_port->port_no = port->port_no;
retval = 0;
} else {
if (ufidp) {
CMAP_FOR_EACH_WITH_HASH (netdev_flow, node, dp_netdev_flow_hash(ufidp),
&pmd->flow_table) {
- if (ovs_u128_equal(&netdev_flow->ufid, ufidp)) {
+ if (ovs_u128_equals(&netdev_flow->ufid, ufidp)) {
return netdev_flow;
}
}
struct flow_wildcards wc;
struct dp_netdev_actions *actions;
size_t offset;
+ struct odp_flow_key_parms odp_parms = {
+ .flow = &netdev_flow->flow,
+ .mask = &wc.masks,
+ .support = dp_netdev_support,
+ };
miniflow_expand(&netdev_flow->cr.mask->mf, &wc.masks);
/* Key */
offset = key_buf->size;
flow->key = ofpbuf_tail(key_buf);
- odp_flow_key_from_flow(key_buf, &netdev_flow->flow, &wc.masks,
- netdev_flow->flow.in_port.odp_port, true);
+ odp_parms.odp_in_port = netdev_flow->flow.in_port.odp_port;
+ odp_flow_key_from_flow(&odp_parms, key_buf);
flow->key_len = key_buf->size - offset;
/* Mask */
offset = mask_buf->size;
flow->mask = ofpbuf_tail(mask_buf);
- odp_flow_key_from_mask(mask_buf, &wc.masks, &netdev_flow->flow,
- odp_to_u32(wc.masks.in_port.odp_port),
- SIZE_MAX, true);
+ odp_parms.odp_in_port = wc.masks.in_port.odp_port;
+ odp_parms.key_buf = key_buf;
+ odp_flow_key_from_mask(&odp_parms, mask_buf);
flow->mask_len = mask_buf->size - offset;
/* Actions */
if (mask_key_len) {
enum odp_key_fitness fitness;
- fitness = odp_flow_key_to_mask(mask_key, mask_key_len, mask, flow);
+ fitness = odp_flow_key_to_mask(mask_key, mask_key_len, key, key_len,
+ mask, flow);
if (fitness) {
/* This should not happen: it indicates that
* odp_flow_key_from_mask() and odp_flow_key_to_mask()
struct dp_netdev *dp = get_dp_netdev(dpif);
struct dp_netdev_flow *netdev_flow;
struct dp_netdev_pmd_thread *pmd;
- int pmd_id = get->pmd_id == PMD_ID_NULL ? NON_PMD_CORE_ID : get->pmd_id;
+ unsigned pmd_id = get->pmd_id == PMD_ID_NULL
+ ? NON_PMD_CORE_ID : get->pmd_id;
int error = 0;
pmd = dp_netdev_get_pmd(dp, pmd_id);
memset(&flow->stats, 0, sizeof flow->stats);
flow->dead = false;
flow->batch = NULL;
- *CONST_CAST(int *, &flow->pmd_id) = pmd->core_id;
+ *CONST_CAST(unsigned *, &flow->pmd_id) = pmd->core_id;
*CONST_CAST(struct flow *, &flow->flow) = match->flow;
*CONST_CAST(ovs_u128 *, &flow->ufid) = *ufid;
ovs_refcount_init(&flow->ref_cnt);
struct dp_netdev_pmd_thread *pmd;
struct match match;
ovs_u128 ufid;
- int pmd_id = put->pmd_id == PMD_ID_NULL ? NON_PMD_CORE_ID : put->pmd_id;
+ unsigned pmd_id = put->pmd_id == PMD_ID_NULL
+ ? NON_PMD_CORE_ID : put->pmd_id;
int error;
error = dpif_netdev_flow_from_nlattrs(put->key, put->key_len, &match.flow);
struct dp_netdev *dp = get_dp_netdev(dpif);
struct dp_netdev_flow *netdev_flow;
struct dp_netdev_pmd_thread *pmd;
- int pmd_id = del->pmd_id == PMD_ID_NULL ? NON_PMD_CORE_ID : del->pmd_id;
+ unsigned pmd_id = del->pmd_id == PMD_ID_NULL
+ ? NON_PMD_CORE_ID : del->pmd_id;
int error = 0;
pmd = dp_netdev_get_pmd(dp, pmd_id);
}
/* Sets the new rx queue config. */
- err = netdev_set_multiq(port->netdev, ovs_numa_get_n_cores(),
+ err = netdev_set_multiq(port->netdev,
+ ovs_numa_get_n_cores() + 1,
n_rxqs);
if (err && (err != EOPNOTSUPP)) {
VLOG_ERR("Failed to set dpdk interface %s rx_queue to:"
/* XXX: initialize md in netdev implementation. */
for (i = 0; i < cnt; i++) {
- packets[i]->md = port->md;
+ pkt_metadata_init(&packets[i]->md, port->port_no);
}
cycles_count_start(pmd);
dp_netdev_input(pmd, packets, cnt);
emc_cache_init(&pmd->flow_cache);
poll_cnt = pmd_load_queues(pmd, &poll_list, poll_cnt);
+ /* List port/core affinity */
+ for (i = 0; i < poll_cnt; i++) {
+ VLOG_INFO("Core %d processing port \'%s\'\n", pmd->core_id, netdev_get_name(poll_list[i].port->netdev));
+ }
+
/* Signal here to make sure the pmd finishes
* reloading the updated configuration. */
dp_netdev_pmd_reload_done(pmd);
*
* Caller must unrefs the returned reference. */
static struct dp_netdev_pmd_thread *
-dp_netdev_get_pmd(struct dp_netdev *dp, int core_id)
+dp_netdev_get_pmd(struct dp_netdev *dp, unsigned core_id)
{
struct dp_netdev_pmd_thread *pmd;
const struct cmap_node *pnode;
return next;
}
+static int
+core_id_to_qid(unsigned core_id)
+{
+ if (core_id != NON_PMD_CORE_ID) {
+ return core_id;
+ } else {
+ return ovs_numa_get_n_cores();
+ }
+}
+
/* Configures the 'pmd' based on the input argument. */
static void
dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp,
- int index, int core_id, int numa_id)
+ int index, unsigned core_id, int numa_id)
{
pmd->dp = dp;
pmd->index = index;
pmd->core_id = core_id;
+ pmd->tx_qid = core_id_to_qid(core_id);
pmd->numa_id = numa_id;
ovs_refcount_init(&pmd->ref_cnt);
can_have = dp->pmd_cmask ? n_unpinned : MIN(n_unpinned, NR_PMD_THREADS);
for (i = 0; i < can_have; i++) {
struct dp_netdev_pmd_thread *pmd = xzalloc(sizeof *pmd);
- int core_id = ovs_numa_get_unpinned_core_on_numa(numa_id);
+ unsigned core_id = ovs_numa_get_unpinned_core_on_numa(numa_id);
dp_netdev_configure_pmd(pmd, dp, i, core_id, numa_id);
/* Each thread will distribute all devices rx-queues among
struct ds ds = DS_EMPTY_INITIALIZER;
char *packet_str;
struct ofpbuf key;
+ struct odp_flow_key_parms odp_parms = {
+ .flow = flow,
+ .mask = &wc->masks,
+ .odp_in_port = flow->in_port.odp_port,
+ .support = dp_netdev_support,
+ };
ofpbuf_init(&key, 0);
- odp_flow_key_from_flow(&key, flow, &wc->masks, flow->in_port.odp_port,
- true);
+ odp_flow_key_from_flow(&odp_parms, &key);
packet_str = ofp_packet_to_string(dp_packet_data(packet_),
dp_packet_size(packet_));
}
static inline uint32_t
-dpif_netdev_packet_get_dp_hash(struct dp_packet *packet,
- const struct miniflow *mf)
+dpif_netdev_packet_get_rss_hash(struct dp_packet *packet,
+ const struct miniflow *mf)
{
- uint32_t hash;
+ uint32_t hash, recirc_depth;
hash = dp_packet_get_rss_hash(packet);
if (OVS_UNLIKELY(!hash)) {
hash = miniflow_hash_5tuple(mf, 0);
dp_packet_set_rss_hash(packet, hash);
}
+
+ /* The RSS hash must account for the recirculation depth to avoid
+ * collisions in the exact match cache */
+ recirc_depth = *recirc_depth_get_unsafe();
+ if (OVS_UNLIKELY(recirc_depth)) {
+ hash = hash_finish(hash, recirc_depth);
+ dp_packet_set_rss_hash(packet, hash);
+ }
return hash;
}
struct dp_netdev_actions *actions;
struct dp_netdev_flow *flow = batch->flow;
- flow->batch = NULL;
dp_netdev_flow_used(flow, batch->packet_count, batch->byte_count,
batch->tcp_flags, now);
continue;
}
+ if (i != cnt - 1) {
+ /* Prefetch next packet data */
+ OVS_PREFETCH(dp_packet_data(packets[i+1]));
+ }
+
miniflow_extract(packets[i], &key.mf);
key.len = 0; /* Not computed yet. */
- key.hash = dpif_netdev_packet_get_dp_hash(packets[i], &key.mf);
+ key.hash = dpif_netdev_packet_get_rss_hash(packets[i], &key.mf);
flow = emc_lookup(flow_cache, &key);
if (OVS_LIKELY(flow)) {
fast_path_processing(pmd, packets, newcnt, keys, batches, &n_batches);
}
+ for (i = 0; i < n_batches; i++) {
+ batches[i].flow->batch = NULL;
+ }
+
for (i = 0; i < n_batches; i++) {
packet_batch_execute(&batches[i], pmd, now);
}
}
static void
-dp_netdev_drop_packets(struct dp_packet ** packets, int cnt, bool may_steal)
+dp_netdev_drop_packets(struct dp_packet **packets, int cnt, bool may_steal)
{
if (may_steal) {
int i;
case OVS_ACTION_ATTR_OUTPUT:
p = dp_netdev_lookup_port(dp, u32_to_odp(nl_attr_get_u32(a)));
if (OVS_LIKELY(p)) {
- netdev_send(p->netdev, pmd->core_id, packets, cnt, may_steal);
+ netdev_send(p->netdev, pmd->tx_qid, packets, cnt, may_steal);
return;
}
break;
}
/* Remove old port. */
- cmap_remove(&dp->ports, &old_port->node, hash_port_no(old_port->md.in_port.odp_port));
+ cmap_remove(&dp->ports, &old_port->node, hash_port_no(old_port->port_no));
ovsrcu_postpone(free, old_port);
/* Insert new port (cmap semantics mean we cannot re-insert 'old_port'). */
new_port = xmemdup(old_port, sizeof *old_port);
- new_port->md.in_port.odp_port = port_no;
+ new_port->port_no = port_no;
cmap_insert(&dp->ports, &new_port->node, hash_port_no(port_no));
seq_change(dp->port_seq);
ovs_mutex_lock(&dp->port_mutex);
if (get_port_by_name(dp, argv[2], &port)) {
unixctl_command_reply_error(conn, "unknown port");
- } else if (port->md.in_port.odp_port == ODPP_LOCAL) {
+ } else if (port->port_no == ODPP_LOCAL) {
unixctl_command_reply_error(conn, "can't delete local port");
} else {
do_del_port(dp, port);
dp_register_provider(class);
}
+static void
+dpif_dummy_override(const char *type)
+{
+ if (!dp_unregister_provider(type)) {
+ dpif_dummy_register__(type);
+ }
+}
+
void
-dpif_dummy_register(bool override)
+dpif_dummy_register(enum dummy_level level)
{
- if (override) {
+ if (level == DUMMY_OVERRIDE_ALL) {
struct sset types;
const char *type;
sset_init(&types);
dp_enumerate_types(&types);
SSET_FOR_EACH (type, &types) {
- if (!dp_unregister_provider(type)) {
- dpif_dummy_register__(type);
- }
+ dpif_dummy_override(type);
}
sset_destroy(&types);
+ } else if (level == DUMMY_OVERRIDE_SYSTEM) {
+ dpif_dummy_override("system");
}
dpif_dummy_register__("dummy");
}
/* Compute hashes for the remaining keys. */
- ULONG_FOR_EACH_1(i, map) {
+ ULLONG_FOR_EACH_1(i, map) {
hashes[i] = netdev_flow_key_hash_in_mask(&mkeys[i],
&subtable->mask);
}
/* Lookup. */
map = cmap_find_batch(&subtable->rules, map, hashes, nodes);
/* Check results. */
- ULONG_FOR_EACH_1(i, map) {
+ ULLONG_FOR_EACH_1(i, map) {
struct dpcls_rule *rule;
CMAP_NODE_FOR_EACH (rule, cmap_node, nodes[i]) {
goto next;
}
}
- ULONG_SET0(map, i); /* Did not match. */
+ ULLONG_SET0(map, i); /* Did not match. */
next:
; /* Keep Sparse happy. */
}