*/
#include <config.h>
-#include "dpif.h"
+#include "dpif-netdev.h"
#include <ctype.h>
#include <errno.h>
#define NETDEV_RULE_PRIORITY 0x8000
#define FLOW_DUMP_MAX_BATCH 50
-#define NR_THREADS 1
/* Use per thread recirc_depth to prevent recirculation loop. */
#define MAX_RECIRC_DEPTH 5
DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0)
OVS_REQ_WRLOCK(dp->queue_rwlock);
static int dpif_netdev_open(const struct dpif_class *, const char *name,
bool create, struct dpif **);
-static int dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf **,
- int cnt, int queue_no, int type,
+static int dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *,
+ int queue_no, int type,
const struct miniflow *,
const struct nlattr *userdata);
static void dp_netdev_execute_actions(struct dp_netdev *dp,
if (netdev_is_pmd(netdev)) {
dp->pmd_count++;
- dp_netdev_set_pmd_threads(dp, NR_THREADS);
+ dp_netdev_set_pmd_threads(dp, NR_PMD_THREADS);
dp_netdev_reload_pmd_threads(dp);
}
ovs_refcount_init(&port->ref_cnt);
}
if (OVS_UNLIKELY(!rules[i])) {
+
dp_netdev_count_packet(dp, DP_STAT_MISS, 1);
+
if (OVS_LIKELY(dp->handler_queues)) {
uint32_t hash = miniflow_hash_5tuple(mfs[i], 0);
struct ofpbuf *buf = &packets[i]->ofpbuf;
- dp_netdev_output_userspace(dp, &buf, 1, hash % dp->n_handlers,
+ dp_netdev_output_userspace(dp, buf, hash % dp->n_handlers,
DPIF_UC_MISS, mfs[i], NULL);
- } else {
- /* No upcall queue. Freeing the packet */
- dpif_packet_delete(packets[i]);
}
+
+ dpif_packet_delete(packets[i]);
continue;
}
if (userdata) {
buf_size += NLA_ALIGN(userdata->nla_len);
}
+ buf_size += ofpbuf_size(packet);
ofpbuf_init(buf, buf_size);
/* Put ODP flow. */
NLA_ALIGN(userdata->nla_len));
}
- upcall->packet = *packet;
+ /* We have to perform a copy of the packet, because we cannot send DPDK
+ * mbufs to a non pmd thread. When the upcall processing will be done
+ * in the pmd thread, this copy can be avoided */
+ ofpbuf_set_data(&upcall->packet, ofpbuf_put(buf, ofpbuf_data(packet),
+ ofpbuf_size(packet)));
+ ofpbuf_set_size(&upcall->packet, ofpbuf_size(packet));
seq_change(q->seq);
return 0;
} else {
- ofpbuf_delete(packet);
return ENOBUFS;
}
-
}
static int
-dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf **packets,
- int cnt, int queue_no, int type,
+dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *packet,
+ int queue_no, int type,
const struct miniflow *key,
const struct nlattr *userdata)
{
struct dp_netdev_queue *q;
int error;
- int i;
fat_rwlock_rdlock(&dp->queue_rwlock);
q = &dp->handler_queues[queue_no];
ovs_mutex_lock(&q->mutex);
- for (i = 0; i < cnt; i++) {
- struct ofpbuf *packet = packets[i];
-
- error = dp_netdev_queue_userspace_packet(q, packet, type, key,
- userdata);
- if (error == ENOBUFS) {
- dp_netdev_count_packet(dp, DP_STAT_LOST, 1);
- }
+ error = dp_netdev_queue_userspace_packet(q, packet, type, key,
+ userdata);
+ if (error == ENOBUFS) {
+ dp_netdev_count_packet(dp, DP_STAT_LOST, 1);
}
ovs_mutex_unlock(&q->mutex);
fat_rwlock_unlock(&dp->queue_rwlock);
miniflow_initialize(&key.flow, key.buf);
for (i = 0; i < cnt; i++) {
- struct ofpbuf *packet, *userspace_packet;
+ struct ofpbuf *packet;
packet = &packets[i]->ofpbuf;
miniflow_extract(packet, md, &key.flow);
- userspace_packet = may_steal ? packet : ofpbuf_clone(packet);
-
- dp_netdev_output_userspace(aux->dp, &userspace_packet, 1,
+ dp_netdev_output_userspace(aux->dp, packet,
miniflow_hash_5tuple(&key.flow, 0)
% aux->dp->n_handlers,
DPIF_UC_ACTION, &key.flow,
userdata);
+ if (may_steal) {
+ dpif_packet_delete(packets[i]);
+ }
}
break;
}
static struct list dpdk_mp_list OVS_GUARDED_BY(dpdk_mutex)
= LIST_INITIALIZER(&dpdk_mp_list);
+/* This mutex must be used by non pmd threads when allocating or freeing
+ * mbufs through mempools. Since dpdk_queue_pkts() and dpdk_queue_flush() may
+ * use mempools, a non pmd thread should hold this mutex while calling them */
+struct ovs_mutex nonpmd_mempool_mutex = OVS_MUTEX_INITIALIZER;
+
struct dpdk_mp {
struct rte_mempool *mp;
int mtu;
int port_id;
};
+static bool thread_is_pmd(void);
+
static int netdev_dpdk_construct(struct netdev *);
static bool
return ptr;
}
+/* XXX this function should be called only by pmd threads (or by non pmd
+ * threads holding the nonpmd_mempool_mutex) */
void
free_dpdk_buf(struct dpif_packet *p)
{
- struct ofpbuf *buf = &p->ofpbuf;
- struct rte_mbuf *pkt = (struct rte_mbuf *) buf->dpdk_buf;
+ struct rte_mbuf *pkt = (struct rte_mbuf *) p;
- rte_mempool_put(pkt->pool, pkt);
+ rte_pktmbuf_free_seg(pkt);
}
static void
nb_tx = rte_eth_tx_burst(dev->port_id, qid, txq->burst_pkts, txq->count);
if (OVS_UNLIKELY(nb_tx != txq->count)) {
- /* free buffers if we couldn't transmit packets */
- rte_mempool_put_bulk(dev->dpdk_mp->mp,
- (void **) &txq->burst_pkts[nb_tx],
- (txq->count - nb_tx));
+ /* free buffers, which we couldn't transmit, one at a time (each
+ * packet could come from a different mempool) */
+ int i;
+
+ for (i = nb_tx; i < txq->count; i++) {
+ rte_pktmbuf_free_seg(txq->burst_pkts[i]);
+ }
}
txq->count = 0;
txq->tsc = rte_get_timer_cycles();
/* Tx function. Transmit packets indefinitely */
static void
dpdk_do_tx_copy(struct netdev *netdev, struct dpif_packet ** pkts, int cnt)
+ OVS_NO_THREAD_SAFETY_ANALYSIS
{
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
struct rte_mbuf *mbufs[cnt];
int newcnt = 0;
int i;
+ /* If we are on a non pmd thread we have to use the mempool mutex, because
+ * every non pmd thread shares the same mempool cache */
+
+ if (!thread_is_pmd()) {
+ ovs_mutex_lock(&nonpmd_mempool_mutex);
+ }
+
for (i = 0; i < cnt; i++) {
int size = ofpbuf_size(&pkts[i]->ofpbuf);
dpdk_queue_pkts(dev, NON_PMD_THREAD_TX_QUEUE, mbufs, newcnt);
dpdk_queue_flush(dev, NON_PMD_THREAD_TX_QUEUE);
+
+ if (!thread_is_pmd()) {
+ ovs_mutex_unlock(&nonpmd_mempool_mutex);
+ }
}
static int
argv[result] = argv[0];
}
+ /* We are called from the main thread here */
+ thread_set_nonpmd();
+
return result + 1;
}
VLOG_ERR("Thread affinity error %d",err);
return err;
}
- RTE_PER_LCORE(_lcore_id) = cpu;
+ /* lcore_id 0 is reseved for use by non pmd threads. */
+ RTE_PER_LCORE(_lcore_id) = cpu + 1;
return 0;
}
+
+void
+thread_set_nonpmd(void)
+{
+ /* We cannot have RTE_MAX_LCORE pmd threads, because lcore_id 0 is reserved
+ * for non pmd threads */
+ BUILD_ASSERT(NR_PMD_THREADS < RTE_MAX_LCORE);
+ /* We have to use 0 to allow non pmd threads to perform certain DPDK
+ * operations, like rte_eth_dev_configure(). */
+ RTE_PER_LCORE(_lcore_id) = 0;
+}
+
+static bool
+thread_is_pmd(void)
+{
+ return rte_lcore_id() != 0;
+}