#include "dirs.h"
#include "dp-packet.h"
#include "dpif-netdev.h"
+#include "fatal-signal.h"
#include "list.h"
#include "netdev-dpdk.h"
#include "netdev-provider.h"
#define NIC_PORT_RX_Q_SIZE 2048 /* Size of Physical NIC RX Queue, Max (n+32<=4096)*/
#define NIC_PORT_TX_Q_SIZE 2048 /* Size of Physical NIC TX Queue, Max (n+32<=4096)*/
+#define OVS_VHOST_MAX_QUEUE_NUM 1024 /* Maximum number of vHost TX queues. */
+
static char *cuse_dev_name = NULL; /* Character device cuse_dev_name. */
static char *vhost_sock_dir = NULL; /* Location of vhost-user sockets */
* from concurrent access. It is used only
* if the queue is shared among different
* pmd threads (see 'txq_needs_locking'). */
+ int map; /* Mapping of configured vhost-user queues
+ * to enabled by guest. */
uint64_t tsc;
struct rte_mbuf *burst_pkts[MAX_TX_QUEUE_LEN];
};
/* For the client rings */
struct rte_ring *cring_tx;
struct rte_ring *cring_rx;
- int user_port_id; /* User given port no, parsed from port name */
+ unsigned int user_port_id; /* User given port no, parsed from port name */
int eth_port_id; /* ethernet device port id */
struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex);
};
/* Queues are shared among CPUs. Always flush */
netdev->tx_q[i].flush_tx = true;
}
+
+ /* Initialize map for vhost devices. */
+ netdev->tx_q[i].map = -1;
rte_spinlock_init(&netdev->tx_q[i].tx_lock);
}
}
netdev_->n_txq = NR_QUEUE;
netdev_->n_rxq = NR_QUEUE;
+ netdev_->requested_n_rxq = NR_QUEUE;
netdev->real_n_txq = NR_QUEUE;
if (type == DPDK_DEV_ETH) {
if (err) {
goto unlock;
}
+ } else {
+ netdev_dpdk_alloc_txq(netdev, OVS_VHOST_MAX_QUEUE_NUM);
}
list_push_back(&dpdk_list, &netdev->list_node);
return err;
}
+/* dev_name must be the prefix followed by a positive decimal number.
+ * (no leading + or - signs are allowed) */
static int
dpdk_dev_parse_name(const char dev_name[], const char prefix[],
unsigned int *port_no)
}
cport = dev_name + strlen(prefix);
- *port_no = strtol(cport, NULL, 0); /* string must be null terminated */
- return 0;
+
+ if (str_to_uint(cport, 10, port_no)) {
+ return 0;
+ } else {
+ return ENODEV;
+ }
}
static int
netdev_dpdk_vhost_user_construct(struct netdev *netdev_)
{
struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_);
+ const char *name = netdev_->name;
int err;
+ /* 'name' is appended to 'vhost_sock_dir' and used to create a socket in
+ * the file system. '/' or '\' would traverse directories, so they're not
+ * acceptable in 'name'. */
+ if (strchr(name, '/') || strchr(name, '\\')) {
+ VLOG_ERR("\"%s\" is not a valid name for a vhost-user port. "
+ "A valid name must not include '/' or '\\'",
+ name);
+ return EINVAL;
+ }
+
ovs_mutex_lock(&dpdk_mutex);
/* Take the name of the vhost-user port and append it to the location where
* the socket is to be created, then register the socket.
*/
snprintf(netdev->vhost_id, sizeof(netdev->vhost_id), "%s/%s",
- vhost_sock_dir, netdev_->name);
+ vhost_sock_dir, name);
+
err = rte_vhost_driver_register(netdev->vhost_id);
if (err) {
VLOG_ERR("vhost-user socket device setup failure for socket %s\n",
netdev->vhost_id);
+ } else {
+ fatal_signal_add_file_to_unlink(netdev->vhost_id);
+ VLOG_INFO("Socket %s created for vhost-user port %s\n",
+ netdev->vhost_id, name);
+ err = vhost_construct_helper(netdev_);
}
- VLOG_INFO("Socket %s created for vhost-user port %s\n", netdev->vhost_id, netdev_->name);
- err = vhost_construct_helper(netdev_);
+
ovs_mutex_unlock(&dpdk_mutex);
return err;
}
if (rte_vhost_driver_unregister(dev->vhost_id)) {
VLOG_ERR("Unable to remove vhost-user socket %s", dev->vhost_id);
+ } else {
+ fatal_signal_remove_file_to_unlink(dev->vhost_id);
}
ovs_mutex_lock(&dpdk_mutex);
}
static int
-netdev_dpdk_get_config(const struct netdev *netdev_, struct smap *args)
+netdev_dpdk_get_config(const struct netdev *netdev, struct smap *args)
{
- struct netdev_dpdk *dev = netdev_dpdk_cast(netdev_);
+ struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
ovs_mutex_lock(&dev->mutex);
- smap_add_format(args, "configured_rx_queues", "%d", netdev_->n_rxq);
- smap_add_format(args, "requested_tx_queues", "%d", netdev_->n_txq);
+ smap_add_format(args, "requested_rx_queues", "%d", netdev->requested_n_rxq);
+ smap_add_format(args, "configured_rx_queues", "%d", netdev->n_rxq);
+ smap_add_format(args, "requested_tx_queues", "%d", netdev->n_txq);
smap_add_format(args, "configured_tx_queues", "%d", dev->real_n_txq);
ovs_mutex_unlock(&dev->mutex);
return 0;
}
+static int
+netdev_dpdk_set_config(struct netdev *netdev, const struct smap *args)
+{
+ struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
+
+ ovs_mutex_lock(&dev->mutex);
+ netdev->requested_n_rxq = MAX(smap_get_int(args, "n_rxq",
+ netdev->requested_n_rxq), 1);
+ netdev_change_seq_changed(netdev);
+ ovs_mutex_unlock(&dev->mutex);
+
+ return 0;
+}
+
static int
netdev_dpdk_get_numa_id(const struct netdev *netdev_)
{
ovs_mutex_lock(&dpdk_mutex);
ovs_mutex_lock(&netdev->mutex);
- rte_free(netdev->tx_q);
netdev->up.n_txq = n_txq;
netdev->up.n_rxq = n_rxq;
- netdev_dpdk_alloc_txq(netdev, netdev->up.n_txq);
ovs_mutex_unlock(&netdev->mutex);
ovs_mutex_unlock(&dpdk_mutex);
unsigned int total_pkts = cnt;
uint64_t start = 0;
- if (OVS_UNLIKELY(!is_vhost_running(virtio_dev))) {
+ qid = vhost_dev->tx_q[qid % vhost_dev->real_n_txq].map;
+
+ if (OVS_UNLIKELY(!is_vhost_running(virtio_dev) || qid == -1)) {
rte_spinlock_lock(&vhost_dev->stats_lock);
vhost_dev->stats.tx_dropped+= cnt;
rte_spinlock_unlock(&vhost_dev->stats_lock);
goto out;
}
- if (vhost_dev->txq_needs_locking) {
- qid = qid % vhost_dev->real_n_txq;
- rte_spinlock_lock(&vhost_dev->tx_q[qid].tx_lock);
- }
+ rte_spinlock_lock(&vhost_dev->tx_q[qid].tx_lock);
do {
int vhost_qid = qid * VIRTIO_QNUM + VIRTIO_RXQ;
}
} while (cnt);
- if (vhost_dev->txq_needs_locking) {
- rte_spinlock_unlock(&vhost_dev->tx_q[qid].tx_lock);
- }
+ rte_spinlock_unlock(&vhost_dev->tx_q[qid].tx_lock);
rte_spinlock_lock(&vhost_dev->stats_lock);
netdev_dpdk_vhost_update_tx_counters(&vhost_dev->stats, pkts, total_pkts,
}
}
+/*
+ * Fixes mapping for vhost-user tx queues. Must be called after each
+ * enabling/disabling of queues and real_n_txq modifications.
+ */
+static void
+netdev_dpdk_remap_txqs(struct netdev_dpdk *netdev)
+ OVS_REQUIRES(netdev->mutex)
+{
+ int *enabled_queues, n_enabled = 0;
+ int i, k, total_txqs = netdev->real_n_txq;
+
+ enabled_queues = dpdk_rte_mzalloc(total_txqs * sizeof *enabled_queues);
+
+ for (i = 0; i < total_txqs; i++) {
+ /* Enabled queues always mapped to themselves. */
+ if (netdev->tx_q[i].map == i) {
+ enabled_queues[n_enabled++] = i;
+ }
+ }
+
+ if (n_enabled == 0 && total_txqs != 0) {
+ enabled_queues[0] = -1;
+ n_enabled = 1;
+ }
+
+ k = 0;
+ for (i = 0; i < total_txqs; i++) {
+ if (netdev->tx_q[i].map != i) {
+ netdev->tx_q[i].map = enabled_queues[k];
+ k = (k + 1) % n_enabled;
+ }
+ }
+
+ VLOG_DBG("TX queue mapping for %s\n", netdev->vhost_id);
+ for (i = 0; i < total_txqs; i++) {
+ VLOG_DBG("%2d --> %2d", i, netdev->tx_q[i].map);
+ }
+
+ rte_free(enabled_queues);
+}
static int
netdev_dpdk_vhost_set_queues(struct netdev_dpdk *netdev, struct virtio_net *dev)
+ OVS_REQUIRES(netdev->mutex)
{
uint32_t qp_num;
netdev->real_n_rxq = qp_num;
netdev->real_n_txq = qp_num;
- if (netdev->up.n_txq > netdev->real_n_txq) {
- netdev->txq_needs_locking = true;
- } else {
- netdev->txq_needs_locking = false;
- }
+ netdev->txq_needs_locking = true;
+
+ netdev_dpdk_remap_txqs(netdev);
return 0;
}
destroy_device(volatile struct virtio_net *dev)
{
struct netdev_dpdk *vhost_dev;
+ bool exists = false;
ovs_mutex_lock(&dpdk_mutex);
LIST_FOR_EACH (vhost_dev, list_node, &dpdk_list) {
ovs_mutex_lock(&vhost_dev->mutex);
dev->flags &= ~VIRTIO_DEV_RUNNING;
ovsrcu_set(&vhost_dev->virtio_dev, NULL);
+ exists = true;
ovs_mutex_unlock(&vhost_dev->mutex);
+ break;
+ }
+ }
- /*
- * Wait for other threads to quiesce before
- * setting the virtio_dev to NULL.
- */
- ovsrcu_synchronize();
- /*
- * As call to ovsrcu_synchronize() will end the quiescent state,
- * put thread back into quiescent state before returning.
- */
- ovsrcu_quiesce_start();
+ ovs_mutex_unlock(&dpdk_mutex);
+
+ if (exists == true) {
+ /*
+ * Wait for other threads to quiesce after setting the 'virtio_dev'
+ * to NULL, before returning.
+ */
+ ovsrcu_synchronize();
+ /*
+ * As call to ovsrcu_synchronize() will end the quiescent state,
+ * put thread back into quiescent state before returning.
+ */
+ ovsrcu_quiesce_start();
+ VLOG_INFO("vHost Device '%s' %"PRIu64" has been removed", dev->ifname,
+ dev->device_fh);
+ } else {
+ VLOG_INFO("vHost Device '%s' %"PRIu64" not found", dev->ifname,
+ dev->device_fh);
+ }
+
+}
+
+static int
+vring_state_changed(struct virtio_net *dev, uint16_t queue_id, int enable)
+{
+ struct netdev_dpdk *vhost_dev;
+ bool exists = false;
+ int qid = queue_id / VIRTIO_QNUM;
+
+ if (queue_id % VIRTIO_QNUM == VIRTIO_TXQ) {
+ return 0;
+ }
+
+ ovs_mutex_lock(&dpdk_mutex);
+ LIST_FOR_EACH (vhost_dev, list_node, &dpdk_list) {
+ if (strncmp(dev->ifname, vhost_dev->vhost_id, IF_NAME_SZ) == 0) {
+ ovs_mutex_lock(&vhost_dev->mutex);
+ if (enable) {
+ vhost_dev->tx_q[qid].map = qid;
+ } else {
+ vhost_dev->tx_q[qid].map = -1;
+ }
+ netdev_dpdk_remap_txqs(vhost_dev);
+ exists = true;
+ ovs_mutex_unlock(&vhost_dev->mutex);
+ break;
}
}
ovs_mutex_unlock(&dpdk_mutex);
- VLOG_INFO("vHost Device '%s' %"PRIu64" has been removed", dev->ifname,
- dev->device_fh);
+ if (exists) {
+ VLOG_INFO("State of queue %d ( tx_qid %d ) of vhost device '%s' %"
+ PRIu64" changed to \'%s\'", queue_id, qid, dev->ifname,
+ dev->device_fh, (enable == 1) ? "enabled" : "disabled");
+ } else {
+ VLOG_INFO("vHost Device '%s' %"PRIu64" not found", dev->ifname,
+ dev->device_fh);
+ return -1;
+ }
+
+ return 0;
}
struct virtio_net *
{
.new_device = new_device,
.destroy_device = destroy_device,
+ .vring_state_changed = vring_state_changed
};
static void *
unsigned int *eth_port_id)
{
struct dpdk_ring *ivshmem;
- char ring_name[10];
+ char ring_name[RTE_RING_NAMESIZE];
int err;
ivshmem = dpdk_rte_mzalloc(sizeof *ivshmem);
}
/* XXX: Add support for multiquque ring. */
- err = snprintf(ring_name, 10, "%s_tx", dev_name);
+ err = snprintf(ring_name, sizeof(ring_name), "%s_tx", dev_name);
if (err < 0) {
return -err;
}
return ENOMEM;
}
- err = snprintf(ring_name, 10, "%s_rx", dev_name);
+ err = snprintf(ring_name, sizeof(ring_name), "%s_rx", dev_name);
if (err < 0) {
return -err;
}
DESTRUCT, \
netdev_dpdk_dealloc, \
netdev_dpdk_get_config, \
- NULL, /* netdev_dpdk_set_config */ \
+ netdev_dpdk_set_config, \
NULL, /* get_tunnel_config */ \
NULL, /* build header */ \
NULL, /* push header */ \