From: Ilya Maximets <i.maximets@samsung.com>
Date: Thu, 21 Jan 2016 14:15:18 +0000 (+0300)
Subject: dpif-netdev: Allow different numbers of rx queues for different ports.
X-Git-Url: http://git.cascardo.eti.br/?p=cascardo%2Fovs.git;a=commitdiff_plain;h=a14b8947fd13d4c587addbffd24eedc7bb48ee2b

dpif-netdev: Allow different numbers of rx queues for different ports.

Currently, all of the PMD netdevs can only have the same number of
rx queues, which is specified in other_config:n-dpdk-rxqs.

Fix that by introducing of new option for PMD interfaces: 'n_rxq', which
specifies the maximum number of rx queues to be created for this
interface.

Example:
	ovs-vsctl set Interface dpdk0 options:n_rxq=8

Old 'other_config:n-dpdk-rxqs' deleted.

Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
Acked-by: Ben Pfaff <blp@ovn.org>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
---

diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md
index e8ef4b562..d8927880c 100644
--- a/INSTALL.DPDK.md
+++ b/INSTALL.DPDK.md
@@ -294,9 +294,9 @@ Performance Tuning:
 
   3. DPDK port Rx Queues
 
-	`ovs-vsctl set Open_vSwitch . other_config:n-dpdk-rxqs=<integer>`
+	`ovs-vsctl set Interface <DPDK interface> options:n_rxq=<integer>`
 
-	The command above sets the number of rx queues for each DPDK interface.
+	The command above sets the number of rx queues for DPDK interface.
 	The rx queues are assigned to pmd threads on the same NUMA node in a
 	round-robin fashion.  For more information, please refer to the
 	Open_vSwitch TABLE section in
@@ -568,9 +568,16 @@ Follow the steps below to attach vhost-user port(s) to a VM.
    ```
 
 3. Optional: Enable multiqueue support
-   QEMU needs to be configured with multiple queues and the number queues
-   must be less or equal to Open vSwitch other_config:n-dpdk-rxqs.
-   The $q below is the number of queues.
+   The vhost-user interface must be configured in Open vSwitch with the
+   desired amount of queues with:
+
+   ```
+   ovs-vsctl set Interface vhost-user-2 options:n_rxq=<requested queues>
+   ```
+
+   QEMU needs to be configured as well.
+   The $q below should match the queues requested in OVS (if $q is more,
+   packets will not be received).
    The $v is the number of vectors, which is '$q x 2 + 2'.
 
    ```
diff --git a/NEWS b/NEWS
index e683dae5f..c133838fb 100644
--- a/NEWS
+++ b/NEWS
@@ -7,6 +7,10 @@ Post-v2.5.0
      * OpenFlow 1.4+ OFPMP_QUEUE_DESC is now supported.
    - ovs-ofctl:
      * queue-get-config command now allows a queue ID to be specified.
+   - DPDK:
+     * New option "n_rxq" for PMD interfaces.
+       Old 'other_config:n-dpdk-rxqs' is no longer supported.
+
 
 v2.5.0 - xx xxx xxxx
 ---------------------
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index f233b6103..1b9793bbf 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -221,9 +221,7 @@ struct dp_netdev {
      * 'struct dp_netdev_pmd_thread' in 'per_pmd_key'. */
     ovsthread_key_t per_pmd_key;
 
-    /* Number of rx queues for each dpdk interface and the cpu mask
-     * for pin of pmd threads. */
-    size_t n_dpdk_rxqs;
+    /* Cpu mask for pin of pmd threads. */
     char *pmd_cmask;
     uint64_t last_tnl_conf_seq;
 };
@@ -254,6 +252,8 @@ struct dp_netdev_port {
     struct netdev_rxq **rxq;
     struct ovs_refcount ref_cnt;
     char *type;                 /* Port type as requested by user. */
+    int latest_requested_n_rxq; /* Latest requested from netdev number
+                                   of rx queues. */
 };
 
 /* Contained by struct dp_netdev_flow's 'stats' member.  */
@@ -866,7 +866,6 @@ create_dp_netdev(const char *name, const struct dpif_class *class,
     ovsthread_key_create(&dp->per_pmd_key, NULL);
 
     dp_netdev_set_nonpmd(dp);
-    dp->n_dpdk_rxqs = NR_QUEUE;
 
     ovs_mutex_lock(&dp->port_mutex);
     error = do_add_port(dp, name, "internal", ODPP_LOCAL);
@@ -1094,7 +1093,8 @@ do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
         /* There can only be ovs_numa_get_n_cores() pmd threads,
          * so creates a txq for each, and one extra for the non
          * pmd threads. */
-        error = netdev_set_multiq(netdev, n_cores + 1, dp->n_dpdk_rxqs);
+        error = netdev_set_multiq(netdev, n_cores + 1,
+                                  netdev_requested_n_rxq(netdev));
         if (error && (error != EOPNOTSUPP)) {
             VLOG_ERR("%s, cannot set multiq", devname);
             return errno;
@@ -1105,6 +1105,7 @@ do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
     port->netdev = netdev;
     port->rxq = xmalloc(sizeof *port->rxq * netdev_n_rxq(netdev));
     port->type = xstrdup(type);
+    port->latest_requested_n_rxq = netdev_requested_n_rxq(netdev);
     for (i = 0; i < netdev_n_rxq(netdev); i++) {
         error = netdev_rxq_open(netdev, &port->rxq[i], i);
         if (error
@@ -2408,32 +2409,42 @@ dpif_netdev_operate(struct dpif *dpif, struct dpif_op **ops, size_t n_ops)
 /* Returns true if the configuration for rx queues or cpu mask
  * is changed. */
 static bool
-pmd_config_changed(const struct dp_netdev *dp, size_t rxqs, const char *cmask)
+pmd_config_changed(const struct dp_netdev *dp, const char *cmask)
 {
-    if (dp->n_dpdk_rxqs != rxqs) {
-        return true;
-    } else {
-        if (dp->pmd_cmask != NULL && cmask != NULL) {
-            return strcmp(dp->pmd_cmask, cmask);
-        } else {
-            return (dp->pmd_cmask != NULL || cmask != NULL);
+    struct dp_netdev_port *port;
+
+    CMAP_FOR_EACH (port, node, &dp->ports) {
+        struct netdev *netdev = port->netdev;
+        int requested_n_rxq = netdev_requested_n_rxq(netdev);
+        if (netdev_is_pmd(netdev)
+            && port->latest_requested_n_rxq != requested_n_rxq) {
+            return true;
         }
     }
+
+    if (dp->pmd_cmask != NULL && cmask != NULL) {
+        return strcmp(dp->pmd_cmask, cmask);
+    } else {
+        return (dp->pmd_cmask != NULL || cmask != NULL);
+    }
 }
 
 /* Resets pmd threads if the configuration for 'rxq's or cpu mask changes. */
 static int
-dpif_netdev_pmd_set(struct dpif *dpif, unsigned int n_rxqs, const char *cmask)
+dpif_netdev_pmd_set(struct dpif *dpif, const char *cmask)
 {
     struct dp_netdev *dp = get_dp_netdev(dpif);
 
-    if (pmd_config_changed(dp, n_rxqs, cmask)) {
+    if (pmd_config_changed(dp, cmask)) {
         struct dp_netdev_port *port;
 
         dp_netdev_destroy_all_pmds(dp);
 
         CMAP_FOR_EACH (port, node, &dp->ports) {
-            if (netdev_is_pmd(port->netdev)) {
+            struct netdev *netdev = port->netdev;
+            int requested_n_rxq = netdev_requested_n_rxq(netdev);
+            if (netdev_is_pmd(port->netdev)
+                && port->latest_requested_n_rxq != requested_n_rxq) {
                 int i, err;
 
                 /* Closes the existing 'rxq's. */
@@ -2445,14 +2456,14 @@ dpif_netdev_pmd_set(struct dpif *dpif, unsigned int n_rxqs, const char *cmask)
                 /* Sets the new rx queue config.  */
                 err = netdev_set_multiq(port->netdev,
                                         ovs_numa_get_n_cores() + 1,
-                                        n_rxqs);
+                                        requested_n_rxq);
                 if (err && (err != EOPNOTSUPP)) {
                     VLOG_ERR("Failed to set dpdk interface %s rx_queue to:"
                              " %u", netdev_get_name(port->netdev),
-                             n_rxqs);
+                             requested_n_rxq);
                     return err;
                 }
-
+                port->latest_requested_n_rxq = requested_n_rxq;
                 /* If the set_multiq() above succeeds, reopens the 'rxq's. */
                 port->rxq = xrealloc(port->rxq, sizeof *port->rxq
                                      * netdev_n_rxq(port->netdev));
@@ -2461,8 +2472,6 @@ dpif_netdev_pmd_set(struct dpif *dpif, unsigned int n_rxqs, const char *cmask)
                 }
             }
         }
-        dp->n_dpdk_rxqs = n_rxqs;
-
         /* Reconfigures the cpu mask. */
         ovs_numa_set_cpu_mask(cmask);
         free(dp->pmd_cmask);
diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h
index a9844be9e..fbd370fcc 100644
--- a/lib/dpif-provider.h
+++ b/lib/dpif-provider.h
@@ -318,11 +318,9 @@ struct dpif_class {
     int (*handlers_set)(struct dpif *dpif, uint32_t n_handlers);
 
     /* If 'dpif' creates its own I/O polling threads, refreshes poll threads
-     * configuration.  'n_rxqs' configures the number of rx_queues, which
-     * are distributed among threads.  'cmask' configures the cpu mask
-     * for setting the polling threads' cpu affinity. */
-    int (*poll_threads_set)(struct dpif *dpif, unsigned int n_rxqs,
-                            const char *cmask);
+     * configuration.  'cmask' configures the cpu mask for setting the polling
+     * threads' cpu affinity. */
+    int (*poll_threads_set)(struct dpif *dpif, const char *cmask);
 
     /* Translates OpenFlow queue ID 'queue_id' (in host byte order) into a
      * priority value used for setting packet priority. */
diff --git a/lib/dpif.c b/lib/dpif.c
index 38e40bac9..81cbe1181 100644
--- a/lib/dpif.c
+++ b/lib/dpif.c
@@ -1406,13 +1406,12 @@ dpif_print_packet(struct dpif *dpif, struct dpif_upcall *upcall)
 /* If 'dpif' creates its own I/O polling threads, refreshes poll threads
  * configuration. */
 int
-dpif_poll_threads_set(struct dpif *dpif, unsigned int n_rxqs,
-                      const char *cmask)
+dpif_poll_threads_set(struct dpif *dpif, const char *cmask)
 {
     int error = 0;
 
     if (dpif->dpif_class->poll_threads_set) {
-        error = dpif->dpif_class->poll_threads_set(dpif, n_rxqs, cmask);
+        error = dpif->dpif_class->poll_threads_set(dpif, cmask);
         if (error) {
             log_operation(dpif, "poll_threads_set", error);
         }
diff --git a/lib/dpif.h b/lib/dpif.h
index 50174ee95..97d5d064c 100644
--- a/lib/dpif.h
+++ b/lib/dpif.h
@@ -836,8 +836,7 @@ void dpif_register_upcall_cb(struct dpif *, upcall_callback *, void *aux);
 
 int dpif_recv_set(struct dpif *, bool enable);
 int dpif_handlers_set(struct dpif *, uint32_t n_handlers);
-int dpif_poll_threads_set(struct dpif *, unsigned int n_rxqs,
-                          const char *cmask);
+int dpif_poll_threads_set(struct dpif *, const char *cmask);
 int dpif_recv(struct dpif *, uint32_t handler_id, struct dpif_upcall *,
               struct ofpbuf *);
 void dpif_recv_purge(struct dpif *);
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index d115de2c7..2a479e2c7 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -614,6 +614,7 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int port_no,
 
     netdev_->n_txq = NR_QUEUE;
     netdev_->n_rxq = NR_QUEUE;
+    netdev_->requested_n_rxq = NR_QUEUE;
     netdev->real_n_txq = NR_QUEUE;
 
     if (type == DPDK_DEV_ETH) {
@@ -769,20 +770,35 @@ netdev_dpdk_dealloc(struct netdev *netdev_)
 }
 
 static int
-netdev_dpdk_get_config(const struct netdev *netdev_, struct smap *args)
+netdev_dpdk_get_config(const struct netdev *netdev, struct smap *args)
 {
-    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev_);
+    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
 
     ovs_mutex_lock(&dev->mutex);
 
-    smap_add_format(args, "configured_rx_queues", "%d", netdev_->n_rxq);
-    smap_add_format(args, "requested_tx_queues", "%d", netdev_->n_txq);
+    smap_add_format(args, "requested_rx_queues", "%d", netdev->requested_n_rxq);
+    smap_add_format(args, "configured_rx_queues", "%d", netdev->n_rxq);
+    smap_add_format(args, "requested_tx_queues", "%d", netdev->n_txq);
     smap_add_format(args, "configured_tx_queues", "%d", dev->real_n_txq);
     ovs_mutex_unlock(&dev->mutex);
 
     return 0;
 }
 
+static int
+netdev_dpdk_set_config(struct netdev *netdev, const struct smap *args)
+{
+    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
+
+    ovs_mutex_lock(&dev->mutex);
+    netdev->requested_n_rxq = MAX(smap_get_int(args, "n_rxq",
+                                               netdev->requested_n_rxq), 1);
+    netdev_change_seq_changed(netdev);
+    ovs_mutex_unlock(&dev->mutex);
+
+    return 0;
+}
+
 static int
 netdev_dpdk_get_numa_id(const struct netdev *netdev_)
 {
@@ -2127,7 +2143,7 @@ unlock_dpdk:
     DESTRUCT,                                                 \
     netdev_dpdk_dealloc,                                      \
     netdev_dpdk_get_config,                                   \
-    NULL,                       /* netdev_dpdk_set_config */  \
+    netdev_dpdk_set_config,                                   \
     NULL,                       /* get_tunnel_config */       \
     NULL,                       /* build header */            \
     NULL,                       /* push header */             \
diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h
index a33bb3b3a..d324ffc92 100644
--- a/lib/netdev-provider.h
+++ b/lib/netdev-provider.h
@@ -52,9 +52,13 @@ struct netdev {
      * 'netdev''s flags, features, ethernet address, or carrier changes. */
     uint64_t change_seq;
 
-    /* The following are protected by 'netdev_mutex' (internal to netdev.c). */
+    /* The core netdev code initializes these at netdev construction and only
+     * provide read-only access to its client.  Netdev implementations may
+     * modify them. */
     int n_txq;
     int n_rxq;
+    /* Number of rx queues requested by user. */
+    int requested_n_rxq;
     int ref_cnt;                        /* Times this devices was opened. */
     struct shash_node *node;            /* Pointer to element in global map. */
     struct ovs_list saved_flags_list; /* Contains "struct netdev_saved_flags". */
diff --git a/lib/netdev.c b/lib/netdev.c
index e3b70b175..c250c93ae 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -106,6 +106,12 @@ netdev_n_rxq(const struct netdev *netdev)
     return netdev->n_rxq;
 }
 
+int
+netdev_requested_n_rxq(const struct netdev *netdev)
+{
+    return netdev->requested_n_rxq;
+}
+
 bool
 netdev_is_pmd(const struct netdev *netdev)
 {
@@ -376,6 +382,7 @@ netdev_open(const char *name, const char *type, struct netdev **netdevp)
                 /* By default enable one tx and rx queue per netdev. */
                 netdev->n_txq = netdev->netdev_class->send ? 1 : 0;
                 netdev->n_rxq = netdev->netdev_class->rxq_alloc ? 1 : 0;
+                netdev->requested_n_rxq = netdev->n_rxq;
 
                 list_init(&netdev->saved_flags_list);
 
diff --git a/lib/netdev.h b/lib/netdev.h
index 622e2ae2c..8a7f68004 100644
--- a/lib/netdev.h
+++ b/lib/netdev.h
@@ -142,6 +142,7 @@ bool netdev_is_reserved_name(const char *name);
 
 int netdev_n_txq(const struct netdev *netdev);
 int netdev_n_rxq(const struct netdev *netdev);
+int netdev_requested_n_rxq(const struct netdev *netdev);
 bool netdev_is_pmd(const struct netdev *netdev);
 
 /* Open and close. */
diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index 89e06aa9c..904cc2a5c 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -564,7 +564,7 @@ type_run(const char *type)
         udpif_set_threads(backer->udpif, n_handlers, n_revalidators);
     }
 
-    dpif_poll_threads_set(backer->dpif, n_dpdk_rxqs, pmd_cpu_mask);
+    dpif_poll_threads_set(backer->dpif, pmd_cpu_mask);
 
     if (backer->need_revalidate) {
         struct ofproto_dpif *ofproto;
diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
index b6aac0a4f..3ba97d042 100644
--- a/ofproto/ofproto-provider.h
+++ b/ofproto/ofproto-provider.h
@@ -482,9 +482,6 @@ extern unsigned ofproto_max_idle;
  * ofproto-dpif implementation. */
 extern size_t n_handlers, n_revalidators;
 
-/* Number of rx queues to be created for each dpdk interface. */
-extern size_t n_dpdk_rxqs;
-
 /* Cpu mask for pmd threads. */
 extern char *pmd_cpu_mask;
 
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index bba30ae16..939cb3714 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -337,7 +337,6 @@ unsigned ofproto_flow_limit = OFPROTO_FLOW_LIMIT_DEFAULT;
 unsigned ofproto_max_idle = OFPROTO_MAX_IDLE_DEFAULT;
 
 size_t n_handlers, n_revalidators;
-size_t n_dpdk_rxqs;
 char *pmd_cpu_mask;
 
 /* Map from datapath name to struct ofproto, for use by unixctl commands. */
@@ -780,12 +779,6 @@ ofproto_port_set_mcast_snooping(struct ofproto *ofproto, void *aux,
             : EOPNOTSUPP);
 }
 
-void
-ofproto_set_n_dpdk_rxqs(int n_rxqs)
-{
-    n_dpdk_rxqs = MAX(n_rxqs, 0);
-}
-
 void
 ofproto_set_cpu_mask(const char *cmask)
 {
diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h
index 7504027c2..b99f0cd9a 100644
--- a/ofproto/ofproto.h
+++ b/ofproto/ofproto.h
@@ -316,7 +316,6 @@ int ofproto_set_mcast_snooping(struct ofproto *ofproto,
 int ofproto_port_set_mcast_snooping(struct ofproto *ofproto, void *aux,
                           const struct ofproto_mcast_snooping_port_settings *s);
 void ofproto_set_threads(int n_handlers, int n_revalidators);
-void ofproto_set_n_dpdk_rxqs(int n_rxqs);
 void ofproto_set_cpu_mask(const char *cmask);
 void ofproto_set_dp_desc(struct ofproto *, const char *dp_desc);
 int ofproto_set_snoops(struct ofproto *, const struct sset *snoops);
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index 2ff23223f..c6fa44526 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -581,8 +581,6 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg)
                                         OFPROTO_FLOW_LIMIT_DEFAULT));
     ofproto_set_max_idle(smap_get_int(&ovs_cfg->other_config, "max-idle",
                                       OFPROTO_MAX_IDLE_DEFAULT));
-    ofproto_set_n_dpdk_rxqs(smap_get_int(&ovs_cfg->other_config,
-                                         "n-dpdk-rxqs", 0));
     ofproto_set_cpu_mask(smap_get(&ovs_cfg->other_config, "pmd-cpu-mask"));
 
     ofproto_set_threads(
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index e7c7e3c78..c2ec914df 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -167,15 +167,6 @@
         </p>
       </column>
 
-      <column name="other_config" key="n-dpdk-rxqs"
-              type='{"type": "integer", "minInteger": 1}'>
-        <p>
-          Specifies the maximum number of rx queues to be created for each dpdk
-          interface.  If not specified or specified to 0, one rx queue will
-          be created for each dpdk interface by default.
-        </p>
-      </column>
-
       <column name="other_config" key="pmd-cpu-mask">
         <p>
           Specifies CPU mask for setting the cpu affinity of PMD (Poll
@@ -2197,6 +2188,21 @@
       </column>
     </group>
 
+    <group title="PMD (Poll Mode Driver) Options">
+      <p>
+        Only PMD netdevs support these options.
+      </p>
+
+      <column name="options" key="n_rxqs"
+              type='{"type": "integer", "minInteger": 1}'>
+        <p>
+          Specifies the maximum number of rx queues to be created for PMD
+          netdev.  If not specified or specified to 0, one rx queue will
+          be created by default.
+        </p>
+      </column>
+    </group>
+
     <group title="Interface Status">
       <p>
         Status information about interfaces attached to bridges, updated every