From: Ilya Maximets Date: Thu, 21 Jan 2016 14:15:18 +0000 (+0300) Subject: dpif-netdev: Allow different numbers of rx queues for different ports. X-Git-Url: http://git.cascardo.eti.br/?p=cascardo%2Fovs.git;a=commitdiff_plain;h=a14b8947fd13d4c587addbffd24eedc7bb48ee2b dpif-netdev: Allow different numbers of rx queues for different ports. Currently, all of the PMD netdevs can only have the same number of rx queues, which is specified in other_config:n-dpdk-rxqs. Fix that by introducing of new option for PMD interfaces: 'n_rxq', which specifies the maximum number of rx queues to be created for this interface. Example: ovs-vsctl set Interface dpdk0 options:n_rxq=8 Old 'other_config:n-dpdk-rxqs' deleted. Signed-off-by: Ilya Maximets Acked-by: Ben Pfaff Acked-by: Flavio Leitner Signed-off-by: Daniele Di Proietto --- diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md index e8ef4b562..d8927880c 100644 --- a/INSTALL.DPDK.md +++ b/INSTALL.DPDK.md @@ -294,9 +294,9 @@ Performance Tuning: 3. DPDK port Rx Queues - `ovs-vsctl set Open_vSwitch . other_config:n-dpdk-rxqs=` + `ovs-vsctl set Interface options:n_rxq=` - The command above sets the number of rx queues for each DPDK interface. + The command above sets the number of rx queues for DPDK interface. The rx queues are assigned to pmd threads on the same NUMA node in a round-robin fashion. For more information, please refer to the Open_vSwitch TABLE section in @@ -568,9 +568,16 @@ Follow the steps below to attach vhost-user port(s) to a VM. ``` 3. Optional: Enable multiqueue support - QEMU needs to be configured with multiple queues and the number queues - must be less or equal to Open vSwitch other_config:n-dpdk-rxqs. - The $q below is the number of queues. + The vhost-user interface must be configured in Open vSwitch with the + desired amount of queues with: + + ``` + ovs-vsctl set Interface vhost-user-2 options:n_rxq= + ``` + + QEMU needs to be configured as well. + The $q below should match the queues requested in OVS (if $q is more, + packets will not be received). The $v is the number of vectors, which is '$q x 2 + 2'. ``` diff --git a/NEWS b/NEWS index e683dae5f..c133838fb 100644 --- a/NEWS +++ b/NEWS @@ -7,6 +7,10 @@ Post-v2.5.0 * OpenFlow 1.4+ OFPMP_QUEUE_DESC is now supported. - ovs-ofctl: * queue-get-config command now allows a queue ID to be specified. + - DPDK: + * New option "n_rxq" for PMD interfaces. + Old 'other_config:n-dpdk-rxqs' is no longer supported. + v2.5.0 - xx xxx xxxx --------------------- diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index f233b6103..1b9793bbf 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -221,9 +221,7 @@ struct dp_netdev { * 'struct dp_netdev_pmd_thread' in 'per_pmd_key'. */ ovsthread_key_t per_pmd_key; - /* Number of rx queues for each dpdk interface and the cpu mask - * for pin of pmd threads. */ - size_t n_dpdk_rxqs; + /* Cpu mask for pin of pmd threads. */ char *pmd_cmask; uint64_t last_tnl_conf_seq; }; @@ -254,6 +252,8 @@ struct dp_netdev_port { struct netdev_rxq **rxq; struct ovs_refcount ref_cnt; char *type; /* Port type as requested by user. */ + int latest_requested_n_rxq; /* Latest requested from netdev number + of rx queues. */ }; /* Contained by struct dp_netdev_flow's 'stats' member. */ @@ -866,7 +866,6 @@ create_dp_netdev(const char *name, const struct dpif_class *class, ovsthread_key_create(&dp->per_pmd_key, NULL); dp_netdev_set_nonpmd(dp); - dp->n_dpdk_rxqs = NR_QUEUE; ovs_mutex_lock(&dp->port_mutex); error = do_add_port(dp, name, "internal", ODPP_LOCAL); @@ -1094,7 +1093,8 @@ do_add_port(struct dp_netdev *dp, const char *devname, const char *type, /* There can only be ovs_numa_get_n_cores() pmd threads, * so creates a txq for each, and one extra for the non * pmd threads. */ - error = netdev_set_multiq(netdev, n_cores + 1, dp->n_dpdk_rxqs); + error = netdev_set_multiq(netdev, n_cores + 1, + netdev_requested_n_rxq(netdev)); if (error && (error != EOPNOTSUPP)) { VLOG_ERR("%s, cannot set multiq", devname); return errno; @@ -1105,6 +1105,7 @@ do_add_port(struct dp_netdev *dp, const char *devname, const char *type, port->netdev = netdev; port->rxq = xmalloc(sizeof *port->rxq * netdev_n_rxq(netdev)); port->type = xstrdup(type); + port->latest_requested_n_rxq = netdev_requested_n_rxq(netdev); for (i = 0; i < netdev_n_rxq(netdev); i++) { error = netdev_rxq_open(netdev, &port->rxq[i], i); if (error @@ -2408,32 +2409,42 @@ dpif_netdev_operate(struct dpif *dpif, struct dpif_op **ops, size_t n_ops) /* Returns true if the configuration for rx queues or cpu mask * is changed. */ static bool -pmd_config_changed(const struct dp_netdev *dp, size_t rxqs, const char *cmask) +pmd_config_changed(const struct dp_netdev *dp, const char *cmask) { - if (dp->n_dpdk_rxqs != rxqs) { - return true; - } else { - if (dp->pmd_cmask != NULL && cmask != NULL) { - return strcmp(dp->pmd_cmask, cmask); - } else { - return (dp->pmd_cmask != NULL || cmask != NULL); + struct dp_netdev_port *port; + + CMAP_FOR_EACH (port, node, &dp->ports) { + struct netdev *netdev = port->netdev; + int requested_n_rxq = netdev_requested_n_rxq(netdev); + if (netdev_is_pmd(netdev) + && port->latest_requested_n_rxq != requested_n_rxq) { + return true; } } + + if (dp->pmd_cmask != NULL && cmask != NULL) { + return strcmp(dp->pmd_cmask, cmask); + } else { + return (dp->pmd_cmask != NULL || cmask != NULL); + } } /* Resets pmd threads if the configuration for 'rxq's or cpu mask changes. */ static int -dpif_netdev_pmd_set(struct dpif *dpif, unsigned int n_rxqs, const char *cmask) +dpif_netdev_pmd_set(struct dpif *dpif, const char *cmask) { struct dp_netdev *dp = get_dp_netdev(dpif); - if (pmd_config_changed(dp, n_rxqs, cmask)) { + if (pmd_config_changed(dp, cmask)) { struct dp_netdev_port *port; dp_netdev_destroy_all_pmds(dp); CMAP_FOR_EACH (port, node, &dp->ports) { - if (netdev_is_pmd(port->netdev)) { + struct netdev *netdev = port->netdev; + int requested_n_rxq = netdev_requested_n_rxq(netdev); + if (netdev_is_pmd(port->netdev) + && port->latest_requested_n_rxq != requested_n_rxq) { int i, err; /* Closes the existing 'rxq's. */ @@ -2445,14 +2456,14 @@ dpif_netdev_pmd_set(struct dpif *dpif, unsigned int n_rxqs, const char *cmask) /* Sets the new rx queue config. */ err = netdev_set_multiq(port->netdev, ovs_numa_get_n_cores() + 1, - n_rxqs); + requested_n_rxq); if (err && (err != EOPNOTSUPP)) { VLOG_ERR("Failed to set dpdk interface %s rx_queue to:" " %u", netdev_get_name(port->netdev), - n_rxqs); + requested_n_rxq); return err; } - + port->latest_requested_n_rxq = requested_n_rxq; /* If the set_multiq() above succeeds, reopens the 'rxq's. */ port->rxq = xrealloc(port->rxq, sizeof *port->rxq * netdev_n_rxq(port->netdev)); @@ -2461,8 +2472,6 @@ dpif_netdev_pmd_set(struct dpif *dpif, unsigned int n_rxqs, const char *cmask) } } } - dp->n_dpdk_rxqs = n_rxqs; - /* Reconfigures the cpu mask. */ ovs_numa_set_cpu_mask(cmask); free(dp->pmd_cmask); diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h index a9844be9e..fbd370fcc 100644 --- a/lib/dpif-provider.h +++ b/lib/dpif-provider.h @@ -318,11 +318,9 @@ struct dpif_class { int (*handlers_set)(struct dpif *dpif, uint32_t n_handlers); /* If 'dpif' creates its own I/O polling threads, refreshes poll threads - * configuration. 'n_rxqs' configures the number of rx_queues, which - * are distributed among threads. 'cmask' configures the cpu mask - * for setting the polling threads' cpu affinity. */ - int (*poll_threads_set)(struct dpif *dpif, unsigned int n_rxqs, - const char *cmask); + * configuration. 'cmask' configures the cpu mask for setting the polling + * threads' cpu affinity. */ + int (*poll_threads_set)(struct dpif *dpif, const char *cmask); /* Translates OpenFlow queue ID 'queue_id' (in host byte order) into a * priority value used for setting packet priority. */ diff --git a/lib/dpif.c b/lib/dpif.c index 38e40bac9..81cbe1181 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -1406,13 +1406,12 @@ dpif_print_packet(struct dpif *dpif, struct dpif_upcall *upcall) /* If 'dpif' creates its own I/O polling threads, refreshes poll threads * configuration. */ int -dpif_poll_threads_set(struct dpif *dpif, unsigned int n_rxqs, - const char *cmask) +dpif_poll_threads_set(struct dpif *dpif, const char *cmask) { int error = 0; if (dpif->dpif_class->poll_threads_set) { - error = dpif->dpif_class->poll_threads_set(dpif, n_rxqs, cmask); + error = dpif->dpif_class->poll_threads_set(dpif, cmask); if (error) { log_operation(dpif, "poll_threads_set", error); } diff --git a/lib/dpif.h b/lib/dpif.h index 50174ee95..97d5d064c 100644 --- a/lib/dpif.h +++ b/lib/dpif.h @@ -836,8 +836,7 @@ void dpif_register_upcall_cb(struct dpif *, upcall_callback *, void *aux); int dpif_recv_set(struct dpif *, bool enable); int dpif_handlers_set(struct dpif *, uint32_t n_handlers); -int dpif_poll_threads_set(struct dpif *, unsigned int n_rxqs, - const char *cmask); +int dpif_poll_threads_set(struct dpif *, const char *cmask); int dpif_recv(struct dpif *, uint32_t handler_id, struct dpif_upcall *, struct ofpbuf *); void dpif_recv_purge(struct dpif *); diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index d115de2c7..2a479e2c7 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -614,6 +614,7 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int port_no, netdev_->n_txq = NR_QUEUE; netdev_->n_rxq = NR_QUEUE; + netdev_->requested_n_rxq = NR_QUEUE; netdev->real_n_txq = NR_QUEUE; if (type == DPDK_DEV_ETH) { @@ -769,20 +770,35 @@ netdev_dpdk_dealloc(struct netdev *netdev_) } static int -netdev_dpdk_get_config(const struct netdev *netdev_, struct smap *args) +netdev_dpdk_get_config(const struct netdev *netdev, struct smap *args) { - struct netdev_dpdk *dev = netdev_dpdk_cast(netdev_); + struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); ovs_mutex_lock(&dev->mutex); - smap_add_format(args, "configured_rx_queues", "%d", netdev_->n_rxq); - smap_add_format(args, "requested_tx_queues", "%d", netdev_->n_txq); + smap_add_format(args, "requested_rx_queues", "%d", netdev->requested_n_rxq); + smap_add_format(args, "configured_rx_queues", "%d", netdev->n_rxq); + smap_add_format(args, "requested_tx_queues", "%d", netdev->n_txq); smap_add_format(args, "configured_tx_queues", "%d", dev->real_n_txq); ovs_mutex_unlock(&dev->mutex); return 0; } +static int +netdev_dpdk_set_config(struct netdev *netdev, const struct smap *args) +{ + struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); + + ovs_mutex_lock(&dev->mutex); + netdev->requested_n_rxq = MAX(smap_get_int(args, "n_rxq", + netdev->requested_n_rxq), 1); + netdev_change_seq_changed(netdev); + ovs_mutex_unlock(&dev->mutex); + + return 0; +} + static int netdev_dpdk_get_numa_id(const struct netdev *netdev_) { @@ -2127,7 +2143,7 @@ unlock_dpdk: DESTRUCT, \ netdev_dpdk_dealloc, \ netdev_dpdk_get_config, \ - NULL, /* netdev_dpdk_set_config */ \ + netdev_dpdk_set_config, \ NULL, /* get_tunnel_config */ \ NULL, /* build header */ \ NULL, /* push header */ \ diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h index a33bb3b3a..d324ffc92 100644 --- a/lib/netdev-provider.h +++ b/lib/netdev-provider.h @@ -52,9 +52,13 @@ struct netdev { * 'netdev''s flags, features, ethernet address, or carrier changes. */ uint64_t change_seq; - /* The following are protected by 'netdev_mutex' (internal to netdev.c). */ + /* The core netdev code initializes these at netdev construction and only + * provide read-only access to its client. Netdev implementations may + * modify them. */ int n_txq; int n_rxq; + /* Number of rx queues requested by user. */ + int requested_n_rxq; int ref_cnt; /* Times this devices was opened. */ struct shash_node *node; /* Pointer to element in global map. */ struct ovs_list saved_flags_list; /* Contains "struct netdev_saved_flags". */ diff --git a/lib/netdev.c b/lib/netdev.c index e3b70b175..c250c93ae 100644 --- a/lib/netdev.c +++ b/lib/netdev.c @@ -106,6 +106,12 @@ netdev_n_rxq(const struct netdev *netdev) return netdev->n_rxq; } +int +netdev_requested_n_rxq(const struct netdev *netdev) +{ + return netdev->requested_n_rxq; +} + bool netdev_is_pmd(const struct netdev *netdev) { @@ -376,6 +382,7 @@ netdev_open(const char *name, const char *type, struct netdev **netdevp) /* By default enable one tx and rx queue per netdev. */ netdev->n_txq = netdev->netdev_class->send ? 1 : 0; netdev->n_rxq = netdev->netdev_class->rxq_alloc ? 1 : 0; + netdev->requested_n_rxq = netdev->n_rxq; list_init(&netdev->saved_flags_list); diff --git a/lib/netdev.h b/lib/netdev.h index 622e2ae2c..8a7f68004 100644 --- a/lib/netdev.h +++ b/lib/netdev.h @@ -142,6 +142,7 @@ bool netdev_is_reserved_name(const char *name); int netdev_n_txq(const struct netdev *netdev); int netdev_n_rxq(const struct netdev *netdev); +int netdev_requested_n_rxq(const struct netdev *netdev); bool netdev_is_pmd(const struct netdev *netdev); /* Open and close. */ diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 89e06aa9c..904cc2a5c 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -564,7 +564,7 @@ type_run(const char *type) udpif_set_threads(backer->udpif, n_handlers, n_revalidators); } - dpif_poll_threads_set(backer->dpif, n_dpdk_rxqs, pmd_cpu_mask); + dpif_poll_threads_set(backer->dpif, pmd_cpu_mask); if (backer->need_revalidate) { struct ofproto_dpif *ofproto; diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h index b6aac0a4f..3ba97d042 100644 --- a/ofproto/ofproto-provider.h +++ b/ofproto/ofproto-provider.h @@ -482,9 +482,6 @@ extern unsigned ofproto_max_idle; * ofproto-dpif implementation. */ extern size_t n_handlers, n_revalidators; -/* Number of rx queues to be created for each dpdk interface. */ -extern size_t n_dpdk_rxqs; - /* Cpu mask for pmd threads. */ extern char *pmd_cpu_mask; diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index bba30ae16..939cb3714 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -337,7 +337,6 @@ unsigned ofproto_flow_limit = OFPROTO_FLOW_LIMIT_DEFAULT; unsigned ofproto_max_idle = OFPROTO_MAX_IDLE_DEFAULT; size_t n_handlers, n_revalidators; -size_t n_dpdk_rxqs; char *pmd_cpu_mask; /* Map from datapath name to struct ofproto, for use by unixctl commands. */ @@ -780,12 +779,6 @@ ofproto_port_set_mcast_snooping(struct ofproto *ofproto, void *aux, : EOPNOTSUPP); } -void -ofproto_set_n_dpdk_rxqs(int n_rxqs) -{ - n_dpdk_rxqs = MAX(n_rxqs, 0); -} - void ofproto_set_cpu_mask(const char *cmask) { diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h index 7504027c2..b99f0cd9a 100644 --- a/ofproto/ofproto.h +++ b/ofproto/ofproto.h @@ -316,7 +316,6 @@ int ofproto_set_mcast_snooping(struct ofproto *ofproto, int ofproto_port_set_mcast_snooping(struct ofproto *ofproto, void *aux, const struct ofproto_mcast_snooping_port_settings *s); void ofproto_set_threads(int n_handlers, int n_revalidators); -void ofproto_set_n_dpdk_rxqs(int n_rxqs); void ofproto_set_cpu_mask(const char *cmask); void ofproto_set_dp_desc(struct ofproto *, const char *dp_desc); int ofproto_set_snoops(struct ofproto *, const struct sset *snoops); diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index 2ff23223f..c6fa44526 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -581,8 +581,6 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg) OFPROTO_FLOW_LIMIT_DEFAULT)); ofproto_set_max_idle(smap_get_int(&ovs_cfg->other_config, "max-idle", OFPROTO_MAX_IDLE_DEFAULT)); - ofproto_set_n_dpdk_rxqs(smap_get_int(&ovs_cfg->other_config, - "n-dpdk-rxqs", 0)); ofproto_set_cpu_mask(smap_get(&ovs_cfg->other_config, "pmd-cpu-mask")); ofproto_set_threads( diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index e7c7e3c78..c2ec914df 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -167,15 +167,6 @@

- -

- Specifies the maximum number of rx queues to be created for each dpdk - interface. If not specified or specified to 0, one rx queue will - be created for each dpdk interface by default. -

-
-

Specifies CPU mask for setting the cpu affinity of PMD (Poll @@ -2197,6 +2188,21 @@ + +

+ Only PMD netdevs support these options. +

+ + +

+ Specifies the maximum number of rx queues to be created for PMD + netdev. If not specified or specified to 0, one rx queue will + be created by default. +

+
+ +

Status information about interfaces attached to bridges, updated every