X-Git-Url: http://git.cascardo.eti.br/?a=blobdiff_plain;f=datapath%2Fvport.c;h=7fd98583c78ddcdbebfd940a29d68b8c0b2f4747;hb=c26d70a2452ad0d7a13b72c94641d08001283119;hp=272324430b36254d6e83fca66230e8f88ff0e55f;hpb=c3827f619a38d3d202020838e1f92860046a3dbe;p=cascardo%2Fovs.git diff --git a/datapath/vport.c b/datapath/vport.c index 272324430..7fd98583c 100644 --- a/datapath/vport.c +++ b/datapath/vport.c @@ -1,504 +1,166 @@ /* - * Copyright (c) 2010 Nicira Networks. - * Distributed under the terms of the GNU GPL version 2. + * Copyright (c) 2007-2015 Nicira, Inc. * - * Significant portions of this file may be copied from parts of the Linux - * kernel, by Linus Torvalds and others. + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include #include #include #include +#include #include #include #include #include +#include #include #include -#include - +#include +#include +#include +#include +#include +#include +#include +#include + +#include "datapath.h" +#include "gso.h" #include "vport.h" #include "vport-internal_dev.h" -/* List of statically compiled vport implementations. Don't forget to also - * add yours to the list at the bottom of vport.h. */ -static const struct vport_ops *base_vport_ops_list[] = { - &netdev_vport_ops, - &internal_vport_ops, - &patch_vport_ops, - &gre_vport_ops, -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26) - &capwap_vport_ops, -#endif -}; - -static const struct vport_ops **vport_ops_list; -static int n_vport_types; +static LIST_HEAD(vport_ops_list); +/* Protected by RCU read lock for reading, ovs_mutex for writing. */ static struct hlist_head *dev_table; #define VPORT_HASH_BUCKETS 1024 -/* Both RTNL lock and vport_mutex need to be held when updating dev_table. - * - * If you use vport_locate and then perform some operations, you need to hold - * one of these locks if you don't want the vport to be deleted out from under - * you. - * - * If you get a reference to a vport through a dp_port, it is protected - * by RCU and you need to hold rcu_read_lock instead when reading. - * - * If multiple locks are taken, the hierarchy is: - * 1. RTNL - * 2. DP - * 3. vport - */ -static DEFINE_MUTEX(vport_mutex); - /** - * vport_lock - acquire vport lock + * ovs_vport_init - initialize vport subsystem * - * Acquire global vport lock. See above comment about locking requirements - * and specific function definitions. May sleep. + * Called at module load time to initialize the vport subsystem. */ -void vport_lock(void) -{ - mutex_lock(&vport_mutex); -} - -/** - * vport_unlock - release vport lock - * - * Release lock acquired with vport_lock. - */ -void vport_unlock(void) -{ - mutex_unlock(&vport_mutex); -} - -#define ASSERT_VPORT() \ -do { \ - if (unlikely(!mutex_is_locked(&vport_mutex))) { \ - pr_err("vport lock not held at %s (%d)\n", \ - __FILE__, __LINE__); \ - dump_stack(); \ - } \ -} while (0) - -/** - * vport_init - initialize vport subsystem - * - * Called at module load time to initialize the vport subsystem and any - * compiled in vport types. - */ -int vport_init(void) +int ovs_vport_init(void) { int err; - int i; dev_table = kzalloc(VPORT_HASH_BUCKETS * sizeof(struct hlist_head), GFP_KERNEL); - if (!dev_table) { - err = -ENOMEM; - goto error; - } - - vport_ops_list = kmalloc(ARRAY_SIZE(base_vport_ops_list) * - sizeof(struct vport_ops *), GFP_KERNEL); - if (!vport_ops_list) { - err = -ENOMEM; - goto error_dev_table; - } - - for (i = 0; i < ARRAY_SIZE(base_vport_ops_list); i++) { - const struct vport_ops *new_ops = base_vport_ops_list[i]; - - if (new_ops->init) - err = new_ops->init(); - else - err = 0; - - if (!err) - vport_ops_list[n_vport_types++] = new_ops; - else if (new_ops->flags & VPORT_F_REQUIRED) { - vport_exit(); - goto error; - } - } - + if (!dev_table) + return -ENOMEM; + + err = lisp_init_module(); + if (err) + goto err_lisp; + err = ipgre_init(); + if (err) + goto err_gre; + err = geneve_init_module(); + if (err) + goto err_geneve; + + err = vxlan_init_module(); + if (err) + goto err_vxlan; + err = ovs_stt_init_module(); + if (err) + goto err_stt; return 0; -error_dev_table: +err_stt: + vxlan_cleanup_module(); +err_vxlan: + geneve_cleanup_module(); +err_geneve: + ipgre_fini(); +err_gre: + lisp_cleanup_module(); +err_lisp: kfree(dev_table); -error: return err; } -static void vport_del_all(void) -{ - int i; - - rtnl_lock(); - vport_lock(); - - for (i = 0; i < VPORT_HASH_BUCKETS; i++) { - struct hlist_head *bucket = &dev_table[i]; - struct vport *vport; - struct hlist_node *node, *next; - - hlist_for_each_entry_safe(vport, node, next, bucket, hash_node) - vport_del(vport); - } - - vport_unlock(); - rtnl_unlock(); -} - /** - * vport_exit - shutdown vport subsystem + * ovs_vport_exit - shutdown vport subsystem * - * Called at module exit time to shutdown the vport subsystem and any - * initialized vport types. + * Called at module exit time to shutdown the vport subsystem. */ -void vport_exit(void) +void ovs_vport_exit(void) { - int i; - - vport_del_all(); - - for (i = 0; i < n_vport_types; i++) { - if (vport_ops_list[i]->exit) - vport_ops_list[i]->exit(); - } - - kfree(vport_ops_list); + ovs_stt_cleanup_module(); + vxlan_cleanup_module(); + geneve_cleanup_module(); + ipgre_fini(); + lisp_cleanup_module(); kfree(dev_table); } -/** - * vport_user_mod - modify existing vport device (for userspace callers) - * - * @uport: New configuration for vport - * - * Modifies an existing device with the specified configuration (which is - * dependent on device type). This function is for userspace callers and - * assumes no locks are held. - */ -int vport_user_mod(const struct odp_port __user *uport) -{ - struct odp_port port; - struct vport *vport; - int err; - - if (copy_from_user(&port, uport, sizeof(port))) - return -EFAULT; - - port.devname[IFNAMSIZ - 1] = '\0'; - - rtnl_lock(); - - vport = vport_locate(port.devname); - if (!vport) { - err = -ENODEV; - goto out; - } - - vport_lock(); - err = vport_mod(vport, &port); - vport_unlock(); - -out: - rtnl_unlock(); - return err; -} - -/** - * vport_user_stats_get - retrieve device stats (for userspace callers) - * - * @ustats_req: Stats request parameters. - * - * Retrieves transmit, receive, and error stats for the given device. This - * function is for userspace callers and assumes no locks are held. - */ -int vport_user_stats_get(struct odp_vport_stats_req __user *ustats_req) -{ - struct odp_vport_stats_req stats_req; - struct vport *vport; - int err; - - if (copy_from_user(&stats_req, ustats_req, sizeof(struct odp_vport_stats_req))) - return -EFAULT; - - stats_req.devname[IFNAMSIZ - 1] = '\0'; - - vport_lock(); - - vport = vport_locate(stats_req.devname); - if (!vport) { - err = -ENODEV; - goto out; - } - - err = vport_get_stats(vport, &stats_req.stats); - -out: - vport_unlock(); - - if (!err) - if (copy_to_user(ustats_req, &stats_req, sizeof(struct odp_vport_stats_req))) - err = -EFAULT; - - return err; -} - -/** - * vport_user_stats_set - sets offset device stats (for userspace callers) - * - * @ustats_req: Stats set parameters. - * - * Provides a set of transmit, receive, and error stats to be added as an - * offset to the collect data when stats are retreived. Some devices may not - * support setting the stats, in which case the result will always be - * -EOPNOTSUPP. This function is for userspace callers and assumes no locks - * are held. - */ -int vport_user_stats_set(struct odp_vport_stats_req __user *ustats_req) -{ - struct odp_vport_stats_req stats_req; - struct vport *vport; - int err; - - if (copy_from_user(&stats_req, ustats_req, sizeof(struct odp_vport_stats_req))) - return -EFAULT; - - stats_req.devname[IFNAMSIZ - 1] = '\0'; - - rtnl_lock(); - vport_lock(); - - vport = vport_locate(stats_req.devname); - if (!vport) { - err = -ENODEV; - goto out; - } - - err = vport_set_stats(vport, &stats_req.stats); - -out: - vport_unlock(); - rtnl_unlock(); - return err; -} - - -/** - * vport_user_ether_get - retrieve device Ethernet address (for userspace callers) - * - * @uvport_ether: Ethernet address request parameters. - * - * Retrieves the Ethernet address of the given device. This function is for - * userspace callers and assumes no locks are held. - */ -int vport_user_ether_get(struct odp_vport_ether __user *uvport_ether) +static struct hlist_head *hash_bucket(const struct net *net, const char *name) { - struct odp_vport_ether vport_ether; - struct vport *vport; - int err = 0; - - if (copy_from_user(&vport_ether, uvport_ether, sizeof(struct odp_vport_ether))) - return -EFAULT; - - vport_ether.devname[IFNAMSIZ - 1] = '\0'; - - vport_lock(); - - vport = vport_locate(vport_ether.devname); - if (!vport) { - err = -ENODEV; - goto out; - } - - rcu_read_lock(); - memcpy(vport_ether.ether_addr, vport_get_addr(vport), ETH_ALEN); - rcu_read_unlock(); - -out: - vport_unlock(); - - if (!err) - if (copy_to_user(uvport_ether, &vport_ether, sizeof(struct odp_vport_ether))) - err = -EFAULT; - - return err; -} - -/** - * vport_user_ether_set - set device Ethernet address (for userspace callers) - * - * @uvport_ether: Ethernet address request parameters. - * - * Sets the Ethernet address of the given device. Some devices may not support - * setting the Ethernet address, in which case the result will always be - * -EOPNOTSUPP. This function is for userspace callers and assumes no locks - * are held. - */ -int vport_user_ether_set(struct odp_vport_ether __user *uvport_ether) -{ - struct odp_vport_ether vport_ether; - struct vport *vport; - int err; - - if (copy_from_user(&vport_ether, uvport_ether, sizeof(struct odp_vport_ether))) - return -EFAULT; - - vport_ether.devname[IFNAMSIZ - 1] = '\0'; - - rtnl_lock(); - vport_lock(); - - vport = vport_locate(vport_ether.devname); - if (!vport) { - err = -ENODEV; - goto out; - } - - err = vport_set_addr(vport, vport_ether.ether_addr); - -out: - vport_unlock(); - rtnl_unlock(); - return err; -} - -/** - * vport_user_mtu_get - retrieve device MTU (for userspace callers) - * - * @uvport_mtu: MTU request parameters. - * - * Retrieves the MTU of the given device. This function is for userspace - * callers and assumes no locks are held. - */ -int vport_user_mtu_get(struct odp_vport_mtu __user *uvport_mtu) -{ - struct odp_vport_mtu vport_mtu; - struct vport *vport; - int err = 0; - - if (copy_from_user(&vport_mtu, uvport_mtu, sizeof(struct odp_vport_mtu))) - return -EFAULT; - - vport_mtu.devname[IFNAMSIZ - 1] = '\0'; - - vport_lock(); - - vport = vport_locate(vport_mtu.devname); - if (!vport) { - err = -ENODEV; - goto out; - } - - vport_mtu.mtu = vport_get_mtu(vport); - -out: - vport_unlock(); - - if (!err) - if (copy_to_user(uvport_mtu, &vport_mtu, sizeof(struct odp_vport_mtu))) - err = -EFAULT; - - return err; + unsigned int hash = jhash(name, strlen(name), (unsigned long) net); + return &dev_table[hash & (VPORT_HASH_BUCKETS - 1)]; } -/** - * vport_user_mtu_set - set device MTU (for userspace callers) - * - * @uvport_mtu: MTU request parameters. - * - * Sets the MTU of the given device. Some devices may not support setting the - * MTU, in which case the result will always be -EOPNOTSUPP. This function is - * for userspace callers and assumes no locks are held. - */ -int vport_user_mtu_set(struct odp_vport_mtu __user *uvport_mtu) +int __ovs_vport_ops_register(struct vport_ops *ops) { - struct odp_vport_mtu vport_mtu; - struct vport *vport; - int err; - - if (copy_from_user(&vport_mtu, uvport_mtu, sizeof(struct odp_vport_mtu))) - return -EFAULT; + int err = -EEXIST; + struct vport_ops *o; - vport_mtu.devname[IFNAMSIZ - 1] = '\0'; + ovs_lock(); + list_for_each_entry(o, &vport_ops_list, list) + if (ops->type == o->type) + goto errout; - rtnl_lock(); - vport_lock(); - - vport = vport_locate(vport_mtu.devname); - if (!vport) { - err = -ENODEV; - goto out; - } - - err = vport_set_mtu(vport, vport_mtu.mtu); - -out: - vport_unlock(); - rtnl_unlock(); + list_add_tail(&ops->list, &vport_ops_list); + err = 0; +errout: + ovs_unlock(); return err; } +EXPORT_SYMBOL_GPL(__ovs_vport_ops_register); -static struct hlist_head *hash_bucket(const char *name) +void ovs_vport_ops_unregister(struct vport_ops *ops) { - unsigned int hash = full_name_hash(name, strlen(name)); - return &dev_table[hash & (VPORT_HASH_BUCKETS - 1)]; + ovs_lock(); + list_del(&ops->list); + ovs_unlock(); } +EXPORT_SYMBOL_GPL(ovs_vport_ops_unregister); /** - * vport_locate - find a port that has already been created + * ovs_vport_locate - find a port that has already been created * * @name: name of port to find * - * Either RTNL or vport lock must be acquired before calling this function - * and held while using the found port. See the locking comments at the - * top of the file. + * Must be called with ovs or RCU read lock. */ -struct vport *vport_locate(const char *name) +struct vport *ovs_vport_locate(const struct net *net, const char *name) { - struct hlist_head *bucket = hash_bucket(name); + struct hlist_head *bucket = hash_bucket(net, name); struct vport *vport; - struct hlist_node *node; - - if (unlikely(!mutex_is_locked(&vport_mutex) && !rtnl_is_locked())) { - pr_err("neither RTNL nor vport lock held in vport_locate\n"); - dump_stack(); - } - - rcu_read_lock(); - - hlist_for_each_entry(vport, node, bucket, hash_node) - if (!strcmp(name, vport_get_name(vport))) - goto out; - - vport = NULL; - -out: - rcu_read_unlock(); - return vport; -} -static void register_vport(struct vport *vport) -{ - hlist_add_head(&vport->hash_node, hash_bucket(vport_get_name(vport))); -} + hlist_for_each_entry_rcu(vport, bucket, hash_node) + if (!strcmp(name, ovs_vport_name(vport)) && + net_eq(ovs_dp_get_net(vport->dp), net)) + return vport; -static void unregister_vport(struct vport *vport) -{ - hlist_del(&vport->hash_node); + return NULL; } /** - * vport_alloc - allocate and initialize new vport + * ovs_vport_alloc - allocate and initialize new vport * * @priv_size: Size of private data area to allocate. * @ops: vport device ops @@ -508,7 +170,8 @@ static void unregister_vport(struct vport *vport) * vport_priv(). vports that are no longer needed should be released with * vport_free(). */ -struct vport *vport_alloc(int priv_size, const struct vport_ops *ops) +struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops, + const struct vport_parms *parms) { struct vport *vport; size_t alloc_size; @@ -523,568 +186,413 @@ struct vport *vport_alloc(int priv_size, const struct vport_ops *ops) if (!vport) return ERR_PTR(-ENOMEM); + vport->dp = parms->dp; + vport->port_no = parms->port_no; vport->ops = ops; + INIT_HLIST_NODE(&vport->dp_hash_node); - if (vport->ops->flags & VPORT_F_GEN_STATS) { - vport->percpu_stats = alloc_percpu(struct vport_percpu_stats); - if (!vport->percpu_stats) - return ERR_PTR(-ENOMEM); - - spin_lock_init(&vport->stats_lock); + if (ovs_vport_set_upcall_portids(vport, parms->upcall_portids)) { + kfree(vport); + return ERR_PTR(-EINVAL); } return vport; } +EXPORT_SYMBOL_GPL(ovs_vport_alloc); /** - * vport_free - uninitialize and free vport + * ovs_vport_free - uninitialize and free vport * * @vport: vport to free * * Frees a vport allocated with vport_alloc() when it is no longer needed. + * + * The caller must ensure that an RCU grace period has passed since the last + * time @vport was in a datapath. */ -void vport_free(struct vport *vport) +void ovs_vport_free(struct vport *vport) { - if (vport->ops->flags & VPORT_F_GEN_STATS) - free_percpu(vport->percpu_stats); - + /* vport is freed from RCU callback or error path, Therefore + * it is safe to use raw dereference. + */ + kfree(rcu_dereference_raw(vport->upcall_portids)); kfree(vport); } +EXPORT_SYMBOL_GPL(ovs_vport_free); + +static struct vport_ops *ovs_vport_lookup(const struct vport_parms *parms) +{ + struct vport_ops *ops; + + list_for_each_entry(ops, &vport_ops_list, list) + if (ops->type == parms->type) + return ops; + + return NULL; +} /** - * vport_add - add vport device (for kernel callers) + * ovs_vport_add - add vport device (for kernel callers) * * @parms: Information about new vport. * - * Creates a new vport with the specified configuration (which is dependent - * on device type). Both RTNL and vport locks must be held. + * Creates a new vport with the specified configuration (which is dependent on + * device type). ovs_mutex must be held. */ -struct vport *vport_add(const struct vport_parms *parms) +struct vport *ovs_vport_add(const struct vport_parms *parms) { + struct vport_ops *ops; struct vport *vport; - int err = 0; - int i; - ASSERT_RTNL(); - ASSERT_VPORT(); + ops = ovs_vport_lookup(parms); + if (ops) { + struct hlist_head *bucket; - for (i = 0; i < n_vport_types; i++) { - if (!strcmp(vport_ops_list[i]->type, parms->type)) { - vport = vport_ops_list[i]->create(parms); - if (IS_ERR(vport)) { - err = PTR_ERR(vport); - goto out; - } + if (!try_module_get(ops->owner)) + return ERR_PTR(-EAFNOSUPPORT); - register_vport(vport); + vport = ops->create(parms); + if (IS_ERR(vport)) { + module_put(ops->owner); return vport; } + + bucket = hash_bucket(ovs_dp_get_net(vport->dp), + ovs_vport_name(vport)); + hlist_add_head_rcu(&vport->hash_node, bucket); + return vport; } - err = -EAFNOSUPPORT; + /* Unlock to attempt module load and return -EAGAIN if load + * was successful as we need to restart the port addition + * workflow. + */ + ovs_unlock(); + request_module("vport-type-%d", parms->type); + ovs_lock(); -out: - return ERR_PTR(err); + if (!ovs_vport_lookup(parms)) + return ERR_PTR(-EAFNOSUPPORT); + else + return ERR_PTR(-EAGAIN); } /** - * vport_mod - modify existing vport device (for kernel callers) + * ovs_vport_set_options - modify existing vport device (for kernel callers) * * @vport: vport to modify. - * @port: New configuration. + * @options: New configuration. * * Modifies an existing device with the specified configuration (which is - * dependent on device type). Both RTNL and vport locks must be held. + * dependent on device type). ovs_mutex must be held. */ -int vport_mod(struct vport *vport, struct odp_port *port) +int ovs_vport_set_options(struct vport *vport, struct nlattr *options) { - ASSERT_RTNL(); - ASSERT_VPORT(); - - if (vport->ops->modify) - return vport->ops->modify(vport, port); - else + if (!vport->ops->set_options) return -EOPNOTSUPP; + return vport->ops->set_options(vport, options); } /** - * vport_del - delete existing vport device (for kernel callers) + * ovs_vport_del - delete existing vport device * * @vport: vport to delete. * - * Deletes the specified device. The device must not be currently attached to - * a datapath. It is possible to fail for reasons such as lack of memory. - * Both RTNL and vport locks must be held. + * Detaches @vport from its datapath and destroys it. ovs_mutex must be + * held. */ -int vport_del(struct vport *vport) +void ovs_vport_del(struct vport *vport) { - ASSERT_RTNL(); - ASSERT_VPORT(); - BUG_ON(vport_get_dp_port(vport)); - - unregister_vport(vport); + ASSERT_OVSL(); - return vport->ops->destroy(vport); + hlist_del_rcu(&vport->hash_node); + module_put(vport->ops->owner); + vport->ops->destroy(vport); } /** - * vport_attach - attach a vport to a datapath + * ovs_vport_get_stats - retrieve device stats + * + * @vport: vport from which to retrieve the stats + * @stats: location to store stats * - * @vport: vport to attach. - * @dp_port: Datapath port to attach the vport to. + * Retrieves transmit, receive, and error stats for the given device. * - * Attaches a vport to a specific datapath so that packets may be exchanged. - * Both ports must be currently unattached. @dp_port must be successfully - * attached to a vport before it is connected to a datapath and must not be - * modified while connected. RTNL lock and the appropriate DP mutex must be held. + * Must be called with ovs_mutex or rcu_read_lock. */ -int vport_attach(struct vport *vport, struct dp_port *dp_port) +void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats) { - ASSERT_RTNL(); + const struct rtnl_link_stats64 *dev_stats; + struct rtnl_link_stats64 temp; - if (vport_get_dp_port(vport)) - return -EBUSY; + dev_stats = dev_get_stats(vport->dev, &temp); + stats->rx_errors = dev_stats->rx_errors; + stats->tx_errors = dev_stats->tx_errors; + stats->tx_dropped = dev_stats->tx_dropped; + stats->rx_dropped = dev_stats->rx_dropped; - if (vport->ops->attach) { - int err; - - err = vport->ops->attach(vport); - if (err) - return err; - } - - rcu_assign_pointer(vport->dp_port, dp_port); - - return 0; + stats->rx_bytes = dev_stats->rx_bytes; + stats->rx_packets = dev_stats->rx_packets; + stats->tx_bytes = dev_stats->tx_bytes; + stats->tx_packets = dev_stats->tx_packets; } /** - * vport_detach - detach a vport from a datapath + * ovs_vport_get_options - retrieve device options + * + * @vport: vport from which to retrieve the options. + * @skb: sk_buff where options should be appended. * - * @vport: vport to detach. + * Retrieves the configuration of the given device, appending an + * %OVS_VPORT_ATTR_OPTIONS attribute that in turn contains nested + * vport-specific attributes to @skb. * - * Detaches a vport from a datapath. May fail for a variety of reasons, - * including lack of memory. RTNL lock and the appropriate DP mutex must be held. + * Returns 0 if successful, -EMSGSIZE if @skb has insufficient room, or another + * negative error code if a real error occurred. If an error occurs, @skb is + * left unmodified. + * + * Must be called with ovs_mutex or rcu_read_lock. */ -int vport_detach(struct vport *vport) +int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb) { - struct dp_port *dp_port; + struct nlattr *nla; + int err; - ASSERT_RTNL(); + if (!vport->ops->get_options) + return 0; - dp_port = vport_get_dp_port(vport); - if (!dp_port) - return -EINVAL; + nla = nla_nest_start(skb, OVS_VPORT_ATTR_OPTIONS); + if (!nla) + return -EMSGSIZE; - rcu_assign_pointer(vport->dp_port, NULL); + err = vport->ops->get_options(vport, skb); + if (err) { + nla_nest_cancel(skb, nla); + return err; + } - if (vport->ops->detach) - return vport->ops->detach(vport); - else - return 0; + nla_nest_end(skb, nla); + return 0; +} + +static void vport_portids_destroy_rcu_cb(struct rcu_head *rcu) +{ + struct vport_portids *ids = container_of(rcu, struct vport_portids, + rcu); + + kfree(ids); } /** - * vport_set_mtu - set device MTU (for kernel callers) + * ovs_vport_set_upcall_portids - set upcall portids of @vport. + * + * @vport: vport to modify. + * @ids: new configuration, an array of port ids. * - * @vport: vport on which to set MTU. - * @mtu: New MTU. + * Sets the vport's upcall_portids to @ids. * - * Sets the MTU of the given device. Some devices may not support setting the - * MTU, in which case the result will always be -EOPNOTSUPP. RTNL lock must - * be held. + * Returns 0 if successful, -EINVAL if @ids is zero length or cannot be parsed + * as an array of U32. + * + * Must be called with ovs_mutex. */ -int vport_set_mtu(struct vport *vport, int mtu) +int ovs_vport_set_upcall_portids(struct vport *vport, const struct nlattr *ids) { - ASSERT_RTNL(); + struct vport_portids *old, *vport_portids; - if (mtu < 68) + if (!nla_len(ids) || nla_len(ids) % sizeof(u32)) return -EINVAL; - if (vport->ops->set_mtu) { - int ret; + old = ovsl_dereference(vport->upcall_portids); - ret = vport->ops->set_mtu(vport, mtu); + vport_portids = kmalloc(sizeof(*vport_portids) + nla_len(ids), + GFP_KERNEL); + if (!vport_portids) + return -ENOMEM; - if (!ret && !is_internal_vport(vport)) { - struct dp_port *dp_port = vport_get_dp_port(vport); + vport_portids->n_ids = nla_len(ids) / sizeof(u32); + vport_portids->rn_ids = reciprocal_value(vport_portids->n_ids); + nla_memcpy(vport_portids->ids, ids, nla_len(ids)); - if (dp_port) - set_internal_devs_mtu(dp_port->dp); - } + rcu_assign_pointer(vport->upcall_portids, vport_portids); - return ret; - } else - return -EOPNOTSUPP; + if (old) + call_rcu(&old->rcu, vport_portids_destroy_rcu_cb); + return 0; } /** - * vport_set_addr - set device Ethernet address (for kernel callers) + * ovs_vport_get_upcall_portids - get the upcall_portids of @vport. * - * @vport: vport on which to set Ethernet address. - * @addr: New address. + * @vport: vport from which to retrieve the portids. + * @skb: sk_buff where portids should be appended. * - * Sets the Ethernet address of the given device. Some devices may not support - * setting the Ethernet address, in which case the result will always be - * -EOPNOTSUPP. RTNL lock must be held. - */ -int vport_set_addr(struct vport *vport, const unsigned char *addr) -{ - ASSERT_RTNL(); - - if (!is_valid_ether_addr(addr)) - return -EADDRNOTAVAIL; - - if (vport->ops->set_addr) - return vport->ops->set_addr(vport, addr); - else - return -EOPNOTSUPP; -} - -/** - * vport_set_stats - sets offset device stats (for kernel callers) + * Retrieves the configuration of the given vport, appending the + * %OVS_VPORT_ATTR_UPCALL_PID attribute which is the array of upcall + * portids to @skb. * - * @vport: vport on which to set stats - * @stats: stats to set - * - * Provides a set of transmit, receive, and error stats to be added as an - * offset to the collect data when stats are retreived. Some devices may not - * support setting the stats, in which case the result will always be - * -EOPNOTSUPP. RTNL lock must be held. + * Returns 0 if successful, -EMSGSIZE if @skb has insufficient room. + * If an error occurs, @skb is left unmodified. Must be called with + * ovs_mutex or rcu_read_lock. */ -int vport_set_stats(struct vport *vport, struct rtnl_link_stats64 *stats) +int ovs_vport_get_upcall_portids(const struct vport *vport, + struct sk_buff *skb) { - ASSERT_RTNL(); + struct vport_portids *ids; - if (vport->ops->flags & VPORT_F_GEN_STATS) { - spin_lock_bh(&vport->stats_lock); - vport->offset_stats = *stats; - spin_unlock_bh(&vport->stats_lock); + ids = rcu_dereference_ovsl(vport->upcall_portids); - return 0; - } else if (vport->ops->set_stats) - return vport->ops->set_stats(vport, stats); + if (vport->dp->user_features & OVS_DP_F_VPORT_PIDS) + return nla_put(skb, OVS_VPORT_ATTR_UPCALL_PID, + ids->n_ids * sizeof(u32), (void *)ids->ids); else - return -EOPNOTSUPP; + return nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, ids->ids[0]); } /** - * vport_get_name - retrieve device name + * ovs_vport_find_upcall_portid - find the upcall portid to send upcall. * - * @vport: vport from which to retrieve the name. + * @vport: vport from which the missed packet is received. + * @skb: skb that the missed packet was received. * - * Retrieves the name of the given device. Either RTNL lock or rcu_read_lock - * must be held for the entire duration that the name is in use. - */ -const char *vport_get_name(const struct vport *vport) -{ - return vport->ops->get_name(vport); -} - -/** - * vport_get_type - retrieve device type - * - * @vport: vport from which to retrieve the type. + * Uses the skb_get_hash() to select the upcall portid to send the + * upcall. * - * Retrieves the type of the given device. Either RTNL lock or rcu_read_lock - * must be held for the entire duration that the type is in use. + * Returns the portid of the target socket. Must be called with rcu_read_lock. */ -const char *vport_get_type(const struct vport *vport) +u32 ovs_vport_find_upcall_portid(const struct vport *vport, struct sk_buff *skb) { - return vport->ops->type; -} + struct vport_portids *ids; + u32 ids_index; + u32 hash; -/** - * vport_get_addr - retrieve device Ethernet address (for kernel callers) - * - * @vport: vport from which to retrieve the Ethernet address. - * - * Retrieves the Ethernet address of the given device. Either RTNL lock or - * rcu_read_lock must be held for the entire duration that the Ethernet address - * is in use. - */ -const unsigned char *vport_get_addr(const struct vport *vport) -{ - return vport->ops->get_addr(vport); -} + ids = rcu_dereference(vport->upcall_portids); -/** - * vport_get_dp_port - retrieve attached datapath port - * - * @vport: vport from which to retrieve the datapath port. - * - * Retrieves the attached datapath port or null if not attached. Either RTNL - * lock or rcu_read_lock must be held for the entire duration that the datapath - * port is being accessed. - */ -struct dp_port *vport_get_dp_port(const struct vport *vport) -{ - return rcu_dereference(vport->dp_port); -} + if (ids->n_ids == 1 && ids->ids[0] == 0) + return 0; -/** - * vport_get_kobj - retrieve associated kobj - * - * @vport: vport from which to retrieve the associated kobj - * - * Retrieves the associated kobj or null if no kobj. The returned kobj is - * valid for as long as the vport exists. - */ -struct kobject *vport_get_kobj(const struct vport *vport) -{ - if (vport->ops->get_kobj) - return vport->ops->get_kobj(vport); - else - return NULL; + hash = skb_get_hash(skb); + ids_index = hash - ids->n_ids * reciprocal_divide(hash, ids->rn_ids); + return ids->ids[ids_index]; } /** - * vport_get_stats - retrieve device stats (for kernel callers) + * ovs_vport_receive - pass up received packet to the datapath for processing * - * @vport: vport from which to retrieve the stats - * @stats: location to store stats + * @vport: vport that received the packet + * @skb: skb that was received + * @tun_key: tunnel (if any) that carried packet * - * Retrieves transmit, receive, and error stats for the given device. + * Must be called with rcu_read_lock. The packet cannot be shared and + * skb->data should point to the Ethernet header. */ -int vport_get_stats(struct vport *vport, struct rtnl_link_stats64 *stats) +int ovs_vport_receive(struct vport *vport, struct sk_buff *skb, + const struct ip_tunnel_info *tun_info) { - struct rtnl_link_stats64 dev_stats; - struct rtnl_link_stats64 *dev_statsp = NULL; - int err; + struct sw_flow_key key; + int error; - if (vport->ops->get_stats) { - if (vport->ops->flags & VPORT_F_GEN_STATS) - dev_statsp = &dev_stats; - else - dev_statsp = stats; + OVS_CB(skb)->input_vport = vport; + OVS_CB(skb)->mru = 0; + if (unlikely(dev_net(skb->dev) != ovs_dp_get_net(vport->dp))) { + u32 mark; - rcu_read_lock(); - err = vport->ops->get_stats(vport, dev_statsp); - rcu_read_unlock(); - - if (err) - goto out; + mark = skb->mark; + skb_scrub_packet(skb, true); + skb->mark = mark; + tun_info = NULL; } - if (vport->ops->flags & VPORT_F_GEN_STATS) { - int i; - - /* We potentially have 3 sources of stats that need to be - * combined: those we have collected (split into err_stats and - * percpu_stats), offset_stats from set_stats(), and device - * error stats from get_stats() (for errors that happen - * downstream and therefore aren't reported through our - * vport_record_error() function). */ - - spin_lock_bh(&vport->stats_lock); - - *stats = vport->offset_stats; - - stats->rx_errors += vport->err_stats.rx_errors; - stats->tx_errors += vport->err_stats.tx_errors; - stats->tx_dropped += vport->err_stats.tx_dropped; - stats->rx_dropped += vport->err_stats.rx_dropped; - - spin_unlock_bh(&vport->stats_lock); - - if (dev_statsp) { - stats->rx_packets += dev_statsp->rx_packets; - stats->tx_packets += dev_statsp->tx_packets; - stats->rx_bytes += dev_statsp->rx_bytes; - stats->tx_bytes += dev_statsp->tx_bytes; - stats->rx_errors += dev_statsp->rx_errors; - stats->tx_errors += dev_statsp->tx_errors; - stats->rx_dropped += dev_statsp->rx_dropped; - stats->tx_dropped += dev_statsp->tx_dropped; - stats->multicast += dev_statsp->multicast; - stats->collisions += dev_statsp->collisions; - stats->rx_length_errors += dev_statsp->rx_length_errors; - stats->rx_over_errors += dev_statsp->rx_over_errors; - stats->rx_crc_errors += dev_statsp->rx_crc_errors; - stats->rx_frame_errors += dev_statsp->rx_frame_errors; - stats->rx_fifo_errors += dev_statsp->rx_fifo_errors; - stats->rx_missed_errors += dev_statsp->rx_missed_errors; - stats->tx_aborted_errors += dev_statsp->tx_aborted_errors; - stats->tx_carrier_errors += dev_statsp->tx_carrier_errors; - stats->tx_fifo_errors += dev_statsp->tx_fifo_errors; - stats->tx_heartbeat_errors += dev_statsp->tx_heartbeat_errors; - stats->tx_window_errors += dev_statsp->tx_window_errors; - stats->rx_compressed += dev_statsp->rx_compressed; - stats->tx_compressed += dev_statsp->tx_compressed; - } - - for_each_possible_cpu(i) { - const struct vport_percpu_stats *percpu_stats; - struct vport_percpu_stats local_stats; - unsigned seqcount; - - percpu_stats = per_cpu_ptr(vport->percpu_stats, i); - - do { - seqcount = read_seqcount_begin(&percpu_stats->seqlock); - local_stats = *percpu_stats; - } while (read_seqcount_retry(&percpu_stats->seqlock, seqcount)); - - stats->rx_bytes += local_stats.rx_bytes; - stats->rx_packets += local_stats.rx_packets; - stats->tx_bytes += local_stats.tx_bytes; - stats->tx_packets += local_stats.tx_packets; - } - - err = 0; - } else - err = -EOPNOTSUPP; - -out: - return err; -} - -/** - * vport_get_flags - retrieve device flags - * - * @vport: vport from which to retrieve the flags - * - * Retrieves the flags of the given device. Either RTNL lock or rcu_read_lock - * must be held. - */ -unsigned vport_get_flags(const struct vport *vport) -{ - return vport->ops->get_dev_flags(vport); + ovs_skb_init_inner_protocol(skb); + skb_clear_ovs_gso_cb(skb); + /* Extract flow from 'skb' into 'key'. */ + error = ovs_flow_key_extract(tun_info, skb, &key); + if (unlikely(error)) { + kfree_skb(skb); + return error; + } + ovs_dp_process_packet(skb, &key); + return 0; } +EXPORT_SYMBOL_GPL(ovs_vport_receive); -/** - * vport_get_flags - check whether device is running - * - * @vport: vport on which to check status. - * - * Checks whether the given device is running. Either RTNL lock or - * rcu_read_lock must be held. - */ -int vport_is_running(const struct vport *vport) +static void free_vport_rcu(struct rcu_head *rcu) { - return vport->ops->is_running(vport); -} + struct vport *vport = container_of(rcu, struct vport, rcu); -/** - * vport_get_flags - retrieve device operating state - * - * @vport: vport from which to check status - * - * Retrieves the RFC2863 operstate of the given device. Either RTNL lock or - * rcu_read_lock must be held. - */ -unsigned char vport_get_operstate(const struct vport *vport) -{ - return vport->ops->get_operstate(vport); + ovs_vport_free(vport); } -/** - * vport_get_ifindex - retrieve device system interface index - * - * @vport: vport from which to retrieve index - * - * Retrieves the system interface index of the given device. Not all devices - * will have system indexes, in which case the index of the datapath local - * port is returned. Returns a negative index on error. Either RTNL lock or - * rcu_read_lock must be held. - */ -int vport_get_ifindex(const struct vport *vport) +void ovs_vport_deferred_free(struct vport *vport) { - const struct dp_port *dp_port; - - if (vport->ops->get_ifindex) - return vport->ops->get_ifindex(vport); - - /* If we don't actually have an ifindex, use the local port's. - * Userspace doesn't check it anyways. */ - dp_port = vport_get_dp_port(vport); - if (!dp_port) - return -EAGAIN; + if (!vport) + return; - return vport_get_ifindex(dp_port->dp->ports[ODPP_LOCAL]->vport); + call_rcu(&vport->rcu, free_vport_rcu); } +EXPORT_SYMBOL_GPL(ovs_vport_deferred_free); -/** - * vport_get_iflink - retrieve device system link index - * - * @vport: vport from which to retrieve index - * - * Retrieves the system link index of the given device. The link is the index - * of the interface on which the packet will actually be sent. In most cases - * this is the same as the ifindex but may be different for tunnel devices. - * Returns a negative index on error. Either RTNL lock or rcu_read_lock must - * be held. - */ -int vport_get_iflink(const struct vport *vport) +int ovs_tunnel_get_egress_info(struct dp_upcall_info *upcall, + struct net *net, + struct sk_buff *skb, + u8 ipproto, + __be16 tp_src, + __be16 tp_dst) { - if (vport->ops->get_iflink) - return vport->ops->get_iflink(vport); + struct ip_tunnel_info *egress_tun_info = upcall->egress_tun_info; + struct ip_tunnel_info *tun_info = skb_tunnel_info(skb); + const struct ip_tunnel_key *tun_key; + u32 skb_mark = skb->mark; + struct rtable *rt; + struct flowi4 fl; - /* If we don't have an iflink, use the ifindex. In most cases they - * are the same. */ - return vport_get_ifindex(vport); -} + if (unlikely(!tun_info)) + return -EINVAL; + if (ip_tunnel_info_af(tun_info) != AF_INET) + return -EINVAL; -/** - * vport_get_mtu - retrieve device MTU (for kernel callers) - * - * @vport: vport from which to retrieve MTU - * - * Retrieves the MTU of the given device. Either RTNL lock or rcu_read_lock - * must be held. - */ -int vport_get_mtu(const struct vport *vport) -{ - return vport->ops->get_mtu(vport); + tun_key = &tun_info->key; + + /* Route lookup to get srouce IP address. + * The process may need to be changed if the corresponding process + * in vports ops changed. + */ + rt = ovs_tunnel_route_lookup(net, tun_key, skb_mark, &fl, ipproto); + if (IS_ERR(rt)) + return PTR_ERR(rt); + + ip_rt_put(rt); + + /* Generate egress_tun_info based on tun_info, + * saddr, tp_src and tp_dst + */ + ip_tunnel_key_init(&egress_tun_info->key, + fl.saddr, tun_key->u.ipv4.dst, + tun_key->tos, + tun_key->ttl, + tp_src, tp_dst, + tun_key->tun_id, + tun_key->tun_flags); + egress_tun_info->options_len = tun_info->options_len; + egress_tun_info->mode = tun_info->mode; + upcall->egress_tun_opts = ip_tunnel_info_opts(tun_info); + return 0; } +EXPORT_SYMBOL_GPL(ovs_tunnel_get_egress_info); -/** - * vport_receive - pass up received packet to the datapath for processing - * - * @vport: vport that received the packet - * @skb: skb that was received - * - * Must be called with rcu_read_lock. The packet cannot be shared and - * skb->data should point to the Ethernet header. The caller must have already - * called compute_ip_summed() to initialize the checksumming fields. - */ -void vport_receive(struct vport *vport, struct sk_buff *skb) +int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, + struct dp_upcall_info *upcall) { - struct dp_port *dp_port = vport_get_dp_port(vport); - - if (!dp_port) { - vport_record_error(vport, VPORT_E_RX_DROPPED); - kfree_skb(skb); - - return; - } - - if (vport->ops->flags & VPORT_F_GEN_STATS) { - struct vport_percpu_stats *stats; - - local_bh_disable(); - stats = per_cpu_ptr(vport->percpu_stats, smp_processor_id()); - - write_seqcount_begin(&stats->seqlock); - stats->rx_packets++; - stats->rx_bytes += skb->len; - write_seqcount_end(&stats->seqlock); - - local_bh_enable(); - } - - if (!(vport->ops->flags & VPORT_F_FLOW)) - OVS_CB(skb)->flow = NULL; - - if (!(vport->ops->flags & VPORT_F_TUN_ID)) - OVS_CB(skb)->tun_id = 0; + /* get_egress_tun_info() is only implemented on tunnel ports. */ + if (unlikely(!vport->ops->get_egress_tun_info)) + return -EINVAL; - dp_process_received_packet(dp_port, skb); + return vport->ops->get_egress_tun_info(vport, skb, upcall); } -static inline unsigned packet_length(const struct sk_buff *skb) +static unsigned int packet_length(const struct sk_buff *skb) { - unsigned length = skb->len - ETH_HLEN; + unsigned int length = skb->len - ETH_HLEN; if (skb->protocol == htons(ETH_P_8021Q)) length -= VLAN_HLEN; @@ -1092,86 +600,22 @@ static inline unsigned packet_length(const struct sk_buff *skb) return length; } -/** - * vport_send - send a packet on a device - * - * @vport: vport on which to send the packet - * @skb: skb to send - * - * Sends the given packet and returns the length of data sent. Either RTNL - * lock or rcu_read_lock must be held. - */ -int vport_send(struct vport *vport, struct sk_buff *skb) +void ovs_vport_send(struct vport *vport, struct sk_buff *skb) { - int mtu; - int sent; + int mtu = vport->dev->mtu; - mtu = vport_get_mtu(vport); if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) { - if (net_ratelimit()) - pr_warn("%s: dropped over-mtu packet: %d > %d\n", - dp_name(vport_get_dp_port(vport)->dp), - packet_length(skb), mtu); - goto error; + net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n", + vport->dev->name, + packet_length(skb), mtu); + vport->dev->stats.tx_errors++; + goto drop; } - sent = vport->ops->send(vport, skb); - - if (vport->ops->flags & VPORT_F_GEN_STATS && sent > 0) { - struct vport_percpu_stats *stats; - - local_bh_disable(); - stats = per_cpu_ptr(vport->percpu_stats, smp_processor_id()); - - write_seqcount_begin(&stats->seqlock); - stats->tx_packets++; - stats->tx_bytes += sent; - write_seqcount_end(&stats->seqlock); - - local_bh_enable(); - } + skb->dev = vport->dev; + vport->ops->send(skb); + return; - return sent; - -error: +drop: kfree_skb(skb); - vport_record_error(vport, VPORT_E_TX_DROPPED); - return 0; -} - -/** - * vport_record_error - indicate device error to generic stats layer - * - * @vport: vport that encountered the error - * @err_type: one of enum vport_err_type types to indicate the error type - * - * If using the vport generic stats layer indicate that an error of the given - * type has occured. - */ -void vport_record_error(struct vport *vport, enum vport_err_type err_type) -{ - if (vport->ops->flags & VPORT_F_GEN_STATS) { - - spin_lock_bh(&vport->stats_lock); - - switch (err_type) { - case VPORT_E_RX_DROPPED: - vport->err_stats.rx_dropped++; - break; - - case VPORT_E_RX_ERROR: - vport->err_stats.rx_errors++; - break; - - case VPORT_E_TX_DROPPED: - vport->err_stats.tx_dropped++; - break; - - case VPORT_E_TX_ERROR: - vport->err_stats.tx_errors++; - break; - }; - - spin_unlock_bh(&vport->stats_lock); - } }