X-Git-Url: http://git.cascardo.eti.br/?a=blobdiff_plain;f=datapath%2Fvport.c;h=7fd98583c78ddcdbebfd940a29d68b8c0b2f4747;hb=f76def2592cc5cb449a3360430ee9cc0f208765d;hp=5a7067b2fb0b38d3900925efb1064d20110b67fd;hpb=efd8a18e8d57a6129bb40fcd25b19cb18c4a3447;p=cascardo%2Fovs.git diff --git a/datapath/vport.c b/datapath/vport.c index 5a7067b2f..7fd98583c 100644 --- a/datapath/vport.c +++ b/datapath/vport.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2012 Nicira, Inc. + * Copyright (c) 2007-2015 Nicira, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -28,31 +27,21 @@ #include #include #include -#include +#include +#include #include +#include +#include +#include +#include +#include #include "datapath.h" #include "gso.h" #include "vport.h" #include "vport-internal_dev.h" -static void ovs_vport_record_error(struct vport *, - enum vport_err_type err_type); - -/* List of statically compiled vport implementations. Don't forget to also - * add yours to the list at the bottom of vport.h. - */ -static const struct vport_ops *vport_ops_list[] = { - &ovs_netdev_vport_ops, - &ovs_internal_vport_ops, - &ovs_geneve_vport_ops, -#if IS_ENABLED(CONFIG_NET_IPGRE_DEMUX) - &ovs_gre_vport_ops, - &ovs_gre64_vport_ops, -#endif - &ovs_vxlan_vport_ops, - &ovs_lisp_vport_ops, -}; +static LIST_HEAD(vport_ops_list); /* Protected by RCU read lock for reading, ovs_mutex for writing. */ static struct hlist_head *dev_table; @@ -65,12 +54,42 @@ static struct hlist_head *dev_table; */ int ovs_vport_init(void) { + int err; + dev_table = kzalloc(VPORT_HASH_BUCKETS * sizeof(struct hlist_head), GFP_KERNEL); if (!dev_table) return -ENOMEM; + err = lisp_init_module(); + if (err) + goto err_lisp; + err = ipgre_init(); + if (err) + goto err_gre; + err = geneve_init_module(); + if (err) + goto err_geneve; + + err = vxlan_init_module(); + if (err) + goto err_vxlan; + err = ovs_stt_init_module(); + if (err) + goto err_stt; return 0; + +err_stt: + vxlan_cleanup_module(); +err_vxlan: + geneve_cleanup_module(); +err_geneve: + ipgre_fini(); +err_gre: + lisp_cleanup_module(); +err_lisp: + kfree(dev_table); + return err; } /** @@ -80,6 +99,11 @@ int ovs_vport_init(void) */ void ovs_vport_exit(void) { + ovs_stt_cleanup_module(); + vxlan_cleanup_module(); + geneve_cleanup_module(); + ipgre_fini(); + lisp_cleanup_module(); kfree(dev_table); } @@ -89,6 +113,32 @@ static struct hlist_head *hash_bucket(const struct net *net, const char *name) return &dev_table[hash & (VPORT_HASH_BUCKETS - 1)]; } +int __ovs_vport_ops_register(struct vport_ops *ops) +{ + int err = -EEXIST; + struct vport_ops *o; + + ovs_lock(); + list_for_each_entry(o, &vport_ops_list, list) + if (ops->type == o->type) + goto errout; + + list_add_tail(&ops->list, &vport_ops_list); + err = 0; +errout: + ovs_unlock(); + return err; +} +EXPORT_SYMBOL_GPL(__ovs_vport_ops_register); + +void ovs_vport_ops_unregister(struct vport_ops *ops) +{ + ovs_lock(); + list_del(&ops->list); + ovs_unlock(); +} +EXPORT_SYMBOL_GPL(ovs_vport_ops_unregister); + /** * ovs_vport_locate - find a port that has already been created * @@ -102,7 +152,7 @@ struct vport *ovs_vport_locate(const struct net *net, const char *name) struct vport *vport; hlist_for_each_entry_rcu(vport, bucket, hash_node) - if (!strcmp(name, vport->ops->get_name(vport)) && + if (!strcmp(name, ovs_vport_name(vport)) && net_eq(ovs_dp_get_net(vport->dp), net)) return vport; @@ -118,10 +168,10 @@ struct vport *ovs_vport_locate(const struct net *net, const char *name) * Allocate and initialize a new vport defined by @ops. The vport will contain * a private data area of size @priv_size that can be accessed using * vport_priv(). vports that are no longer needed should be released with - * ovs_vport_free(). + * vport_free(). */ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops, - const struct vport_parms *parms) + const struct vport_parms *parms) { struct vport *vport; size_t alloc_size; @@ -146,31 +196,40 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops, return ERR_PTR(-EINVAL); } - vport->percpu_stats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); - if (!vport->percpu_stats) { - kfree(vport); - return ERR_PTR(-ENOMEM); - } - return vport; } +EXPORT_SYMBOL_GPL(ovs_vport_alloc); /** * ovs_vport_free - uninitialize and free vport * * @vport: vport to free * - * Frees a vport allocated with ovs_vport_alloc() when it is no longer needed. + * Frees a vport allocated with vport_alloc() when it is no longer needed. * * The caller must ensure that an RCU grace period has passed since the last * time @vport was in a datapath. */ void ovs_vport_free(struct vport *vport) { + /* vport is freed from RCU callback or error path, Therefore + * it is safe to use raw dereference. + */ kfree(rcu_dereference_raw(vport->upcall_portids)); - free_percpu(vport->percpu_stats); kfree(vport); } +EXPORT_SYMBOL_GPL(ovs_vport_free); + +static struct vport_ops *ovs_vport_lookup(const struct vport_parms *parms) +{ + struct vport_ops *ops; + + list_for_each_entry(ops, &vport_ops_list, list) + if (ops->type == parms->type) + return ops; + + return NULL; +} /** * ovs_vport_add - add vport device (for kernel callers) @@ -182,31 +241,40 @@ void ovs_vport_free(struct vport *vport) */ struct vport *ovs_vport_add(const struct vport_parms *parms) { + struct vport_ops *ops; struct vport *vport; - int err = 0; - int i; - - for (i = 0; i < ARRAY_SIZE(vport_ops_list); i++) { - if (vport_ops_list[i]->type == parms->type) { - struct hlist_head *bucket; - - vport = vport_ops_list[i]->create(parms); - if (IS_ERR(vport)) { - err = PTR_ERR(vport); - goto out; - } - - bucket = hash_bucket(ovs_dp_get_net(vport->dp), - vport->ops->get_name(vport)); - hlist_add_head_rcu(&vport->hash_node, bucket); + + ops = ovs_vport_lookup(parms); + if (ops) { + struct hlist_head *bucket; + + if (!try_module_get(ops->owner)) + return ERR_PTR(-EAFNOSUPPORT); + + vport = ops->create(parms); + if (IS_ERR(vport)) { + module_put(ops->owner); return vport; } + + bucket = hash_bucket(ovs_dp_get_net(vport->dp), + ovs_vport_name(vport)); + hlist_add_head_rcu(&vport->hash_node, bucket); + return vport; } - err = -EAFNOSUPPORT; + /* Unlock to attempt module load and return -EAGAIN if load + * was successful as we need to restart the port addition + * workflow. + */ + ovs_unlock(); + request_module("vport-type-%d", parms->type); + ovs_lock(); -out: - return ERR_PTR(err); + if (!ovs_vport_lookup(parms)) + return ERR_PTR(-EAFNOSUPPORT); + else + return ERR_PTR(-EAGAIN); } /** @@ -230,14 +298,15 @@ int ovs_vport_set_options(struct vport *vport, struct nlattr *options) * * @vport: vport to delete. * - * Detaches @vport from its datapath and destroys it. It is possible to fail - * for reasons such as lack of memory. ovs_mutex must be held. + * Detaches @vport from its datapath and destroys it. ovs_mutex must be + * held. */ void ovs_vport_del(struct vport *vport) { ASSERT_OVSL(); hlist_del_rcu(&vport->hash_node); + module_put(vport->ops->owner); vport->ops->destroy(vport); } @@ -253,45 +322,19 @@ void ovs_vport_del(struct vport *vport) */ void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats) { - int i; - - /* We potentially have two surces of stats that need to be - * combined: those we have collected (split into err_stats and - * percpu_stats), and device error stats from netdev->get_stats() - * (for errors that happen downstream and therefore aren't - * reported through our vport_record_error() function). - * Stats from first source are reported by ovs over - * OVS_VPORT_ATTR_STATS. - * netdev-stats can be directly read over netlink-ioctl. - */ - - stats->rx_errors = atomic_long_read(&vport->err_stats.rx_errors); - stats->tx_errors = atomic_long_read(&vport->err_stats.tx_errors); - stats->tx_dropped = atomic_long_read(&vport->err_stats.tx_dropped); - stats->rx_dropped = atomic_long_read(&vport->err_stats.rx_dropped); - - stats->rx_bytes = 0; - stats->rx_packets = 0; - stats->tx_bytes = 0; - stats->tx_packets = 0; - - for_each_possible_cpu(i) { - const struct pcpu_sw_netstats *percpu_stats; - struct pcpu_sw_netstats local_stats; - unsigned int start; - - percpu_stats = per_cpu_ptr(vport->percpu_stats, i); - - do { - start = u64_stats_fetch_begin_irq(&percpu_stats->syncp); - local_stats = *percpu_stats; - } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start)); - - stats->rx_bytes += local_stats.rx_bytes; - stats->rx_packets += local_stats.rx_packets; - stats->tx_bytes += local_stats.tx_bytes; - stats->tx_packets += local_stats.tx_packets; - } + const struct rtnl_link_stats64 *dev_stats; + struct rtnl_link_stats64 temp; + + dev_stats = dev_get_stats(vport->dev, &temp); + stats->rx_errors = dev_stats->rx_errors; + stats->tx_errors = dev_stats->tx_errors; + stats->tx_dropped = dev_stats->tx_dropped; + stats->rx_dropped = dev_stats->rx_dropped; + + stats->rx_bytes = dev_stats->rx_bytes; + stats->rx_packets = dev_stats->rx_packets; + stats->tx_bytes = dev_stats->tx_bytes; + stats->tx_packets = dev_stats->tx_packets; } /** @@ -362,7 +405,7 @@ int ovs_vport_set_upcall_portids(struct vport *vport, const struct nlattr *ids) old = ovsl_dereference(vport->upcall_portids); - vport_portids = kmalloc(sizeof *vport_portids + nla_len(ids), + vport_portids = kmalloc(sizeof(*vport_portids) + nla_len(ids), GFP_KERNEL); if (!vport_portids) return -ENOMEM; @@ -375,7 +418,6 @@ int ovs_vport_set_upcall_portids(struct vport *vport, const struct nlattr *ids) if (old) call_rcu(&old->rcu, vport_portids_destroy_rcu_cb); - return 0; } @@ -402,7 +444,7 @@ int ovs_vport_get_upcall_portids(const struct vport *vport, if (vport->dp->user_features & OVS_DP_F_VPORT_PIDS) return nla_put(skb, OVS_VPORT_ATTR_UPCALL_PID, - ids->n_ids * sizeof(u32), (void *) ids->ids); + ids->n_ids * sizeof(u32), (void *)ids->ids); else return nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, ids->ids[0]); } @@ -421,6 +463,7 @@ int ovs_vport_get_upcall_portids(const struct vport *vport, u32 ovs_vport_find_upcall_portid(const struct vport *vport, struct sk_buff *skb) { struct vport_portids *ids; + u32 ids_index; u32 hash; ids = rcu_dereference(vport->upcall_portids); @@ -429,7 +472,8 @@ u32 ovs_vport_find_upcall_portid(const struct vport *vport, struct sk_buff *skb) return 0; hash = skb_get_hash(skb); - return ids->ids[hash - ids->n_ids * reciprocal_divide(hash, ids->rn_ids)]; + ids_index = hash - ids->n_ids * reciprocal_divide(hash, ids->rn_ids); + return ids->ids[ids_index]; } /** @@ -437,98 +481,40 @@ u32 ovs_vport_find_upcall_portid(const struct vport *vport, struct sk_buff *skb) * * @vport: vport that received the packet * @skb: skb that was received - * @tun_info: tunnel (if any) that carried packet + * @tun_key: tunnel (if any) that carried packet * * Must be called with rcu_read_lock. The packet cannot be shared and - * skb->data should point to the Ethernet header. The caller must have already - * called compute_ip_summed() to initialize the checksumming fields. + * skb->data should point to the Ethernet header. */ -void ovs_vport_receive(struct vport *vport, struct sk_buff *skb, - const struct ovs_tunnel_info *tun_info) +int ovs_vport_receive(struct vport *vport, struct sk_buff *skb, + const struct ip_tunnel_info *tun_info) { - struct pcpu_sw_netstats *stats; struct sw_flow_key key; int error; - stats = this_cpu_ptr(vport->percpu_stats); - u64_stats_update_begin(&stats->syncp); - stats->rx_packets++; - stats->rx_bytes += skb->len + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); - u64_stats_update_end(&stats->syncp); + OVS_CB(skb)->input_vport = vport; + OVS_CB(skb)->mru = 0; + if (unlikely(dev_net(skb->dev) != ovs_dp_get_net(vport->dp))) { + u32 mark; + + mark = skb->mark; + skb_scrub_packet(skb, true); + skb->mark = mark; + tun_info = NULL; + } ovs_skb_init_inner_protocol(skb); - OVS_CB(skb)->input_vport = vport; - OVS_CB(skb)->egress_tun_info = NULL; + skb_clear_ovs_gso_cb(skb); + /* Extract flow from 'skb' into 'key'. */ error = ovs_flow_key_extract(tun_info, skb, &key); if (unlikely(error)) { kfree_skb(skb); - return; + return error; } - ovs_dp_process_packet(skb, &key); + return 0; } - -/** - * ovs_vport_send - send a packet on a device - * - * @vport: vport on which to send the packet - * @skb: skb to send - * - * Sends the given packet and returns the length of data sent. Either ovs - * lock or rcu_read_lock must be held. - */ -int ovs_vport_send(struct vport *vport, struct sk_buff *skb) -{ - int sent = vport->ops->send(vport, skb); - - if (likely(sent > 0)) { - struct pcpu_sw_netstats *stats; - - stats = this_cpu_ptr(vport->percpu_stats); - - u64_stats_update_begin(&stats->syncp); - stats->tx_packets++; - stats->tx_bytes += sent; - u64_stats_update_end(&stats->syncp); - } else if (sent < 0) { - ovs_vport_record_error(vport, VPORT_E_TX_ERROR); - } else { - ovs_vport_record_error(vport, VPORT_E_TX_DROPPED); - } - - return sent; -} - -/** - * ovs_vport_record_error - indicate device error to generic stats layer - * - * @vport: vport that encountered the error - * @err_type: one of enum vport_err_type types to indicate the error type - * - * If using the vport generic stats layer indicate that an error of the given - * type has occurred. - */ -static void ovs_vport_record_error(struct vport *vport, - enum vport_err_type err_type) -{ - switch (err_type) { - case VPORT_E_RX_DROPPED: - atomic_long_inc(&vport->err_stats.rx_dropped); - break; - - case VPORT_E_RX_ERROR: - atomic_long_inc(&vport->err_stats.rx_errors); - break; - - case VPORT_E_TX_DROPPED: - atomic_long_inc(&vport->err_stats.tx_dropped); - break; - - case VPORT_E_TX_ERROR: - atomic_long_inc(&vport->err_stats.tx_errors); - break; - } -} +EXPORT_SYMBOL_GPL(ovs_vport_receive); static void free_vport_rcu(struct rcu_head *rcu) { @@ -544,34 +530,34 @@ void ovs_vport_deferred_free(struct vport *vport) call_rcu(&vport->rcu, free_vport_rcu); } +EXPORT_SYMBOL_GPL(ovs_vport_deferred_free); -int ovs_tunnel_get_egress_info(struct ovs_tunnel_info *egress_tun_info, +int ovs_tunnel_get_egress_info(struct dp_upcall_info *upcall, struct net *net, - const struct ovs_tunnel_info *tun_info, + struct sk_buff *skb, u8 ipproto, - u32 skb_mark, __be16 tp_src, __be16 tp_dst) { - const struct ovs_key_ipv4_tunnel *tun_key; + struct ip_tunnel_info *egress_tun_info = upcall->egress_tun_info; + struct ip_tunnel_info *tun_info = skb_tunnel_info(skb); + const struct ip_tunnel_key *tun_key; + u32 skb_mark = skb->mark; struct rtable *rt; - __be32 saddr; + struct flowi4 fl; if (unlikely(!tun_info)) return -EINVAL; + if (ip_tunnel_info_af(tun_info) != AF_INET) + return -EINVAL; + + tun_key = &tun_info->key; - tun_key = &tun_info->tunnel; - saddr = tun_key->ipv4_src; - /* Route lookup to get srouce IP address: saddr. + /* Route lookup to get srouce IP address. * The process may need to be changed if the corresponding process * in vports ops changed. */ - rt = find_route(net, - &saddr, - tun_key->ipv4_dst, - ipproto, - tun_key->ipv4_tos, - skb_mark); + rt = ovs_tunnel_route_lookup(net, tun_key, skb_mark, &fl, ipproto); if (IS_ERR(rt)) return PTR_ERR(rt); @@ -580,25 +566,56 @@ int ovs_tunnel_get_egress_info(struct ovs_tunnel_info *egress_tun_info, /* Generate egress_tun_info based on tun_info, * saddr, tp_src and tp_dst */ - __ovs_flow_tun_info_init(egress_tun_info, - saddr, tun_key->ipv4_dst, - tun_key->ipv4_tos, - tun_key->ipv4_ttl, - tp_src, tp_dst, - tun_key->tun_id, - tun_key->tun_flags, - tun_info->options, - tun_info->options_len); - + ip_tunnel_key_init(&egress_tun_info->key, + fl.saddr, tun_key->u.ipv4.dst, + tun_key->tos, + tun_key->ttl, + tp_src, tp_dst, + tun_key->tun_id, + tun_key->tun_flags); + egress_tun_info->options_len = tun_info->options_len; + egress_tun_info->mode = tun_info->mode; + upcall->egress_tun_opts = ip_tunnel_info_opts(tun_info); return 0; } +EXPORT_SYMBOL_GPL(ovs_tunnel_get_egress_info); int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, - struct ovs_tunnel_info *info) + struct dp_upcall_info *upcall) { /* get_egress_tun_info() is only implemented on tunnel ports. */ if (unlikely(!vport->ops->get_egress_tun_info)) return -EINVAL; - return vport->ops->get_egress_tun_info(vport, skb, info); + return vport->ops->get_egress_tun_info(vport, skb, upcall); +} + +static unsigned int packet_length(const struct sk_buff *skb) +{ + unsigned int length = skb->len - ETH_HLEN; + + if (skb->protocol == htons(ETH_P_8021Q)) + length -= VLAN_HLEN; + + return length; +} + +void ovs_vport_send(struct vport *vport, struct sk_buff *skb) +{ + int mtu = vport->dev->mtu; + + if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) { + net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n", + vport->dev->name, + packet_length(skb), mtu); + vport->dev->stats.tx_errors++; + goto drop; + } + + skb->dev = vport->dev; + vport->ops->send(skb); + return; + +drop: + kfree_skb(skb); }