X-Git-Url: http://git.cascardo.eti.br/?a=blobdiff_plain;f=datapath%2Fvport.c;h=7fd98583c78ddcdbebfd940a29d68b8c0b2f4747;hb=f76def2592cc5cb449a3360430ee9cc0f208765d;hp=5a7067b2fb0b38d3900925efb1064d20110b67fd;hpb=efd8a18e8d57a6129bb40fcd25b19cb18c4a3447;p=cascardo%2Fovs.git

diff --git a/datapath/vport.c b/datapath/vport.c
index 5a7067b2f..7fd98583c 100644
--- a/datapath/vport.c
+++ b/datapath/vport.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007-2012 Nicira, Inc.
+ * Copyright (c) 2007-2015 Nicira, Inc.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of version 2 of the GNU General Public
@@ -20,7 +20,6 @@
 #include <linux/if.h>
 #include <linux/if_vlan.h>
 #include <linux/jhash.h>
-#include <linux/kconfig.h>
 #include <linux/kernel.h>
 #include <linux/list.h>
 #include <linux/mutex.h>
@@ -28,31 +27,21 @@
 #include <linux/rcupdate.h>
 #include <linux/rtnetlink.h>
 #include <linux/compat.h>
-#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/if_link.h>
 #include <net/net_namespace.h>
+#include <net/lisp.h>
+#include <net/gre.h>
+#include <net/geneve.h>
+#include <net/vxlan.h>
+#include <net/stt.h>
 
 #include "datapath.h"
 #include "gso.h"
 #include "vport.h"
 #include "vport-internal_dev.h"
 
-static void ovs_vport_record_error(struct vport *,
-				   enum vport_err_type err_type);
-
-/* List of statically compiled vport implementations.  Don't forget to also
- * add yours to the list at the bottom of vport.h.
- */
-static const struct vport_ops *vport_ops_list[] = {
-	&ovs_netdev_vport_ops,
-	&ovs_internal_vport_ops,
-	&ovs_geneve_vport_ops,
-#if IS_ENABLED(CONFIG_NET_IPGRE_DEMUX)
-	&ovs_gre_vport_ops,
-	&ovs_gre64_vport_ops,
-#endif
-	&ovs_vxlan_vport_ops,
-	&ovs_lisp_vport_ops,
-};
+static LIST_HEAD(vport_ops_list);
 
 /* Protected by RCU read lock for reading, ovs_mutex for writing. */
 static struct hlist_head *dev_table;
@@ -65,12 +54,42 @@ static struct hlist_head *dev_table;
  */
 int ovs_vport_init(void)
 {
+	int err;
+
 	dev_table = kzalloc(VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
 			    GFP_KERNEL);
 	if (!dev_table)
 		return -ENOMEM;
 
+	err = lisp_init_module();
+	if (err)
+		goto err_lisp;
+	err = ipgre_init();
+	if (err)
+		goto err_gre;
+	err = geneve_init_module();
+	if (err)
+		goto err_geneve;
+
+	err = vxlan_init_module();
+	if (err)
+		goto err_vxlan;
+	err = ovs_stt_init_module();
+	if (err)
+		goto err_stt;
 	return 0;
+
+err_stt:
+	vxlan_cleanup_module();
+err_vxlan:
+	geneve_cleanup_module();
+err_geneve:
+	ipgre_fini();
+err_gre:
+	lisp_cleanup_module();
+err_lisp:
+	kfree(dev_table);
+	return err;
 }
 
 /**
@@ -80,6 +99,11 @@ int ovs_vport_init(void)
  */
 void ovs_vport_exit(void)
 {
+	ovs_stt_cleanup_module();
+	vxlan_cleanup_module();
+	geneve_cleanup_module();
+	ipgre_fini();
+	lisp_cleanup_module();
 	kfree(dev_table);
 }
 
@@ -89,6 +113,32 @@ static struct hlist_head *hash_bucket(const struct net *net, const char *name)
 	return &dev_table[hash & (VPORT_HASH_BUCKETS - 1)];
 }
 
+int __ovs_vport_ops_register(struct vport_ops *ops)
+{
+	int err = -EEXIST;
+	struct vport_ops *o;
+
+	ovs_lock();
+	list_for_each_entry(o, &vport_ops_list, list)
+		if (ops->type == o->type)
+			goto errout;
+
+	list_add_tail(&ops->list, &vport_ops_list);
+	err = 0;
+errout:
+	ovs_unlock();
+	return err;
+}
+EXPORT_SYMBOL_GPL(__ovs_vport_ops_register);
+
+void ovs_vport_ops_unregister(struct vport_ops *ops)
+{
+	ovs_lock();
+	list_del(&ops->list);
+	ovs_unlock();
+}
+EXPORT_SYMBOL_GPL(ovs_vport_ops_unregister);
+
 /**
  *	ovs_vport_locate - find a port that has already been created
  *
@@ -102,7 +152,7 @@ struct vport *ovs_vport_locate(const struct net *net, const char *name)
 	struct vport *vport;
 
 	hlist_for_each_entry_rcu(vport, bucket, hash_node)
-		if (!strcmp(name, vport->ops->get_name(vport)) &&
+		if (!strcmp(name, ovs_vport_name(vport)) &&
 		    net_eq(ovs_dp_get_net(vport->dp), net))
 			return vport;
 
@@ -118,10 +168,10 @@ struct vport *ovs_vport_locate(const struct net *net, const char *name)
  * Allocate and initialize a new vport defined by @ops.  The vport will contain
  * a private data area of size @priv_size that can be accessed using
  * vport_priv().  vports that are no longer needed should be released with
- * ovs_vport_free().
+ * vport_free().
  */
 struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
-			      const struct vport_parms *parms)
+			  const struct vport_parms *parms)
 {
 	struct vport *vport;
 	size_t alloc_size;
@@ -146,31 +196,40 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
 		return ERR_PTR(-EINVAL);
 	}
 
-	vport->percpu_stats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
-	if (!vport->percpu_stats) {
-		kfree(vport);
-		return ERR_PTR(-ENOMEM);
-	}
-
 	return vport;
 }
+EXPORT_SYMBOL_GPL(ovs_vport_alloc);
 
 /**
  *	ovs_vport_free - uninitialize and free vport
  *
  * @vport: vport to free
  *
- * Frees a vport allocated with ovs_vport_alloc() when it is no longer needed.
+ * Frees a vport allocated with vport_alloc() when it is no longer needed.
  *
  * The caller must ensure that an RCU grace period has passed since the last
  * time @vport was in a datapath.
  */
 void ovs_vport_free(struct vport *vport)
 {
+	/* vport is freed from RCU callback or error path, Therefore
+	 * it is safe to use raw dereference.
+	 */
 	kfree(rcu_dereference_raw(vport->upcall_portids));
-	free_percpu(vport->percpu_stats);
 	kfree(vport);
 }
+EXPORT_SYMBOL_GPL(ovs_vport_free);
+
+static struct vport_ops *ovs_vport_lookup(const struct vport_parms *parms)
+{
+	struct vport_ops *ops;
+
+	list_for_each_entry(ops, &vport_ops_list, list)
+		if (ops->type == parms->type)
+			return ops;
+
+	return NULL;
+}
 
 /**
  *	ovs_vport_add - add vport device (for kernel callers)
@@ -182,31 +241,40 @@ void ovs_vport_free(struct vport *vport)
  */
 struct vport *ovs_vport_add(const struct vport_parms *parms)
 {
+	struct vport_ops *ops;
 	struct vport *vport;
-	int err = 0;
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(vport_ops_list); i++) {
-		if (vport_ops_list[i]->type == parms->type) {
-			struct hlist_head *bucket;
-
-			vport = vport_ops_list[i]->create(parms);
-			if (IS_ERR(vport)) {
-				err = PTR_ERR(vport);
-				goto out;
-			}
-
-			bucket = hash_bucket(ovs_dp_get_net(vport->dp),
-					     vport->ops->get_name(vport));
-			hlist_add_head_rcu(&vport->hash_node, bucket);
+
+	ops = ovs_vport_lookup(parms);
+	if (ops) {
+		struct hlist_head *bucket;
+
+		if (!try_module_get(ops->owner))
+			return ERR_PTR(-EAFNOSUPPORT);
+
+		vport = ops->create(parms);
+		if (IS_ERR(vport)) {
+			module_put(ops->owner);
 			return vport;
 		}
+
+		bucket = hash_bucket(ovs_dp_get_net(vport->dp),
+				     ovs_vport_name(vport));
+		hlist_add_head_rcu(&vport->hash_node, bucket);
+		return vport;
 	}
 
-	err = -EAFNOSUPPORT;
+	/* Unlock to attempt module load and return -EAGAIN if load
+	 * was successful as we need to restart the port addition
+	 * workflow.
+	 */
+	ovs_unlock();
+	request_module("vport-type-%d", parms->type);
+	ovs_lock();
 
-out:
-	return ERR_PTR(err);
+	if (!ovs_vport_lookup(parms))
+		return ERR_PTR(-EAFNOSUPPORT);
+	else
+		return ERR_PTR(-EAGAIN);
 }
 
 /**
@@ -230,14 +298,15 @@ int ovs_vport_set_options(struct vport *vport, struct nlattr *options)
  *
  * @vport: vport to delete.
  *
- * Detaches @vport from its datapath and destroys it.  It is possible to fail
- * for reasons such as lack of memory.  ovs_mutex must be held.
+ * Detaches @vport from its datapath and destroys it.  ovs_mutex must be
+ * held.
  */
 void ovs_vport_del(struct vport *vport)
 {
 	ASSERT_OVSL();
 
 	hlist_del_rcu(&vport->hash_node);
+	module_put(vport->ops->owner);
 	vport->ops->destroy(vport);
 }
 
@@ -253,45 +322,19 @@ void ovs_vport_del(struct vport *vport)
  */
 void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)
 {
-	int i;
-
-	/* We potentially have two surces of stats that need to be
-	 * combined: those we have collected (split into err_stats and
-	 * percpu_stats), and device error stats from netdev->get_stats()
-	 * (for errors that happen downstream and therefore aren't
-	 * reported through our vport_record_error() function).
-	 * Stats from first source are reported by ovs over
-	 * OVS_VPORT_ATTR_STATS.
-	 * netdev-stats can be directly read over netlink-ioctl.
-	 */
-
-	stats->rx_errors  = atomic_long_read(&vport->err_stats.rx_errors);
-	stats->tx_errors  = atomic_long_read(&vport->err_stats.tx_errors);
-	stats->tx_dropped = atomic_long_read(&vport->err_stats.tx_dropped);
-	stats->rx_dropped = atomic_long_read(&vport->err_stats.rx_dropped);
-
-	stats->rx_bytes		= 0;
-	stats->rx_packets	= 0;
-	stats->tx_bytes		= 0;
-	stats->tx_packets	= 0;
-
-	for_each_possible_cpu(i) {
-		const struct pcpu_sw_netstats *percpu_stats;
-		struct pcpu_sw_netstats local_stats;
-		unsigned int start;
-
-		percpu_stats = per_cpu_ptr(vport->percpu_stats, i);
-
-		do {
-			start = u64_stats_fetch_begin_irq(&percpu_stats->syncp);
-			local_stats = *percpu_stats;
-		} while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start));
-
-		stats->rx_bytes		+= local_stats.rx_bytes;
-		stats->rx_packets	+= local_stats.rx_packets;
-		stats->tx_bytes		+= local_stats.tx_bytes;
-		stats->tx_packets	+= local_stats.tx_packets;
-	}
+	const struct rtnl_link_stats64 *dev_stats;
+	struct rtnl_link_stats64 temp;
+
+	dev_stats = dev_get_stats(vport->dev, &temp);
+	stats->rx_errors  = dev_stats->rx_errors;
+	stats->tx_errors  = dev_stats->tx_errors;
+	stats->tx_dropped = dev_stats->tx_dropped;
+	stats->rx_dropped = dev_stats->rx_dropped;
+
+	stats->rx_bytes	  = dev_stats->rx_bytes;
+	stats->rx_packets = dev_stats->rx_packets;
+	stats->tx_bytes	  = dev_stats->tx_bytes;
+	stats->tx_packets = dev_stats->tx_packets;
 }
 
 /**
@@ -362,7 +405,7 @@ int ovs_vport_set_upcall_portids(struct vport *vport, const struct nlattr *ids)
 
 	old = ovsl_dereference(vport->upcall_portids);
 
-	vport_portids = kmalloc(sizeof *vport_portids + nla_len(ids),
+	vport_portids = kmalloc(sizeof(*vport_portids) + nla_len(ids),
 				GFP_KERNEL);
 	if (!vport_portids)
 		return -ENOMEM;
@@ -375,7 +418,6 @@ int ovs_vport_set_upcall_portids(struct vport *vport, const struct nlattr *ids)
 
 	if (old)
 		call_rcu(&old->rcu, vport_portids_destroy_rcu_cb);
-
 	return 0;
 }
 
@@ -402,7 +444,7 @@ int ovs_vport_get_upcall_portids(const struct vport *vport,
 
 	if (vport->dp->user_features & OVS_DP_F_VPORT_PIDS)
 		return nla_put(skb, OVS_VPORT_ATTR_UPCALL_PID,
-			       ids->n_ids * sizeof(u32), (void *) ids->ids);
+			       ids->n_ids * sizeof(u32), (void *)ids->ids);
 	else
 		return nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, ids->ids[0]);
 }
@@ -421,6 +463,7 @@ int ovs_vport_get_upcall_portids(const struct vport *vport,
 u32 ovs_vport_find_upcall_portid(const struct vport *vport, struct sk_buff *skb)
 {
 	struct vport_portids *ids;
+	u32 ids_index;
 	u32 hash;
 
 	ids = rcu_dereference(vport->upcall_portids);
@@ -429,7 +472,8 @@ u32 ovs_vport_find_upcall_portid(const struct vport *vport, struct sk_buff *skb)
 		return 0;
 
 	hash = skb_get_hash(skb);
-	return ids->ids[hash - ids->n_ids * reciprocal_divide(hash, ids->rn_ids)];
+	ids_index = hash - ids->n_ids * reciprocal_divide(hash, ids->rn_ids);
+	return ids->ids[ids_index];
 }
 
 /**
@@ -437,98 +481,40 @@ u32 ovs_vport_find_upcall_portid(const struct vport *vport, struct sk_buff *skb)
  *
  * @vport: vport that received the packet
  * @skb: skb that was received
- * @tun_info: tunnel (if any) that carried packet
+ * @tun_key: tunnel (if any) that carried packet
  *
  * Must be called with rcu_read_lock.  The packet cannot be shared and
- * skb->data should point to the Ethernet header.  The caller must have already
- * called compute_ip_summed() to initialize the checksumming fields.
+ * skb->data should point to the Ethernet header.
  */
-void ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
-		       const struct ovs_tunnel_info *tun_info)
+int ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
+		      const struct ip_tunnel_info *tun_info)
 {
-	struct pcpu_sw_netstats *stats;
 	struct sw_flow_key key;
 	int error;
 
-	stats = this_cpu_ptr(vport->percpu_stats);
-	u64_stats_update_begin(&stats->syncp);
-	stats->rx_packets++;
-	stats->rx_bytes += skb->len + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
-	u64_stats_update_end(&stats->syncp);
+	OVS_CB(skb)->input_vport = vport;
+	OVS_CB(skb)->mru = 0;
+	if (unlikely(dev_net(skb->dev) != ovs_dp_get_net(vport->dp))) {
+		u32 mark;
+
+		mark = skb->mark;
+		skb_scrub_packet(skb, true);
+		skb->mark = mark;
+		tun_info = NULL;
+	}
 
 	ovs_skb_init_inner_protocol(skb);
-	OVS_CB(skb)->input_vport = vport;
-	OVS_CB(skb)->egress_tun_info = NULL;
+	skb_clear_ovs_gso_cb(skb);
+	/* Extract flow from 'skb' into 'key'. */
 	error = ovs_flow_key_extract(tun_info, skb, &key);
 	if (unlikely(error)) {
 		kfree_skb(skb);
-		return;
+		return error;
 	}
-
 	ovs_dp_process_packet(skb, &key);
+	return 0;
 }
-
-/**
- *	ovs_vport_send - send a packet on a device
- *
- * @vport: vport on which to send the packet
- * @skb: skb to send
- *
- * Sends the given packet and returns the length of data sent.  Either ovs
- * lock or rcu_read_lock must be held.
- */
-int ovs_vport_send(struct vport *vport, struct sk_buff *skb)
-{
-	int sent = vport->ops->send(vport, skb);
-
-	if (likely(sent > 0)) {
-		struct pcpu_sw_netstats *stats;
-
-		stats = this_cpu_ptr(vport->percpu_stats);
-
-		u64_stats_update_begin(&stats->syncp);
-		stats->tx_packets++;
-		stats->tx_bytes += sent;
-		u64_stats_update_end(&stats->syncp);
-	} else if (sent < 0) {
-		ovs_vport_record_error(vport, VPORT_E_TX_ERROR);
-	} else {
-		ovs_vport_record_error(vport, VPORT_E_TX_DROPPED);
-	}
-
-	return sent;
-}
-
-/**
- *	ovs_vport_record_error - indicate device error to generic stats layer
- *
- * @vport: vport that encountered the error
- * @err_type: one of enum vport_err_type types to indicate the error type
- *
- * If using the vport generic stats layer indicate that an error of the given
- * type has occurred.
- */
-static void ovs_vport_record_error(struct vport *vport,
-				   enum vport_err_type err_type)
-{
-	switch (err_type) {
-	case VPORT_E_RX_DROPPED:
-		atomic_long_inc(&vport->err_stats.rx_dropped);
-		break;
-
-	case VPORT_E_RX_ERROR:
-		atomic_long_inc(&vport->err_stats.rx_errors);
-		break;
-
-	case VPORT_E_TX_DROPPED:
-		atomic_long_inc(&vport->err_stats.tx_dropped);
-		break;
-
-	case VPORT_E_TX_ERROR:
-		atomic_long_inc(&vport->err_stats.tx_errors);
-		break;
-	}
-}
+EXPORT_SYMBOL_GPL(ovs_vport_receive);
 
 static void free_vport_rcu(struct rcu_head *rcu)
 {
@@ -544,34 +530,34 @@ void ovs_vport_deferred_free(struct vport *vport)
 
 	call_rcu(&vport->rcu, free_vport_rcu);
 }
+EXPORT_SYMBOL_GPL(ovs_vport_deferred_free);
 
-int ovs_tunnel_get_egress_info(struct ovs_tunnel_info *egress_tun_info,
+int ovs_tunnel_get_egress_info(struct dp_upcall_info *upcall,
 			       struct net *net,
-			       const struct ovs_tunnel_info *tun_info,
+			       struct sk_buff *skb,
 			       u8 ipproto,
-			       u32 skb_mark,
 			       __be16 tp_src,
 			       __be16 tp_dst)
 {
-	const struct ovs_key_ipv4_tunnel *tun_key;
+	struct ip_tunnel_info *egress_tun_info = upcall->egress_tun_info;
+	struct ip_tunnel_info *tun_info = skb_tunnel_info(skb);
+	const struct ip_tunnel_key *tun_key;
+	u32 skb_mark = skb->mark;
 	struct rtable *rt;
-	__be32 saddr;
+	struct flowi4 fl;
 
 	if (unlikely(!tun_info))
 		return -EINVAL;
+	if (ip_tunnel_info_af(tun_info) != AF_INET)
+		return -EINVAL;
+
+	tun_key = &tun_info->key;
 
-	tun_key = &tun_info->tunnel;
-	saddr = tun_key->ipv4_src;
-	/* Route lookup to get srouce IP address: saddr.
+	/* Route lookup to get srouce IP address.
 	 * The process may need to be changed if the corresponding process
 	 * in vports ops changed.
 	 */
-	rt = find_route(net,
-			&saddr,
-			tun_key->ipv4_dst,
-			ipproto,
-			tun_key->ipv4_tos,
-			skb_mark);
+	rt = ovs_tunnel_route_lookup(net, tun_key, skb_mark, &fl, ipproto);
 	if (IS_ERR(rt))
 		return PTR_ERR(rt);
 
@@ -580,25 +566,56 @@ int ovs_tunnel_get_egress_info(struct ovs_tunnel_info *egress_tun_info,
 	/* Generate egress_tun_info based on tun_info,
 	 * saddr, tp_src and tp_dst
 	 */
-	__ovs_flow_tun_info_init(egress_tun_info,
-				 saddr, tun_key->ipv4_dst,
-				 tun_key->ipv4_tos,
-				 tun_key->ipv4_ttl,
-				 tp_src, tp_dst,
-				 tun_key->tun_id,
-				 tun_key->tun_flags,
-				 tun_info->options,
-				 tun_info->options_len);
-
+	ip_tunnel_key_init(&egress_tun_info->key,
+			   fl.saddr, tun_key->u.ipv4.dst,
+			   tun_key->tos,
+			   tun_key->ttl,
+			   tp_src, tp_dst,
+			   tun_key->tun_id,
+			   tun_key->tun_flags);
+	egress_tun_info->options_len = tun_info->options_len;
+	egress_tun_info->mode = tun_info->mode;
+	upcall->egress_tun_opts = ip_tunnel_info_opts(tun_info);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(ovs_tunnel_get_egress_info);
 
 int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
-				  struct ovs_tunnel_info *info)
+				  struct dp_upcall_info *upcall)
 {
 	/* get_egress_tun_info() is only implemented on tunnel ports. */
 	if (unlikely(!vport->ops->get_egress_tun_info))
 		return -EINVAL;
 
-	return vport->ops->get_egress_tun_info(vport, skb, info);
+	return vport->ops->get_egress_tun_info(vport, skb, upcall);
+}
+
+static unsigned int packet_length(const struct sk_buff *skb)
+{
+	unsigned int length = skb->len - ETH_HLEN;
+
+	if (skb->protocol == htons(ETH_P_8021Q))
+		length -= VLAN_HLEN;
+
+	return length;
+}
+
+void ovs_vport_send(struct vport *vport, struct sk_buff *skb)
+{
+	int mtu = vport->dev->mtu;
+
+	if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) {
+		net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n",
+				     vport->dev->name,
+				     packet_length(skb), mtu);
+		vport->dev->stats.tx_errors++;
+		goto drop;
+	}
+
+	skb->dev = vport->dev;
+	vport->ops->send(skb);
+	return;
+
+drop:
+	kfree_skb(skb);
 }