datapath: add skb mark matching and set action
[cascardo/ovs.git] / datapath / datapath.c
index 7f31394..c9485ca 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007-2012 Nicira Networks.
+ * Copyright (c) 2007-2012 Nicira, Inc.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of version 2 of the GNU General Public
@@ -39,7 +39,6 @@
 #include <linux/version.h>
 #include <linux/ethtool.h>
 #include <linux/wait.h>
-#include <asm/system.h>
 #include <asm/div64.h>
 #include <linux/highmem.h>
 #include <linux/netfilter_bridge.h>
@@ -62,8 +61,8 @@
 #include "vport-internal_dev.h"
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) || \
-    LINUX_VERSION_CODE >= KERNEL_VERSION(3,4,0)
-#error Kernels before 2.6.18 or after 3.3 are not supported by this version of Open vSwitch.
+    LINUX_VERSION_CODE >= KERNEL_VERSION(3,7,0)
+#error Kernels before 2.6.18 or after 3.6 are not supported by this version of Open vSwitch.
 #endif
 
 #define REHASH_FLOW_INTERVAL (10 * 60 * HZ)
@@ -405,14 +404,15 @@ static int queue_gso_packets(struct net *net, int dp_ifindex,
                             struct sk_buff *skb,
                             const struct dp_upcall_info *upcall_info)
 {
+       unsigned short gso_type = skb_shinfo(skb)->gso_type;
        struct dp_upcall_info later_info;
        struct sw_flow_key later_key;
        struct sk_buff *segs, *nskb;
        int err;
 
        segs = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
-       if (IS_ERR(skb))
-               return PTR_ERR(skb);
+       if (IS_ERR(segs))
+               return PTR_ERR(segs);
 
        /* Queue all of the segments. */
        skb = segs;
@@ -421,7 +421,7 @@ static int queue_gso_packets(struct net *net, int dp_ifindex,
                if (err)
                        break;
 
-               if (skb == segs && skb_shinfo(skb)->gso_type & SKB_GSO_UDP) {
+               if (skb == segs && gso_type & SKB_GSO_UDP) {
                        /* The initial flow key extracted by ovs_flow_extract()
                         * in this case is for a first fragment, so we need to
                         * properly mark later fragments.
@@ -558,6 +558,19 @@ static int validate_sample(const struct nlattr *attr,
        return validate_actions(actions, key, depth + 1);
 }
 
+static int validate_tp_port(const struct sw_flow_key *flow_key)
+{
+       if (flow_key->eth.type == htons(ETH_P_IP)) {
+               if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst)
+                       return 0;
+       } else if (flow_key->eth.type == htons(ETH_P_IPV6)) {
+               if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst)
+                       return 0;
+       }
+
+       return -EINVAL;
+}
+
 static int validate_set(const struct nlattr *a,
                        const struct sw_flow_key *flow_key)
 {
@@ -574,17 +587,32 @@ static int validate_set(const struct nlattr *a,
 
        switch (key_type) {
        const struct ovs_key_ipv4 *ipv4_key;
+       const struct ovs_key_ipv4_tunnel *tun_key;
+       const struct ovs_key_ipv6 *ipv6_key;
 
        case OVS_KEY_ATTR_PRIORITY:
        case OVS_KEY_ATTR_TUN_ID:
        case OVS_KEY_ATTR_ETHERNET:
                break;
 
+       case OVS_KEY_ATTR_SKB_MARK:
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) && !defined(CONFIG_NETFILTER)
+               if (nla_get_u32(ovs_key) != 0)
+                       return -EINVAL;
+#endif
+               break;
+
+       case OVS_KEY_ATTR_IPV4_TUNNEL:
+               tun_key = nla_data(ovs_key);
+               if (!tun_key->ipv4_dst)
+                       return -EINVAL;
+               break;
+
        case OVS_KEY_ATTR_IPV4:
                if (flow_key->eth.type != htons(ETH_P_IP))
                        return -EINVAL;
 
-               if (!flow_key->ipv4.addr.src || !flow_key->ipv4.addr.dst)
+               if (!flow_key->ip.proto)
                        return -EINVAL;
 
                ipv4_key = nla_data(ovs_key);
@@ -596,22 +624,36 @@ static int validate_set(const struct nlattr *a,
 
                break;
 
-       case OVS_KEY_ATTR_TCP:
-               if (flow_key->ip.proto != IPPROTO_TCP)
+       case OVS_KEY_ATTR_IPV6:
+               if (flow_key->eth.type != htons(ETH_P_IPV6))
                        return -EINVAL;
 
-               if (!flow_key->ipv4.tp.src || !flow_key->ipv4.tp.dst)
+               if (!flow_key->ip.proto)
+                       return -EINVAL;
+
+               ipv6_key = nla_data(ovs_key);
+               if (ipv6_key->ipv6_proto != flow_key->ip.proto)
+                       return -EINVAL;
+
+               if (ipv6_key->ipv6_frag != flow_key->ip.frag)
+                       return -EINVAL;
+
+               if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
                        return -EINVAL;
 
                break;
 
+       case OVS_KEY_ATTR_TCP:
+               if (flow_key->ip.proto != IPPROTO_TCP)
+                       return -EINVAL;
+
+               return validate_tp_port(flow_key);
+
        case OVS_KEY_ATTR_UDP:
                if (flow_key->ip.proto != IPPROTO_UDP)
                        return -EINVAL;
 
-               if (!flow_key->ipv4.tp.src || !flow_key->ipv4.tp.dst)
-                       return -EINVAL;
-               break;
+               return validate_tp_port(flow_key);
 
        default:
                return -EINVAL;
@@ -773,29 +815,25 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 
        err = ovs_flow_extract(packet, -1, &flow->key, &key_len);
        if (err)
-               goto err_flow_put;
+               goto err_flow_free;
 
-       err = ovs_flow_metadata_from_nlattrs(&flow->key.phy.priority,
-                                            &flow->key.phy.in_port,
-                                            &flow->key.phy.tun_id,
-                                            a[OVS_PACKET_ATTR_KEY]);
+       err = ovs_flow_metadata_from_nlattrs(flow, key_len, a[OVS_PACKET_ATTR_KEY]);
        if (err)
-               goto err_flow_put;
+               goto err_flow_free;
 
        err = validate_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, 0);
        if (err)
-               goto err_flow_put;
-
-       flow->hash = ovs_flow_hash(&flow->key, key_len);
+               goto err_flow_free;
 
        acts = ovs_flow_actions_alloc(a[OVS_PACKET_ATTR_ACTIONS]);
        err = PTR_ERR(acts);
        if (IS_ERR(acts))
-               goto err_flow_put;
+               goto err_flow_free;
        rcu_assign_pointer(flow->sf_acts, acts);
 
        OVS_CB(packet)->flow = flow;
        packet->priority = flow->key.phy.priority;
+       skb_set_mark(packet, flow->key.phy.skb_mark);
 
        rcu_read_lock();
        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
@@ -808,13 +846,13 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
        local_bh_enable();
        rcu_read_unlock();
 
-       ovs_flow_put(flow);
+       ovs_flow_free(flow);
        return err;
 
 err_unlock:
        rcu_read_unlock();
-err_flow_put:
-       ovs_flow_put(flow);
+err_flow_free:
+       ovs_flow_free(flow);
 err_kfree_skb:
        kfree_skb(packet);
 err:
@@ -1058,7 +1096,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
                        error = PTR_ERR(flow);
                        goto error;
                }
-               flow->key = key;
                clear_stats(flow);
 
                /* Obtain actions. */
@@ -1069,8 +1106,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
                rcu_assign_pointer(flow->sf_acts, acts);
 
                /* Put flow in bucket. */
-               flow->hash = ovs_flow_hash(&key, key_len);
-               ovs_flow_tbl_insert(table, flow);
+               ovs_flow_tbl_insert(table, flow, &key, key_len);
 
                reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid,
                                                info->snd_seq,
@@ -1131,7 +1167,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
        return 0;
 
 error_free_flow:
-       ovs_flow_put(flow);
+       ovs_flow_free(flow);
 error:
        return error;
 }
@@ -1399,6 +1435,8 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
        dp->ifobj.kset = NULL;
        kobject_init(&dp->ifobj, &dp_ktype);
 
+       ovs_dp_set_net(dp, hold_net(sock_net(skb->sk)));
+
        /* Allocate table. */
        err = -ENOMEM;
        rcu_assign_pointer(dp->table, ovs_flow_tbl_alloc(TBL_MIN_BUCKETS));
@@ -1410,7 +1448,6 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
                err = -ENOMEM;
                goto err_destroy_table;
        }
-       ovs_dp_set_net(dp, hold_net(sock_net(skb->sk)));
 
        dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
                            GFP_KERNEL);
@@ -1465,6 +1502,7 @@ err_destroy_percpu:
 err_destroy_table:
        ovs_flow_tbl_destroy(genl_dereference(dp->table));
 err_free_dp:
+       release_net(ovs_dp_get_net(dp));
        kfree(dp);
 err_unlock_rtnl:
        rtnl_unlock();
@@ -1883,16 +1921,17 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
                err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
        if (!err)
                err = change_vport(vport, a);
+       else
+               goto exit_unlock;
        if (!err && a[OVS_VPORT_ATTR_UPCALL_PID])
                vport->upcall_pid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
 
        reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
                                         OVS_VPORT_CMD_NEW);
        if (IS_ERR(reply)) {
-               err = PTR_ERR(reply);
                netlink_set_err(GENL_SOCK(sock_net(skb->sk)), 0,
-                               ovs_dp_vport_multicast_group.id, err);
-               return 0;
+                               ovs_dp_vport_multicast_group.id, PTR_ERR(reply));
+               goto exit_unlock;
        }
 
        genl_notify(reply, genl_info_net(info), info->snd_pid,
@@ -2246,3 +2285,4 @@ module_exit(dp_cleanup);
 
 MODULE_DESCRIPTION("Open vSwitch switching datapath");
 MODULE_LICENSE("GPL");
+MODULE_VERSION(VERSION);