datapath: add skb mark matching and set action
[cascardo/ovs.git] / datapath / datapath.c
index d64fc32..c9485ca 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007-2012 Nicira Networks.
+ * Copyright (c) 2007-2012 Nicira, Inc.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of version 2 of the GNU General Public
@@ -39,7 +39,6 @@
 #include <linux/version.h>
 #include <linux/ethtool.h>
 #include <linux/wait.h>
-#include <asm/system.h>
 #include <asm/div64.h>
 #include <linux/highmem.h>
 #include <linux/netfilter_bridge.h>
@@ -62,8 +61,8 @@
 #include "vport-internal_dev.h"
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) || \
-    LINUX_VERSION_CODE >= KERNEL_VERSION(3,3,0)
-#error Kernels before 2.6.18 or after 3.2 are not supported by this version of Open vSwitch.
+    LINUX_VERSION_CODE >= KERNEL_VERSION(3,7,0)
+#error Kernels before 2.6.18 or after 3.6 are not supported by this version of Open vSwitch.
 #endif
 
 #define REHASH_FLOW_INTERVAL (10 * 60 * HZ)
@@ -176,17 +175,17 @@ static int dp_fill_ifinfo(struct sk_buff *skb,
        hdr->ifi_flags = port->ops->get_dev_flags(port);
        hdr->ifi_change = 0;
 
-       NLA_PUT_STRING(skb, IFLA_IFNAME, port->ops->get_name(port));
-       NLA_PUT_U32(skb, IFLA_MASTER, get_dpifindex(dp));
-       NLA_PUT_U32(skb, IFLA_MTU, port->ops->get_mtu(port));
+       if (nla_put_string(skb, IFLA_IFNAME, port->ops->get_name(port)) ||
+           nla_put_u32(skb, IFLA_MASTER, get_dpifindex(dp)) ||
+           nla_put_u32(skb, IFLA_MTU, port->ops->get_mtu(port)) ||
 #ifdef IFLA_OPERSTATE
-       NLA_PUT_U8(skb, IFLA_OPERSTATE,
-                  port->ops->is_running(port)
-                       ? port->ops->get_operstate(port)
-                       : IF_OPER_DOWN);
+           nla_put_u8(skb, IFLA_OPERSTATE,
+                      port->ops->is_running(port) ?
+                               port->ops->get_operstate(port) :
+                               IF_OPER_DOWN) ||
 #endif
-
-       NLA_PUT(skb, IFLA_ADDRESS, ETH_ALEN, port->ops->get_addr(port));
+           nla_put(skb, IFLA_ADDRESS, ETH_ALEN, port->ops->get_addr(port)))
+               goto nla_put_failure;
 
        return nlmsg_end(skb, nlh);
 
@@ -405,14 +404,15 @@ static int queue_gso_packets(struct net *net, int dp_ifindex,
                             struct sk_buff *skb,
                             const struct dp_upcall_info *upcall_info)
 {
+       unsigned short gso_type = skb_shinfo(skb)->gso_type;
        struct dp_upcall_info later_info;
        struct sw_flow_key later_key;
        struct sk_buff *segs, *nskb;
        int err;
 
        segs = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
-       if (IS_ERR(skb))
-               return PTR_ERR(skb);
+       if (IS_ERR(segs))
+               return PTR_ERR(segs);
 
        /* Queue all of the segments. */
        skb = segs;
@@ -421,7 +421,7 @@ static int queue_gso_packets(struct net *net, int dp_ifindex,
                if (err)
                        break;
 
-               if (skb == segs && skb_shinfo(skb)->gso_type & SKB_GSO_UDP) {
+               if (skb == segs && gso_type & SKB_GSO_UDP) {
                        /* The initial flow key extracted by ovs_flow_extract()
                         * in this case is for a first fragment, so we need to
                         * properly mark later fragments.
@@ -558,6 +558,19 @@ static int validate_sample(const struct nlattr *attr,
        return validate_actions(actions, key, depth + 1);
 }
 
+static int validate_tp_port(const struct sw_flow_key *flow_key)
+{
+       if (flow_key->eth.type == htons(ETH_P_IP)) {
+               if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst)
+                       return 0;
+       } else if (flow_key->eth.type == htons(ETH_P_IPV6)) {
+               if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst)
+                       return 0;
+       }
+
+       return -EINVAL;
+}
+
 static int validate_set(const struct nlattr *a,
                        const struct sw_flow_key *flow_key)
 {
@@ -574,17 +587,32 @@ static int validate_set(const struct nlattr *a,
 
        switch (key_type) {
        const struct ovs_key_ipv4 *ipv4_key;
+       const struct ovs_key_ipv4_tunnel *tun_key;
+       const struct ovs_key_ipv6 *ipv6_key;
 
        case OVS_KEY_ATTR_PRIORITY:
        case OVS_KEY_ATTR_TUN_ID:
        case OVS_KEY_ATTR_ETHERNET:
                break;
 
+       case OVS_KEY_ATTR_SKB_MARK:
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) && !defined(CONFIG_NETFILTER)
+               if (nla_get_u32(ovs_key) != 0)
+                       return -EINVAL;
+#endif
+               break;
+
+       case OVS_KEY_ATTR_IPV4_TUNNEL:
+               tun_key = nla_data(ovs_key);
+               if (!tun_key->ipv4_dst)
+                       return -EINVAL;
+               break;
+
        case OVS_KEY_ATTR_IPV4:
                if (flow_key->eth.type != htons(ETH_P_IP))
                        return -EINVAL;
 
-               if (!flow_key->ipv4.addr.src || !flow_key->ipv4.addr.dst)
+               if (!flow_key->ip.proto)
                        return -EINVAL;
 
                ipv4_key = nla_data(ovs_key);
@@ -596,22 +624,36 @@ static int validate_set(const struct nlattr *a,
 
                break;
 
-       case OVS_KEY_ATTR_TCP:
-               if (flow_key->ip.proto != IPPROTO_TCP)
+       case OVS_KEY_ATTR_IPV6:
+               if (flow_key->eth.type != htons(ETH_P_IPV6))
+                       return -EINVAL;
+
+               if (!flow_key->ip.proto)
                        return -EINVAL;
 
-               if (!flow_key->ipv4.tp.src || !flow_key->ipv4.tp.dst)
+               ipv6_key = nla_data(ovs_key);
+               if (ipv6_key->ipv6_proto != flow_key->ip.proto)
+                       return -EINVAL;
+
+               if (ipv6_key->ipv6_frag != flow_key->ip.frag)
+                       return -EINVAL;
+
+               if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
                        return -EINVAL;
 
                break;
 
+       case OVS_KEY_ATTR_TCP:
+               if (flow_key->ip.proto != IPPROTO_TCP)
+                       return -EINVAL;
+
+               return validate_tp_port(flow_key);
+
        case OVS_KEY_ATTR_UDP:
                if (flow_key->ip.proto != IPPROTO_UDP)
                        return -EINVAL;
 
-               if (!flow_key->ipv4.tp.src || !flow_key->ipv4.tp.dst)
-                       return -EINVAL;
-               break;
+               return validate_tp_port(flow_key);
 
        default:
                return -EINVAL;
@@ -773,29 +815,25 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 
        err = ovs_flow_extract(packet, -1, &flow->key, &key_len);
        if (err)
-               goto err_flow_put;
+               goto err_flow_free;
 
-       err = ovs_flow_metadata_from_nlattrs(&flow->key.phy.priority,
-                                            &flow->key.phy.in_port,
-                                            &flow->key.phy.tun_id,
-                                            a[OVS_PACKET_ATTR_KEY]);
+       err = ovs_flow_metadata_from_nlattrs(flow, key_len, a[OVS_PACKET_ATTR_KEY]);
        if (err)
-               goto err_flow_put;
+               goto err_flow_free;
 
        err = validate_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, 0);
        if (err)
-               goto err_flow_put;
-
-       flow->hash = ovs_flow_hash(&flow->key, key_len);
+               goto err_flow_free;
 
        acts = ovs_flow_actions_alloc(a[OVS_PACKET_ATTR_ACTIONS]);
        err = PTR_ERR(acts);
        if (IS_ERR(acts))
-               goto err_flow_put;
+               goto err_flow_free;
        rcu_assign_pointer(flow->sf_acts, acts);
 
        OVS_CB(packet)->flow = flow;
        packet->priority = flow->key.phy.priority;
+       skb_set_mark(packet, flow->key.phy.skb_mark);
 
        rcu_read_lock();
        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
@@ -808,13 +846,13 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
        local_bh_enable();
        rcu_read_unlock();
 
-       ovs_flow_put(flow);
+       ovs_flow_free(flow);
        return err;
 
 err_unlock:
        rcu_read_unlock();
-err_flow_put:
-       ovs_flow_put(flow);
+err_flow_free:
+       ovs_flow_free(flow);
 err_kfree_skb:
        kfree_skb(packet);
 err:
@@ -918,15 +956,18 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
        tcp_flags = flow->tcp_flags;
        spin_unlock_bh(&flow->lock);
 
-       if (used)
-               NLA_PUT_U64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used));
+       if (used &&
+           nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used)))
+               goto nla_put_failure;
 
-       if (stats.n_packets)
-               NLA_PUT(skb, OVS_FLOW_ATTR_STATS,
-                       sizeof(struct ovs_flow_stats), &stats);
+       if (stats.n_packets &&
+           nla_put(skb, OVS_FLOW_ATTR_STATS,
+                   sizeof(struct ovs_flow_stats), &stats))
+               goto nla_put_failure;
 
-       if (tcp_flags)
-               NLA_PUT_U8(skb, OVS_FLOW_ATTR_TCP_FLAGS, tcp_flags);
+       if (tcp_flags &&
+           nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, tcp_flags))
+               goto nla_put_failure;
 
        /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
         * this is the first flow to be dumped into 'skb'.  This is unusual for
@@ -1055,7 +1096,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
                        error = PTR_ERR(flow);
                        goto error;
                }
-               flow->key = key;
                clear_stats(flow);
 
                /* Obtain actions. */
@@ -1066,8 +1106,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
                rcu_assign_pointer(flow->sf_acts, acts);
 
                /* Put flow in bucket. */
-               flow->hash = ovs_flow_hash(&key, key_len);
-               ovs_flow_tbl_insert(table, flow);
+               ovs_flow_tbl_insert(table, flow, &key, key_len);
 
                reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid,
                                                info->snd_seq,
@@ -1128,7 +1167,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
        return 0;
 
 error_free_flow:
-       ovs_flow_put(flow);
+       ovs_flow_free(flow);
 error:
        return error;
 }
@@ -1312,7 +1351,8 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
                goto nla_put_failure;
 
        get_dp_stats(dp, &dp_stats);
-       NLA_PUT(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats), &dp_stats);
+       if (nla_put(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats), &dp_stats))
+               goto nla_put_failure;
 
        return genlmsg_end(skb, ovs_header);
 
@@ -1395,6 +1435,8 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
        dp->ifobj.kset = NULL;
        kobject_init(&dp->ifobj, &dp_ktype);
 
+       ovs_dp_set_net(dp, hold_net(sock_net(skb->sk)));
+
        /* Allocate table. */
        err = -ENOMEM;
        rcu_assign_pointer(dp->table, ovs_flow_tbl_alloc(TBL_MIN_BUCKETS));
@@ -1406,7 +1448,6 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
                err = -ENOMEM;
                goto err_destroy_table;
        }
-       ovs_dp_set_net(dp, hold_net(sock_net(skb->sk)));
 
        dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
                            GFP_KERNEL);
@@ -1461,6 +1502,7 @@ err_destroy_percpu:
 err_destroy_table:
        ovs_flow_tbl_destroy(genl_dereference(dp->table));
 err_free_dp:
+       release_net(ovs_dp_get_net(dp));
        kfree(dp);
 err_unlock_rtnl:
        rtnl_unlock();
@@ -1668,17 +1710,20 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
 
        ovs_header->dp_ifindex = get_dpifindex(vport->dp);
 
-       NLA_PUT_U32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no);
-       NLA_PUT_U32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type);
-       NLA_PUT_STRING(skb, OVS_VPORT_ATTR_NAME, vport->ops->get_name(vport));
-       NLA_PUT_U32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_pid);
+       if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
+           nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
+           nla_put_string(skb, OVS_VPORT_ATTR_NAME, vport->ops->get_name(vport)) ||
+           nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_pid))
+               goto nla_put_failure;
 
        ovs_vport_get_stats(vport, &vport_stats);
-       NLA_PUT(skb, OVS_VPORT_ATTR_STATS, sizeof(struct ovs_vport_stats),
-               &vport_stats);
+       if (nla_put(skb, OVS_VPORT_ATTR_STATS, sizeof(struct ovs_vport_stats),
+                   &vport_stats))
+               goto nla_put_failure;
 
-       NLA_PUT(skb, OVS_VPORT_ATTR_ADDRESS, ETH_ALEN,
-               vport->ops->get_addr(vport));
+       if (nla_put(skb, OVS_VPORT_ATTR_ADDRESS, ETH_ALEN,
+                   vport->ops->get_addr(vport)))
+               goto nla_put_failure;
 
        err = ovs_vport_get_options(vport, skb);
        if (err == -EMSGSIZE)
@@ -1876,16 +1921,17 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
                err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
        if (!err)
                err = change_vport(vport, a);
+       else
+               goto exit_unlock;
        if (!err && a[OVS_VPORT_ATTR_UPCALL_PID])
                vport->upcall_pid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
 
        reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
                                         OVS_VPORT_CMD_NEW);
        if (IS_ERR(reply)) {
-               err = PTR_ERR(reply);
                netlink_set_err(GENL_SOCK(sock_net(skb->sk)), 0,
-                               ovs_dp_vport_multicast_group.id, err);
-               return 0;
+                               ovs_dp_vport_multicast_group.id, PTR_ERR(reply));
+               goto exit_unlock;
        }
 
        genl_notify(reply, genl_info_net(info), info->snd_pid,
@@ -2239,3 +2285,4 @@ module_exit(dp_cleanup);
 
 MODULE_DESCRIPTION("Open vSwitch switching datapath");
 MODULE_LICENSE("GPL");
+MODULE_VERSION(VERSION);