odp-util: Format and scan multiple MPLS labels.
[cascardo/ovs.git] / datapath / flow.c
index f1bb95d..057dde1 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007-2014 Nicira, Inc.
+ * Copyright (c) 2007-2015 Nicira, Inc.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of version 2 of the GNU General Public
@@ -16,8 +16,6 @@
  * 02110-1301, USA
  */
 
-#include "flow.h"
-#include "datapath.h"
 #include <linux/uaccess.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
@@ -34,6 +32,7 @@
 #include <linux/if_arp.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
+#include <linux/mpls.h>
 #include <linux/sctp.h>
 #include <linux/smp.h>
 #include <linux/tcp.h>
 #include <linux/rculist.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
+#include <net/mpls.h>
 #include <net/ndisc.h>
 
+#include "datapath.h"
+#include "conntrack.h"
+#include "flow.h"
+#include "flow_netlink.h"
+#include "vport.h"
 #include "vlan.h"
 
 u64 ovs_flow_used_time(unsigned long flow_jiffies)
@@ -63,10 +68,11 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies)
 #define TCP_FLAGS_BE16(tp) (*(__be16 *)&tcp_flag_word(tp) & htons(0x0FFF))
 
 void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
-                          struct sk_buff *skb)
+                          const struct sk_buff *skb)
 {
        struct flow_stats *stats;
        int node = numa_node_id();
+       int len = skb->len + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
 
        stats = rcu_dereference(flow->stats[node]);
 
@@ -90,19 +96,21 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
                         * allocated stats as we have already locked them.
                         */
                        if (likely(flow->stats_last_writer != NUMA_NO_NODE)
-                           && likely(!rcu_dereference(flow->stats[node]))) {
+                           && likely(!rcu_access_pointer(flow->stats[node]))) {
                                /* Try to allocate node-specific stats. */
                                struct flow_stats *new_stats;
 
                                new_stats =
                                        kmem_cache_alloc_node(flow_stats_cache,
-                                                             GFP_THISNODE |
+                                                              GFP_NOWAIT |
+                                                              __GFP_THISNODE |
+                                                              __GFP_NOWARN |
                                                              __GFP_NOMEMALLOC,
                                                              node);
                                if (likely(new_stats)) {
                                        new_stats->used = jiffies;
                                        new_stats->packet_count = 1;
-                                       new_stats->byte_count = skb->len;
+                                       new_stats->byte_count = len;
                                        new_stats->tcp_flags = tcp_flags;
                                        spin_lock_init(&new_stats->lock);
 
@@ -117,7 +125,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
 
        stats->used = jiffies;
        stats->packet_count++;
-       stats->byte_count += skb->len;
+       stats->byte_count += len;
        stats->tcp_flags |= tcp_flags;
 unlock:
        spin_unlock(&stats->lock);
@@ -326,7 +334,7 @@ static __be16 parse_ethertype(struct sk_buff *skb)
        proto = *(__be16 *) skb->data;
        __skb_pull(skb, sizeof(__be16));
 
-       if (ntohs(proto) >= ETH_P_802_3_MIN)
+       if (eth_proto_is_802_3(proto))
                return proto;
 
        if (skb->len < sizeof(struct llc_snap_hdr))
@@ -343,7 +351,7 @@ static __be16 parse_ethertype(struct sk_buff *skb)
 
        __skb_pull(skb, sizeof(struct llc_snap_hdr));
 
-       if (ntohs(llc->ethertype) >= ETH_P_802_3_MIN)
+       if (eth_proto_is_802_3(llc->ethertype))
                return llc->ethertype;
 
        return htons(ETH_P_802_2);
@@ -399,13 +407,13 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
                                if (unlikely(!is_zero_ether_addr(key->ipv6.nd.sll)))
                                        goto invalid;
                                ether_addr_copy(key->ipv6.nd.sll,
-                                   &nd->opt[offset+sizeof(*nd_opt)]);
+                                               &nd->opt[offset+sizeof(*nd_opt)]);
                        } else if (nd_opt->nd_opt_type == ND_OPT_TARGET_LL_ADDR
                                   && opt_len == 8) {
                                if (unlikely(!is_zero_ether_addr(key->ipv6.nd.tll)))
                                        goto invalid;
                                ether_addr_copy(key->ipv6.nd.tll,
-                                   &nd->opt[offset+sizeof(*nd_opt)]);
+                                               &nd->opt[offset+sizeof(*nd_opt)]);
                        }
 
                        icmp_len -= opt_len;
@@ -424,10 +432,9 @@ invalid:
 }
 
 /**
- * ovs_flow_extract - extracts a flow key from an Ethernet frame.
+ * key_extract - extracts a flow key from an Ethernet frame.
  * @skb: sk_buff that contains the frame, with skb->data pointing to the
  * Ethernet header
- * @in_port: port number on which @skb was received.
  * @key: output flow key
  *
  * The caller must ensure that skb->len >= ETH_HLEN.
@@ -446,26 +453,12 @@ invalid:
  *      of a correct length, otherwise the same as skb->network_header.
  *      For other key->eth.type values it is left untouched.
  */
-int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
+static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
 {
        int error;
        struct ethhdr *eth;
 
-       if (OVS_CB(skb)->tun_info) {
-               struct ovs_tunnel_info *tun_info = OVS_CB(skb)->tun_info;
-               memcpy(&key->tun_key, &tun_info->tunnel,
-                       sizeof(key->tun_key));
-       } else {
-               memset(&key->tun_key, 0, sizeof(key->tun_key));
-       }
-
-       key->phy.priority = skb->priority;
-       key->phy.in_port = in_port;
-       key->phy.skb_mark = skb->mark;
-       key->ovs_flow_hash = 0;
-       key->recirc_id = 0;
-
-       /* Flags are always used as part of stats. */
+       /* Flags are always used as part of stats */
        key->tp.flags = 0;
 
        skb_reset_mac_header(skb);
@@ -479,10 +472,11 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
 
        __skb_pull(skb, 2 * ETH_ALEN);
        /* We are going to push all headers that we pull, so no need to
-        * update skb->csum here. */
+        * update skb->csum here.
+        */
 
        key->eth.tci = 0;
-       if (vlan_tx_tag_present(skb))
+       if (skb_vlan_tag_present(skb))
                key->eth.tci = htons(vlan_get_tci(skb));
        else if (eth->h_proto == htons(ETH_P_8021Q))
                if (unlikely(parse_vlan(skb, key)))
@@ -493,6 +487,7 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
                return -ENOMEM;
 
        skb_reset_network_header(skb);
+       skb_reset_mac_len(skb);
        __skb_push(skb, skb->data - skb_mac_header(skb));
 
        /* Network layer. */
@@ -525,7 +520,7 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
                        return 0;
                }
                if (nh->frag_off & htons(IP_MF) ||
-                        skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
+                       skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
                        key->ip.frag = OVS_FRAG_TYPE_FIRST;
                else
                        key->ip.frag = OVS_FRAG_TYPE_NONE;
@@ -540,6 +535,7 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
                        } else {
                                memset(&key->tp, 0, sizeof(key->tp));
                        }
+
                } else if (key->ip.proto == IPPROTO_UDP) {
                        if (udphdr_ok(skb)) {
                                struct udphdr *udp = udp_hdr(skb);
@@ -561,7 +557,8 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
                                struct icmphdr *icmp = icmp_hdr(skb);
                                /* The ICMP type and code fields use the 16-bit
                                 * transport port fields, so we need to store
-                                * them in 16-bit network byte order. */
+                                * them in 16-bit network byte order.
+                                */
                                key->tp.src = htons(icmp->type);
                                key->tp.dst = htons(icmp->code);
                        } else {
@@ -572,14 +569,15 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
        } else if (key->eth.type == htons(ETH_P_ARP) ||
                   key->eth.type == htons(ETH_P_RARP)) {
                struct arp_eth_header *arp;
+               bool arp_available = arphdr_ok(skb);
 
                arp = (struct arp_eth_header *)skb_network_header(skb);
 
-               if (arphdr_ok(skb)
-                               && arp->ar_hrd == htons(ARPHRD_ETHER)
-                               && arp->ar_pro == htons(ETH_P_IP)
-                               && arp->ar_hln == ETH_ALEN
-                               && arp->ar_pln == 4) {
+               if (arp_available &&
+                   arp->ar_hrd == htons(ARPHRD_ETHER) &&
+                   arp->ar_pro == htons(ETH_P_IP) &&
+                   arp->ar_hln == ETH_ALEN &&
+                   arp->ar_pln == 4) {
 
                        /* We only match on the lower 8 bits of the opcode. */
                        if (ntohs(arp->ar_op) <= 0xff)
@@ -595,6 +593,33 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
                        memset(&key->ip, 0, sizeof(key->ip));
                        memset(&key->ipv4, 0, sizeof(key->ipv4));
                }
+       } else if (eth_p_mpls(key->eth.type)) {
+               size_t stack_len = MPLS_HLEN;
+
+               /* In the presence of an MPLS label stack the end of the L2
+                * header and the beginning of the L3 header differ.
+                *
+                * Advance network_header to the beginning of the L3
+                * header. mac_len corresponds to the end of the L2 header.
+                */
+               while (1) {
+                       __be32 lse;
+
+                       error = check_header(skb, skb->mac_len + stack_len);
+                       if (unlikely(error))
+                               return 0;
+
+                       memcpy(&lse, skb_network_header(skb), MPLS_HLEN);
+
+                       if (stack_len == MPLS_HLEN)
+                               memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN);
+
+                       skb_set_network_header(skb, skb->mac_len + stack_len);
+                       if (lse & htonl(MPLS_LS_S_MASK))
+                               break;
+
+                       stack_len += MPLS_HLEN;
+               }
        } else if (key->eth.type == htons(ETH_P_IPV6)) {
                int nh_len;             /* IPv6 Header + Extensions */
 
@@ -652,6 +677,58 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
                        }
                }
        }
-
        return 0;
 }
+
+int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key)
+{
+       return key_extract(skb, key);
+}
+
+int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
+                        struct sk_buff *skb, struct sw_flow_key *key)
+{
+       /* Extract metadata from packet. */
+       if (tun_info) {
+               if (ip_tunnel_info_af(tun_info) != AF_INET)
+                       return -EINVAL;
+
+               memcpy(&key->tun_key, &tun_info->key, sizeof(key->tun_key));
+               BUILD_BUG_ON(((1 << (sizeof(tun_info->options_len) * 8)) - 1) >
+                            sizeof(key->tun_opts));
+
+               if (tun_info->options_len) {
+                       ip_tunnel_info_opts_get(TUN_METADATA_OPTS(key, tun_info->options_len),
+                                               tun_info);
+                       key->tun_opts_len = tun_info->options_len;
+               } else {
+                       key->tun_opts_len = 0;
+               }
+       } else {
+               key->tun_opts_len = 0;
+               memset(&key->tun_key, 0, sizeof(key->tun_key));
+       }
+
+       key->phy.priority = skb->priority;
+       key->phy.in_port = OVS_CB(skb)->input_vport->port_no;
+       key->phy.skb_mark = skb->mark;
+       ovs_ct_fill_key(skb, key);
+       key->ovs_flow_hash = 0;
+       key->recirc_id = 0;
+
+       return key_extract(skb, key);
+}
+
+int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr,
+                                  struct sk_buff *skb,
+                                  struct sw_flow_key *key, bool log)
+{
+       int err;
+
+       /* Extract metadata from netlink attributes. */
+       err = ovs_nla_get_flow_metadata(net, attr, key, log);
+       if (err)
+               return err;
+
+       return key_extract(skb, key);
+}