datapath: Add basic MPLS support to kernel

author Simon Horman <horms@verge.net.au>

Tue, 24 Jun 2014 11:56:57 +0000 (20:56 +0900)

committer Jesse Gross <jesse@nicira.com>

Tue, 24 Jun 2014 23:02:02 +0000 (16:02 -0700)
author Simon Horman <horms@verge.net.au>
Tue, 24 Jun 2014 11:56:57 +0000 (20:56 +0900)
committer Jesse Gross <jesse@nicira.com>
Tue, 24 Jun 2014 23:02:02 +0000 (16:02 -0700)
diff --git a/OPENFLOW-1.1+ b/OPENFLOW-1.1+

index 97c2923..476f79a 100644 (file)
--- a/OPENFLOW-1.1+
+++ b/OPENFLOW-1.1+
@@ -54,10 +54,6 @@ OpenFlow 1.1
  The list of remaining work items for OpenFlow 1.1 is below.  It is
  probably incomplete.
  
-    * MPLS.  Simon Horman maintains a patch series that adds this
-      feature.  This is partially merged.
-      [optional for OF1.1+]
-
      * Match and set double-tagged VLANs (QinQ).  This requires kernel
        work for reasonable performance.
        [optional for OF1.1+]
diff --git a/datapath/Modules.mk b/datapath/Modules.mk

index 41ffbea..90e158c 100644 (file)
--- a/datapath/Modules.mk
+++ b/datapath/Modules.mk
@@ -27,6 +27,7 @@ openvswitch_headers = \
         flow.h \
         flow_netlink.h \
         flow_table.h \
+       mpls.h \
         vlan.h \
         vport.h \
         vport-internal_dev.h \
diff --git a/datapath/actions.c b/datapath/actions.c

index 72fdcf9..cb26ad5 100644 (file)
--- a/datapath/actions.c
+++ b/datapath/actions.c
@@ -35,6 +35,8 @@
  #include <net/sctp/checksum.h>
  
  #include "datapath.h"
+#include "gso.h"
+#include "mpls.h"
  #include "vlan.h"
  #include "vport.h"
  
@@ -49,6 +51,98 @@ static int make_writable(struct sk_buff *skb, int write_len)
         return pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
  }
  
+/* The end of the mac header.
+ *
+ * For non-MPLS skbs this will correspond to the network header.
+ * For MPLS skbs it will be before the network_header as the MPLS
+ * label stack lies between the end of the mac header and the network
+ * header. That is, for MPLS skbs the end of the mac header
+ * is the top of the MPLS label stack.
+ */
+static unsigned char *mac_header_end(const struct sk_buff *skb)
+{
+       return skb_mac_header(skb) + skb->mac_len;
+}
+
+static int push_mpls(struct sk_buff *skb,
+                    const struct ovs_action_push_mpls *mpls)
+{
+       __be32 *new_mpls_lse;
+       struct ethhdr *hdr;
+
+       if (skb_cow_head(skb, MPLS_HLEN) < 0)
+               return -ENOMEM;
+
+       skb_push(skb, MPLS_HLEN);
+       memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb),
+               skb->mac_len);
+       skb_reset_mac_header(skb);
+
+       new_mpls_lse = (__be32 *)mac_header_end(skb);
+       *new_mpls_lse = mpls->mpls_lse;
+
+       if (skb->ip_summed == CHECKSUM_COMPLETE)
+               skb->csum = csum_add(skb->csum, csum_partial(new_mpls_lse,
+                                                            MPLS_HLEN, 0));
+
+       hdr = eth_hdr(skb);
+       hdr->h_proto = mpls->mpls_ethertype;
+       if (!ovs_skb_get_inner_protocol(skb))
+               ovs_skb_set_inner_protocol(skb, skb->protocol);
+       skb->protocol = mpls->mpls_ethertype;
+       return 0;
+}
+
+static int pop_mpls(struct sk_buff *skb, const __be16 ethertype)
+{
+       struct ethhdr *hdr;
+       int err;
+
+       err = make_writable(skb, skb->mac_len + MPLS_HLEN);
+       if (unlikely(err))
+               return err;
+
+       if (skb->ip_summed == CHECKSUM_COMPLETE)
+               skb->csum = csum_sub(skb->csum,
+                                    csum_partial(mac_header_end(skb),
+                                                 MPLS_HLEN, 0));
+
+       memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb),
+               skb->mac_len);
+
+       __skb_pull(skb, MPLS_HLEN);
+       skb_reset_mac_header(skb);
+
+       /* mac_header_end() is used to locate the ethertype
+        * field correctly in the presence of VLAN tags.
+        */
+       hdr = (struct ethhdr *)(mac_header_end(skb) - ETH_HLEN);
+       hdr->h_proto = ethertype;
+       if (eth_p_mpls(skb->protocol))
+               skb->protocol = ethertype;
+       return 0;
+}
+
+static int set_mpls(struct sk_buff *skb, const __be32 *mpls_lse)
+{
+       __be32 *stack = (__be32 *)mac_header_end(skb);
+       int err;
+
+       err = make_writable(skb, skb->mac_len + MPLS_HLEN);
+       if (unlikely(err))
+               return err;
+
+       if (skb->ip_summed == CHECKSUM_COMPLETE) {
+               __be32 diff[] = { ~(*stack), *mpls_lse };
+               skb->csum = ~csum_partial((char *)diff, sizeof(diff),
+                                         ~skb->csum);
+       }
+
+       *stack = *mpls_lse;
+
+       return 0;
+}
+
  /* remove VLAN header from packet and update csum accordingly. */
  static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci)
  {
@@ -71,7 +165,8 @@ static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci)
  
         vlan_set_encap_proto(skb, vhdr);
         skb->mac_header += VLAN_HLEN;
-       skb_reset_mac_len(skb);
+       /* Update mac_len for subsequent MPLS actions */
+       skb->mac_len -= VLAN_HLEN;
  
         return 0;
  }
@@ -116,6 +211,9 @@ static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vla
                 if (!__vlan_put_tag(skb, skb->vlan_proto, current_tag))
                         return -ENOMEM;
  
+               /* Update mac_len for subsequent MPLS actions */
+               skb->mac_len += VLAN_HLEN;
+
                 if (skb->ip_summed == CHECKSUM_COMPLETE)
                         skb->csum = csum_add(skb->csum, csum_partial(skb->data
                                         + (2 * ETH_ALEN), VLAN_HLEN, 0));
@@ -545,6 +643,10 @@ static int execute_set_action(struct sk_buff *skb,
         case OVS_KEY_ATTR_SCTP:
                 err = set_sctp(skb, nla_data(nested_attr));
                 break;
+
+       case OVS_KEY_ATTR_MPLS:
+               err = set_mpls(skb, nla_data(nested_attr));
+               break;
         }
  
         return err;
@@ -606,6 +708,14 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
                         execute_hash(skb, a);
                         break;
  
+               case OVS_ACTION_ATTR_PUSH_MPLS:
+                       err = push_mpls(skb, nla_data(a));
+                       break;
+
+               case OVS_ACTION_ATTR_POP_MPLS:
+                       err = pop_mpls(skb, nla_get_be16(a));
+                       break;
+
                 case OVS_ACTION_ATTR_PUSH_VLAN:
                         err = push_vlan(skb, nla_data(a));
                         if (unlikely(err)) /* skb already freed. */
@@ -701,6 +811,9 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, bool recirc)
                 goto out_loop;
         }
  
+       if (!recirc)
+               ovs_skb_init_inner_protocol(skb);
+
         OVS_CB(skb)->tun_info = NULL;
         error = do_execute_actions(dp, skb, acts->actions, acts->actions_len);
  
diff --git a/datapath/datapath.c b/datapath/datapath.c

index 6f4236b..4ec908e 100644 (file)
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -382,7 +382,7 @@ static size_t key_attr_size(void)
  {
         /* Whenever adding new OVS_KEY_ FIELDS, we should consider
          * updating this function.  */
-       BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 21);
+       BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 22);
  
         return    nla_total_size(4)   /* OVS_KEY_ATTR_PRIORITY */
                 + nla_total_size(0)   /* OVS_KEY_ATTR_TUNNEL */
@@ -586,7 +586,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
                 goto err_flow_free;
  
         err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS],
-                                  &flow->key, 0, &acts);
+                                  &flow->key, &acts);
         rcu_assign_pointer(flow->sf_acts, acts);
         if (err)
                 goto err_flow_free;
@@ -874,7 +874,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
                 goto err_kfree_flow;
  
         error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key,
-                                    0, &acts);
+                                    &acts);
         if (error) {
                 OVS_NLERR("Flow actions may not be safe on all matching packets.\n");
                 goto err_kfree_acts;
@@ -978,7 +978,7 @@ static struct sw_flow_actions *get_flow_actions(const struct nlattr *a,
                 return acts;
  
         ovs_flow_mask_key(&masked_key, key, mask);
-       error = ovs_nla_copy_actions(a, &masked_key, 0, &acts);
+       error = ovs_nla_copy_actions(a, &masked_key, &acts);
         if (error) {
                 OVS_NLERR("Flow actions may not be safe on all matching packets.\n");
                 kfree(acts);
diff --git a/datapath/flow.c b/datapath/flow.c

index e90f99a..e234796 100644 (file)
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -45,6 +45,7 @@
  #include <net/ipv6.h>
  #include <net/ndisc.h>
  
+#include "mpls.h"
  #include "vlan.h"
  
  u64 ovs_flow_used_time(unsigned long flow_jiffies)
@@ -503,6 +504,7 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
                 return -ENOMEM;
  
         skb_reset_network_header(skb);
+       skb_reset_mac_len(skb);
         __skb_push(skb, skb->data - skb_mac_header(skb));
  
         /* Network layer. */
@@ -605,6 +607,33 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
                         memset(&key->ip, 0, sizeof(key->ip));
                         memset(&key->ipv4, 0, sizeof(key->ipv4));
                 }
+       } else if (eth_p_mpls(key->eth.type)) {
+               size_t stack_len = MPLS_HLEN;
+
+               /* In the presence of an MPLS label stack the end of the L2
+                * header and the beginning of the L3 header differ.
+                *
+                * Advance network_header to the beginning of the L3
+                * header. mac_len corresponds to the end of the L2 header.
+                */
+               while (1) {
+                       __be32 lse;
+
+                       error = check_header(skb, skb->mac_len + stack_len);
+                       if (unlikely(error))
+                               return 0;
+
+                       memcpy(&lse, skb_network_header(skb), MPLS_HLEN);
+
+                       if (stack_len == MPLS_HLEN)
+                               memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN);
+
+                       skb_set_network_header(skb, skb->mac_len + stack_len);
+                       if (lse & htonl(MPLS_BOS_MASK))
+                               break;
+
+                       stack_len += MPLS_HLEN;
+               }
         } else if (key->eth.type == htons(ETH_P_IPV6)) {
                 int nh_len;             /* IPv6 Header + Extensions */
  
diff --git a/datapath/flow.h b/datapath/flow.h

index 9414869..f6afa48 100644 (file)
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -104,12 +104,17 @@ struct sw_flow_key {
                 __be16 tci;             /* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */
                 __be16 type;            /* Ethernet frame type. */
         } eth;
-       struct {
-               u8     proto;           /* IP protocol or lower 8 bits of ARP opcode. */
-               u8     tos;             /* IP ToS. */
-               u8     ttl;             /* IP TTL/hop limit. */
-               u8     frag;            /* One of OVS_FRAG_TYPE_*. */
-       } ip;
+       union {
+               struct {
+                       __be32 top_lse;         /* top label stack entry */
+               } mpls;
+               struct {
+                       u8     proto;           /* IP protocol or lower 8 bits of ARP opcode. */
+                       u8     tos;                 /* IP ToS. */
+                       u8     ttl;                 /* IP TTL/hop limit. */
+                       u8     frag;            /* One of OVS_FRAG_TYPE_*. */
+               } ip;
+       };
         struct {
                 __be16 src;             /* TCP/UDP/SCTP source port. */
                 __be16 dst;             /* TCP/UDP/SCTP destination port. */
diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c

index 22ad2d0..5a978f0 100644 (file)
--- a/datapath/flow_netlink.c
+++ b/datapath/flow_netlink.c
@@ -20,6 +20,7 @@
  
  #include "flow.h"
  #include "datapath.h"
+#include "mpls.h"
  #include <linux/uaccess.h>
  #include <linux/netdevice.h>
  #include <linux/etherdevice.h>
@@ -127,7 +128,8 @@ static bool match_validate(const struct sw_flow_match *match,
                         | (1ULL << OVS_KEY_ATTR_ICMP)
                         | (1ULL << OVS_KEY_ATTR_ICMPV6)
                         | (1ULL << OVS_KEY_ATTR_ARP)
-                       | (1ULL << OVS_KEY_ATTR_ND));
+                       | (1ULL << OVS_KEY_ATTR_ND)
+                       | (1ULL << OVS_KEY_ATTR_MPLS));
  
         /* Always allowed mask fields. */
         mask_allowed |= ((1ULL << OVS_KEY_ATTR_TUNNEL)
@@ -142,6 +144,13 @@ static bool match_validate(const struct sw_flow_match *match,
                         mask_allowed |= 1ULL << OVS_KEY_ATTR_ARP;
         }
  
+
+       if (eth_p_mpls(match->key->eth.type)) {
+               key_expected |= 1ULL << OVS_KEY_ATTR_MPLS;
+               if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
+                       mask_allowed |= 1ULL << OVS_KEY_ATTR_MPLS;
+       }
+
         if (match->key->eth.type == htons(ETH_P_IP)) {
                 key_expected |= 1ULL << OVS_KEY_ATTR_IPV4;
                 if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
@@ -259,6 +268,7 @@ static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
         [OVS_KEY_ATTR_DP_HASH] = sizeof(u32),
         [OVS_KEY_ATTR_RECIRC_ID] = sizeof(u32),
         [OVS_KEY_ATTR_TUNNEL] = -1,
+       [OVS_KEY_ATTR_MPLS] = sizeof(struct ovs_key_mpls),
  };
  
  static bool is_all_zero(const u8 *fp, size_t size)
@@ -710,6 +720,16 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
                 attrs &= ~(1ULL << OVS_KEY_ATTR_ARP);
         }
  
+       if (attrs & (1ULL << OVS_KEY_ATTR_MPLS)) {
+               const struct ovs_key_mpls *mpls_key;
+
+               mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]);
+               SW_FLOW_KEY_PUT(match, mpls.top_lse,
+                               mpls_key->mpls_lse, is_mask);
+
+               attrs &= ~(1ULL << OVS_KEY_ATTR_MPLS);
+        }
+
         if (attrs & (1ULL << OVS_KEY_ATTR_TCP)) {
                 const struct ovs_key_tcp *tcp_key;
  
@@ -1091,6 +1111,14 @@ int ovs_nla_put_flow(struct datapath *dp, const struct sw_flow_key *swkey,
                 arp_key->arp_op = htons(output->ip.proto);
                 ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha);
                 ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha);
+       } else if (eth_p_mpls(swkey->eth.type)) {
+               struct ovs_key_mpls *mpls_key;
+
+               nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key));
+               if (!nla)
+                       goto nla_put_failure;
+               mpls_key = nla_data(nla);
+               mpls_key->mpls_lse = output->mpls.top_lse;
         }
  
         if ((swkey->eth.type == htons(ETH_P_IP) ||
@@ -1295,9 +1323,15 @@ static inline void add_nested_action_end(struct sw_flow_actions *sfa,
         a->nla_len = sfa->actions_len - st_offset;
  }
  
+static int ovs_nla_copy_actions__(const struct nlattr *attr,
+                                 const struct sw_flow_key *key,
+                                 int depth, struct sw_flow_actions **sfa,
+                                 __be16 eth_type, __be16 vlan_tci);
+
  static int validate_and_copy_sample(const struct nlattr *attr,
                                     const struct sw_flow_key *key, int depth,
-                                   struct sw_flow_actions **sfa)
+                                   struct sw_flow_actions **sfa,
+                                   __be16 eth_type, __be16 vlan_tci)
  {
         const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
         const struct nlattr *probability, *actions;
@@ -1334,7 +1368,8 @@ static int validate_and_copy_sample(const struct nlattr *attr,
         if (st_acts < 0)
                 return st_acts;
  
-       err = ovs_nla_copy_actions(actions, key, depth + 1, sfa);
+       err = ovs_nla_copy_actions__(actions, key, depth + 1, sfa,
+                                    eth_type, vlan_tci);
         if (err)
                 return err;
  
@@ -1344,10 +1379,10 @@ static int validate_and_copy_sample(const struct nlattr *attr,
         return 0;
  }
  
-static int validate_tp_port(const struct sw_flow_key *flow_key)
+static int validate_tp_port(const struct sw_flow_key *flow_key,
+                           __be16 eth_type)
  {
-       if ((flow_key->eth.type == htons(ETH_P_IP) ||
-            flow_key->eth.type == htons(ETH_P_IPV6)) &&
+       if ((eth_type == htons(ETH_P_IP) || eth_type == htons(ETH_P_IPV6)) &&
             (flow_key->tp.src || flow_key->tp.dst))
                 return 0;
  
@@ -1442,7 +1477,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
  static int validate_set(const struct nlattr *a,
                         const struct sw_flow_key *flow_key,
                         struct sw_flow_actions **sfa,
-                       bool *set_tun)
+                       bool *set_tun, __be16 eth_type)
  {
         const struct nlattr *ovs_key = nla_data(a);
         int key_type = nla_type(ovs_key);
@@ -1474,7 +1509,7 @@ static int validate_set(const struct nlattr *a,
                 break;
  
         case OVS_KEY_ATTR_IPV4:
-               if (flow_key->eth.type != htons(ETH_P_IP))
+               if (eth_type != htons(ETH_P_IP))
                         return -EINVAL;
  
                 if (!flow_key->ip.proto)
@@ -1490,7 +1525,7 @@ static int validate_set(const struct nlattr *a,
                 break;
  
         case OVS_KEY_ATTR_IPV6:
-               if (flow_key->eth.type != htons(ETH_P_IPV6))
+               if (eth_type != htons(ETH_P_IPV6))
                         return -EINVAL;
  
                 if (!flow_key->ip.proto)
@@ -1512,19 +1547,24 @@ static int validate_set(const struct nlattr *a,
                 if (flow_key->ip.proto != IPPROTO_TCP)
                         return -EINVAL;
  
-               return validate_tp_port(flow_key);
+               return validate_tp_port(flow_key, eth_type);
  
         case OVS_KEY_ATTR_UDP:
                 if (flow_key->ip.proto != IPPROTO_UDP)
                         return -EINVAL;
  
-               return validate_tp_port(flow_key);
+               return validate_tp_port(flow_key, eth_type);
+
+       case OVS_KEY_ATTR_MPLS:
+               if (!eth_p_mpls(eth_type))
+                       return -EINVAL;
+               break;
  
         case OVS_KEY_ATTR_SCTP:
                 if (flow_key->ip.proto != IPPROTO_SCTP)
                         return -EINVAL;
  
-               return validate_tp_port(flow_key);
+               return validate_tp_port(flow_key, eth_type);
  
         default:
                 return -EINVAL;
@@ -1568,10 +1608,10 @@ static int copy_action(const struct nlattr *from,
         return 0;
  }
  
-int ovs_nla_copy_actions(const struct nlattr *attr,
-                        const struct sw_flow_key *key,
-                        int depth,
-                        struct sw_flow_actions **sfa)
+static int ovs_nla_copy_actions__(const struct nlattr *attr,
+                                 const struct sw_flow_key *key,
+                                 int depth, struct sw_flow_actions **sfa,
+                                 __be16 eth_type, __be16 vlan_tci)
  {
         const struct nlattr *a;
         int rem, err;
@@ -1585,6 +1625,8 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
                         [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
                         [OVS_ACTION_ATTR_RECIRC] = sizeof(u32),
                         [OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
+                       [OVS_ACTION_ATTR_PUSH_MPLS] = sizeof(struct ovs_action_push_mpls),
+                       [OVS_ACTION_ATTR_POP_MPLS] = sizeof(__be16),
                         [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
                         [OVS_ACTION_ATTR_POP_VLAN] = 0,
                         [OVS_ACTION_ATTR_SET] = (u32)-1,
@@ -1638,19 +1680,63 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
                                 return -EINVAL;
                         if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT)))
                                 return -EINVAL;
+                       vlan_tci = vlan->vlan_tci;
                         break;
  
                 case OVS_ACTION_ATTR_RECIRC:
                         break;
  
+               case OVS_ACTION_ATTR_PUSH_MPLS: {
+                       const struct ovs_action_push_mpls *mpls = nla_data(a);
+
+                       if (!eth_p_mpls(mpls->mpls_ethertype))
+                               return -EINVAL;
+                       /* Prohibit push MPLS other than to a white list
+                        * for packets that have a known tag order.
+                        *
+                        * vlan_tci indicates that the packet at one
+                        * point had a VLAN. It may have been subsequently
+                        * removed using pop VLAN so this rule is stricter
+                        * than necessary. This is because it is not
+                        * possible to know if a VLAN is still present
+                        * after a pop VLAN action. */
+                       if (vlan_tci & htons(VLAN_TAG_PRESENT) ||
+                           (eth_type != htons(ETH_P_IP) &&
+                            eth_type != htons(ETH_P_IPV6) &&
+                            eth_type != htons(ETH_P_ARP) &&
+                            eth_type != htons(ETH_P_RARP) &&
+                            !eth_p_mpls(eth_type)))
+                               return -EINVAL;
+                       eth_type = mpls->mpls_ethertype;
+                       break;
+               }
+
+               case OVS_ACTION_ATTR_POP_MPLS:
+                       if (vlan_tci & htons(VLAN_TAG_PRESENT) ||
+                           !eth_p_mpls(eth_type))
+                               return -EINVAL;
+
+                       /* Disallow subsequent L2.5+ set and mpls_pop actions
+                        * as there is no check here to ensure that the new
+                        * eth_type is valid and thus set actions could
+                        * write off the end of the packet or otherwise
+                        * corrupt it.
+                        *
+                        * Support for these actions is planned using packet
+                        * recirculation.
+                        */
+                       eth_type = htons(0);
+                       break;
+
                 case OVS_ACTION_ATTR_SET:
-                       err = validate_set(a, key, sfa, &skip_copy);
+                       err = validate_set(a, key, sfa, &skip_copy, eth_type);
                         if (err)
                                 return err;
                         break;
  
                 case OVS_ACTION_ATTR_SAMPLE:
-                       err = validate_and_copy_sample(a, key, depth, sfa);
+                       err = validate_and_copy_sample(a, key, depth, sfa,
+                                                      eth_type, vlan_tci);
                         if (err)
                                 return err;
                         skip_copy = true;
@@ -1672,6 +1758,14 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
         return 0;
  }
  
+int ovs_nla_copy_actions(const struct nlattr *attr,
+                        const struct sw_flow_key *key,
+                        struct sw_flow_actions **sfa)
+{
+       return ovs_nla_copy_actions__(attr, key, 0, sfa, key->eth.type,
+                                     key->eth.tci);
+}
+
  static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb)
  {
         const struct nlattr *a;
diff --git a/datapath/flow_netlink.h b/datapath/flow_netlink.h

index 42de456..0c20e86 100644 (file)
--- a/datapath/flow_netlink.h
+++ b/datapath/flow_netlink.h
@@ -49,7 +49,7 @@ int ovs_nla_get_match(struct sw_flow_match *match,
                       const struct nlattr *);
  
  int ovs_nla_copy_actions(const struct nlattr *attr,
-                        const struct sw_flow_key *key, int depth,
+                        const struct sw_flow_key *key,
                          struct sw_flow_actions **sfa);
  int ovs_nla_put_actions(const struct nlattr *attr,
                         int len, struct sk_buff *skb);
diff --git a/datapath/linux/compat/gso.c b/datapath/linux/compat/gso.c

index 9ded17c..8344293 100644 (file)
--- a/datapath/linux/compat/gso.c
+++ b/datapath/linux/compat/gso.c
@@ -17,11 +17,12 @@
   */
  
  #include <linux/version.h>
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3,12,0)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,16,0)
  
  #include <linux/module.h>
  #include <linux/if.h>
  #include <linux/if_tunnel.h>
+#include <linux/if_vlan.h>
  #include <linux/icmp.h>
  #include <linux/in.h>
  #include <linux/ip.h>
@@ -38,6 +39,8 @@
  #include <net/xfrm.h>
  
  #include "gso.h"
+#include "mpls.h"
+#include "vlan.h"
  
  #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) && \
         !defined(HAVE_VLAN_BUG_WORKAROUND)
@@ -50,10 +53,11 @@ MODULE_PARM_DESC(vlan_tso, "Enable TSO for VLAN packets");
  #define vlan_tso true
  #endif
  
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37)
  static bool dev_supports_vlan_tx(struct net_device *dev)
  {
-#if defined(HAVE_VLAN_BUG_WORKAROUND)
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,37)
+       return true;
+#elif defined(HAVE_VLAN_BUG_WORKAROUND)
         return dev->features & NETIF_F_HW_VLAN_TX;
  #else
         /* Assume that the driver is buggy. */
@@ -61,24 +65,70 @@ static bool dev_supports_vlan_tx(struct net_device *dev)
  #endif
  }
  
+/* Strictly this is not needed and will be optimised out
+ * as this code is guarded by if LINUX_VERSION_CODE < KERNEL_VERSION(3,16,0).
+ * It is here to make things explicit should the compatibility
+ * code be extended in some way prior extending its life-span
+ * beyond v3.16.
+ */
+static bool supports_mpls_gso(void)
+{
+/* MPLS GSO was introduced in v3.11, however it was not correctly
+ * activated using mpls_features until v3.16. */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0)
+       return true;
+#else
+       return false;
+#endif
+}
+
  int rpl_dev_queue_xmit(struct sk_buff *skb)
  {
  #undef dev_queue_xmit
         int err = -ENOMEM;
+       bool vlan, mpls;
+
+       vlan = mpls = false;
+
+       /* Avoid traversing any VLAN tags that are present to determine if
+        * the ethtype is MPLS. Instead compare the mac_len (end of L2) and
+        * skb_network_offset() (beginning of L3) whose inequality will
+        * indicate the presence of an MPLS label stack. */
+       if (skb->mac_len != skb_network_offset(skb) && !supports_mpls_gso())
+               mpls = true;
+
+       if (vlan_tx_tag_present(skb) && !dev_supports_vlan_tx(skb->dev))
+               vlan = true;
  
-       if (vlan_tx_tag_present(skb) && !dev_supports_vlan_tx(skb->dev)) {
+       if (vlan || mpls) {
                 int features;
  
                 features = netif_skb_features(skb);
  
-               if (!vlan_tso)
-                       features &= ~(NETIF_F_TSO | NETIF_F_TSO6 |
-                                     NETIF_F_UFO | NETIF_F_FSO);
+               if (vlan) {
+                       if (!vlan_tso)
+                               features &= ~(NETIF_F_TSO | NETIF_F_TSO6 |
+                                             NETIF_F_UFO | NETIF_F_FSO);
  
-               skb = __vlan_put_tag(skb, skb->vlan_proto, vlan_tx_tag_get(skb));
-               if (unlikely(!skb))
-                       return err;
-               vlan_set_tci(skb, 0);
+                       skb = __vlan_put_tag(skb, skb->vlan_proto,
+                                            vlan_tx_tag_get(skb));
+                       if (unlikely(!skb))
+                               return err;
+                       vlan_set_tci(skb, 0);
+               }
+
+               /* As of v3.11 the kernel provides an mpls_features field in
+                * struct net_device which allows devices to advertise which
+                * features its supports for MPLS. This value defaults to
+                * NETIF_F_SG and as of v3.16.
+                *
+                * This compatibility code is intended for kernels older
+                * than v3.16 that do not support MPLS GSO and do not
+                * use mpls_features. Thus this code uses NETIF_F_SG
+                * directly in place of mpls_features.
+                */
+               if (mpls)
+                       features &= NETIF_F_SG;
  
                 if (netif_needs_gso(skb, features)) {
                         struct sk_buff *nskb;
@@ -117,7 +167,6 @@ drop:
         kfree_skb(skb);
         return err;
  }
-#endif /* kernel version < 2.6.37 */
  
  static __be16 __skb_network_protocol(struct sk_buff *skb)
  {
@@ -135,9 +184,22 @@ static __be16 __skb_network_protocol(struct sk_buff *skb)
                 vlan_depth += VLAN_HLEN;
         }
  
+       if (eth_p_mpls(type))
+               type = ovs_skb_get_inner_protocol(skb);
+
         return type;
  }
  
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,12,0)
+static void tnl_fix_segment(struct sk_buff *skb)
+{
+       if (OVS_GSO_CB(skb)->fix_segment)
+               OVS_GSO_CB(skb)->fix_segment(skb);
+}
+#else
+static void tnl_fix_segment(struct sk_buff *skb) { }
+#endif
+
  static struct sk_buff *tnl_skb_gso_segment(struct sk_buff *skb,
                                            netdev_features_t features,
                                            bool tx_path)
@@ -178,8 +240,7 @@ static struct sk_buff *tnl_skb_gso_segment(struct sk_buff *skb,
  
                 memcpy(ip_hdr(skb), iph, pkt_hlen);
                 memcpy(skb->cb, cb, sizeof(cb));
-               if (OVS_GSO_CB(skb)->fix_segment)
-                       OVS_GSO_CB(skb)->fix_segment(skb);
+               tnl_fix_segment(skb);
  
                 skb->protocol = proto;
                 skb = skb->next;
@@ -232,4 +293,4 @@ int rpl_ip_local_out(struct sk_buff *skb)
         }
         return ret;
  }
-#endif /* 3.12 */
+#endif /* 3.16 */
diff --git a/datapath/linux/compat/gso.h b/datapath/linux/compat/gso.h

index 3041e88..6281f29 100644 (file)
--- a/datapath/linux/compat/gso.h
+++ b/datapath/linux/compat/gso.h
@@ -4,6 +4,7 @@
  #include <linux/version.h>
  #if LINUX_VERSION_CODE < KERNEL_VERSION(3,12,0)
  
+#include <linux/netdevice.h>
  #include <linux/skbuff.h>
  #include <net/protocol.h>
  
@@ -11,9 +12,11 @@
  
  struct ovs_gso_cb {
         struct ovs_skb_cb dp_cb;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,11,0)
+       __be16          inner_protocol;
+#endif
         u16             inner_network_header;   /* Offset from
                                                  * inner_mac_header */
-       /* 16bit hole */
         sk_buff_data_t  inner_mac_header;       /* Offset from skb->head */
         void (*fix_segment)(struct sk_buff *);
  };
@@ -51,12 +54,6 @@ static inline int skb_inner_network_offset(const struct sk_buff *skb)
         return skb_inner_network_header(skb) - skb->data;
  }
  
-#define skb_inner_mac_offset rpl_skb_inner_mac_offset
-static inline int skb_inner_mac_offset(const struct sk_buff *skb)
-{
-       return skb_inner_mac_header(skb) - skb->data;
-}
-
  #define skb_reset_inner_headers rpl_skb_reset_inner_headers
  static inline void skb_reset_inner_headers(struct sk_buff *skb)
  {
@@ -68,8 +65,52 @@ static inline void skb_reset_inner_headers(struct sk_buff *skb)
         OVS_GSO_CB(skb)->fix_segment = NULL;
  }
  
+#endif /* 3.12 */
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,16,0)
  #define ip_local_out rpl_ip_local_out
  int ip_local_out(struct sk_buff *skb);
  
-#endif /* 3.12 */
+#define skb_inner_mac_offset rpl_skb_inner_mac_offset
+static inline int skb_inner_mac_offset(const struct sk_buff *skb)
+{
+       return skb_inner_mac_header(skb) - skb->data;
+}
+#endif /* 3.16 */
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,11,0)
+static inline void ovs_skb_init_inner_protocol(struct sk_buff *skb) {
+       OVS_GSO_CB(skb)->inner_protocol = htons(0);
+}
+
+static inline void ovs_skb_set_inner_protocol(struct sk_buff *skb,
+                                             __be16 ethertype) {
+       OVS_GSO_CB(skb)->inner_protocol = ethertype;
+}
+
+static inline __be16 ovs_skb_get_inner_protocol(struct sk_buff *skb)
+{
+       return OVS_GSO_CB(skb)->inner_protocol;
+}
+
+#else
+
+static inline void ovs_skb_init_inner_protocol(struct sk_buff *skb) {
+       /* Nothing to do. The inner_protocol is either zero or
+        * has been set to a value by another user.
+        * Either way it may be considered initialised.
+        */
+}
+
+static inline void ovs_skb_set_inner_protocol(struct sk_buff *skb,
+                                             __be16 ethertype)
+{
+       skb->inner_protocol = ethertype;
+}
+
+static inline __be16 ovs_skb_get_inner_protocol(struct sk_buff *skb)
+{
+       return skb->inner_protocol;
+}
+#endif /* 3.11 */
  #endif
diff --git a/datapath/linux/compat/include/linux/netdevice.h b/datapath/linux/compat/include/linux/netdevice.h

index d726390..886c2f8 100644 (file)
--- a/datapath/linux/compat/include/linux/netdevice.h
+++ b/datapath/linux/compat/include/linux/netdevice.h
@@ -64,11 +64,13 @@ static inline struct net_device *dev_get_by_index_rcu(struct net *net, int ifind
  typedef u32 netdev_features_t;
  #endif
  
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,38)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,16,0)
  #define skb_gso_segment rpl_skb_gso_segment
  struct sk_buff *rpl_skb_gso_segment(struct sk_buff *skb,
                                      netdev_features_t features);
+#endif
  
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,38)
  #define netif_skb_features rpl_netif_skb_features
  netdev_features_t rpl_netif_skb_features(struct sk_buff *skb);
  
@@ -113,7 +115,7 @@ static inline struct net_device *netdev_master_upper_dev_get(struct net_device *
  }
  #endif
  
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,16,0)
  #define dev_queue_xmit rpl_dev_queue_xmit
  int dev_queue_xmit(struct sk_buff *skb);
  #endif
diff --git a/datapath/linux/compat/netdevice.c b/datapath/linux/compat/netdevice.c

index 1dc5abf..72bdec5 100644 (file)
--- a/datapath/linux/compat/netdevice.c
+++ b/datapath/linux/compat/netdevice.c
@@ -1,6 +1,9 @@
  #include <linux/netdevice.h>
  #include <linux/if_vlan.h>
  
+#include "mpls.h"
+#include "gso.h"
+
  #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,38)
  #ifndef HAVE_CAN_CHECKSUM_PROTOCOL
  static bool can_checksum_protocol(netdev_features_t features, __be16 protocol)
@@ -69,7 +72,9 @@ netdev_features_t rpl_netif_skb_features(struct sk_buff *skb)
                 return harmonize_features(skb, protocol, features);
         }
  }
+#endif /* kernel version < 2.6.38 */
  
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,16,0)
  struct sk_buff *rpl_skb_gso_segment(struct sk_buff *skb,
                                     netdev_features_t features)
  {
@@ -89,6 +94,9 @@ struct sk_buff *rpl_skb_gso_segment(struct sk_buff *skb,
                 vlan_depth += VLAN_HLEN;
         }
  
+       if (eth_p_mpls(type))
+               type = ovs_skb_get_inner_protocol(skb);
+
         /* this hack needed to get regular skb_gso_segment() */
  #undef skb_gso_segment
         skb_proto = skb->protocol;
@@ -98,4 +106,4 @@ struct sk_buff *rpl_skb_gso_segment(struct sk_buff *skb,
         skb->protocol = skb_proto;
         return skb_gso;
  }
-#endif /* kernel version < 2.6.38 */
+#endif /* kernel version < 3.16.0 */
diff --git a/datapath/mpls.h b/datapath/mpls.h

new file mode 100644 (file)

index 0000000..7eab104
--- /dev/null
+++ b/datapath/mpls.h
@@ -0,0 +1,15 @@
+#ifndef MPLS_H
+#define MPLS_H 1
+
+#include <linux/if_ether.h>
+
+#define MPLS_BOS_MASK  0x00000100
+#define MPLS_HLEN 4
+
+static inline bool eth_p_mpls(__be16 eth_type)
+{
+       return eth_type == htons(ETH_P_MPLS_UC) ||
+               eth_type == htons(ETH_P_MPLS_MC);
+}
+
+#endif
diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h

index 4f84045..bf27dcb 100644 (file)
--- a/include/linux/openvswitch.h
+++ b/include/linux/openvswitch.h
@@ -319,15 +319,14 @@ enum ovs_key_attr {
         OVS_KEY_ATTR_DP_HASH,   /* u32 hash value. Value 0 indicates the hash
                                    is not computed by the datapath. */
         OVS_KEY_ATTR_RECIRC_ID, /* u32 recirc id */
+       OVS_KEY_ATTR_MPLS,      /* array of struct ovs_key_mpls.
+                                * The implementation may restrict
+                                * the accepted length of the array. */
+
  #ifdef __KERNEL__
         /* Only used within kernel data path. */
         OVS_KEY_ATTR_TUNNEL_INFO,  /* struct ovs_tunnel_info */
  #endif
-       /* Experimental */
-
-       OVS_KEY_ATTR_MPLS = 62, /* array of struct ovs_key_mpls.
-                                * The implementation may restrict
-                                * the accepted length of the array. */
         __OVS_KEY_ATTR_MAX
  };
author	Simon Horman <horms@verge.net.au>
	Tue, 24 Jun 2014 11:56:57 +0000 (20:56 +0900)
committer	Jesse Gross <jesse@nicira.com>
	Tue, 24 Jun 2014 23:02:02 +0000 (16:02 -0700)
OPENFLOW-1.1+		patch \| blob \| history
datapath/Modules.mk		patch \| blob \| history
datapath/actions.c		patch \| blob \| history
datapath/datapath.c		patch \| blob \| history
datapath/flow.c		patch \| blob \| history
datapath/flow.h		patch \| blob \| history
datapath/flow_netlink.c		patch \| blob \| history
datapath/flow_netlink.h		patch \| blob \| history
datapath/linux/compat/gso.c		patch \| blob \| history
datapath/linux/compat/gso.h		patch \| blob \| history
datapath/linux/compat/include/linux/netdevice.h		patch \| blob \| history
datapath/linux/compat/netdevice.c		patch \| blob \| history
datapath/mpls.h	[new file with mode: 0644]	patch \| blob
include/linux/openvswitch.h		patch \| blob \| history