/*
- * Copyright (c) 2007-2013 Nicira, Inc.
+ * Copyright (c) 2007-2014 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
#include "flow.h"
#include "datapath.h"
+#include "mpls.h"
#include <linux/uaccess.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/icmp.h>
#include <linux/icmpv6.h>
#include <linux/rculist.h>
+#include <net/geneve.h>
#include <net/ip.h>
+#include <net/ip_tunnels.h>
#include <net/ipv6.h>
#include <net/ndisc.h>
} \
} while (0)
+#define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask) \
+ do { \
+ update_range__(match, offset, len, is_mask); \
+ if (is_mask) { \
+ if ((match)->mask) \
+ memcpy((u8 *)&(match)->mask->key + offset, value_p, len);\
+ } else { \
+ memcpy((u8 *)(match)->key + offset, value_p, len); \
+ } \
+ } while (0)
+
#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \
+ SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), \
+ value_p, len, is_mask)
+
+#define SW_FLOW_KEY_MEMSET_FIELD(match, field, value, is_mask) \
do { \
update_range__(match, offsetof(struct sw_flow_key, field), \
- len, is_mask); \
+ sizeof((match)->key->field), is_mask); \
if (is_mask) { \
if ((match)->mask) \
- memcpy(&(match)->mask->key.field, value_p, len);\
+ memset((u8 *)&(match)->mask->key.field, value,\
+ sizeof((match)->mask->key.field)); \
} else { \
- memcpy(&(match)->key->field, value_p, len); \
+ memset((u8 *)&(match)->key->field, value, \
+ sizeof((match)->key->field)); \
} \
} while (0)
-static u16 range_n_bytes(const struct sw_flow_key_range *range)
-{
- return range->end - range->start;
-}
-
static bool match_validate(const struct sw_flow_match *match,
u64 key_attrs, u64 mask_attrs)
{
| (1ULL << OVS_KEY_ATTR_ICMP)
| (1ULL << OVS_KEY_ATTR_ICMPV6)
| (1ULL << OVS_KEY_ATTR_ARP)
- | (1ULL << OVS_KEY_ATTR_ND));
+ | (1ULL << OVS_KEY_ATTR_ND)
+ | (1ULL << OVS_KEY_ATTR_MPLS));
/* Always allowed mask fields. */
mask_allowed |= ((1ULL << OVS_KEY_ATTR_TUNNEL)
mask_allowed |= 1ULL << OVS_KEY_ATTR_ARP;
}
+
+ if (eth_p_mpls(match->key->eth.type)) {
+ key_expected |= 1ULL << OVS_KEY_ATTR_MPLS;
+ if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
+ mask_allowed |= 1ULL << OVS_KEY_ATTR_MPLS;
+ }
+
if (match->key->eth.type == htons(ETH_P_IP)) {
key_expected |= 1ULL << OVS_KEY_ATTR_IPV4;
if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
if (match->mask && (match->mask->key.ip.proto == 0xff))
mask_allowed |= 1ULL << OVS_KEY_ATTR_ICMPV6;
- if (match->key->ipv6.tp.src ==
+ if (match->key->tp.src ==
htons(NDISC_NEIGHBOUR_SOLICITATION) ||
- match->key->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
+ match->key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
key_expected |= 1ULL << OVS_KEY_ATTR_ND;
- if (match->mask && (match->mask->key.ipv6.tp.src == htons(0xffff)))
+ if (match->mask && (match->mask->key.tp.src == htons(0xff)))
mask_allowed |= 1ULL << OVS_KEY_ATTR_ND;
}
}
[OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6),
[OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp),
[OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd),
+ [OVS_KEY_ATTR_DP_HASH] = sizeof(u32),
+ [OVS_KEY_ATTR_RECIRC_ID] = sizeof(u32),
[OVS_KEY_ATTR_TUNNEL] = -1,
+ [OVS_KEY_ATTR_MPLS] = sizeof(struct ovs_key_mpls),
};
static bool is_all_zero(const u8 *fp, size_t size)
[OVS_TUNNEL_KEY_ATTR_TTL] = 1,
[OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0,
[OVS_TUNNEL_KEY_ATTR_CSUM] = 0,
+ [OVS_TUNNEL_KEY_ATTR_OAM] = 0,
+ [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = -1,
};
if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
return -EINVAL;
}
- if (ovs_tunnel_key_lens[type] != nla_len(a)) {
+ if (ovs_tunnel_key_lens[type] != nla_len(a) &&
+ ovs_tunnel_key_lens[type] != -1) {
OVS_NLERR("IPv4 tunnel attribute type has unexpected "
" length (type=%d, length=%d, expected=%d).\n",
type, nla_len(a), ovs_tunnel_key_lens[type]);
case OVS_TUNNEL_KEY_ATTR_CSUM:
tun_flags |= TUNNEL_CSUM;
break;
+ case OVS_TUNNEL_KEY_ATTR_OAM:
+ tun_flags |= TUNNEL_OAM;
+ break;
+ case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
+ if (nla_len(a) > sizeof(match->key->tun_opts)) {
+ OVS_NLERR("Geneve option length exceeds "
+ "maximum size (len %d, max %zu).\n",
+ nla_len(a),
+ sizeof(match->key->tun_opts));
+ return -EINVAL;
+ }
+
+ if (nla_len(a) % 4 != 0) {
+ OVS_NLERR("Geneve option length is not "
+ "a multiple of 4 (len %d).\n",
+ nla_len(a));
+ return -EINVAL;
+ }
+
+ /* We need to record the length of the options passed
+ * down, otherwise packets with the same format but
+ * additional options will be silently matched.
+ */
+ if (!is_mask) {
+ SW_FLOW_KEY_PUT(match, tun_opts_len, nla_len(a),
+ false);
+ } else {
+ /* This is somewhat unusual because it looks at
+ * both the key and mask while parsing the
+ * attributes (and by extension assumes the key
+ * is parsed first). Normally, we would verify
+ * that each is the correct length and that the
+ * attributes line up in the validate function.
+ * However, that is difficult because this is
+ * variable length and we won't have the
+ * information later.
+ */
+ if (match->key->tun_opts_len != nla_len(a)) {
+ OVS_NLERR("Geneve option key length (%d)"
+ " is different from mask length (%d).",
+ match->key->tun_opts_len, nla_len(a));
+ return -EINVAL;
+ }
+
+ SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff,
+ true);
+ }
+
+ SW_FLOW_KEY_MEMCPY_OFFSET(match,
+ (unsigned long)GENEVE_OPTS((struct sw_flow_key *)0,
+ nla_len(a)),
+ nla_data(a), nla_len(a), is_mask);
+ break;
default:
+ OVS_NLERR("Unknown IPv4 tunnel attribute (%d).\n", type);
return -EINVAL;
}
}
}
static int ipv4_tun_to_nlattr(struct sk_buff *skb,
- const struct ovs_key_ipv4_tunnel *tun_key,
- const struct ovs_key_ipv4_tunnel *output)
+ const struct ovs_key_ipv4_tunnel *output,
+ const struct geneve_opt *tun_opts,
+ int swkey_tun_opts_len)
{
struct nlattr *nla;
if ((output->tun_flags & TUNNEL_CSUM) &&
nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
return -EMSGSIZE;
+ if ((output->tun_flags & TUNNEL_OAM) &&
+ nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
+ return -EMSGSIZE;
+ if (tun_opts &&
+ nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
+ swkey_tun_opts_len, tun_opts))
+ return -EMSGSIZE;
nla_nest_end(skb, nla);
return 0;
static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs,
const struct nlattr **a, bool is_mask)
{
+ if (*attrs & (1ULL << OVS_KEY_ATTR_DP_HASH)) {
+ u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]);
+
+ SW_FLOW_KEY_PUT(match, ovs_flow_hash, hash_val, is_mask);
+ *attrs &= ~(1ULL << OVS_KEY_ATTR_DP_HASH);
+ }
+
+ if (*attrs & (1ULL << OVS_KEY_ATTR_RECIRC_ID)) {
+ u32 recirc_id = nla_get_u32(a[OVS_KEY_ATTR_RECIRC_ID]);
+
+ SW_FLOW_KEY_PUT(match, recirc_id, recirc_id, is_mask);
+ *attrs &= ~(1ULL << OVS_KEY_ATTR_RECIRC_ID);
+ }
+
if (*attrs & (1ULL << OVS_KEY_ATTR_PRIORITY)) {
SW_FLOW_KEY_PUT(match, phy.priority,
nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask);
if (*attrs & (1ULL << OVS_KEY_ATTR_IN_PORT)) {
u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]);
- if (is_mask)
+ if (is_mask) {
in_port = 0xffffffff; /* Always exact match in_port. */
- else if (in_port >= DP_MAX_PORTS)
+ } else if (in_port >= DP_MAX_PORTS) {
+ OVS_NLERR("Input port (%d) exceeds maximum allowable (%d).\n",
+ in_port, DP_MAX_PORTS);
return -EINVAL;
+ }
SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask);
*attrs &= ~(1ULL << OVS_KEY_ATTR_IN_PORT);
const struct nlattr **a, bool is_mask)
{
int err;
- u64 orig_attrs = attrs;
err = metadata_from_nlattrs(match, &attrs, a, is_mask);
if (err)
attrs &= ~(1ULL << OVS_KEY_ATTR_ARP);
}
+ if (attrs & (1ULL << OVS_KEY_ATTR_MPLS)) {
+ const struct ovs_key_mpls *mpls_key;
+
+ mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]);
+ SW_FLOW_KEY_PUT(match, mpls.top_lse,
+ mpls_key->mpls_lse, is_mask);
+
+ attrs &= ~(1ULL << OVS_KEY_ATTR_MPLS);
+ }
+
if (attrs & (1ULL << OVS_KEY_ATTR_TCP)) {
const struct ovs_key_tcp *tcp_key;
tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
- if (orig_attrs & (1ULL << OVS_KEY_ATTR_IPV4)) {
- SW_FLOW_KEY_PUT(match, ipv4.tp.src,
- tcp_key->tcp_src, is_mask);
- SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
- tcp_key->tcp_dst, is_mask);
- } else {
- SW_FLOW_KEY_PUT(match, ipv6.tp.src,
- tcp_key->tcp_src, is_mask);
- SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
- tcp_key->tcp_dst, is_mask);
- }
+ SW_FLOW_KEY_PUT(match, tp.src, tcp_key->tcp_src, is_mask);
+ SW_FLOW_KEY_PUT(match, tp.dst, tcp_key->tcp_dst, is_mask);
attrs &= ~(1ULL << OVS_KEY_ATTR_TCP);
}
if (attrs & (1ULL << OVS_KEY_ATTR_TCP_FLAGS)) {
- if (orig_attrs & (1ULL << OVS_KEY_ATTR_IPV4)) {
- SW_FLOW_KEY_PUT(match, ipv4.tp.flags,
- nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]),
- is_mask);
- } else {
- SW_FLOW_KEY_PUT(match, ipv6.tp.flags,
- nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]),
- is_mask);
- }
+ SW_FLOW_KEY_PUT(match, tp.flags,
+ nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]),
+ is_mask);
attrs &= ~(1ULL << OVS_KEY_ATTR_TCP_FLAGS);
}
const struct ovs_key_udp *udp_key;
udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
- if (orig_attrs & (1ULL << OVS_KEY_ATTR_IPV4)) {
- SW_FLOW_KEY_PUT(match, ipv4.tp.src,
- udp_key->udp_src, is_mask);
- SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
- udp_key->udp_dst, is_mask);
- } else {
- SW_FLOW_KEY_PUT(match, ipv6.tp.src,
- udp_key->udp_src, is_mask);
- SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
- udp_key->udp_dst, is_mask);
- }
+ SW_FLOW_KEY_PUT(match, tp.src, udp_key->udp_src, is_mask);
+ SW_FLOW_KEY_PUT(match, tp.dst, udp_key->udp_dst, is_mask);
attrs &= ~(1ULL << OVS_KEY_ATTR_UDP);
}
const struct ovs_key_sctp *sctp_key;
sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]);
- if (orig_attrs & (1ULL << OVS_KEY_ATTR_IPV4)) {
- SW_FLOW_KEY_PUT(match, ipv4.tp.src,
- sctp_key->sctp_src, is_mask);
- SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
- sctp_key->sctp_dst, is_mask);
- } else {
- SW_FLOW_KEY_PUT(match, ipv6.tp.src,
- sctp_key->sctp_src, is_mask);
- SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
- sctp_key->sctp_dst, is_mask);
- }
+ SW_FLOW_KEY_PUT(match, tp.src, sctp_key->sctp_src, is_mask);
+ SW_FLOW_KEY_PUT(match, tp.dst, sctp_key->sctp_dst, is_mask);
attrs &= ~(1ULL << OVS_KEY_ATTR_SCTP);
}
const struct ovs_key_icmp *icmp_key;
icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]);
- SW_FLOW_KEY_PUT(match, ipv4.tp.src,
+ SW_FLOW_KEY_PUT(match, tp.src,
htons(icmp_key->icmp_type), is_mask);
- SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
+ SW_FLOW_KEY_PUT(match, tp.dst,
htons(icmp_key->icmp_code), is_mask);
attrs &= ~(1ULL << OVS_KEY_ATTR_ICMP);
}
const struct ovs_key_icmpv6 *icmpv6_key;
icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]);
- SW_FLOW_KEY_PUT(match, ipv6.tp.src,
+ SW_FLOW_KEY_PUT(match, tp.src,
htons(icmpv6_key->icmpv6_type), is_mask);
- SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
+ SW_FLOW_KEY_PUT(match, tp.dst,
htons(icmpv6_key->icmpv6_code), is_mask);
attrs &= ~(1ULL << OVS_KEY_ATTR_ICMPV6);
}
attrs &= ~(1ULL << OVS_KEY_ATTR_ND);
}
- if (attrs != 0)
+ if (attrs != 0) {
+ OVS_NLERR("Unknown key attributes (%llx).\n",
+ (unsigned long long)attrs);
return -EINVAL;
+ }
return 0;
}
-static void sw_flow_mask_set(struct sw_flow_mask *mask,
- struct sw_flow_key_range *range, u8 val)
+static void nlattr_set(struct nlattr *attr, u8 val, bool is_attr_mask_key)
{
- u8 *m = (u8 *)&mask->key + range->start;
+ struct nlattr *nla;
+ int rem;
+
+ /* The nlattr stream should already have been validated */
+ nla_for_each_nested(nla, attr, rem) {
+ /* We assume that ovs_key_lens[type] == -1 means that type is a
+ * nested attribute
+ */
+ if (is_attr_mask_key && ovs_key_lens[nla_type(nla)] == -1)
+ nlattr_set(nla, val, false);
+ else
+ memset(nla_data(nla), val, nla_len(nla));
+ }
+}
- mask->range = *range;
- memset(m, val, range_n_bytes(range));
+static void mask_set_nlattr(struct nlattr *attr, u8 val)
+{
+ nlattr_set(attr, val, true);
}
/**
{
const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
const struct nlattr *encap;
+ struct nlattr *newmask = NULL;
u64 key_attrs = 0;
u64 mask_attrs = 0;
bool encap_valid = false;
if (err)
return err;
+ if (match->mask && !mask) {
+ /* Create an exact match mask. We need to set to 0xff all the
+ * 'match->mask' fields that have been touched in 'match->key'.
+ * We cannot simply memset 'match->mask', because padding bytes
+ * and fields not specified in 'match->key' should be left to 0.
+ * Instead, we use a stream of netlink attributes, copied from
+ * 'key' and set to 0xff: ovs_key_from_nlattrs() will take care
+ * of filling 'match->mask' appropriately.
+ */
+ newmask = kmemdup(key, nla_total_size(nla_len(key)),
+ GFP_KERNEL);
+ if (!newmask)
+ return -ENOMEM;
+
+ mask_set_nlattr(newmask, 0xff);
+
+ /* The userspace does not send tunnel attributes that are 0,
+ * but we should not wildcard them nonetheless. */
+ if (match->key->tun_key.ipv4_dst)
+ SW_FLOW_KEY_MEMSET_FIELD(match, tun_key, 0xff, true);
+
+ mask = newmask;
+ }
+
if (mask) {
err = parse_flow_mask_nlattrs(mask, a, &mask_attrs);
if (err)
- return err;
+ goto free_newmask;
- if (mask_attrs & 1ULL << OVS_KEY_ATTR_ENCAP) {
+ if (mask_attrs & 1ULL << OVS_KEY_ATTR_ENCAP) {
__be16 eth_type = 0;
__be16 tci = 0;
if (!encap_valid) {
OVS_NLERR("Encap mask attribute is set for non-VLAN frame.\n");
- return -EINVAL;
+ err = -EINVAL;
+ goto free_newmask;
}
mask_attrs &= ~(1ULL << OVS_KEY_ATTR_ENCAP);
mask_attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERTYPE);
encap = a[OVS_KEY_ATTR_ENCAP];
err = parse_flow_mask_nlattrs(encap, a, &mask_attrs);
+ if (err)
+ goto free_newmask;
} else {
OVS_NLERR("VLAN frames must have an exact match on the TPID (mask=%x).\n",
ntohs(eth_type));
- return -EINVAL;
+ err = -EINVAL;
+ goto free_newmask;
}
if (a[OVS_KEY_ATTR_VLAN])
if (!(tci & htons(VLAN_TAG_PRESENT))) {
OVS_NLERR("VLAN tag present bit must have an exact match (tci_mask=%x).\n", ntohs(tci));
- return -EINVAL;
+ err = -EINVAL;
+ goto free_newmask;
}
}
err = ovs_key_from_nlattrs(match, mask_attrs, a, true);
if (err)
- return err;
- } else {
- /* Populate exact match flow's key mask. */
- if (match->mask)
- sw_flow_mask_set(match->mask, &match->range, 0xff);
+ goto free_newmask;
}
if (!match_validate(match, key_attrs, mask_attrs))
- return -EINVAL;
+ err = -EINVAL;
- return 0;
+free_newmask:
+ kfree(newmask);
+ return err;
}
/**
flow->key.phy.in_port = DP_MAX_PORTS;
flow->key.phy.priority = 0;
flow->key.phy.skb_mark = 0;
+ flow->key.ovs_flow_hash = 0;
+ flow->key.recirc_id = 0;
memset(tun_key, 0, sizeof(flow->key.tun_key));
err = parse_flow_nlattrs(attr, a, &attrs);
return 0;
}
-int ovs_nla_put_flow(const struct sw_flow_key *swkey,
+int ovs_nla_put_flow(struct datapath *dp, const struct sw_flow_key *swkey,
const struct sw_flow_key *output, struct sk_buff *skb)
{
struct ovs_key_ethernet *eth_key;
struct nlattr *nla, *encap;
bool is_mask = (swkey != output);
- if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
+ if (nla_put_u32(skb, OVS_KEY_ATTR_DP_HASH, output->ovs_flow_hash))
goto nla_put_failure;
- if ((swkey->tun_key.ipv4_dst || is_mask) &&
- ipv4_tun_to_nlattr(skb, &swkey->tun_key, &output->tun_key))
+ if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id))
goto nla_put_failure;
+ if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
+ goto nla_put_failure;
+
+ if ((swkey->tun_key.ipv4_dst || is_mask)) {
+ const struct geneve_opt *opts = NULL;
+
+ if (!is_mask) {
+ struct vport *in_port;
+
+ in_port = ovs_vport_ovsl_rcu(dp, swkey->phy.in_port);
+ if (in_port->ops->type == OVS_VPORT_TYPE_GENEVE)
+ opts = GENEVE_OPTS(output, swkey->tun_opts_len);
+ } else {
+ if (output->tun_opts_len)
+ opts = GENEVE_OPTS(output, swkey->tun_opts_len);
+ }
+
+ if (ipv4_tun_to_nlattr(skb, &output->tun_key, opts,
+ swkey->tun_opts_len))
+ goto nla_put_failure;
+ }
+
if (swkey->phy.in_port == DP_MAX_PORTS) {
if (is_mask && (output->phy.in_port == 0xffff))
if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff))
arp_key->arp_op = htons(output->ip.proto);
ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha);
ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha);
+ } else if (eth_p_mpls(swkey->eth.type)) {
+ struct ovs_key_mpls *mpls_key;
+
+ nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key));
+ if (!nla)
+ goto nla_put_failure;
+ mpls_key = nla_data(nla);
+ mpls_key->mpls_lse = output->mpls.top_lse;
}
if ((swkey->eth.type == htons(ETH_P_IP) ||
if (!nla)
goto nla_put_failure;
tcp_key = nla_data(nla);
- if (swkey->eth.type == htons(ETH_P_IP)) {
- tcp_key->tcp_src = output->ipv4.tp.src;
- tcp_key->tcp_dst = output->ipv4.tp.dst;
- if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS,
- output->ipv4.tp.flags))
- goto nla_put_failure;
- } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
- tcp_key->tcp_src = output->ipv6.tp.src;
- tcp_key->tcp_dst = output->ipv6.tp.dst;
- if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS,
- output->ipv6.tp.flags))
- goto nla_put_failure;
- }
+ tcp_key->tcp_src = output->tp.src;
+ tcp_key->tcp_dst = output->tp.dst;
+ if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS,
+ output->tp.flags))
+ goto nla_put_failure;
} else if (swkey->ip.proto == IPPROTO_UDP) {
struct ovs_key_udp *udp_key;
if (!nla)
goto nla_put_failure;
udp_key = nla_data(nla);
- if (swkey->eth.type == htons(ETH_P_IP)) {
- udp_key->udp_src = output->ipv4.tp.src;
- udp_key->udp_dst = output->ipv4.tp.dst;
- } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
- udp_key->udp_src = output->ipv6.tp.src;
- udp_key->udp_dst = output->ipv6.tp.dst;
- }
+ udp_key->udp_src = output->tp.src;
+ udp_key->udp_dst = output->tp.dst;
} else if (swkey->ip.proto == IPPROTO_SCTP) {
struct ovs_key_sctp *sctp_key;
if (!nla)
goto nla_put_failure;
sctp_key = nla_data(nla);
- if (swkey->eth.type == htons(ETH_P_IP)) {
- sctp_key->sctp_src = swkey->ipv4.tp.src;
- sctp_key->sctp_dst = swkey->ipv4.tp.dst;
- } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
- sctp_key->sctp_src = swkey->ipv6.tp.src;
- sctp_key->sctp_dst = swkey->ipv6.tp.dst;
- }
+ sctp_key->sctp_src = output->tp.src;
+ sctp_key->sctp_dst = output->tp.dst;
} else if (swkey->eth.type == htons(ETH_P_IP) &&
swkey->ip.proto == IPPROTO_ICMP) {
struct ovs_key_icmp *icmp_key;
if (!nla)
goto nla_put_failure;
icmp_key = nla_data(nla);
- icmp_key->icmp_type = ntohs(output->ipv4.tp.src);
- icmp_key->icmp_code = ntohs(output->ipv4.tp.dst);
+ icmp_key->icmp_type = ntohs(output->tp.src);
+ icmp_key->icmp_code = ntohs(output->tp.dst);
} else if (swkey->eth.type == htons(ETH_P_IPV6) &&
swkey->ip.proto == IPPROTO_ICMPV6) {
struct ovs_key_icmpv6 *icmpv6_key;
if (!nla)
goto nla_put_failure;
icmpv6_key = nla_data(nla);
- icmpv6_key->icmpv6_type = ntohs(output->ipv6.tp.src);
- icmpv6_key->icmpv6_code = ntohs(output->ipv6.tp.dst);
+ icmpv6_key->icmpv6_type = ntohs(output->tp.src);
+ icmpv6_key->icmpv6_code = ntohs(output->tp.dst);
if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION ||
icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) {
{
struct sw_flow_actions *sfa;
- if (size > MAX_ACTIONS_BUFSIZE)
+ if (size > MAX_ACTIONS_BUFSIZE) {
+ OVS_NLERR("Flow action size (%u bytes) exceeds maximum "
+ "(%u bytes)\n", size, MAX_ACTIONS_BUFSIZE);
return ERR_PTR(-EINVAL);
+ }
sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL);
if (!sfa)
return (struct nlattr *) ((unsigned char *)(*sfa) + next_offset);
}
-static int add_action(struct sw_flow_actions **sfa, int attrtype, void *data, int len)
+static struct nlattr *__add_action(struct sw_flow_actions **sfa, int attrtype,
+ void *data, int len)
{
struct nlattr *a;
a = reserve_sfa_size(sfa, nla_attr_size(len));
if (IS_ERR(a))
- return PTR_ERR(a);
+ return a;
a->nla_type = attrtype;
a->nla_len = nla_attr_size(len);
memcpy(nla_data(a), data, len);
memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len));
+ return a;
+}
+
+static int add_action(struct sw_flow_actions **sfa, int attrtype,
+ void *data, int len)
+{
+ struct nlattr *a;
+
+ a = __add_action(sfa, attrtype, data, len);
+ if (IS_ERR(a))
+ return PTR_ERR(a);
+
return 0;
}
a->nla_len = sfa->actions_len - st_offset;
}
+static int ovs_nla_copy_actions__(const struct nlattr *attr,
+ const struct sw_flow_key *key,
+ int depth, struct sw_flow_actions **sfa,
+ __be16 eth_type, __be16 vlan_tci);
+
static int validate_and_copy_sample(const struct nlattr *attr,
const struct sw_flow_key *key, int depth,
- struct sw_flow_actions **sfa)
+ struct sw_flow_actions **sfa,
+ __be16 eth_type, __be16 vlan_tci)
{
const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
const struct nlattr *probability, *actions;
if (st_acts < 0)
return st_acts;
- err = ovs_nla_copy_actions(actions, key, depth + 1, sfa);
+ err = ovs_nla_copy_actions__(actions, key, depth + 1, sfa,
+ eth_type, vlan_tci);
if (err)
return err;
return 0;
}
-static int validate_tp_port(const struct sw_flow_key *flow_key)
+static int validate_tp_port(const struct sw_flow_key *flow_key,
+ __be16 eth_type)
{
- if (flow_key->eth.type == htons(ETH_P_IP)) {
- if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst)
- return 0;
- } else if (flow_key->eth.type == htons(ETH_P_IPV6)) {
- if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst)
- return 0;
- }
+ if ((eth_type == htons(ETH_P_IP) || eth_type == htons(ETH_P_IPV6)) &&
+ (flow_key->tp.src || flow_key->tp.dst))
+ return 0;
return -EINVAL;
}
{
struct sw_flow_match match;
struct sw_flow_key key;
+ struct ovs_tunnel_info *tun_info;
+ struct nlattr *a;
int err, start;
ovs_match_init(&match, &key, NULL);
if (err)
return err;
+ if (key.tun_opts_len) {
+ struct geneve_opt *option = GENEVE_OPTS(&key,
+ key.tun_opts_len);
+ int opts_len = key.tun_opts_len;
+ bool crit_opt = false;
+
+ while (opts_len > 0) {
+ int len;
+
+ if (opts_len < sizeof(*option))
+ return -EINVAL;
+
+ len = sizeof(*option) + option->length * 4;
+ if (len > opts_len)
+ return -EINVAL;
+
+ crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE);
+
+ option = (struct geneve_opt *)((u8 *)option + len);
+ opts_len -= len;
+ };
+
+ key.tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0;
+ };
+
start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET);
if (start < 0)
return start;
- err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &match.key->tun_key,
- sizeof(match.key->tun_key));
+ a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL,
+ sizeof(*tun_info) + key.tun_opts_len);
+ if (IS_ERR(a))
+ return PTR_ERR(a);
+
+ tun_info = nla_data(a);
+ tun_info->tunnel = key.tun_key;
+ tun_info->options_len = key.tun_opts_len;
+
+ if (tun_info->options_len) {
+ /* We need to store the options in the action itself since
+ * everything else will go away after flow setup. We can append
+ * it to tun_info and then point there.
+ */
+ tun_info->options = (struct geneve_opt *)(tun_info + 1);
+ memcpy(tun_info->options, GENEVE_OPTS(&key, key.tun_opts_len),
+ key.tun_opts_len);
+ } else {
+ tun_info->options = NULL;
+ }
+
add_nested_action_end(*sfa, start);
return err;
static int validate_set(const struct nlattr *a,
const struct sw_flow_key *flow_key,
struct sw_flow_actions **sfa,
- bool *set_tun)
+ bool *set_tun, __be16 eth_type)
{
const struct nlattr *ovs_key = nla_data(a);
int key_type = nla_type(ovs_key);
break;
case OVS_KEY_ATTR_IPV4:
- if (flow_key->eth.type != htons(ETH_P_IP))
+ if (eth_type != htons(ETH_P_IP))
return -EINVAL;
if (!flow_key->ip.proto)
break;
case OVS_KEY_ATTR_IPV6:
- if (flow_key->eth.type != htons(ETH_P_IPV6))
+ if (eth_type != htons(ETH_P_IPV6))
return -EINVAL;
if (!flow_key->ip.proto)
if (flow_key->ip.proto != IPPROTO_TCP)
return -EINVAL;
- return validate_tp_port(flow_key);
+ return validate_tp_port(flow_key, eth_type);
case OVS_KEY_ATTR_UDP:
if (flow_key->ip.proto != IPPROTO_UDP)
return -EINVAL;
- return validate_tp_port(flow_key);
+ return validate_tp_port(flow_key, eth_type);
+
+ case OVS_KEY_ATTR_MPLS:
+ if (!eth_p_mpls(eth_type))
+ return -EINVAL;
+ break;
case OVS_KEY_ATTR_SCTP:
if (flow_key->ip.proto != IPPROTO_SCTP)
return -EINVAL;
- return validate_tp_port(flow_key);
+ return validate_tp_port(flow_key, eth_type);
default:
return -EINVAL;
return 0;
}
-int ovs_nla_copy_actions(const struct nlattr *attr,
- const struct sw_flow_key *key,
- int depth,
- struct sw_flow_actions **sfa)
+static int ovs_nla_copy_actions__(const struct nlattr *attr,
+ const struct sw_flow_key *key,
+ int depth, struct sw_flow_actions **sfa,
+ __be16 eth_type, __be16 vlan_tci)
{
const struct nlattr *a;
int rem, err;
/* Expected argument lengths, (u32)-1 for variable length. */
static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
[OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
+ [OVS_ACTION_ATTR_RECIRC] = sizeof(u32),
[OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
+ [OVS_ACTION_ATTR_PUSH_MPLS] = sizeof(struct ovs_action_push_mpls),
+ [OVS_ACTION_ATTR_POP_MPLS] = sizeof(__be16),
[OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
[OVS_ACTION_ATTR_POP_VLAN] = 0,
[OVS_ACTION_ATTR_SET] = (u32)-1,
- [OVS_ACTION_ATTR_SAMPLE] = (u32)-1
+ [OVS_ACTION_ATTR_SAMPLE] = (u32)-1,
+ [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash)
};
const struct ovs_action_push_vlan *vlan;
int type = nla_type(a);
return -EINVAL;
break;
+ case OVS_ACTION_ATTR_HASH: {
+ const struct ovs_action_hash *act_hash = nla_data(a);
+
+ switch (act_hash->hash_alg) {
+ case OVS_HASH_ALG_L4:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ break;
+ }
case OVS_ACTION_ATTR_POP_VLAN:
+ vlan_tci = htons(0);
break;
case OVS_ACTION_ATTR_PUSH_VLAN:
return -EINVAL;
if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT)))
return -EINVAL;
+ vlan_tci = vlan->vlan_tci;
+ break;
+
+ case OVS_ACTION_ATTR_RECIRC:
+ break;
+
+ case OVS_ACTION_ATTR_PUSH_MPLS: {
+ const struct ovs_action_push_mpls *mpls = nla_data(a);
+
+ if (!eth_p_mpls(mpls->mpls_ethertype))
+ return -EINVAL;
+ /* Prohibit push MPLS other than to a white list
+ * for packets that have a known tag order.
+ */
+ if (vlan_tci & htons(VLAN_TAG_PRESENT) ||
+ (eth_type != htons(ETH_P_IP) &&
+ eth_type != htons(ETH_P_IPV6) &&
+ eth_type != htons(ETH_P_ARP) &&
+ eth_type != htons(ETH_P_RARP) &&
+ !eth_p_mpls(eth_type)))
+ return -EINVAL;
+ eth_type = mpls->mpls_ethertype;
+ break;
+ }
+
+ case OVS_ACTION_ATTR_POP_MPLS:
+ if (vlan_tci & htons(VLAN_TAG_PRESENT) ||
+ !eth_p_mpls(eth_type))
+ return -EINVAL;
+
+ /* Disallow subsequent L2.5+ set and mpls_pop actions
+ * as there is no check here to ensure that the new
+ * eth_type is valid and thus set actions could
+ * write off the end of the packet or otherwise
+ * corrupt it.
+ *
+ * Support for these actions is planned using packet
+ * recirculation.
+ */
+ eth_type = htons(0);
break;
case OVS_ACTION_ATTR_SET:
- err = validate_set(a, key, sfa, &skip_copy);
+ err = validate_set(a, key, sfa, &skip_copy, eth_type);
if (err)
return err;
break;
case OVS_ACTION_ATTR_SAMPLE:
- err = validate_and_copy_sample(a, key, depth, sfa);
+ err = validate_and_copy_sample(a, key, depth, sfa,
+ eth_type, vlan_tci);
if (err)
return err;
skip_copy = true;
return 0;
}
+int ovs_nla_copy_actions(const struct nlattr *attr,
+ const struct sw_flow_key *key,
+ struct sw_flow_actions **sfa)
+{
+ return ovs_nla_copy_actions__(attr, key, 0, sfa, key->eth.type,
+ key->eth.tci);
+}
+
static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb)
{
const struct nlattr *a;
int err;
switch (key_type) {
- case OVS_KEY_ATTR_IPV4_TUNNEL:
+ case OVS_KEY_ATTR_TUNNEL_INFO: {
+ struct ovs_tunnel_info *tun_info = nla_data(ovs_key);
+
start = nla_nest_start(skb, OVS_ACTION_ATTR_SET);
if (!start)
return -EMSGSIZE;
- err = ipv4_tun_to_nlattr(skb, nla_data(ovs_key),
- nla_data(ovs_key));
+ err = ipv4_tun_to_nlattr(skb, &tun_info->tunnel,
+ tun_info->options_len ?
+ tun_info->options : NULL,
+ tun_info->options_len);
if (err)
return err;
nla_nest_end(skb, start);
break;
+ }
default:
if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key))
return -EMSGSIZE;