/*
- * Copyright (c) 2007-2014 Nicira, Inc.
+ * Copyright (c) 2007-2015 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
#include <net/ipv6.h>
#include <net/ndisc.h>
#include <net/mpls.h>
+#include <net/vxlan.h>
#include "datapath.h"
+#include "conntrack.h"
#include "flow.h"
#include "flow_netlink.h"
-#include "vport-vxlan.h"
+#include "gso.h"
struct ovs_len_tbl {
int len;
};
#define OVS_ATTR_NESTED -1
+#define OVS_ATTR_VARIABLE -2
static void update_range(struct sw_flow_match *match,
size_t offset, size_t size, bool is_mask)
/* Whenever adding new OVS_KEY_ FIELDS, we should consider
* updating this function.
*/
- BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 22);
+ BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 26);
return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */
+ nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */
+ nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */
+ nla_total_size(4) /* OVS_KEY_ATTR_DP_HASH */
+ nla_total_size(4) /* OVS_KEY_ATTR_RECIRC_ID */
+ + nla_total_size(4) /* OVS_KEY_ATTR_CT_STATE */
+ + nla_total_size(2) /* OVS_KEY_ATTR_CT_ZONE */
+ + nla_total_size(4) /* OVS_KEY_ATTR_CT_MARK */
+ + nla_total_size(16) /* OVS_KEY_ATTR_CT_LABELS */
+ nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */
+ nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
+ nla_total_size(4) /* OVS_KEY_ATTR_VLAN */
+ nla_total_size(28); /* OVS_KEY_ATTR_ND */
}
+static const struct ovs_len_tbl ovs_vxlan_ext_key_lens[OVS_VXLAN_EXT_MAX + 1] = {
+ [OVS_VXLAN_EXT_GBP] = { .len = sizeof(u32) },
+};
+
static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = {
[OVS_TUNNEL_KEY_ATTR_ID] = { .len = sizeof(u64) },
[OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = { .len = sizeof(u32) },
[OVS_TUNNEL_KEY_ATTR_TP_SRC] = { .len = sizeof(u16) },
[OVS_TUNNEL_KEY_ATTR_TP_DST] = { .len = sizeof(u16) },
[OVS_TUNNEL_KEY_ATTR_OAM] = { .len = 0 },
- [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = { .len = OVS_ATTR_NESTED },
- [OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS] = { .len = OVS_ATTR_NESTED },
+ [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = { .len = OVS_ATTR_VARIABLE },
+ [OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS] = { .len = OVS_ATTR_NESTED,
+ .next = ovs_vxlan_ext_key_lens },
};
/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */
[OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED,
.next = ovs_tunnel_key_lens, },
[OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) },
+ [OVS_KEY_ATTR_CT_STATE] = { .len = sizeof(u32) },
+ [OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) },
+ [OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) },
+ [OVS_KEY_ATTR_CT_LABELS] = { .len = sizeof(struct ovs_key_ct_labels) },
};
+
+static bool check_attr_len(unsigned int attr_len, unsigned int expected_len)
+{
+ return expected_len == attr_len ||
+ expected_len == OVS_ATTR_NESTED ||
+ expected_len == OVS_ATTR_VARIABLE;
+}
+
static bool is_all_zero(const u8 *fp, size_t size)
{
int i;
}
expected_len = ovs_key_lens[type].len;
- if (nla_len(nla) != expected_len && expected_len != OVS_ATTR_NESTED) {
+ if (!check_attr_len(nla_len(nla), expected_len)) {
OVS_NLERR(log, "Key %d has unexpected len %d expected %d",
type, nla_len(nla), expected_len);
return -EINVAL;
return 0;
}
-static const struct nla_policy vxlan_opt_policy[OVS_VXLAN_EXT_MAX + 1] = {
- [OVS_VXLAN_EXT_GBP] = { .type = NLA_U32 },
-};
-
-static int vxlan_tun_opt_from_nlattr(const struct nlattr *a,
+static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr,
struct sw_flow_match *match, bool is_mask,
bool log)
{
- struct nlattr *tb[OVS_VXLAN_EXT_MAX+1];
+ struct nlattr *a;
+ int rem;
unsigned long opt_key_offset;
- struct ovs_vxlan_opts opts;
- int err;
+ struct vxlan_metadata opts;
BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts));
- err = nla_parse_nested(tb, OVS_VXLAN_EXT_MAX, a, vxlan_opt_policy);
- if (err < 0)
- return err;
-
memset(&opts, 0, sizeof(opts));
+ nla_for_each_nested(a, attr, rem) {
+ int type = nla_type(a);
+
+ if (type > OVS_VXLAN_EXT_MAX) {
+ OVS_NLERR(log, "VXLAN extension %d out of range max %d",
+ type, OVS_VXLAN_EXT_MAX);
+ return -EINVAL;
+ }
+
+ if (!check_attr_len(nla_len(a),
+ ovs_vxlan_ext_key_lens[type].len)) {
+ OVS_NLERR(log, "VXLAN extension %d has unexpected len %d expected %d",
+ type, nla_len(a),
+ ovs_vxlan_ext_key_lens[type].len);
+ return -EINVAL;
+ }
- if (tb[OVS_VXLAN_EXT_GBP])
- opts.gbp = nla_get_u32(tb[OVS_VXLAN_EXT_GBP]);
+ switch (type) {
+ case OVS_VXLAN_EXT_GBP:
+ opts.gbp = nla_get_u32(a);
+ break;
+ default:
+ OVS_NLERR(log, "Unknown VXLAN extension attribute %d",
+ type);
+ return -EINVAL;
+ }
+ }
+ if (rem) {
+ OVS_NLERR(log, "VXLAN extension message has %d unknown bytes.",
+ rem);
+ return -EINVAL;
+ }
if (!is_mask)
SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), false);
return -EINVAL;
}
- if (ovs_tunnel_key_lens[type].len != nla_len(a) &&
- ovs_tunnel_key_lens[type].len != OVS_ATTR_NESTED) {
+ if (!check_attr_len(nla_len(a),
+ ovs_tunnel_key_lens[type].len)) {
OVS_NLERR(log, "Tunnel attr %d has unexpected len %d expected %d",
type, nla_len(a), ovs_tunnel_key_lens[type].len);
return -EINVAL;
tun_flags |= TUNNEL_KEY;
break;
case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
- SW_FLOW_KEY_PUT(match, tun_key.ipv4_src,
- nla_get_be32(a), is_mask);
+ SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.src,
+ nla_get_in_addr(a), is_mask);
break;
case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
- SW_FLOW_KEY_PUT(match, tun_key.ipv4_dst,
- nla_get_be32(a), is_mask);
+ SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.dst,
+ nla_get_in_addr(a), is_mask);
break;
case OVS_TUNNEL_KEY_ATTR_TOS:
- SW_FLOW_KEY_PUT(match, tun_key.ipv4_tos,
+ SW_FLOW_KEY_PUT(match, tun_key.tos,
nla_get_u8(a), is_mask);
break;
case OVS_TUNNEL_KEY_ATTR_TTL:
- SW_FLOW_KEY_PUT(match, tun_key.ipv4_ttl,
+ SW_FLOW_KEY_PUT(match, tun_key.ttl,
nla_get_u8(a), is_mask);
ttl = true;
break;
}
if (!is_mask) {
- if (!match->key->tun_key.ipv4_dst) {
+ if (!match->key->tun_key.u.ipv4.dst) {
OVS_NLERR(log, "IPv4 tunnel dst address is zero");
return -EINVAL;
}
static int vxlan_opt_to_nlattr(struct sk_buff *skb,
const void *tun_opts, int swkey_tun_opts_len)
{
- const struct ovs_vxlan_opts *opts = tun_opts;
+ const struct vxlan_metadata *opts = tun_opts;
struct nlattr *nla;
nla = nla_nest_start(skb, OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS);
}
static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
- const struct ovs_key_ipv4_tunnel *output,
+ const struct ip_tunnel_key *output,
const void *tun_opts, int swkey_tun_opts_len)
{
if (output->tun_flags & TUNNEL_KEY &&
nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id))
return -EMSGSIZE;
- if (output->ipv4_src &&
- nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, output->ipv4_src))
+ if (output->u.ipv4.src &&
+ nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC,
+ output->u.ipv4.src))
return -EMSGSIZE;
- if (output->ipv4_dst &&
- nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, output->ipv4_dst))
+ if (output->u.ipv4.dst &&
+ nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST,
+ output->u.ipv4.dst))
return -EMSGSIZE;
- if (output->ipv4_tos &&
- nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos))
+ if (output->tos &&
+ nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->tos))
return -EMSGSIZE;
- if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ipv4_ttl))
+ if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ttl))
return -EMSGSIZE;
if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) &&
nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
swkey_tun_opts_len, tun_opts))
return -EMSGSIZE;
- else if (output->tun_flags & TUNNEL_VXLAN_OPT &&
- vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len))
+ else if (output->tun_flags & TUNNEL_VXLAN_OPT &&
+ vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len))
return -EMSGSIZE;
}
}
static int ipv4_tun_to_nlattr(struct sk_buff *skb,
- const struct ovs_key_ipv4_tunnel *output,
+ const struct ip_tunnel_key *output,
const void *tun_opts, int swkey_tun_opts_len)
{
struct nlattr *nla;
}
int ovs_nla_put_egress_tunnel_key(struct sk_buff *skb,
- const struct ovs_tunnel_info *egress_tun_info)
+ const struct ip_tunnel_info *egress_tun_info,
+ const void *egress_tun_opts)
{
- return __ipv4_tun_to_nlattr(skb, &egress_tun_info->tunnel,
- egress_tun_info->options,
+ return __ipv4_tun_to_nlattr(skb, &egress_tun_info->key,
+ egress_tun_opts,
egress_tun_info->options_len);
}
-static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs,
- const struct nlattr **a, bool is_mask,
- bool log)
+static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
+ u64 *attrs, const struct nlattr **a,
+ bool is_mask, bool log)
{
if (*attrs & (1ULL << OVS_KEY_ATTR_DP_HASH)) {
u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]);
return -EINVAL;
*attrs &= ~(1ULL << OVS_KEY_ATTR_TUNNEL);
}
+
+ if (*attrs & (1 << OVS_KEY_ATTR_CT_STATE) &&
+ ovs_ct_verify(net, OVS_KEY_ATTR_CT_STATE)) {
+ u32 ct_state = nla_get_u32(a[OVS_KEY_ATTR_CT_STATE]);
+
+ if (ct_state & ~CT_SUPPORTED_MASK) {
+ OVS_NLERR(log, "ct_state flags %08x unsupported",
+ ct_state);
+ return -EINVAL;
+ }
+
+ SW_FLOW_KEY_PUT(match, ct.state, ct_state, is_mask);
+ *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_STATE);
+ }
+ if (*attrs & (1 << OVS_KEY_ATTR_CT_ZONE) &&
+ ovs_ct_verify(net, OVS_KEY_ATTR_CT_ZONE)) {
+ u16 ct_zone = nla_get_u16(a[OVS_KEY_ATTR_CT_ZONE]);
+
+ SW_FLOW_KEY_PUT(match, ct.zone, ct_zone, is_mask);
+ *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ZONE);
+ }
+ if (*attrs & (1 << OVS_KEY_ATTR_CT_MARK) &&
+ ovs_ct_verify(net, OVS_KEY_ATTR_CT_MARK)) {
+ u32 mark = nla_get_u32(a[OVS_KEY_ATTR_CT_MARK]);
+
+ SW_FLOW_KEY_PUT(match, ct.mark, mark, is_mask);
+ *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_MARK);
+ }
+ if (*attrs & (1 << OVS_KEY_ATTR_CT_LABELS) &&
+ ovs_ct_verify(net, OVS_KEY_ATTR_CT_LABELS)) {
+ const struct ovs_key_ct_labels *cl;
+
+ cl = nla_data(a[OVS_KEY_ATTR_CT_LABELS]);
+ SW_FLOW_KEY_MEMCPY(match, ct.labels, cl->ct_labels,
+ sizeof(*cl), is_mask);
+ *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABELS);
+ }
return 0;
}
-static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
- const struct nlattr **a, bool is_mask,
- bool log)
+static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
+ u64 attrs, const struct nlattr **a,
+ bool is_mask, bool log)
{
int err;
- err = metadata_from_nlattrs(match, &attrs, a, is_mask, log);
+ err = metadata_from_nlattrs(net, match, &attrs, a, is_mask, log);
if (err)
return err;
if (is_mask) {
/* Always exact match EtherType. */
eth_type = htons(0xffff);
- } else if (ntohs(eth_type) < ETH_P_802_3_MIN) {
+ } else if (!eth_proto_is_802_3(eth_type)) {
OVS_NLERR(log, "EtherType %x is less than min %x",
ntohs(eth_type), ETH_P_802_3_MIN);
return -EINVAL;
SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask);
}
- if (attrs & (1ULL << OVS_KEY_ATTR_IPV4)) {
+ if (attrs & (1 << OVS_KEY_ATTR_IPV4)) {
const struct ovs_key_ipv4 *ipv4_key;
ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]);
ipv4_key->ipv4_src, is_mask);
SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
ipv4_key->ipv4_dst, is_mask);
- attrs &= ~(1ULL << OVS_KEY_ATTR_IPV4);
+ attrs &= ~(1 << OVS_KEY_ATTR_IPV4);
}
if (attrs & (1ULL << OVS_KEY_ATTR_IPV6)) {
ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX);
return -EINVAL;
}
+
if (!is_mask && ipv6_key->ipv6_label & htonl(0xFFF00000)) {
OVS_NLERR(log,
"Invalid IPv6 flow label value (value=%x, max=%x).",
ntohl(ipv6_key->ipv6_label), (1 << 20) - 1);
return -EINVAL;
}
+
SW_FLOW_KEY_PUT(match, ipv6.label,
ipv6_key->ipv6_label, is_mask);
SW_FLOW_KEY_PUT(match, ip.proto,
/* The nlattr stream should already have been validated */
nla_for_each_nested(nla, attr, rem) {
- if (tbl && tbl[nla_type(nla)].len == OVS_ATTR_NESTED)
- nlattr_set(nla, val, tbl[nla_type(nla)].next);
- else
+ if (tbl[nla_type(nla)].len == OVS_ATTR_NESTED) {
+ if (tbl[nla_type(nla)].next)
+ tbl = tbl[nla_type(nla)].next;
+ nlattr_set(nla, val, tbl);
+ } else {
memset(nla_data(nla), val, nla_len(nla));
+ }
+
+ if (nla_type(nla) == OVS_KEY_ATTR_CT_STATE)
+ *(u32 *)nla_data(nla) &= CT_SUPPORTED_MASK;
}
}
* mask. In case the 'mask' is NULL, the flow is treated as exact match
* flow. Otherwise, it is treated as a wildcarded flow, except the mask
* does not include any don't care bit.
+ * @net: Used to determine per-namespace field support.
* @match: receives the extracted flow match information.
* @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
* sequence. The fields should of the packet that triggered the creation
* probing for feature compatibility this should be passed in as false to
* suppress unnecessary error logging.
*/
-int ovs_nla_get_match(struct sw_flow_match *match,
+int ovs_nla_get_match(struct net *net, struct sw_flow_match *match,
const struct nlattr *nla_key,
const struct nlattr *nla_mask,
bool log)
}
}
- err = ovs_key_from_nlattrs(match, key_attrs, a, false, log);
+ err = ovs_key_from_nlattrs(net, match, key_attrs, a, false, log);
if (err)
return err;
/* The userspace does not send tunnel attributes that
* are 0, but we should not wildcard them nonetheless.
*/
- if (match->key->tun_key.ipv4_dst)
+ if (match->key->tun_key.u.ipv4.dst)
SW_FLOW_KEY_MEMSET_FIELD(match, tun_key,
0xff, true);
}
}
- err = ovs_key_from_nlattrs(match, mask_attrs, a, true, log);
+ err = ovs_key_from_nlattrs(net, match, mask_attrs, a, true,
+ log);
if (err)
goto free_newmask;
}
return err;
}
+static size_t get_ufid_len(const struct nlattr *attr, bool log)
+{
+ size_t len;
+
+ if (!attr)
+ return 0;
+
+ len = nla_len(attr);
+ if (len < 1 || len > MAX_UFID_LENGTH) {
+ OVS_NLERR(log, "ufid size %u bytes exceeds the range (1, %d)",
+ nla_len(attr), MAX_UFID_LENGTH);
+ return 0;
+ }
+
+ return len;
+}
+
+/* Initializes 'flow->ufid', returning true if 'attr' contains a valid UFID,
+ * or false otherwise.
+ */
+bool ovs_nla_get_ufid(struct sw_flow_id *sfid, const struct nlattr *attr,
+ bool log)
+{
+ sfid->ufid_len = get_ufid_len(attr, log);
+ if (sfid->ufid_len)
+ memcpy(sfid->ufid, nla_data(attr), sfid->ufid_len);
+
+ return sfid->ufid_len;
+}
+
+int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid,
+ const struct sw_flow_key *key, bool log)
+{
+ struct sw_flow_key *new_key;
+
+ if (ovs_nla_get_ufid(sfid, ufid, log))
+ return 0;
+
+ /* If UFID was not provided, use unmasked key. */
+ new_key = kmalloc(sizeof(*new_key), GFP_KERNEL);
+ if (!new_key)
+ return -ENOMEM;
+ memcpy(new_key, key, sizeof(*key));
+ sfid->unmasked_key = new_key;
+
+ return 0;
+}
+
+u32 ovs_nla_get_ufid_flags(const struct nlattr *attr)
+{
+ return attr ? nla_get_u32(attr) : 0;
+}
+
/**
* ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key.
* @key: Receives extracted in_port, priority, tun_key and skb_mark.
* extracted from the packet itself.
*/
-int ovs_nla_get_flow_metadata(const struct nlattr *attr,
+int ovs_nla_get_flow_metadata(struct net *net, const struct nlattr *attr,
struct sw_flow_key *key,
bool log)
{
match.key = key;
memset(key, 0, OVS_SW_FLOW_KEY_METADATA_SIZE);
+ memset(&key->ct, 0, sizeof(key->ct));
key->phy.in_port = DP_MAX_PORTS;
- return metadata_from_nlattrs(&match, &attrs, a, false, log);
+ return metadata_from_nlattrs(net, &match, &attrs, a, false, log);
}
-int ovs_nla_put_flow(const struct sw_flow_key *swkey,
- const struct sw_flow_key *output, struct sk_buff *skb)
+static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
+ const struct sw_flow_key *output, bool is_mask,
+ struct sk_buff *skb)
{
struct ovs_key_ethernet *eth_key;
struct nlattr *nla, *encap;
- bool is_mask = (swkey != output);
if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id))
goto nla_put_failure;
if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
goto nla_put_failure;
- if ((swkey->tun_key.ipv4_dst || is_mask)) {
+ if ((swkey->tun_key.u.ipv4.dst || is_mask)) {
const void *opts = NULL;
if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT)
if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark))
goto nla_put_failure;
+ if (ovs_ct_put_key(output, skb))
+ goto nla_put_failure;
+
nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
if (!nla)
goto nla_put_failure;
return -EMSGSIZE;
}
+int ovs_nla_put_key(const struct sw_flow_key *swkey,
+ const struct sw_flow_key *output, int attr, bool is_mask,
+ struct sk_buff *skb)
+{
+ int err;
+ struct nlattr *nla;
+
+ nla = nla_nest_start(skb, attr);
+ if (!nla)
+ return -EMSGSIZE;
+ err = __ovs_nla_put_key(swkey, output, is_mask, skb);
+ if (err)
+ return err;
+ nla_nest_end(skb, nla);
+
+ return 0;
+}
+
+/* Called with ovs_mutex or RCU read lock. */
+int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb)
+{
+ if (ovs_identifier_is_ufid(&flow->id))
+ return nla_put(skb, OVS_FLOW_ATTR_UFID, flow->id.ufid_len,
+ flow->id.ufid);
+
+ return ovs_nla_put_key(flow->id.unmasked_key, flow->id.unmasked_key,
+ OVS_FLOW_ATTR_KEY, false, skb);
+}
+
+/* Called with ovs_mutex or RCU read lock. */
+int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb)
+{
+ return ovs_nla_put_key(&flow->key, &flow->key,
+ OVS_FLOW_ATTR_KEY, false, skb);
+}
+
+/* Called with ovs_mutex or RCU read lock. */
+int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb)
+{
+ return ovs_nla_put_key(&flow->key, &flow->mask->key,
+ OVS_FLOW_ATTR_MASK, true, skb);
+}
+
#define MAX_ACTIONS_BUFSIZE (32 * 1024)
static struct sw_flow_actions *nla_alloc_flow_actions(int size, bool log)
return sfa;
}
-/* RCU callback used by ovs_nla_free_flow_actions. */
-static void rcu_free_acts_callback(struct rcu_head *rcu)
+static void ovs_nla_free_set_action(const struct nlattr *a)
+{
+ const struct nlattr *ovs_key = nla_data(a);
+ struct ovs_tunnel_info *ovs_tun;
+
+ switch (nla_type(ovs_key)) {
+ case OVS_KEY_ATTR_TUNNEL_INFO:
+ ovs_tun = nla_data(ovs_key);
+ ovs_dst_release((struct dst_entry *)ovs_tun->tun_dst);
+ break;
+ }
+}
+
+void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
{
- struct sw_flow_actions *sf_acts = container_of(rcu,
- struct sw_flow_actions, rcu);
+ const struct nlattr *a;
+ int rem;
+
+ if (!sf_acts)
+ return;
+
+ nla_for_each_attr(a, sf_acts->actions, sf_acts->actions_len, rem) {
+ switch (nla_type(a)) {
+ case OVS_ACTION_ATTR_SET:
+ ovs_nla_free_set_action(a);
+ break;
+ case OVS_ACTION_ATTR_CT:
+ ovs_ct_free_action(a);
+ break;
+ }
+ }
+
kfree(sf_acts);
}
+static void __ovs_nla_free_flow_actions(struct rcu_head *head)
+{
+ ovs_nla_free_flow_actions(container_of(head, struct sw_flow_actions, rcu));
+}
+
/* Schedules 'sf_acts' to be freed after the next RCU grace period.
* The caller must hold rcu_read_lock for this to be sensible.
*/
-void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
+void ovs_nla_free_flow_actions_rcu(struct sw_flow_actions *sf_acts)
{
- call_rcu(&sf_acts->rcu, rcu_free_acts_callback);
+ call_rcu(&sf_acts->rcu, __ovs_nla_free_flow_actions);
}
static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len);
acts->actions_len = (*sfa)->actions_len;
+ acts->orig_len = (*sfa)->orig_len;
kfree(*sfa);
*sfa = acts;
return a;
}
-static int add_action(struct sw_flow_actions **sfa, int attrtype,
- void *data, int len, bool log)
+int ovs_nla_add_action(struct sw_flow_actions **sfa, int attrtype, void *data,
+ int len, bool log)
{
struct nlattr *a;
int used = (*sfa)->actions_len;
int err;
- err = add_action(sfa, attrtype, NULL, 0, log);
+ err = ovs_nla_add_action(sfa, attrtype, NULL, 0, log);
if (err)
return err;
a->nla_len = sfa->actions_len - st_offset;
}
-static int __ovs_nla_copy_actions(const struct nlattr *attr,
+static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
int depth, struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci, bool log);
-static int validate_and_copy_sample(const struct nlattr *attr,
+static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key, int depth,
struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci, bool log)
start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE, log);
if (start < 0)
return start;
- err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY,
- nla_data(probability), sizeof(u32), log);
+ err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY,
+ nla_data(probability), sizeof(u32), log);
if (err)
return err;
st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS, log);
if (st_acts < 0)
return st_acts;
- err = __ovs_nla_copy_actions(actions, key, depth + 1, sfa,
+ err = __ovs_nla_copy_actions(net, actions, key, depth + 1, sfa,
eth_type, vlan_tci, log);
if (err)
return err;
return 0;
}
-static int validate_tp_port(const struct sw_flow_key *flow_key,
- __be16 eth_type)
-{
- if ((eth_type == htons(ETH_P_IP) || eth_type == htons(ETH_P_IPV6)) &&
- (flow_key->tp.src || flow_key->tp.dst))
- return 0;
-
- return -EINVAL;
-}
-
void ovs_match_init(struct sw_flow_match *match,
struct sw_flow_key *key,
struct sw_flow_mask *mask)
{
struct sw_flow_match match;
struct sw_flow_key key;
- struct ovs_tunnel_info *tun_info;
+ struct metadata_dst *tun_dst;
+ struct ip_tunnel_info *tun_info;
+ struct ovs_tunnel_info *ovs_tun;
struct nlattr *a;
- int start, opts_type;
- int err = 0;
+ int err = 0, start, opts_type;
ovs_match_init(&match, &key, NULL);
opts_type = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log);
if (start < 0)
return start;
+ tun_dst = metadata_dst_alloc(key.tun_opts_len, GFP_KERNEL);
+ if (!tun_dst)
+ return -ENOMEM;
+
a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL,
- sizeof(*tun_info) + key.tun_opts_len, log);
- if (IS_ERR(a))
+ sizeof(*ovs_tun), log);
+ if (IS_ERR(a)) {
+ ovs_dst_release((struct dst_entry *)tun_dst);
return PTR_ERR(a);
+ }
- tun_info = nla_data(a);
- tun_info->tunnel = key.tun_key;
- tun_info->options_len = key.tun_opts_len;
-
- if (tun_info->options_len) {
- /* We need to store the options in the action itself since
- * everything else will go away after flow setup. We can append
- * it to tun_info and then point there.
- */
- memcpy((tun_info + 1),
- TUN_METADATA_OPTS(&key, key.tun_opts_len), key.tun_opts_len);
- tun_info->options = (tun_info + 1);
+ ovs_tun = nla_data(a);
+ ovs_tun->tun_dst = tun_dst;
- } else {
- tun_info->options = NULL;
- }
+ tun_info = &tun_dst->u.tun_info;
+ tun_info->mode = IP_TUNNEL_INFO_TX;
+ tun_info->key = key.tun_key;
+ /* We need to store the options in the action itself since
+ * everything else will go away after flow setup. We can append
+ * it to tun_info and then point there.
+ */
+ ip_tunnel_info_opts_set(tun_info,
+ TUN_METADATA_OPTS(&key, key.tun_opts_len),
+ key.tun_opts_len);
add_nested_action_end(*sfa, start);
return err;
}
+/* Return false if there are any non-masked bits set.
+ * Mask follows data immediately, before any netlink padding.
+ */
+static bool validate_masked(u8 *data, int len)
+{
+ u8 *mask = data + len;
+
+ while (len--)
+ if (*data++ & ~*mask++)
+ return false;
+
+ return true;
+}
+
static int validate_set(const struct nlattr *a,
const struct sw_flow_key *flow_key,
struct sw_flow_actions **sfa,
- bool *set_tun, __be16 eth_type, bool log)
+ bool *skip_copy, __be16 eth_type, bool masked, bool log)
{
const struct nlattr *ovs_key = nla_data(a);
int key_type = nla_type(ovs_key);
+ size_t key_len;
/* There can be only one key in a action */
if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
return -EINVAL;
+ key_len = nla_len(ovs_key);
+ if (masked)
+ key_len /= 2;
+
if (key_type > OVS_KEY_ATTR_MAX ||
- (ovs_key_lens[key_type].len != nla_len(ovs_key) &&
- ovs_key_lens[key_type].len != OVS_ATTR_NESTED))
+ !check_attr_len(key_len, ovs_key_lens[key_type].len))
+ return -EINVAL;
+
+ if (masked && !validate_masked(nla_data(ovs_key), key_len))
return -EINVAL;
switch (key_type) {
case OVS_KEY_ATTR_PRIORITY:
case OVS_KEY_ATTR_SKB_MARK:
+ case OVS_KEY_ATTR_CT_MARK:
+ case OVS_KEY_ATTR_CT_LABELS:
case OVS_KEY_ATTR_ETHERNET:
break;
if (eth_p_mpls(eth_type))
return -EINVAL;
- *set_tun = true;
+ if (masked)
+ return -EINVAL; /* Masked tunnel set not supported. */
+
+ *skip_copy = true;
err = validate_and_copy_set_tun(a, sfa, log);
if (err)
return err;
if (eth_type != htons(ETH_P_IP))
return -EINVAL;
- if (!flow_key->ip.proto)
- return -EINVAL;
-
ipv4_key = nla_data(ovs_key);
- if (ipv4_key->ipv4_proto != flow_key->ip.proto)
- return -EINVAL;
- if (ipv4_key->ipv4_frag != flow_key->ip.frag)
- return -EINVAL;
+ if (masked) {
+ const struct ovs_key_ipv4 *mask = ipv4_key + 1;
+
+ /* Non-writeable fields. */
+ if (mask->ipv4_proto || mask->ipv4_frag)
+ return -EINVAL;
+ } else {
+ if (ipv4_key->ipv4_proto != flow_key->ip.proto)
+ return -EINVAL;
+ if (ipv4_key->ipv4_frag != flow_key->ip.frag)
+ return -EINVAL;
+ }
break;
case OVS_KEY_ATTR_IPV6:
if (eth_type != htons(ETH_P_IPV6))
return -EINVAL;
- if (!flow_key->ip.proto)
- return -EINVAL;
-
ipv6_key = nla_data(ovs_key);
- if (ipv6_key->ipv6_proto != flow_key->ip.proto)
- return -EINVAL;
- if (ipv6_key->ipv6_frag != flow_key->ip.frag)
- return -EINVAL;
+ if (masked) {
+ const struct ovs_key_ipv6 *mask = ipv6_key + 1;
+ /* Non-writeable fields. */
+ if (mask->ipv6_proto || mask->ipv6_frag)
+ return -EINVAL;
+
+ /* Invalid bits in the flow label mask? */
+ if (ntohl(mask->ipv6_label) & 0xFFF00000)
+ return -EINVAL;
+ } else {
+ if (ipv6_key->ipv6_proto != flow_key->ip.proto)
+ return -EINVAL;
+
+ if (ipv6_key->ipv6_frag != flow_key->ip.frag)
+ return -EINVAL;
+ }
if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
return -EINVAL;
break;
case OVS_KEY_ATTR_TCP:
- if (flow_key->ip.proto != IPPROTO_TCP)
+ if ((eth_type != htons(ETH_P_IP) &&
+ eth_type != htons(ETH_P_IPV6)) ||
+ flow_key->ip.proto != IPPROTO_TCP)
return -EINVAL;
- return validate_tp_port(flow_key, eth_type);
+ break;
case OVS_KEY_ATTR_UDP:
- if (flow_key->ip.proto != IPPROTO_UDP)
+ if ((eth_type != htons(ETH_P_IP) &&
+ eth_type != htons(ETH_P_IPV6)) ||
+ flow_key->ip.proto != IPPROTO_UDP)
return -EINVAL;
- return validate_tp_port(flow_key, eth_type);
+ break;
case OVS_KEY_ATTR_MPLS:
if (!eth_p_mpls(eth_type))
break;
case OVS_KEY_ATTR_SCTP:
- if (flow_key->ip.proto != IPPROTO_SCTP)
+ if ((eth_type != htons(ETH_P_IP) &&
+ eth_type != htons(ETH_P_IPV6)) ||
+ flow_key->ip.proto != IPPROTO_SCTP)
return -EINVAL;
- return validate_tp_port(flow_key, eth_type);
+ break;
default:
return -EINVAL;
}
+ /* Convert non-masked non-tunnel set actions to masked set actions. */
+ if (!masked && key_type != OVS_KEY_ATTR_TUNNEL) {
+ int start, len = key_len * 2;
+ struct nlattr *at;
+
+ *skip_copy = true;
+
+ start = add_nested_action_start(sfa,
+ OVS_ACTION_ATTR_SET_TO_MASKED,
+ log);
+ if (start < 0)
+ return start;
+
+ at = __add_action(sfa, key_type, NULL, len, log);
+ if (IS_ERR(at))
+ return PTR_ERR(at);
+
+ memcpy(nla_data(at), nla_data(ovs_key), key_len); /* Key. */
+ memset(nla_data(at) + key_len, 0xff, key_len); /* Mask. */
+ /* Clear non-writeable bits from otherwise writeable fields. */
+ if (key_type == OVS_KEY_ATTR_IPV6) {
+ struct ovs_key_ipv6 *mask = nla_data(at) + key_len;
+
+ mask->ipv6_label &= htonl(0x000FFFFF);
+ }
+ add_nested_action_end(*sfa, start);
+ }
+
return 0;
}
return 0;
}
-static int __ovs_nla_copy_actions(const struct nlattr *attr,
+static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
int depth, struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci, bool log)
[OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
[OVS_ACTION_ATTR_POP_VLAN] = 0,
[OVS_ACTION_ATTR_SET] = (u32)-1,
+ [OVS_ACTION_ATTR_SET_MASKED] = (u32)-1,
[OVS_ACTION_ATTR_SAMPLE] = (u32)-1,
- [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash)
+ [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash),
+ [OVS_ACTION_ATTR_CT] = (u32)-1,
};
const struct ovs_action_push_vlan *vlan;
int type = nla_type(a);
case OVS_ACTION_ATTR_OUTPUT:
if (nla_get_u32(a) >= DP_MAX_PORTS)
return -EINVAL;
-
break;
case OVS_ACTION_ATTR_HASH: {
if (!eth_p_mpls(mpls->mpls_ethertype))
return -EINVAL;
-
/* Prohibit push MPLS other than to a white list
* for packets that have a known tag order.
*/
case OVS_ACTION_ATTR_SET:
err = validate_set(a, key, sfa,
- &skip_copy, eth_type, log);
+ &skip_copy, eth_type, false, log);
+ if (err)
+ return err;
+ break;
+
+ case OVS_ACTION_ATTR_SET_MASKED:
+ err = validate_set(a, key, sfa,
+ &skip_copy, eth_type, true, log);
if (err)
return err;
break;
case OVS_ACTION_ATTR_SAMPLE:
- err = validate_and_copy_sample(a, key, depth, sfa,
+ err = validate_and_copy_sample(net, a, key, depth, sfa,
eth_type, vlan_tci, log);
if (err)
return err;
skip_copy = true;
break;
+ case OVS_ACTION_ATTR_CT:
+ err = ovs_ct_copy_action(net, a, key, sfa, log);
+ if (err)
+ return err;
+ skip_copy = true;
+ break;
+
default:
OVS_NLERR(log, "Unknown Action type %d", type);
return -EINVAL;
return 0;
}
-int ovs_nla_copy_actions(const struct nlattr *attr,
+/* 'key' must be the masked key. */
+int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa, bool log)
{
if (IS_ERR(*sfa))
return PTR_ERR(*sfa);
- err = __ovs_nla_copy_actions(attr, key, 0, sfa, key->eth.type,
+ (*sfa)->orig_len = nla_len(attr);
+ err = __ovs_nla_copy_actions(net, attr, key, 0, sfa, key->eth.type,
key->eth.tci, log);
if (err)
- kfree(*sfa);
+ ovs_nla_free_flow_actions(*sfa);
return err;
}
switch (key_type) {
case OVS_KEY_ATTR_TUNNEL_INFO: {
- struct ovs_tunnel_info *tun_info = nla_data(ovs_key);
+ struct ovs_tunnel_info *ovs_tun = nla_data(ovs_key);
+ struct ip_tunnel_info *tun_info = &ovs_tun->tun_dst->u.tun_info;
start = nla_nest_start(skb, OVS_ACTION_ATTR_SET);
if (!start)
return -EMSGSIZE;
- err = ipv4_tun_to_nlattr(skb, &tun_info->tunnel,
+ err = ipv4_tun_to_nlattr(skb, &tun_info->key,
tun_info->options_len ?
- tun_info->options : NULL,
+ ip_tunnel_info_opts(tun_info) : NULL,
tun_info->options_len);
if (err)
return err;
return 0;
}
+static int masked_set_action_to_set_action_attr(const struct nlattr *a,
+ struct sk_buff *skb)
+{
+ const struct nlattr *ovs_key = nla_data(a);
+ struct nlattr *nla;
+ size_t key_len = nla_len(ovs_key) / 2;
+
+ /* Revert the conversion we did from a non-masked set action to
+ * masked set action.
+ */
+ nla = nla_nest_start(skb, OVS_ACTION_ATTR_SET);
+ if (!nla)
+ return -EMSGSIZE;
+
+ if (nla_put(skb, nla_type(ovs_key), key_len, nla_data(ovs_key)))
+ return -EMSGSIZE;
+
+ nla_nest_end(skb, nla);
+ return 0;
+}
+
int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
{
const struct nlattr *a;
return err;
break;
+ case OVS_ACTION_ATTR_SET_TO_MASKED:
+ err = masked_set_action_to_set_action_attr(a, skb);
+ if (err)
+ return err;
+ break;
+
case OVS_ACTION_ATTR_SAMPLE:
err = sample_action_to_attr(a, skb);
if (err)
return err;
break;
+
+ case OVS_ACTION_ATTR_CT:
+ err = ovs_ct_action_to_attr(nla_data(a), skb);
+ if (err)
+ return err;
+ break;
+
default:
if (nla_put(skb, type, nla_len(a), nla_data(a)))
return -EMSGSIZE;