X-Git-Url: http://git.cascardo.eti.br/?a=blobdiff_plain;f=lib%2Fodp-util.c;h=b5e782675d2e4ed0fed9d640bfb1158805fcf1d0;hb=06d4d4b68726d9cf153577a4a2287f944276f0d7;hp=d52c1727e38b41809840e7433b3af70d250c04b3;hpb=627298c5b3d141b843cde6adaa35c7b6636d595b;p=cascardo%2Fovs.git diff --git a/lib/odp-util.c b/lib/odp-util.c index d52c1727e..b5e782675 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc. + * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -35,8 +36,10 @@ #include "packets.h" #include "simap.h" #include "timeval.h" +#include "tun-metadata.h" #include "unaligned.h" #include "util.h" +#include "uuid.h" #include "openvswitch/vlog.h" VLOG_DEFINE_THIS_MODULE(odp_util); @@ -50,6 +53,16 @@ VLOG_DEFINE_THIS_MODULE(odp_util); /* The set of characters that may separate one action or one key attribute * from another. */ static const char *delimiters = ", \t\r\n"; +static const char *delimiters_end = ", \t\r\n)"; + +struct attr_len_tbl { + int len; + const struct attr_len_tbl *next; + int next_max; +}; +#define ATTR_LEN_INVALID -1 +#define ATTR_LEN_VARIABLE -2 +#define ATTR_LEN_NESTED -3 static int parse_odp_key_mask_attr(const char *, const struct simap *port_names, struct ofpbuf *, struct ofpbuf *); @@ -58,15 +71,33 @@ static void format_odp_key_attr(const struct nlattr *a, const struct hmap *portno_names, struct ds *ds, bool verbose); +struct geneve_scan { + struct geneve_opt d[63]; + int len; +}; + +static int scan_geneve(const char *s, struct geneve_scan *key, + struct geneve_scan *mask); +static void format_geneve_opts(const struct geneve_opt *opt, + const struct geneve_opt *mask, int opts_len, + struct ds *, bool verbose); + +static struct nlattr *generate_all_wildcard_mask(const struct attr_len_tbl tbl[], + int max, struct ofpbuf *, + const struct nlattr *key); +static void format_u128(struct ds *ds, const ovs_u128 *value, + const ovs_u128 *mask, bool verbose); +static int scan_u128(const char *s, ovs_u128 *value, ovs_u128 *mask); + /* Returns one the following for the action with the given OVS_ACTION_ATTR_* * 'type': * * - For an action whose argument has a fixed length, returned that * nonnegative length in bytes. * - * - For an action with a variable-length argument, returns -2. + * - For an action with a variable-length argument, returns ATTR_LEN_VARIABLE. * - * - For an invalid 'type', returns -1. */ + * - For an invalid 'type', returns ATTR_LEN_INVALID. */ static int odp_action_len(uint16_t type) { @@ -76,25 +107,26 @@ odp_action_len(uint16_t type) switch ((enum ovs_action_attr) type) { case OVS_ACTION_ATTR_OUTPUT: return sizeof(uint32_t); - case OVS_ACTION_ATTR_TUNNEL_PUSH: return -2; + case OVS_ACTION_ATTR_TUNNEL_PUSH: return ATTR_LEN_VARIABLE; case OVS_ACTION_ATTR_TUNNEL_POP: return sizeof(uint32_t); - case OVS_ACTION_ATTR_USERSPACE: return -2; + case OVS_ACTION_ATTR_USERSPACE: return ATTR_LEN_VARIABLE; case OVS_ACTION_ATTR_PUSH_VLAN: return sizeof(struct ovs_action_push_vlan); case OVS_ACTION_ATTR_POP_VLAN: return 0; case OVS_ACTION_ATTR_PUSH_MPLS: return sizeof(struct ovs_action_push_mpls); case OVS_ACTION_ATTR_POP_MPLS: return sizeof(ovs_be16); case OVS_ACTION_ATTR_RECIRC: return sizeof(uint32_t); case OVS_ACTION_ATTR_HASH: return sizeof(struct ovs_action_hash); - case OVS_ACTION_ATTR_SET: return -2; - case OVS_ACTION_ATTR_SET_MASKED: return -2; - case OVS_ACTION_ATTR_SAMPLE: return -2; + case OVS_ACTION_ATTR_SET: return ATTR_LEN_VARIABLE; + case OVS_ACTION_ATTR_SET_MASKED: return ATTR_LEN_VARIABLE; + case OVS_ACTION_ATTR_SAMPLE: return ATTR_LEN_VARIABLE; + case OVS_ACTION_ATTR_CT: return ATTR_LEN_VARIABLE; case OVS_ACTION_ATTR_UNSPEC: case __OVS_ACTION_ATTR_MAX: - return -1; + return ATTR_LEN_INVALID; } - return -1; + return ATTR_LEN_INVALID; } /* Returns a string form of 'attr'. The return value is either a statically @@ -109,6 +141,10 @@ ovs_key_attr_to_string(enum ovs_key_attr attr, char *namebuf, size_t bufsize) case OVS_KEY_ATTR_ENCAP: return "encap"; case OVS_KEY_ATTR_PRIORITY: return "skb_priority"; case OVS_KEY_ATTR_SKB_MARK: return "skb_mark"; + case OVS_KEY_ATTR_CT_STATE: return "ct_state"; + case OVS_KEY_ATTR_CT_ZONE: return "ct_zone"; + case OVS_KEY_ATTR_CT_MARK: return "ct_mark"; + case OVS_KEY_ATTR_CT_LABELS: return "ct_label"; case OVS_KEY_ATTR_TUNNEL: return "tunnel"; case OVS_KEY_ATTR_IN_PORT: return "in_port"; case OVS_KEY_ATTR_ETHERNET: return "eth"; @@ -210,128 +246,11 @@ slow_path_reason_to_explanation(enum slow_path_reason reason) } static int -parse_flags(const char *s, const char *(*bit_to_string)(uint32_t), - uint32_t *res_flags, uint32_t allowed, uint32_t *res_mask) +parse_odp_flags(const char *s, const char *(*bit_to_string)(uint32_t), + uint32_t *res_flags, uint32_t allowed, uint32_t *res_mask) { - uint32_t result = 0; - int n; - - /* Parse masked flags in numeric format? */ - if (res_mask && ovs_scan(s, "%"SCNi32"/%"SCNi32"%n", - res_flags, res_mask, &n) && n > 0) { - if (*res_flags & ~allowed || *res_mask & ~allowed) { - return -EINVAL; - } - return n; - } - - n = 0; - - if (res_mask && (*s == '+' || *s == '-')) { - uint32_t flags = 0, mask = 0; - - /* Parse masked flags. */ - while (s[n] != ')') { - bool set; - uint32_t bit; - int name_len; - - if (s[n] == '+') { - set = true; - } else if (s[n] == '-') { - set = false; - } else { - return -EINVAL; - } - n++; - - name_len = strcspn(s + n, "+-)"); - - for (bit = 1; bit; bit <<= 1) { - const char *fname = bit_to_string(bit); - size_t len; - - if (!fname) { - continue; - } - - len = strlen(fname); - if (len != name_len) { - continue; - } - if (!strncmp(s + n, fname, len)) { - if (mask & bit) { - /* bit already set. */ - return -EINVAL; - } - if (!(bit & allowed)) { - return -EINVAL; - } - if (set) { - flags |= bit; - } - mask |= bit; - break; - } - } - - if (!bit) { - return -EINVAL; /* Unknown flag name */ - } - s += name_len; - } - - *res_flags = flags; - *res_mask = mask; - return n; - } - - /* Parse unmasked flags. If a flag is present, it is set, otherwise - * it is not set. */ - while (s[n] != ')') { - unsigned long long int flags; - uint32_t bit; - int n0; - - if (ovs_scan(&s[n], "%lli%n", &flags, &n0)) { - if (flags & ~allowed) { - return -EINVAL; - } - n += n0 + (s[n + n0] == ','); - result |= flags; - continue; - } - - for (bit = 1; bit; bit <<= 1) { - const char *name = bit_to_string(bit); - size_t len; - - if (!name) { - continue; - } - - len = strlen(name); - if (!strncmp(s + n, name, len) && - (s[n + len] == ',' || s[n + len] == ')')) { - if (!(bit & allowed)) { - return -EINVAL; - } - result |= bit; - n += len + (s[n + len] == ','); - break; - } - } - - if (!bit) { - return -EINVAL; - } - } - - *res_flags = result; - if (res_mask) { - *res_mask = UINT32_MAX; - } - return n; + return parse_flags(s, bit_to_string, ')', NULL, NULL, + res_flags, allowed, res_mask); } static void @@ -343,6 +262,8 @@ format_odp_userspace_action(struct ds *ds, const struct nlattr *attr) .optional = true }, [OVS_USERSPACE_ATTR_EGRESS_TUN_PORT] = { .type = NL_A_U32, .optional = true }, + [OVS_USERSPACE_ATTR_ACTIONS] = { .type = NL_A_UNSPEC, + .optional = true }, }; struct nlattr *a[ARRAY_SIZE(ovs_userspace_policy)]; const struct nlattr *userdata_attr; @@ -414,6 +335,10 @@ format_odp_userspace_action(struct ds *ds, const struct nlattr *attr) } } + if (a[OVS_USERSPACE_ATTR_ACTIONS]) { + ds_put_cstr(ds, ",actions"); + } + tunnel_out_port_attr = a[OVS_USERSPACE_ATTR_EGRESS_TUN_PORT]; if (tunnel_out_port_attr) { ds_put_format(ds, ",tunnel_out_port=%"PRIu32, @@ -461,13 +386,13 @@ static void format_mpls(struct ds *ds, const struct ovs_key_mpls *mpls_key, const struct ovs_key_mpls *mpls_mask, int n) { - if (n == 1) { - ovs_be32 key = mpls_key->mpls_lse; + for (int i = 0; i < n; i++) { + ovs_be32 key = mpls_key[i].mpls_lse; if (mpls_mask == NULL) { format_mpls_lse(ds, key); } else { - ovs_be32 mask = mpls_mask->mpls_lse; + ovs_be32 mask = mpls_mask[i].mpls_lse; ds_put_format(ds, "label=%"PRIu32"/0x%x,tc=%d/%x,ttl=%d/0x%x,bos=%d/%x", mpls_lse_to_label(key), mpls_lse_to_label(mask), @@ -475,25 +400,15 @@ format_mpls(struct ds *ds, const struct ovs_key_mpls *mpls_key, mpls_lse_to_ttl(key), mpls_lse_to_ttl(mask), mpls_lse_to_bos(key), mpls_lse_to_bos(mask)); } - } else { - int i; - - for (i = 0; i < n; i++) { - ds_put_format(ds, "lse%d=%#"PRIx32, - i, ntohl(mpls_key[i].mpls_lse)); - if (mpls_mask) { - ds_put_format(ds, "/%#"PRIx32, ntohl(mpls_mask[i].mpls_lse)); - } - ds_put_char(ds, ','); - } - ds_chomp(ds, ','); + ds_put_char(ds, ','); } + ds_chomp(ds, ','); } static void format_odp_recirc_action(struct ds *ds, uint32_t recirc_id) { - ds_put_format(ds, "recirc(%"PRIu32")", recirc_id); + ds_put_format(ds, "recirc(%#"PRIx32")", recirc_id); } static void @@ -510,17 +425,27 @@ format_odp_hash_action(struct ds *ds, const struct ovs_action_hash *hash_act) ds_put_format(ds, ")"); } +static const void * +format_udp_tnl_push_header(struct ds *ds, const struct udp_header *udp) +{ + ds_put_format(ds, "udp(src=%"PRIu16",dst=%"PRIu16",csum=0x%"PRIx16"),", + ntohs(udp->udp_src), ntohs(udp->udp_dst), + ntohs(udp->udp_csum)); + + return udp + 1; +} + static void format_odp_tnl_push_header(struct ds *ds, struct ovs_action_push_tnl *data) { const struct eth_header *eth; - const struct ip_header *ip; const void *l3; + const void *l4; + const struct udp_header *udp; eth = (const struct eth_header *)data->header; l3 = eth + 1; - ip = (const struct ip_header *)l3; /* Ethernet */ ds_put_format(ds, "header(size=%"PRIu8",type=%"PRIu8",eth(dst=", @@ -530,42 +455,71 @@ format_odp_tnl_push_header(struct ds *ds, struct ovs_action_push_tnl *data) ds_put_format(ds, ETH_ADDR_FMT, ETH_ADDR_ARGS(eth->eth_src)); ds_put_format(ds, ",dl_type=0x%04"PRIx16"),", ntohs(eth->eth_type)); - /* IPv4 */ - ds_put_format(ds, "ipv4(src="IP_FMT",dst="IP_FMT",proto=%"PRIu8 - ",tos=%#"PRIx8",ttl=%"PRIu8",frag=0x%"PRIx16"),", - IP_ARGS(get_16aligned_be32(&ip->ip_src)), - IP_ARGS(get_16aligned_be32(&ip->ip_dst)), - ip->ip_proto, ip->ip_tos, - ip->ip_ttl, - ip->ip_frag_off); + if (eth->eth_type == htons(ETH_TYPE_IP)) { + /* IPv4 */ + const struct ip_header *ip; + ip = (const struct ip_header *) l3; + ds_put_format(ds, "ipv4(src="IP_FMT",dst="IP_FMT",proto=%"PRIu8 + ",tos=%#"PRIx8",ttl=%"PRIu8",frag=0x%"PRIx16"),", + IP_ARGS(get_16aligned_be32(&ip->ip_src)), + IP_ARGS(get_16aligned_be32(&ip->ip_dst)), + ip->ip_proto, ip->ip_tos, + ip->ip_ttl, + ntohs(ip->ip_frag_off)); + l4 = (ip + 1); + } else { + const struct ip6_hdr *ip6; + ip6 = (const struct ip6_hdr *) l3; + ds_put_format(ds, "ipv6(src="); + ipv6_format_addr(&ip6->ip6_src, ds); + ds_put_format(ds, ",dst="); + ipv6_format_addr(&ip6->ip6_dst, ds); + ds_put_format(ds, ",label=%i,proto=%"PRIu8",tclass=0x%"PRIx8 + ",hlimit=%"PRIu8"),", + ntohl(ip6->ip6_flow) & IPV6_LABEL_MASK, ip6->ip6_nxt, + (ntohl(ip6->ip6_flow) >> 20) & 0xff, ip6->ip6_hlim); + l4 = (ip6 + 1); + } + + udp = (const struct udp_header *) l4; if (data->tnl_type == OVS_VPORT_TYPE_VXLAN) { const struct vxlanhdr *vxh; - const struct udp_header *udp; - /* UDP */ - udp = (const struct udp_header *) (ip + 1); - ds_put_format(ds, "udp(src=%"PRIu16",dst=%"PRIu16"),", - ntohs(udp->udp_src), ntohs(udp->udp_dst)); + vxh = format_udp_tnl_push_header(ds, udp); - /* VxLan */ - vxh = (const struct vxlanhdr *) (udp + 1); ds_put_format(ds, "vxlan(flags=0x%"PRIx32",vni=0x%"PRIx32")", ntohl(get_16aligned_be32(&vxh->vx_flags)), - ntohl(get_16aligned_be32(&vxh->vx_vni))); + ntohl(get_16aligned_be32(&vxh->vx_vni)) >> 8); + } else if (data->tnl_type == OVS_VPORT_TYPE_GENEVE) { + const struct genevehdr *gnh; + + gnh = format_udp_tnl_push_header(ds, udp); + + ds_put_format(ds, "geneve(%s%svni=0x%"PRIx32, + gnh->oam ? "oam," : "", + gnh->critical ? "crit," : "", + ntohl(get_16aligned_be32(&gnh->vni)) >> 8); + + if (gnh->opt_len) { + ds_put_cstr(ds, ",options("); + format_geneve_opts(gnh->options, NULL, gnh->opt_len * 4, + ds, false); + ds_put_char(ds, ')'); + } + + ds_put_char(ds, ')'); } else if (data->tnl_type == OVS_VPORT_TYPE_GRE) { const struct gre_base_hdr *greh; ovs_16aligned_be32 *options; - void *l4; - l4 = ((uint8_t *)l3 + sizeof(struct ip_header)); greh = (const struct gre_base_hdr *) l4; ds_put_format(ds, "gre((flags=0x%"PRIx16",proto=0x%"PRIx16")", - greh->flags, ntohs(greh->protocol)); + ntohs(greh->flags), ntohs(greh->protocol)); options = (ovs_16aligned_be32 *)(greh + 1); if (greh->flags & htons(GRE_CSUM)) { - ds_put_format(ds, ",csum=0x%"PRIx32, ntohl(get_16aligned_be32(options))); + ds_put_format(ds, ",csum=0x%"PRIx16, ntohs(*((ovs_be16 *)options))); options++; } if (greh->flags & htons(GRE_KEY)) { @@ -593,16 +547,224 @@ format_odp_tnl_push_action(struct ds *ds, const struct nlattr *attr) ds_put_format(ds, ",out_port(%"PRIu32"))", data->out_port); } +static const struct nl_policy ovs_nat_policy[] = { + [OVS_NAT_ATTR_SRC] = { .type = NL_A_FLAG, .optional = true, }, + [OVS_NAT_ATTR_DST] = { .type = NL_A_FLAG, .optional = true, }, + [OVS_NAT_ATTR_IP_MIN] = { .type = NL_A_UNSPEC, .optional = true, + .min_len = sizeof(struct in_addr), + .max_len = sizeof(struct in6_addr)}, + [OVS_NAT_ATTR_IP_MAX] = { .type = NL_A_UNSPEC, .optional = true, + .min_len = sizeof(struct in_addr), + .max_len = sizeof(struct in6_addr)}, + [OVS_NAT_ATTR_PROTO_MIN] = { .type = NL_A_U16, .optional = true, }, + [OVS_NAT_ATTR_PROTO_MAX] = { .type = NL_A_U16, .optional = true, }, + [OVS_NAT_ATTR_PERSISTENT] = { .type = NL_A_FLAG, .optional = true, }, + [OVS_NAT_ATTR_PROTO_HASH] = { .type = NL_A_FLAG, .optional = true, }, + [OVS_NAT_ATTR_PROTO_RANDOM] = { .type = NL_A_FLAG, .optional = true, }, +}; + +static void +format_odp_ct_nat(struct ds *ds, const struct nlattr *attr) +{ + struct nlattr *a[ARRAY_SIZE(ovs_nat_policy)]; + size_t addr_len; + ovs_be32 ip_min, ip_max; + struct in6_addr ip6_min, ip6_max; + uint16_t proto_min, proto_max; + + if (!nl_parse_nested(attr, ovs_nat_policy, a, ARRAY_SIZE(a))) { + ds_put_cstr(ds, "nat(error: nl_parse_nested() failed.)"); + return; + } + /* If no type, then nothing else either. */ + if (!(a[OVS_NAT_ATTR_SRC] || a[OVS_NAT_ATTR_DST]) + && (a[OVS_NAT_ATTR_IP_MIN] || a[OVS_NAT_ATTR_IP_MAX] + || a[OVS_NAT_ATTR_PROTO_MIN] || a[OVS_NAT_ATTR_PROTO_MAX] + || a[OVS_NAT_ATTR_PERSISTENT] || a[OVS_NAT_ATTR_PROTO_HASH] + || a[OVS_NAT_ATTR_PROTO_RANDOM])) { + ds_put_cstr(ds, "nat(error: options allowed only with \"src\" or \"dst\")"); + return; + } + /* Both SNAT & DNAT may not be specified. */ + if (a[OVS_NAT_ATTR_SRC] && a[OVS_NAT_ATTR_DST]) { + ds_put_cstr(ds, "nat(error: Only one of \"src\" or \"dst\" may be present.)"); + return; + } + /* proto may not appear without ip. */ + if (!a[OVS_NAT_ATTR_IP_MIN] && a[OVS_NAT_ATTR_PROTO_MIN]) { + ds_put_cstr(ds, "nat(error: proto but no IP.)"); + return; + } + /* MAX may not appear without MIN. */ + if ((!a[OVS_NAT_ATTR_IP_MIN] && a[OVS_NAT_ATTR_IP_MAX]) + || (!a[OVS_NAT_ATTR_PROTO_MIN] && a[OVS_NAT_ATTR_PROTO_MAX])) { + ds_put_cstr(ds, "nat(error: range max without min.)"); + return; + } + /* Address sizes must match. */ + if ((a[OVS_NAT_ATTR_IP_MIN] + && (nl_attr_get_size(a[OVS_NAT_ATTR_IP_MIN]) != sizeof(ovs_be32) && + nl_attr_get_size(a[OVS_NAT_ATTR_IP_MIN]) != sizeof(struct in6_addr))) + || (a[OVS_NAT_ATTR_IP_MIN] && a[OVS_NAT_ATTR_IP_MAX] + && (nl_attr_get_size(a[OVS_NAT_ATTR_IP_MIN]) + != nl_attr_get_size(a[OVS_NAT_ATTR_IP_MAX])))) { + ds_put_cstr(ds, "nat(error: IP address sizes do not match)"); + return; + } + + addr_len = a[OVS_NAT_ATTR_IP_MIN] + ? nl_attr_get_size(a[OVS_NAT_ATTR_IP_MIN]) : 0; + ip_min = addr_len == sizeof(ovs_be32) && a[OVS_NAT_ATTR_IP_MIN] + ? nl_attr_get_be32(a[OVS_NAT_ATTR_IP_MIN]) : 0; + ip_max = addr_len == sizeof(ovs_be32) && a[OVS_NAT_ATTR_IP_MAX] + ? nl_attr_get_be32(a[OVS_NAT_ATTR_IP_MAX]) : 0; + if (addr_len == sizeof ip6_min) { + ip6_min = a[OVS_NAT_ATTR_IP_MIN] + ? *(struct in6_addr *)nl_attr_get(a[OVS_NAT_ATTR_IP_MIN]) + : in6addr_any; + ip6_max = a[OVS_NAT_ATTR_IP_MAX] + ? *(struct in6_addr *)nl_attr_get(a[OVS_NAT_ATTR_IP_MAX]) + : in6addr_any; + } + proto_min = a[OVS_NAT_ATTR_PROTO_MIN] + ? nl_attr_get_u16(a[OVS_NAT_ATTR_PROTO_MIN]) : 0; + proto_max = a[OVS_NAT_ATTR_PROTO_MAX] + ? nl_attr_get_u16(a[OVS_NAT_ATTR_PROTO_MAX]) : 0; + + if ((addr_len == sizeof(ovs_be32) + && ip_max && ntohl(ip_min) > ntohl(ip_max)) + || (addr_len == sizeof(struct in6_addr) + && !ipv6_mask_is_any(&ip6_max) + && memcmp(&ip6_min, &ip6_max, sizeof ip6_min) > 0) + || (proto_max && proto_min > proto_max)) { + ds_put_cstr(ds, "nat(range error)"); + return; + } + + ds_put_cstr(ds, "nat"); + if (a[OVS_NAT_ATTR_SRC] || a[OVS_NAT_ATTR_DST]) { + ds_put_char(ds, '('); + if (a[OVS_NAT_ATTR_SRC]) { + ds_put_cstr(ds, "src"); + } else if (a[OVS_NAT_ATTR_DST]) { + ds_put_cstr(ds, "dst"); + } + + if (addr_len > 0) { + ds_put_cstr(ds, "="); + + if (addr_len == sizeof ip_min) { + ds_put_format(ds, IP_FMT, IP_ARGS(ip_min)); + + if (ip_max && ip_max != ip_min) { + ds_put_format(ds, "-"IP_FMT, IP_ARGS(ip_max)); + } + } else if (addr_len == sizeof ip6_min) { + ipv6_format_addr_bracket(&ip6_min, ds, proto_min); + + if (!ipv6_mask_is_any(&ip6_max) && + memcmp(&ip6_max, &ip6_min, sizeof ip6_max) != 0) { + ds_put_char(ds, '-'); + ipv6_format_addr_bracket(&ip6_max, ds, proto_min); + } + } + if (proto_min) { + ds_put_format(ds, ":%"PRIu16, proto_min); + + if (proto_max && proto_max != proto_min) { + ds_put_format(ds, "-%"PRIu16, proto_max); + } + } + } + ds_put_char(ds, ','); + if (a[OVS_NAT_ATTR_PERSISTENT]) { + ds_put_cstr(ds, "persistent,"); + } + if (a[OVS_NAT_ATTR_PROTO_HASH]) { + ds_put_cstr(ds, "hash,"); + } + if (a[OVS_NAT_ATTR_PROTO_RANDOM]) { + ds_put_cstr(ds, "random,"); + } + ds_chomp(ds, ','); + ds_put_char(ds, ')'); + } +} + +static const struct nl_policy ovs_conntrack_policy[] = { + [OVS_CT_ATTR_COMMIT] = { .type = NL_A_FLAG, .optional = true, }, + [OVS_CT_ATTR_ZONE] = { .type = NL_A_U16, .optional = true, }, + [OVS_CT_ATTR_MARK] = { .type = NL_A_UNSPEC, .optional = true, + .min_len = sizeof(uint32_t) * 2 }, + [OVS_CT_ATTR_LABELS] = { .type = NL_A_UNSPEC, .optional = true, + .min_len = sizeof(struct ovs_key_ct_labels) * 2 }, + [OVS_CT_ATTR_HELPER] = { .type = NL_A_STRING, .optional = true, + .min_len = 1, .max_len = 16 }, + [OVS_CT_ATTR_NAT] = { .type = NL_A_UNSPEC, .optional = true }, +}; + +static void +format_odp_conntrack_action(struct ds *ds, const struct nlattr *attr) +{ + struct nlattr *a[ARRAY_SIZE(ovs_conntrack_policy)]; + const ovs_u128 *label; + const uint32_t *mark; + const char *helper; + uint16_t zone; + bool commit; + const struct nlattr *nat; + + if (!nl_parse_nested(attr, ovs_conntrack_policy, a, ARRAY_SIZE(a))) { + ds_put_cstr(ds, "ct(error)"); + return; + } + + commit = a[OVS_CT_ATTR_COMMIT] ? true : false; + zone = a[OVS_CT_ATTR_ZONE] ? nl_attr_get_u16(a[OVS_CT_ATTR_ZONE]) : 0; + mark = a[OVS_CT_ATTR_MARK] ? nl_attr_get(a[OVS_CT_ATTR_MARK]) : NULL; + label = a[OVS_CT_ATTR_LABELS] ? nl_attr_get(a[OVS_CT_ATTR_LABELS]): NULL; + helper = a[OVS_CT_ATTR_HELPER] ? nl_attr_get(a[OVS_CT_ATTR_HELPER]) : NULL; + nat = a[OVS_CT_ATTR_NAT]; + + ds_put_format(ds, "ct"); + if (commit || zone || mark || label || helper || nat) { + ds_put_cstr(ds, "("); + if (commit) { + ds_put_format(ds, "commit,"); + } + if (zone) { + ds_put_format(ds, "zone=%"PRIu16",", zone); + } + if (mark) { + ds_put_format(ds, "mark=%#"PRIx32"/%#"PRIx32",", *mark, + *(mark + 1)); + } + if (label) { + ds_put_format(ds, "label="); + format_u128(ds, label, label + 1, true); + ds_put_char(ds, ','); + } + if (helper) { + ds_put_format(ds, "helper=%s,", helper); + } + if (nat) { + format_odp_ct_nat(ds, nat); + } + ds_chomp(ds, ','); + ds_put_cstr(ds, ")"); + } +} + static void format_odp_action(struct ds *ds, const struct nlattr *a) { int expected_len; enum ovs_action_attr type = nl_attr_type(a); - const struct ovs_action_push_vlan *vlan; size_t size; expected_len = odp_action_len(nl_attr_type(a)); - if (expected_len != -2 && nl_attr_get_size(a) != expected_len) { + if (expected_len != ATTR_LEN_VARIABLE && + nl_attr_get_size(a) != expected_len) { ds_put_format(ds, "bad length %"PRIuSIZE", expected %d for: ", nl_attr_get_size(a), expected_len); format_generic_odp_action(ds, a); @@ -655,8 +817,8 @@ format_odp_action(struct ds *ds, const struct nlattr *a) format_odp_key_attr(nl_attr_get(a), NULL, NULL, ds, true); ds_put_cstr(ds, ")"); break; - case OVS_ACTION_ATTR_PUSH_VLAN: - vlan = nl_attr_get(a); + case OVS_ACTION_ATTR_PUSH_VLAN: { + const struct ovs_action_push_vlan *vlan = nl_attr_get(a); ds_put_cstr(ds, "push_vlan("); if (vlan->vlan_tpid != htons(ETH_TYPE_VLAN)) { ds_put_format(ds, "tpid=0x%04"PRIx16",", ntohs(vlan->vlan_tpid)); @@ -664,6 +826,7 @@ format_odp_action(struct ds *ds, const struct nlattr *a) format_vlan_tci(ds, vlan->vlan_tci, OVS_BE16_MAX, false); ds_put_char(ds, ')'); break; + } case OVS_ACTION_ATTR_POP_VLAN: ds_put_cstr(ds, "pop_vlan"); break; @@ -682,6 +845,9 @@ format_odp_action(struct ds *ds, const struct nlattr *a) case OVS_ACTION_ATTR_SAMPLE: format_odp_sample_action(ds, a); break; + case OVS_ACTION_ATTR_CT: + format_odp_conntrack_action(ds, a); + break; case OVS_ACTION_ATTR_UNSPEC: case __OVS_ACTION_ATTR_MAX: default: @@ -732,11 +898,15 @@ parse_odp_userspace_action(const char *s, struct ofpbuf *actions) int n = -1; void *user_data = NULL; size_t user_data_size = 0; + bool include_actions = false; + int res; if (!ovs_scan(s, "userspace(pid=%"SCNi32"%n", &pid, &n)) { return -EINVAL; } + ofpbuf_init(&buf, 16); + { uint32_t output; uint32_t probability; @@ -763,18 +933,16 @@ parse_odp_userspace_action(const char *s, struct ofpbuf *actions) user_data_size = sizeof cookie.sflow; } else if (ovs_scan(&s[n], ",slow_path(%n", &n1)) { - int res; - n += n1; cookie.type = USER_ACTION_COOKIE_SLOW_PATH; cookie.slow_path.unused = 0; cookie.slow_path.reason = 0; - res = parse_flags(&s[n], slow_path_reason_to_string, - &cookie.slow_path.reason, - SLOW_PATH_REASON_MASK, NULL); + res = parse_odp_flags(&s[n], slow_path_reason_to_string, + &cookie.slow_path.reason, + SLOW_PATH_REASON_MASK, NULL); if (res < 0 || s[n + res] != ')') { - return res; + goto out; } n += res + 1; @@ -807,30 +975,43 @@ parse_odp_userspace_action(const char *s, struct ofpbuf *actions) char *end; n += n1; - ofpbuf_init(&buf, 16); end = ofpbuf_put_hex(&buf, &s[n], NULL); if (end[0] != ')') { - return -EINVAL; + res = -EINVAL; + goto out; } - user_data = ofpbuf_data(&buf); - user_data_size = ofpbuf_size(&buf); + user_data = buf.data; + user_data_size = buf.size; n = (end + 1) - s; } } + { + int n1 = -1; + if (ovs_scan(&s[n], ",actions%n", &n1)) { + n += n1; + include_actions = true; + } + } + { int n1 = -1; if (ovs_scan(&s[n], ",tunnel_out_port=%"SCNi32")%n", &tunnel_out_port, &n1)) { - odp_put_userspace_action(pid, user_data, user_data_size, tunnel_out_port, actions); - return n + n1; + odp_put_userspace_action(pid, user_data, user_data_size, + tunnel_out_port, include_actions, actions); + res = n + n1; } else if (s[n] == ')') { - odp_put_userspace_action(pid, user_data, user_data_size, ODPP_NONE, actions); - return n + 1; + odp_put_userspace_action(pid, user_data, user_data_size, + ODPP_NONE, include_actions, actions); + res = n + 1; + } else { + res = -EINVAL; } } - - return -EINVAL; +out: + ofpbuf_uninit(&buf); + return res; } static int @@ -838,11 +1019,12 @@ ovs_parse_tnl_push(const char *s, struct ovs_action_push_tnl *data) { struct eth_header *eth; struct ip_header *ip; + struct ovs_16aligned_ip6_hdr *ip6; struct udp_header *udp; struct gre_base_hdr *greh; - uint16_t gre_proto, dl_type, udp_src, udp_dst; + uint16_t gre_proto, gre_flags, dl_type, udp_src, udp_dst, csum; ovs_be32 sip, dip; - uint32_t tnl_type = 0, header_len = 0; + uint32_t tnl_type = 0, header_len = 0, ip_len = 0; void *l3, *l4; int n = 0; @@ -851,18 +1033,18 @@ ovs_parse_tnl_push(const char *s, struct ovs_action_push_tnl *data) } eth = (struct eth_header *) data->header; l3 = (data->header + sizeof *eth); - l4 = ((uint8_t *) l3 + sizeof (struct ip_header)); ip = (struct ip_header *) l3; + ip6 = (struct ovs_16aligned_ip6_hdr *) l3; if (!ovs_scan_len(s, &n, "header(size=%"SCNi32",type=%"SCNi32"," - "eth(dst="ETH_ADDR_SCAN_FMT",", - &data->header_len, - &data->tnl_type, - ETH_ADDR_SCAN_ARGS(eth->eth_dst))) { + "eth(dst="ETH_ADDR_SCAN_FMT",", + &data->header_len, + &data->tnl_type, + ETH_ADDR_SCAN_ARGS(eth->eth_dst))) { return -EINVAL; } if (!ovs_scan_len(s, &n, "src="ETH_ADDR_SCAN_FMT",", - ETH_ADDR_SCAN_ARGS(eth->eth_src))) { + ETH_ADDR_SCAN_ARGS(eth->eth_src))) { return -EINVAL; } if (!ovs_scan_len(s, &n, "dl_type=0x%"SCNx16"),", &dl_type)) { @@ -870,55 +1052,123 @@ ovs_parse_tnl_push(const char *s, struct ovs_action_push_tnl *data) } eth->eth_type = htons(dl_type); - /* IPv4 */ - if (!ovs_scan_len(s, &n, "ipv4(src="IP_SCAN_FMT",dst="IP_SCAN_FMT",proto=%"SCNi8 - ",tos=%"SCNi8",ttl=%"SCNi8",frag=0x%"SCNx16"),", - IP_SCAN_ARGS(&sip), - IP_SCAN_ARGS(&dip), - &ip->ip_proto, &ip->ip_tos, - &ip->ip_ttl, &ip->ip_frag_off)) { - return -EINVAL; + if (eth->eth_type == htons(ETH_TYPE_IP)) { + /* IPv4 */ + uint16_t ip_frag_off; + if (!ovs_scan_len(s, &n, "ipv4(src="IP_SCAN_FMT",dst="IP_SCAN_FMT",proto=%"SCNi8 + ",tos=%"SCNi8",ttl=%"SCNi8",frag=0x%"SCNx16"),", + IP_SCAN_ARGS(&sip), + IP_SCAN_ARGS(&dip), + &ip->ip_proto, &ip->ip_tos, + &ip->ip_ttl, &ip_frag_off)) { + return -EINVAL; + } + put_16aligned_be32(&ip->ip_src, sip); + put_16aligned_be32(&ip->ip_dst, dip); + ip->ip_frag_off = htons(ip_frag_off); + ip_len = sizeof *ip; + } else { + char sip6_s[IPV6_SCAN_LEN + 1]; + char dip6_s[IPV6_SCAN_LEN + 1]; + struct in6_addr sip6, dip6; + uint8_t tclass; + uint32_t label; + if (!ovs_scan_len(s, &n, "ipv6(src="IPV6_SCAN_FMT",dst="IPV6_SCAN_FMT + ",label=%i,proto=%"SCNi8",tclass=0x%"SCNx8 + ",hlimit=%"SCNi8"),", + sip6_s, dip6_s, &label, &ip6->ip6_nxt, + &tclass, &ip6->ip6_hlim) + || (label & ~IPV6_LABEL_MASK) != 0 + || inet_pton(AF_INET6, sip6_s, &sip6) != 1 + || inet_pton(AF_INET6, dip6_s, &dip6) != 1) { + return -EINVAL; + } + put_16aligned_be32(&ip6->ip6_flow, htonl(6 << 28) | + htonl(tclass << 20) | htonl(label)); + memcpy(&ip6->ip6_src, &sip6, sizeof(ip6->ip6_src)); + memcpy(&ip6->ip6_dst, &dip6, sizeof(ip6->ip6_dst)); + ip_len = sizeof *ip6; } - put_16aligned_be32(&ip->ip_src, sip); - put_16aligned_be32(&ip->ip_dst, dip); /* Tunnel header */ + l4 = ((uint8_t *) l3 + ip_len); udp = (struct udp_header *) l4; greh = (struct gre_base_hdr *) l4; - if (ovs_scan_len(s, &n, "udp(src=%"SCNi16",dst=%"SCNi16"),", - &udp_src, &udp_dst)) { - struct vxlanhdr *vxh; - uint32_t vx_flags, vx_vni; + if (ovs_scan_len(s, &n, "udp(src=%"SCNi16",dst=%"SCNi16",csum=0x%"SCNx16"),", + &udp_src, &udp_dst, &csum)) { + uint32_t vx_flags, vni; udp->udp_src = htons(udp_src); udp->udp_dst = htons(udp_dst); udp->udp_len = 0; - udp->udp_csum = 0; + udp->udp_csum = htons(csum); + + if (ovs_scan_len(s, &n, "vxlan(flags=0x%"SCNx32",vni=0x%"SCNx32"))", + &vx_flags, &vni)) { + struct vxlanhdr *vxh = (struct vxlanhdr *) (udp + 1); + + put_16aligned_be32(&vxh->vx_flags, htonl(vx_flags)); + put_16aligned_be32(&vxh->vx_vni, htonl(vni << 8)); + tnl_type = OVS_VPORT_TYPE_VXLAN; + header_len = sizeof *eth + ip_len + + sizeof *udp + sizeof *vxh; + } else if (ovs_scan_len(s, &n, "geneve(")) { + struct genevehdr *gnh = (struct genevehdr *) (udp + 1); + + memset(gnh, 0, sizeof *gnh); + header_len = sizeof *eth + ip_len + + sizeof *udp + sizeof *gnh; + + if (ovs_scan_len(s, &n, "oam,")) { + gnh->oam = 1; + } + if (ovs_scan_len(s, &n, "crit,")) { + gnh->critical = 1; + } + if (!ovs_scan_len(s, &n, "vni=%"SCNi32, &vni)) { + return -EINVAL; + } + if (ovs_scan_len(s, &n, ",options(")) { + struct geneve_scan options; + int len; - vxh = (struct vxlanhdr *) (udp + 1); - if (!ovs_scan_len(s, &n, "vxlan(flags=0x%"SCNx32",vni=0x%"SCNx32"))", - &vx_flags, &vx_vni)) { + memset(&options, 0, sizeof options); + len = scan_geneve(s + n, &options, NULL); + if (!len) { + return -EINVAL; + } + + memcpy(gnh->options, options.d, options.len); + gnh->opt_len = options.len / 4; + header_len += options.len; + + n += len; + } + if (!ovs_scan_len(s, &n, "))")) { + return -EINVAL; + } + + gnh->proto_type = htons(ETH_TYPE_TEB); + put_16aligned_be32(&gnh->vni, htonl(vni << 8)); + tnl_type = OVS_VPORT_TYPE_GENEVE; + } else { return -EINVAL; } - put_16aligned_be32(&vxh->vx_flags, htonl(vx_flags)); - put_16aligned_be32(&vxh->vx_vni, htonl(vx_vni)); - tnl_type = OVS_VPORT_TYPE_VXLAN; - header_len = sizeof *eth + sizeof *ip + - sizeof *udp + sizeof *vxh; } else if (ovs_scan_len(s, &n, "gre((flags=0x%"SCNx16",proto=0x%"SCNx16")", - &greh->flags, &gre_proto)){ + &gre_flags, &gre_proto)){ tnl_type = OVS_VPORT_TYPE_GRE; + greh->flags = htons(gre_flags); greh->protocol = htons(gre_proto); ovs_16aligned_be32 *options = (ovs_16aligned_be32 *) (greh + 1); if (greh->flags & htons(GRE_CSUM)) { - uint32_t csum; - - if (!ovs_scan_len(s, &n, ",csum=0x%"SCNx32, &csum)) { + if (!ovs_scan_len(s, &n, ",csum=0x%"SCNx16, &csum)) { return -EINVAL; } - put_16aligned_be32(options, htonl(csum)); + + memset(options, 0, sizeof *options); + *((ovs_be16 *)options) = htons(csum); options++; } if (greh->flags & htons(GRE_KEY)) { @@ -945,7 +1195,7 @@ ovs_parse_tnl_push(const char *s, struct ovs_action_push_tnl *data) return -EINVAL; } - header_len = sizeof *eth + sizeof *ip + + header_len = sizeof *eth + ip_len + ((uint8_t *) options - (uint8_t *) greh); } else { return -EINVAL; @@ -967,63 +1217,355 @@ ovs_parse_tnl_push(const char *s, struct ovs_action_push_tnl *data) return n; } +struct ct_nat_params { + bool snat; + bool dnat; + size_t addr_len; + union { + ovs_be32 ip; + struct in6_addr ip6; + } addr_min; + union { + ovs_be32 ip; + struct in6_addr ip6; + } addr_max; + uint16_t proto_min; + uint16_t proto_max; + bool persistent; + bool proto_hash; + bool proto_random; +}; + static int -parse_odp_action(const char *s, const struct simap *port_names, - struct ofpbuf *actions) +scan_ct_nat_range(const char *s, int *n, struct ct_nat_params *p) { - { - uint32_t port; - int n; - - if (ovs_scan(s, "%"SCNi32"%n", &port, &n)) { - nl_msg_put_u32(actions, OVS_ACTION_ATTR_OUTPUT, port); - return n; + if (ovs_scan_len(s, n, "=")) { + char ipv6_s[IPV6_SCAN_LEN + 1]; + struct in6_addr ipv6; + + if (ovs_scan_len(s, n, IP_SCAN_FMT, IP_SCAN_ARGS(&p->addr_min.ip))) { + p->addr_len = sizeof p->addr_min.ip; + if (ovs_scan_len(s, n, "-")) { + if (!ovs_scan_len(s, n, IP_SCAN_FMT, + IP_SCAN_ARGS(&p->addr_max.ip))) { + return -EINVAL; + } + } + } else if ((ovs_scan_len(s, n, IPV6_SCAN_FMT, ipv6_s) + || ovs_scan_len(s, n, "["IPV6_SCAN_FMT"]", ipv6_s)) + && inet_pton(AF_INET6, ipv6_s, &ipv6) == 1) { + p->addr_len = sizeof p->addr_min.ip6; + p->addr_min.ip6 = ipv6; + if (ovs_scan_len(s, n, "-")) { + if ((ovs_scan_len(s, n, IPV6_SCAN_FMT, ipv6_s) + || ovs_scan_len(s, n, "["IPV6_SCAN_FMT"]", ipv6_s)) + && inet_pton(AF_INET6, ipv6_s, &ipv6) == 1) { + p->addr_max.ip6 = ipv6; + } else { + return -EINVAL; + } + } + } else { + return -EINVAL; } - } - - if (port_names) { - int len = strcspn(s, delimiters); - struct simap_node *node; - - node = simap_find_len(port_names, s, len); - if (node) { - nl_msg_put_u32(actions, OVS_ACTION_ATTR_OUTPUT, node->data); - return len; + if (ovs_scan_len(s, n, ":%"SCNu16, &p->proto_min)) { + if (ovs_scan_len(s, n, "-")) { + if (!ovs_scan_len(s, n, "%"SCNu16, &p->proto_max)) { + return -EINVAL; + } + } } } + return 0; +} - { - uint32_t recirc_id; - int n = -1; - - if (ovs_scan(s, "recirc(%"PRIu32")%n", &recirc_id, &n)) { - nl_msg_put_u32(actions, OVS_ACTION_ATTR_RECIRC, recirc_id); - return n; - } - } +static int +scan_ct_nat(const char *s, struct ct_nat_params *p) +{ + int n = 0; - if (!strncmp(s, "userspace(", 10)) { - return parse_odp_userspace_action(s, actions); - } + if (ovs_scan_len(s, &n, "nat")) { + memset(p, 0, sizeof *p); - if (!strncmp(s, "set(", 4)) { - size_t start_ofs; - int retval; - struct nlattr mask[128 / sizeof(struct nlattr)]; - struct ofpbuf maskbuf; - struct nlattr *nested, *key; - size_t size; + if (ovs_scan_len(s, &n, "(")) { + char *end; + int end_n; - /* 'mask' is big enough to hold any key. */ - ofpbuf_use_stack(&maskbuf, mask, sizeof mask); + end = strchr(s + n, ')'); + if (!end) { + return -EINVAL; + } + end_n = end - s; - start_ofs = nl_msg_start_nested(actions, OVS_ACTION_ATTR_SET); - retval = parse_odp_key_mask_attr(s + 4, port_names, actions, &maskbuf); - if (retval < 0) { - return retval; - } - if (s[retval + 4] != ')') { - return -EINVAL; + while (n < end_n) { + n += strspn(s + n, delimiters); + if (ovs_scan_len(s, &n, "src")) { + int err = scan_ct_nat_range(s, &n, p); + if (err) { + return err; + } + p->snat = true; + continue; + } + if (ovs_scan_len(s, &n, "dst")) { + int err = scan_ct_nat_range(s, &n, p); + if (err) { + return err; + } + p->dnat = true; + continue; + } + if (ovs_scan_len(s, &n, "persistent")) { + p->persistent = true; + continue; + } + if (ovs_scan_len(s, &n, "hash")) { + p->proto_hash = true; + continue; + } + if (ovs_scan_len(s, &n, "random")) { + p->proto_random = true; + continue; + } + return -EINVAL; + } + + if (p->snat && p->dnat) { + return -EINVAL; + } + if ((p->addr_len != 0 && + memcmp(&p->addr_max, &in6addr_any, p->addr_len) && + memcmp(&p->addr_max, &p->addr_min, p->addr_len) < 0) || + (p->proto_max && p->proto_max < p->proto_min)) { + return -EINVAL; + } + if (p->proto_hash && p->proto_random) { + return -EINVAL; + } + n++; + } + } + return n; +} + +static void +nl_msg_put_ct_nat(struct ct_nat_params *p, struct ofpbuf *actions) +{ + size_t start = nl_msg_start_nested(actions, OVS_CT_ATTR_NAT); + + if (p->snat) { + nl_msg_put_flag(actions, OVS_NAT_ATTR_SRC); + } else if (p->dnat) { + nl_msg_put_flag(actions, OVS_NAT_ATTR_DST); + } else { + goto out; + } + if (p->addr_len != 0) { + nl_msg_put_unspec(actions, OVS_NAT_ATTR_IP_MIN, &p->addr_min, + p->addr_len); + if (memcmp(&p->addr_max, &p->addr_min, p->addr_len) > 0) { + nl_msg_put_unspec(actions, OVS_NAT_ATTR_IP_MAX, &p->addr_max, + p->addr_len); + } + if (p->proto_min) { + nl_msg_put_u16(actions, OVS_NAT_ATTR_PROTO_MIN, p->proto_min); + if (p->proto_max && p->proto_max > p->proto_min) { + nl_msg_put_u16(actions, OVS_NAT_ATTR_PROTO_MAX, p->proto_max); + } + } + if (p->persistent) { + nl_msg_put_flag(actions, OVS_NAT_ATTR_PERSISTENT); + } + if (p->proto_hash) { + nl_msg_put_flag(actions, OVS_NAT_ATTR_PROTO_HASH); + } + if (p->proto_random) { + nl_msg_put_flag(actions, OVS_NAT_ATTR_PROTO_RANDOM); + } + } +out: + nl_msg_end_nested(actions, start); +} + +static int +parse_conntrack_action(const char *s_, struct ofpbuf *actions) +{ + const char *s = s_; + + if (ovs_scan(s, "ct")) { + const char *helper = NULL; + size_t helper_len = 0; + bool commit = false; + uint16_t zone = 0; + struct { + uint32_t value; + uint32_t mask; + } ct_mark = { 0, 0 }; + struct { + ovs_u128 value; + ovs_u128 mask; + } ct_label; + struct ct_nat_params nat_params; + bool have_nat = false; + size_t start; + char *end; + + memset(&ct_label, 0, sizeof(ct_label)); + + s += 2; + if (ovs_scan(s, "(")) { + s++; +find_end: + end = strchr(s, ')'); + if (!end) { + return -EINVAL; + } + + while (s != end) { + int n; + + s += strspn(s, delimiters); + if (ovs_scan(s, "commit%n", &n)) { + commit = true; + s += n; + continue; + } + if (ovs_scan(s, "zone=%"SCNu16"%n", &zone, &n)) { + s += n; + continue; + } + if (ovs_scan(s, "mark=%"SCNx32"%n", &ct_mark.value, &n)) { + s += n; + n = -1; + if (ovs_scan(s, "/%"SCNx32"%n", &ct_mark.mask, &n)) { + s += n; + } else { + ct_mark.mask = UINT32_MAX; + } + continue; + } + if (ovs_scan(s, "label=%n", &n)) { + int retval; + + s += n; + retval = scan_u128(s, &ct_label.value, &ct_label.mask); + if (retval < 0) { + return retval; + } + s += retval; + continue; + } + if (ovs_scan(s, "helper=%n", &n)) { + s += n; + helper_len = strcspn(s, delimiters_end); + if (!helper_len || helper_len > 15) { + return -EINVAL; + } + helper = s; + s += helper_len; + continue; + } + + n = scan_ct_nat(s, &nat_params); + if (n > 0) { + s += n; + have_nat = true; + + /* end points to the end of the nested, nat action. + * find the real end. */ + goto find_end; + } + /* Nothing matched. */ + return -EINVAL; + } + s++; + } + + start = nl_msg_start_nested(actions, OVS_ACTION_ATTR_CT); + if (commit) { + nl_msg_put_flag(actions, OVS_CT_ATTR_COMMIT); + } + if (zone) { + nl_msg_put_u16(actions, OVS_CT_ATTR_ZONE, zone); + } + if (ct_mark.mask) { + nl_msg_put_unspec(actions, OVS_CT_ATTR_MARK, &ct_mark, + sizeof(ct_mark)); + } + if (!ovs_u128_is_zero(&ct_label.mask)) { + nl_msg_put_unspec(actions, OVS_CT_ATTR_LABELS, &ct_label, + sizeof ct_label); + } + if (helper) { + nl_msg_put_string__(actions, OVS_CT_ATTR_HELPER, helper, + helper_len); + } + if (have_nat) { + nl_msg_put_ct_nat(&nat_params, actions); + } + nl_msg_end_nested(actions, start); + } + + return s - s_; +} + +static int +parse_odp_action(const char *s, const struct simap *port_names, + struct ofpbuf *actions) +{ + { + uint32_t port; + int n; + + if (ovs_scan(s, "%"SCNi32"%n", &port, &n)) { + nl_msg_put_u32(actions, OVS_ACTION_ATTR_OUTPUT, port); + return n; + } + } + + if (port_names) { + int len = strcspn(s, delimiters); + struct simap_node *node; + + node = simap_find_len(port_names, s, len); + if (node) { + nl_msg_put_u32(actions, OVS_ACTION_ATTR_OUTPUT, node->data); + return len; + } + } + + { + uint32_t recirc_id; + int n = -1; + + if (ovs_scan(s, "recirc(%"PRIu32")%n", &recirc_id, &n)) { + nl_msg_put_u32(actions, OVS_ACTION_ATTR_RECIRC, recirc_id); + return n; + } + } + + if (!strncmp(s, "userspace(", 10)) { + return parse_odp_userspace_action(s, actions); + } + + if (!strncmp(s, "set(", 4)) { + size_t start_ofs; + int retval; + struct nlattr mask[128 / sizeof(struct nlattr)]; + struct ofpbuf maskbuf; + struct nlattr *nested, *key; + size_t size; + + /* 'mask' is big enough to hold any key. */ + ofpbuf_use_stack(&maskbuf, mask, sizeof mask); + + start_ofs = nl_msg_start_nested(actions, OVS_ACTION_ATTR_SET); + retval = parse_odp_key_mask_attr(s + 4, port_names, actions, &maskbuf); + if (retval < 0) { + return retval; + } + if (s[retval + 4] != ')') { + return -EINVAL; } nested = ofpbuf_at_assert(actions, start_ofs, sizeof *nested); @@ -1124,6 +1666,15 @@ parse_odp_action(const char *s, const struct simap *port_names, } } + { + int retval; + + retval = parse_conntrack_action(s, actions); + if (retval) { + return retval; + } + } + { struct ovs_action_push_tnl data; int n; @@ -1154,7 +1705,7 @@ odp_actions_from_string(const char *s, const struct simap *port_names, return 0; } - old_size = ofpbuf_size(actions); + old_size = actions->size; for (;;) { int retval; @@ -1165,7 +1716,7 @@ odp_actions_from_string(const char *s, const struct simap *port_names, retval = parse_odp_action(s, port_names, actions); if (retval < 0 || !strchr(delimiters, s[retval])) { - ofpbuf_set_size(actions, old_size); + actions->size = old_size; return -retval; } s += retval; @@ -1174,45 +1725,71 @@ odp_actions_from_string(const char *s, const struct simap *port_names, return 0; } +static const struct attr_len_tbl ovs_vxlan_ext_attr_lens[OVS_VXLAN_EXT_MAX + 1] = { + [OVS_VXLAN_EXT_GBP] = { .len = 4 }, +}; + +static const struct attr_len_tbl ovs_tun_key_attr_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { + [OVS_TUNNEL_KEY_ATTR_ID] = { .len = 8 }, + [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = { .len = 4 }, + [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = { .len = 4 }, + [OVS_TUNNEL_KEY_ATTR_TOS] = { .len = 1 }, + [OVS_TUNNEL_KEY_ATTR_TTL] = { .len = 1 }, + [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = { .len = 0 }, + [OVS_TUNNEL_KEY_ATTR_CSUM] = { .len = 0 }, + [OVS_TUNNEL_KEY_ATTR_TP_SRC] = { .len = 2 }, + [OVS_TUNNEL_KEY_ATTR_TP_DST] = { .len = 2 }, + [OVS_TUNNEL_KEY_ATTR_OAM] = { .len = 0 }, + [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = { .len = ATTR_LEN_VARIABLE }, + [OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS] = { .len = ATTR_LEN_NESTED, + .next = ovs_vxlan_ext_attr_lens , + .next_max = OVS_VXLAN_EXT_MAX}, + [OVS_TUNNEL_KEY_ATTR_IPV6_SRC] = { .len = 16 }, + [OVS_TUNNEL_KEY_ATTR_IPV6_DST] = { .len = 16 }, +}; + +static const struct attr_len_tbl ovs_flow_key_attr_lens[OVS_KEY_ATTR_MAX + 1] = { + [OVS_KEY_ATTR_ENCAP] = { .len = ATTR_LEN_NESTED }, + [OVS_KEY_ATTR_PRIORITY] = { .len = 4 }, + [OVS_KEY_ATTR_SKB_MARK] = { .len = 4 }, + [OVS_KEY_ATTR_DP_HASH] = { .len = 4 }, + [OVS_KEY_ATTR_RECIRC_ID] = { .len = 4 }, + [OVS_KEY_ATTR_TUNNEL] = { .len = ATTR_LEN_NESTED, + .next = ovs_tun_key_attr_lens, + .next_max = OVS_TUNNEL_KEY_ATTR_MAX }, + [OVS_KEY_ATTR_IN_PORT] = { .len = 4 }, + [OVS_KEY_ATTR_ETHERNET] = { .len = sizeof(struct ovs_key_ethernet) }, + [OVS_KEY_ATTR_VLAN] = { .len = 2 }, + [OVS_KEY_ATTR_ETHERTYPE] = { .len = 2 }, + [OVS_KEY_ATTR_MPLS] = { .len = ATTR_LEN_VARIABLE }, + [OVS_KEY_ATTR_IPV4] = { .len = sizeof(struct ovs_key_ipv4) }, + [OVS_KEY_ATTR_IPV6] = { .len = sizeof(struct ovs_key_ipv6) }, + [OVS_KEY_ATTR_TCP] = { .len = sizeof(struct ovs_key_tcp) }, + [OVS_KEY_ATTR_TCP_FLAGS] = { .len = 2 }, + [OVS_KEY_ATTR_UDP] = { .len = sizeof(struct ovs_key_udp) }, + [OVS_KEY_ATTR_SCTP] = { .len = sizeof(struct ovs_key_sctp) }, + [OVS_KEY_ATTR_ICMP] = { .len = sizeof(struct ovs_key_icmp) }, + [OVS_KEY_ATTR_ICMPV6] = { .len = sizeof(struct ovs_key_icmpv6) }, + [OVS_KEY_ATTR_ARP] = { .len = sizeof(struct ovs_key_arp) }, + [OVS_KEY_ATTR_ND] = { .len = sizeof(struct ovs_key_nd) }, + [OVS_KEY_ATTR_CT_STATE] = { .len = 4 }, + [OVS_KEY_ATTR_CT_ZONE] = { .len = 2 }, + [OVS_KEY_ATTR_CT_MARK] = { .len = 4 }, + [OVS_KEY_ATTR_CT_LABELS] = { .len = sizeof(struct ovs_key_ct_labels) }, +}; + /* Returns the correct length of the payload for a flow key attribute of the - * specified 'type', -1 if 'type' is unknown, or -2 if the attribute's payload - * is variable length. */ + * specified 'type', ATTR_LEN_INVALID if 'type' is unknown, ATTR_LEN_VARIABLE + * if the attribute's payload is variable length, or ATTR_LEN_NESTED if the + * payload is a nested type. */ static int -odp_flow_key_attr_len(uint16_t type) +odp_key_attr_len(const struct attr_len_tbl tbl[], int max_len, uint16_t type) { - if (type > OVS_KEY_ATTR_MAX) { - return -1; - } - - switch ((enum ovs_key_attr) type) { - case OVS_KEY_ATTR_ENCAP: return -2; - case OVS_KEY_ATTR_PRIORITY: return 4; - case OVS_KEY_ATTR_SKB_MARK: return 4; - case OVS_KEY_ATTR_DP_HASH: return 4; - case OVS_KEY_ATTR_RECIRC_ID: return 4; - case OVS_KEY_ATTR_TUNNEL: return -2; - case OVS_KEY_ATTR_IN_PORT: return 4; - case OVS_KEY_ATTR_ETHERNET: return sizeof(struct ovs_key_ethernet); - case OVS_KEY_ATTR_VLAN: return sizeof(ovs_be16); - case OVS_KEY_ATTR_ETHERTYPE: return 2; - case OVS_KEY_ATTR_MPLS: return -2; - case OVS_KEY_ATTR_IPV4: return sizeof(struct ovs_key_ipv4); - case OVS_KEY_ATTR_IPV6: return sizeof(struct ovs_key_ipv6); - case OVS_KEY_ATTR_TCP: return sizeof(struct ovs_key_tcp); - case OVS_KEY_ATTR_TCP_FLAGS: return 2; - case OVS_KEY_ATTR_UDP: return sizeof(struct ovs_key_udp); - case OVS_KEY_ATTR_SCTP: return sizeof(struct ovs_key_sctp); - case OVS_KEY_ATTR_ICMP: return sizeof(struct ovs_key_icmp); - case OVS_KEY_ATTR_ICMPV6: return sizeof(struct ovs_key_icmpv6); - case OVS_KEY_ATTR_ARP: return sizeof(struct ovs_key_arp); - case OVS_KEY_ATTR_ND: return sizeof(struct ovs_key_nd); - - case OVS_KEY_ATTR_UNSPEC: - case __OVS_KEY_ATTR_MAX: - return -1; + if (type > max_len) { + return ATTR_LEN_INVALID; } - return -1; + return tbl[type].len; } static void @@ -1249,62 +1826,11 @@ ovs_frag_type_to_string(enum ovs_frag_type type) } } -static int -tunnel_key_attr_len(int type) -{ - switch (type) { - case OVS_TUNNEL_KEY_ATTR_ID: return 8; - case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: return 4; - case OVS_TUNNEL_KEY_ATTR_IPV4_DST: return 4; - case OVS_TUNNEL_KEY_ATTR_TOS: return 1; - case OVS_TUNNEL_KEY_ATTR_TTL: return 1; - case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: return 0; - case OVS_TUNNEL_KEY_ATTR_CSUM: return 0; - case OVS_TUNNEL_KEY_ATTR_TP_SRC: return 2; - case OVS_TUNNEL_KEY_ATTR_TP_DST: return 2; - case OVS_TUNNEL_KEY_ATTR_OAM: return 0; - case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: return -2; - case __OVS_TUNNEL_KEY_ATTR_MAX: - return -1; - } - return -1; -} - -#define GENEVE_OPT(class, type) ((OVS_FORCE uint32_t)(class) << 8 | (type)) -static int -parse_geneve_opts(const struct nlattr *attr) -{ - int opts_len = nl_attr_get_size(attr); - const struct geneve_opt *opt = nl_attr_get(attr); - - while (opts_len > 0) { - int len; - - if (opts_len < sizeof(*opt)) { - return -EINVAL; - } - - len = sizeof(*opt) + opt->length * 4; - if (len > opts_len) { - return -EINVAL; - } - - switch (GENEVE_OPT(opt->opt_class, opt->type)) { - default: - if (opt->type & GENEVE_CRIT_OPT_TYPE) { - return -EINVAL; - } - }; - - opt = opt + len / sizeof(*opt); - opts_len -= len; - }; - - return 0; -} - -enum odp_key_fitness -odp_tun_key_from_attr(const struct nlattr *attr, struct flow_tnl *tun) +static enum odp_key_fitness +odp_tun_key_from_attr__(const struct nlattr *attr, + const struct nlattr *flow_attrs, size_t flow_attr_len, + const struct flow_tnl *src_tun, struct flow_tnl *tun, + bool udpif) { unsigned int left; const struct nlattr *a; @@ -1314,7 +1840,8 @@ odp_tun_key_from_attr(const struct nlattr *attr, struct flow_tnl *tun) NL_NESTED_FOR_EACH(a, left, attr) { uint16_t type = nl_attr_type(a); size_t len = nl_attr_get_size(a); - int expected_len = tunnel_key_attr_len(type); + int expected_len = odp_key_attr_len(ovs_tun_key_attr_lens, + OVS_TUNNEL_ATTR_MAX, type); if (len != expected_len && expected_len >= 0) { return ODP_FIT_ERROR; @@ -1331,6 +1858,12 @@ odp_tun_key_from_attr(const struct nlattr *attr, struct flow_tnl *tun) case OVS_TUNNEL_KEY_ATTR_IPV4_DST: tun->ip_dst = nl_attr_get_be32(a); break; + case OVS_TUNNEL_KEY_ATTR_IPV6_SRC: + tun->ipv6_src = nl_attr_get_in6_addr(a); + break; + case OVS_TUNNEL_KEY_ATTR_IPV6_DST: + tun->ipv6_dst = nl_attr_get_in6_addr(a); + break; case OVS_TUNNEL_KEY_ATTR_TOS: tun->ip_tos = nl_attr_get_u8(a); break; @@ -1353,15 +1886,32 @@ odp_tun_key_from_attr(const struct nlattr *attr, struct flow_tnl *tun) case OVS_TUNNEL_KEY_ATTR_OAM: tun->flags |= FLOW_TNL_F_OAM; break; - case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: { - if (parse_geneve_opts(a)) { + case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: { + static const struct nl_policy vxlan_opts_policy[] = { + [OVS_VXLAN_EXT_GBP] = { .type = NL_A_U32 }, + }; + struct nlattr *ext[ARRAY_SIZE(vxlan_opts_policy)]; + + if (!nl_parse_nested(a, vxlan_opts_policy, ext, ARRAY_SIZE(ext))) { return ODP_FIT_ERROR; } - /* It is necessary to reproduce options exactly (including order) - * so it's easiest to just echo them back. */ - unknown = true; + + if (ext[OVS_VXLAN_EXT_GBP]) { + uint32_t gbp = nl_attr_get_u32(ext[OVS_VXLAN_EXT_GBP]); + + tun->gbp_id = htons(gbp & 0xFFFF); + tun->gbp_flags = (gbp >> 16) & 0xFF; + } + break; } + case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: + if (tun_metadata_from_geneve_nlattr(a, flow_attrs, flow_attr_len, + src_tun, udpif, tun)) { + return ODP_FIT_ERROR; + } + break; + default: /* Allow this to show up as unexpected, if there are unknown * tunnel attribute, eventually resulting in ODP_FIT_TOO_MUCH. */ @@ -1379,8 +1929,18 @@ odp_tun_key_from_attr(const struct nlattr *attr, struct flow_tnl *tun) return ODP_FIT_PERFECT; } +enum odp_key_fitness +odp_tun_key_from_attr(const struct nlattr *attr, bool udpif, + struct flow_tnl *tun) +{ + memset(tun, 0, sizeof *tun); + return odp_tun_key_from_attr__(attr, NULL, 0, NULL, tun, udpif); +} + static void -tun_key_to_attr(struct ofpbuf *a, const struct flow_tnl *tun_key) +tun_key_to_attr(struct ofpbuf *a, const struct flow_tnl *tun_key, + const struct flow_tnl *tun_flow_key, + const struct ofpbuf *key_buf) { size_t tun_key_ofs; @@ -1396,6 +1956,12 @@ tun_key_to_attr(struct ofpbuf *a, const struct flow_tnl *tun_key) if (tun_key->ip_dst) { nl_msg_put_be32(a, OVS_TUNNEL_KEY_ATTR_IPV4_DST, tun_key->ip_dst); } + if (ipv6_addr_is_set(&tun_key->ipv6_src)) { + nl_msg_put_in6_addr(a, OVS_TUNNEL_KEY_ATTR_IPV6_SRC, &tun_key->ipv6_src); + } + if (ipv6_addr_is_set(&tun_key->ipv6_dst)) { + nl_msg_put_in6_addr(a, OVS_TUNNEL_KEY_ATTR_IPV6_DST, &tun_key->ipv6_dst); + } if (tun_key->ip_tos) { nl_msg_put_u8(a, OVS_TUNNEL_KEY_ATTR_TOS, tun_key->ip_tos); } @@ -1415,6 +1981,15 @@ tun_key_to_attr(struct ofpbuf *a, const struct flow_tnl *tun_key) if (tun_key->flags & FLOW_TNL_F_OAM) { nl_msg_put_flag(a, OVS_TUNNEL_KEY_ATTR_OAM); } + if (tun_key->gbp_flags || tun_key->gbp_id) { + size_t vxlan_opts_ofs; + + vxlan_opts_ofs = nl_msg_start_nested(a, OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS); + nl_msg_put_u32(a, OVS_VXLAN_EXT_GBP, + (tun_key->gbp_flags << 16) | ntohs(tun_key->gbp_id)); + nl_msg_end_nested(a, vxlan_opts_ofs); + } + tun_metadata_to_geneve_nlattr(tun_key, tun_flow_key, key_buf, a); nl_msg_end_nested(a, tun_key_ofs); } @@ -1445,16 +2020,7 @@ odp_mask_is_exact(enum ovs_key_attr attr, const void *mask, size_t size) && ipv6_mask_is_exact((const struct in6_addr *)ipv6_mask->ipv6_dst); } if (attr == OVS_KEY_ATTR_TUNNEL) { - const struct flow_tnl *tun_mask = mask; - - return tun_mask->flags == FLOW_TNL_F_MASK - && tun_mask->tun_id == OVS_BE64_MAX - && tun_mask->ip_src == OVS_BE32_MAX - && tun_mask->ip_dst == OVS_BE32_MAX - && tun_mask->ip_tos == UINT8_MAX - && tun_mask->ip_ttl == UINT8_MAX - && tun_mask->tp_src == OVS_BE16_MAX - && tun_mask->tp_dst == OVS_BE16_MAX; + return false; } if (attr == OVS_KEY_ATTR_ARP) { @@ -1471,16 +2037,12 @@ odp_mask_is_exact(enum ovs_key_attr attr, const void *mask, size_t size) static bool odp_mask_attr_is_exact(const struct nlattr *ma) { - struct flow_tnl tun_mask; enum ovs_key_attr attr = nl_attr_type(ma); const void *mask; size_t size; if (attr == OVS_KEY_ATTR_TUNNEL) { - memset(&tun_mask, 0, sizeof tun_mask); - odp_tun_key_from_attr(ma, &tun_mask); - mask = &tun_mask; - size = sizeof tun_mask; + return false; } else { mask = nl_attr_get(ma); size = nl_attr_get_size(ma); @@ -1531,8 +2093,8 @@ odp_portno_names_destroy(struct hmap *portno_names) /* Format helpers. */ static void -format_eth(struct ds *ds, const char *name, const uint8_t key[ETH_ADDR_LEN], - const uint8_t (*mask)[ETH_ADDR_LEN], bool verbose) +format_eth(struct ds *ds, const char *name, const struct eth_addr key, + const struct eth_addr *mask, bool verbose) { bool mask_empty = mask && eth_addr_is_zero(*mask); @@ -1543,7 +2105,7 @@ format_eth(struct ds *ds, const char *name, const uint8_t key[ETH_ADDR_LEN], ds_put_format(ds, "%s="ETH_ADDR_FMT",", name, ETH_ADDR_ARGS(key)); } else { ds_put_format(ds, "%s=", name); - eth_format_masked(key, *mask, ds); + eth_format_masked(key, mask, ds); ds_put_char(ds, ','); } } @@ -1584,13 +2146,12 @@ format_ipv4(struct ds *ds, const char *name, ovs_be32 key, } static void -format_ipv6(struct ds *ds, const char *name, const ovs_be32 key_[4], - const ovs_be32 (*mask_)[4], bool verbose) +format_in6_addr(struct ds *ds, const char *name, + const struct in6_addr *key, + const struct in6_addr *mask, + bool verbose) { char buf[INET6_ADDRSTRLEN]; - const struct in6_addr *key = (const struct in6_addr *)key_; - const struct in6_addr *mask = mask_ ? (const struct in6_addr *)*mask_ - : NULL; bool mask_empty = mask && ipv6_mask_is_any(mask); if (verbose || !mask_empty) { @@ -1606,6 +2167,16 @@ format_ipv6(struct ds *ds, const char *name, const ovs_be32 key_[4], } } +static void +format_ipv6(struct ds *ds, const char *name, const ovs_be32 key_[4], + const ovs_be32 (*mask_)[4], bool verbose) +{ + format_in6_addr(ds, name, + (const struct in6_addr *)key_, + mask_ ? (const struct in6_addr *)*mask_ : NULL, + verbose); +} + static void format_ipv6_label(struct ds *ds, const char *name, ovs_be32 key, const ovs_be32 *mask, bool verbose) @@ -1675,6 +2246,23 @@ format_be16(struct ds *ds, const char *name, ovs_be16 key, } } +static void +format_be16x(struct ds *ds, const char *name, ovs_be16 key, + const ovs_be16 *mask, bool verbose) +{ + bool mask_empty = mask && !*mask; + + if (verbose || !mask_empty) { + bool mask_full = !mask || *mask == OVS_BE16_MAX; + + ds_put_format(ds, "%s=%#"PRIx16, name, ntohs(key)); + if (!mask_full) { /* Partially masked. */ + ds_put_format(ds, "/%#"PRIx16, ntohs(*mask)); + } + ds_put_char(ds, ','); + } +} + static void format_tun_flags(struct ds *ds, const char *name, uint16_t key, const uint16_t *mask, bool verbose) @@ -1682,19 +2270,358 @@ format_tun_flags(struct ds *ds, const char *name, uint16_t key, bool mask_empty = mask && !*mask; if (verbose || !mask_empty) { - bool mask_full = !mask || (*mask & FLOW_TNL_F_MASK) == FLOW_TNL_F_MASK; - ds_put_cstr(ds, name); ds_put_char(ds, '('); - if (!mask_full) { /* Partially masked. */ - format_flags_masked(ds, NULL, flow_tun_flag_to_string, key, *mask); + if (mask) { + format_flags_masked(ds, NULL, flow_tun_flag_to_string, key, + *mask & FLOW_TNL_F_MASK, FLOW_TNL_F_MASK); } else { /* Fully masked. */ - format_flags(ds, flow_tun_flag_to_string, key, ','); + format_flags(ds, flow_tun_flag_to_string, key, '|'); } ds_put_cstr(ds, "),"); } } +static bool +check_attr_len(struct ds *ds, const struct nlattr *a, const struct nlattr *ma, + const struct attr_len_tbl tbl[], int max_len, bool need_key) +{ + int expected_len; + + expected_len = odp_key_attr_len(tbl, max_len, nl_attr_type(a)); + if (expected_len != ATTR_LEN_VARIABLE && + expected_len != ATTR_LEN_NESTED) { + + bool bad_key_len = nl_attr_get_size(a) != expected_len; + bool bad_mask_len = ma && nl_attr_get_size(ma) != expected_len; + + if (bad_key_len || bad_mask_len) { + if (need_key) { + ds_put_format(ds, "key%u", nl_attr_type(a)); + } + if (bad_key_len) { + ds_put_format(ds, "(bad key length %"PRIuSIZE", expected %d)(", + nl_attr_get_size(a), expected_len); + } + format_generic_odp_key(a, ds); + if (ma) { + ds_put_char(ds, '/'); + if (bad_mask_len) { + ds_put_format(ds, "(bad mask length %"PRIuSIZE", expected %d)(", + nl_attr_get_size(ma), expected_len); + } + format_generic_odp_key(ma, ds); + } + ds_put_char(ds, ')'); + return false; + } + } + + return true; +} + +static void +format_unknown_key(struct ds *ds, const struct nlattr *a, + const struct nlattr *ma) +{ + ds_put_format(ds, "key%u(", nl_attr_type(a)); + format_generic_odp_key(a, ds); + if (ma && !odp_mask_attr_is_exact(ma)) { + ds_put_char(ds, '/'); + format_generic_odp_key(ma, ds); + } + ds_put_cstr(ds, "),"); +} + +static void +format_odp_tun_vxlan_opt(const struct nlattr *attr, + const struct nlattr *mask_attr, struct ds *ds, + bool verbose) +{ + unsigned int left; + const struct nlattr *a; + struct ofpbuf ofp; + + ofpbuf_init(&ofp, 100); + NL_NESTED_FOR_EACH(a, left, attr) { + uint16_t type = nl_attr_type(a); + const struct nlattr *ma = NULL; + + if (mask_attr) { + ma = nl_attr_find__(nl_attr_get(mask_attr), + nl_attr_get_size(mask_attr), type); + if (!ma) { + ma = generate_all_wildcard_mask(ovs_vxlan_ext_attr_lens, + OVS_VXLAN_EXT_MAX, + &ofp, a); + } + } + + if (!check_attr_len(ds, a, ma, ovs_vxlan_ext_attr_lens, + OVS_VXLAN_EXT_MAX, true)) { + continue; + } + + switch (type) { + case OVS_VXLAN_EXT_GBP: { + uint32_t key = nl_attr_get_u32(a); + ovs_be16 id, id_mask; + uint8_t flags, flags_mask; + + id = htons(key & 0xFFFF); + flags = (key >> 16) & 0xFF; + if (ma) { + uint32_t mask = nl_attr_get_u32(ma); + id_mask = htons(mask & 0xFFFF); + flags_mask = (mask >> 16) & 0xFF; + } + + ds_put_cstr(ds, "gbp("); + format_be16(ds, "id", id, ma ? &id_mask : NULL, verbose); + format_u8x(ds, "flags", flags, ma ? &flags_mask : NULL, verbose); + ds_chomp(ds, ','); + ds_put_cstr(ds, "),"); + break; + } + + default: + format_unknown_key(ds, a, ma); + } + ofpbuf_clear(&ofp); + } + + ds_chomp(ds, ','); + ofpbuf_uninit(&ofp); +} + +#define MASK(PTR, FIELD) PTR ? &PTR->FIELD : NULL + +static void +format_geneve_opts(const struct geneve_opt *opt, + const struct geneve_opt *mask, int opts_len, + struct ds *ds, bool verbose) +{ + while (opts_len > 0) { + unsigned int len; + uint8_t data_len, data_len_mask; + + if (opts_len < sizeof *opt) { + ds_put_format(ds, "opt len %u less than minimum %"PRIuSIZE, + opts_len, sizeof *opt); + return; + } + + data_len = opt->length * 4; + if (mask) { + if (mask->length == 0x1f) { + data_len_mask = UINT8_MAX; + } else { + data_len_mask = mask->length; + } + } + len = sizeof *opt + data_len; + if (len > opts_len) { + ds_put_format(ds, "opt len %u greater than remaining %u", + len, opts_len); + return; + } + + ds_put_char(ds, '{'); + format_be16x(ds, "class", opt->opt_class, MASK(mask, opt_class), + verbose); + format_u8x(ds, "type", opt->type, MASK(mask, type), verbose); + format_u8u(ds, "len", data_len, mask ? &data_len_mask : NULL, verbose); + if (data_len && + (verbose || !mask || !is_all_zeros(mask + 1, data_len))) { + ds_put_hex(ds, opt + 1, data_len); + if (mask && !is_all_ones(mask + 1, data_len)) { + ds_put_char(ds, '/'); + ds_put_hex(ds, mask + 1, data_len); + } + } else { + ds_chomp(ds, ','); + } + ds_put_char(ds, '}'); + + opt += len / sizeof(*opt); + if (mask) { + mask += len / sizeof(*opt); + } + opts_len -= len; + }; +} + +static void +format_odp_tun_geneve(const struct nlattr *attr, + const struct nlattr *mask_attr, struct ds *ds, + bool verbose) +{ + int opts_len = nl_attr_get_size(attr); + const struct geneve_opt *opt = nl_attr_get(attr); + const struct geneve_opt *mask = mask_attr ? + nl_attr_get(mask_attr) : NULL; + + if (mask && nl_attr_get_size(attr) != nl_attr_get_size(mask_attr)) { + ds_put_format(ds, "value len %"PRIuSIZE" different from mask len %"PRIuSIZE, + nl_attr_get_size(attr), nl_attr_get_size(mask_attr)); + return; + } + + format_geneve_opts(opt, mask, opts_len, ds, verbose); +} + +static void +format_odp_tun_attr(const struct nlattr *attr, const struct nlattr *mask_attr, + struct ds *ds, bool verbose) +{ + unsigned int left; + const struct nlattr *a; + uint16_t flags = 0; + uint16_t mask_flags = 0; + struct ofpbuf ofp; + + ofpbuf_init(&ofp, 100); + NL_NESTED_FOR_EACH(a, left, attr) { + enum ovs_tunnel_key_attr type = nl_attr_type(a); + const struct nlattr *ma = NULL; + + if (mask_attr) { + ma = nl_attr_find__(nl_attr_get(mask_attr), + nl_attr_get_size(mask_attr), type); + if (!ma) { + ma = generate_all_wildcard_mask(ovs_tun_key_attr_lens, + OVS_TUNNEL_KEY_ATTR_MAX, + &ofp, a); + } + } + + if (!check_attr_len(ds, a, ma, ovs_tun_key_attr_lens, + OVS_TUNNEL_KEY_ATTR_MAX, true)) { + continue; + } + + switch (type) { + case OVS_TUNNEL_KEY_ATTR_ID: + format_be64(ds, "tun_id", nl_attr_get_be64(a), + ma ? nl_attr_get(ma) : NULL, verbose); + flags |= FLOW_TNL_F_KEY; + if (ma) { + mask_flags |= FLOW_TNL_F_KEY; + } + break; + case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: + format_ipv4(ds, "src", nl_attr_get_be32(a), + ma ? nl_attr_get(ma) : NULL, verbose); + break; + case OVS_TUNNEL_KEY_ATTR_IPV4_DST: + format_ipv4(ds, "dst", nl_attr_get_be32(a), + ma ? nl_attr_get(ma) : NULL, verbose); + break; + case OVS_TUNNEL_KEY_ATTR_IPV6_SRC: { + struct in6_addr ipv6_src; + ipv6_src = nl_attr_get_in6_addr(a); + format_in6_addr(ds, "ipv6_src", &ipv6_src, + ma ? nl_attr_get(ma) : NULL, verbose); + break; + } + case OVS_TUNNEL_KEY_ATTR_IPV6_DST: { + struct in6_addr ipv6_dst; + ipv6_dst = nl_attr_get_in6_addr(a); + format_in6_addr(ds, "ipv6_dst", &ipv6_dst, + ma ? nl_attr_get(ma) : NULL, verbose); + break; + } + case OVS_TUNNEL_KEY_ATTR_TOS: + format_u8x(ds, "tos", nl_attr_get_u8(a), + ma ? nl_attr_get(ma) : NULL, verbose); + break; + case OVS_TUNNEL_KEY_ATTR_TTL: + format_u8u(ds, "ttl", nl_attr_get_u8(a), + ma ? nl_attr_get(ma) : NULL, verbose); + break; + case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: + flags |= FLOW_TNL_F_DONT_FRAGMENT; + break; + case OVS_TUNNEL_KEY_ATTR_CSUM: + flags |= FLOW_TNL_F_CSUM; + break; + case OVS_TUNNEL_KEY_ATTR_TP_SRC: + format_be16(ds, "tp_src", nl_attr_get_be16(a), + ma ? nl_attr_get(ma) : NULL, verbose); + break; + case OVS_TUNNEL_KEY_ATTR_TP_DST: + format_be16(ds, "tp_dst", nl_attr_get_be16(a), + ma ? nl_attr_get(ma) : NULL, verbose); + break; + case OVS_TUNNEL_KEY_ATTR_OAM: + flags |= FLOW_TNL_F_OAM; + break; + case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: + ds_put_cstr(ds, "vxlan("); + format_odp_tun_vxlan_opt(a, ma, ds, verbose); + ds_put_cstr(ds, "),"); + break; + case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: + ds_put_cstr(ds, "geneve("); + format_odp_tun_geneve(a, ma, ds, verbose); + ds_put_cstr(ds, "),"); + break; + case __OVS_TUNNEL_KEY_ATTR_MAX: + default: + format_unknown_key(ds, a, ma); + } + ofpbuf_clear(&ofp); + } + + /* Flags can have a valid mask even if the attribute is not set, so + * we need to collect these separately. */ + if (mask_attr) { + NL_NESTED_FOR_EACH(a, left, mask_attr) { + switch (nl_attr_type(a)) { + case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: + mask_flags |= FLOW_TNL_F_DONT_FRAGMENT; + break; + case OVS_TUNNEL_KEY_ATTR_CSUM: + mask_flags |= FLOW_TNL_F_CSUM; + break; + case OVS_TUNNEL_KEY_ATTR_OAM: + mask_flags |= FLOW_TNL_F_OAM; + break; + } + } + } + + format_tun_flags(ds, "flags", flags, mask_attr ? &mask_flags : NULL, + verbose); + ds_chomp(ds, ','); + ofpbuf_uninit(&ofp); +} + +static const char * +odp_ct_state_to_string(uint32_t flag) +{ + switch (flag) { + case OVS_CS_F_REPLY_DIR: + return "rpl"; + case OVS_CS_F_TRACKED: + return "trk"; + case OVS_CS_F_NEW: + return "new"; + case OVS_CS_F_ESTABLISHED: + return "est"; + case OVS_CS_F_RELATED: + return "rel"; + case OVS_CS_F_INVALID: + return "inv"; + case OVS_CS_F_SRC_NAT: + return "snat"; + case OVS_CS_F_DST_NAT: + return "dnat"; + default: + return NULL; + } +} + static void format_frag(struct ds *ds, const char *name, uint8_t key, const uint8_t *mask, bool verbose) @@ -1714,7 +2641,20 @@ format_frag(struct ds *ds, const char *name, uint8_t key, } } -#define MASK(PTR, FIELD) PTR ? &PTR->FIELD : NULL +static bool +mask_empty(const struct nlattr *ma) +{ + const void *mask; + size_t n; + + if (!ma) { + return true; + } + mask = nl_attr_get(ma); + n = nl_attr_get_size(ma); + + return is_all_zeros(mask, n); +} static void format_odp_key_attr(const struct nlattr *a, const struct nlattr *ma, @@ -1723,37 +2663,15 @@ format_odp_key_attr(const struct nlattr *a, const struct nlattr *ma, { enum ovs_key_attr attr = nl_attr_type(a); char namebuf[OVS_KEY_ATTR_BUFSIZE]; - int expected_len; bool is_exact; is_exact = ma ? odp_mask_attr_is_exact(ma) : true; ds_put_cstr(ds, ovs_key_attr_to_string(attr, namebuf, sizeof namebuf)); - { - expected_len = odp_flow_key_attr_len(nl_attr_type(a)); - if (expected_len != -2) { - bool bad_key_len = nl_attr_get_size(a) != expected_len; - bool bad_mask_len = ma && nl_attr_get_size(ma) != expected_len; - - if (bad_key_len || bad_mask_len) { - if (bad_key_len) { - ds_put_format(ds, "(bad key length %"PRIuSIZE", expected %d)(", - nl_attr_get_size(a), expected_len); - } - format_generic_odp_key(a, ds); - if (ma) { - ds_put_char(ds, '/'); - if (bad_mask_len) { - ds_put_format(ds, "(bad mask length %"PRIuSIZE", expected %d)(", - nl_attr_get_size(ma), expected_len); - } - format_generic_odp_key(ma, ds); - } - ds_put_char(ds, ')'); - return; - } - } + if (!check_attr_len(ds, a, ma, ovs_flow_key_attr_lens, + OVS_KEY_ATTR_MAX, false)) { + return; } ds_put_char(ds, '('); @@ -1779,30 +2697,53 @@ format_odp_key_attr(const struct nlattr *a, const struct nlattr *ma, } break; - case OVS_KEY_ATTR_TUNNEL: { - struct flow_tnl key, mask_; - struct flow_tnl *mask = ma ? &mask_ : NULL; + case OVS_KEY_ATTR_CT_MARK: + if (verbose || !mask_empty(ma)) { + ds_put_format(ds, "%#"PRIx32, nl_attr_get_u32(a)); + if (!is_exact) { + ds_put_format(ds, "/%#"PRIx32, nl_attr_get_u32(ma)); + } + } + break; - if (mask) { - memset(mask, 0, sizeof *mask); - odp_tun_key_from_attr(ma, mask); + case OVS_KEY_ATTR_CT_STATE: + if (verbose) { + ds_put_format(ds, "%#"PRIx32, nl_attr_get_u32(a)); + if (!is_exact) { + ds_put_format(ds, "/%#"PRIx32, + mask_empty(ma) ? 0 : nl_attr_get_u32(ma)); + } + } else if (!is_exact) { + format_flags_masked(ds, NULL, odp_ct_state_to_string, + nl_attr_get_u32(a), + mask_empty(ma) ? 0 : nl_attr_get_u32(ma), + UINT32_MAX); + } else { + format_flags(ds, odp_ct_state_to_string, nl_attr_get_u32(a), '|'); } - memset(&key, 0, sizeof key); - if (odp_tun_key_from_attr(a, &key) == ODP_FIT_ERROR) { - ds_put_format(ds, "error"); - return; + break; + + case OVS_KEY_ATTR_CT_ZONE: + if (verbose || !mask_empty(ma)) { + ds_put_format(ds, "%#"PRIx16, nl_attr_get_u16(a)); + if (!is_exact) { + ds_put_format(ds, "/%#"PRIx16, nl_attr_get_u16(ma)); + } } - format_be64(ds, "tun_id", key.tun_id, MASK(mask, tun_id), verbose); - format_ipv4(ds, "src", key.ip_src, MASK(mask, ip_src), verbose); - format_ipv4(ds, "dst", key.ip_dst, MASK(mask, ip_dst), verbose); - format_u8x(ds, "tos", key.ip_tos, MASK(mask, ip_tos), verbose); - format_u8u(ds, "ttl", key.ip_ttl, MASK(mask, ip_ttl), verbose); - format_be16(ds, "tp_src", key.tp_src, MASK(mask, tp_src), verbose); - format_be16(ds, "tp_dst", key.tp_dst, MASK(mask, tp_dst), verbose); - format_tun_flags(ds, "flags", key.flags, MASK(mask, flags), verbose); - ds_chomp(ds, ','); + break; + + case OVS_KEY_ATTR_CT_LABELS: { + const ovs_u128 *value = nl_attr_get(a); + const ovs_u128 *mask = ma ? nl_attr_get(ma) : NULL; + + format_u128(ds, value, mask, verbose); break; } + + case OVS_KEY_ATTR_TUNNEL: + format_odp_tun_attr(a, ma, ds, verbose); + break; + case OVS_KEY_ATTR_IN_PORT: if (portno_names && verbose && is_exact) { char *name = odp_portno_names_get(portno_names, @@ -1912,10 +2853,11 @@ format_odp_key_attr(const struct nlattr *a, const struct nlattr *ma, if (!is_exact) { format_flags_masked(ds, NULL, packet_tcp_flag_to_string, ntohs(nl_attr_get_be16(a)), - ntohs(nl_attr_get_be16(ma))); + TCP_FLAGS(nl_attr_get_be16(ma)), + TCP_FLAGS(OVS_BE16_MAX)); } else { format_flags(ds, packet_tcp_flag_to_string, - ntohs(nl_attr_get_be16(a)), ','); + ntohs(nl_attr_get_be16(a)), '|'); } break; @@ -1977,26 +2919,82 @@ format_odp_key_attr(const struct nlattr *a, const struct nlattr *ma, } static struct nlattr * -generate_all_wildcard_mask(struct ofpbuf *ofp, const struct nlattr *key) +generate_all_wildcard_mask(const struct attr_len_tbl tbl[], int max, + struct ofpbuf *ofp, const struct nlattr *key) { const struct nlattr *a; unsigned int left; int type = nl_attr_type(key); int size = nl_attr_get_size(key); - if (odp_flow_key_attr_len(type) >=0) { + if (odp_key_attr_len(tbl, max, type) != ATTR_LEN_NESTED) { nl_msg_put_unspec_zero(ofp, type, size); } else { size_t nested_mask; + if (tbl[type].next) { + tbl = tbl[type].next; + max = tbl[type].next_max; + } + nested_mask = nl_msg_start_nested(ofp, type); NL_ATTR_FOR_EACH(a, left, key, nl_attr_get_size(key)) { - generate_all_wildcard_mask(ofp, nl_attr_get(a)); + generate_all_wildcard_mask(tbl, max, ofp, nl_attr_get(a)); } nl_msg_end_nested(ofp, nested_mask); } - return ofpbuf_base(ofp); + return ofp->base; +} + +static void +format_u128(struct ds *ds, const ovs_u128 *key, const ovs_u128 *mask, + bool verbose) +{ + if (verbose || (mask && !ovs_u128_is_zero(mask))) { + ovs_be128 value; + + value = hton128(*key); + ds_put_hex(ds, &value, sizeof value); + if (mask && !(ovs_u128_is_ones(mask))) { + value = hton128(*mask); + ds_put_char(ds, '/'); + ds_put_hex(ds, &value, sizeof value); + } + } +} + +static int +scan_u128(const char *s_, ovs_u128 *value, ovs_u128 *mask) +{ + char *s = CONST_CAST(char *, s_); + ovs_be128 be_value; + ovs_be128 be_mask; + + if (!parse_int_string(s, (uint8_t *)&be_value, sizeof be_value, &s)) { + *value = ntoh128(be_value); + + if (mask) { + int n; + + if (ovs_scan(s, "/%n", &n)) { + int error; + + s += n; + error = parse_int_string(s, (uint8_t *)&be_mask, + sizeof be_mask, &s); + if (error) { + return error; + } + *mask = ntoh128(be_mask); + } else { + *mask = OVS_U128_MAX; + } + } + return s - s_; + } + + return 0; } int @@ -2005,24 +3003,12 @@ odp_ufid_from_string(const char *s_, ovs_u128 *ufid) const char *s = s_; if (ovs_scan(s, "ufid:")) { - size_t n; - s += 5; - if (ovs_scan(s, "0x")) { - s += 2; - } - - n = strspn(s, "0123456789abcdefABCDEF"); - if (n != 32) { - return -EINVAL; - } - if (!ovs_scan(s, "%16"SCNx64"%16"SCNx64, &ufid->u64.hi, - &ufid->u64.lo)) { + if (!uuid_from_string_prefix((struct uuid *)ufid, s)) { return -EINVAL; } - s += n; - s += strspn(s, delimiters); + s += UUID_LEN; return s - s_; } @@ -2033,8 +3019,7 @@ odp_ufid_from_string(const char *s_, ovs_u128 *ufid) void odp_format_ufid(const ovs_u128 *ufid, struct ds *ds) { - ds_put_format(ds, "ufid:%016"PRIx64"%016"PRIx64, ufid->u64.hi, - ufid->u64.lo); + ds_put_format(ds, "ufid:"UUID_FMT, UUID_ARGS((struct uuid *)ufid)); } /* Appends to 'ds' a string representation of the 'key_len' bytes of @@ -2064,7 +3049,9 @@ odp_flow_format(const struct nlattr *key, size_t key_len, has_ethtype_key = true; } - is_nested_attr = (odp_flow_key_attr_len(attr_type) == -2); + is_nested_attr = odp_key_attr_len(ovs_flow_key_attr_lens, + OVS_KEY_ATTR_MAX, attr_type) == + ATTR_LEN_NESTED; if (mask && mask_len) { ma = nl_attr_find__(mask, mask_len, nl_attr_type(a)); @@ -2073,7 +3060,9 @@ odp_flow_format(const struct nlattr *key, size_t key_len, if (verbose || !is_wildcard || is_nested_attr) { if (is_wildcard && !ma) { - ma = generate_all_wildcard_mask(&ofp, a); + ma = generate_all_wildcard_mask(ovs_flow_key_attr_lens, + OVS_KEY_ATTR_MAX, + &ofp, a); } if (!first_field) { ds_put_char(ds, ','); @@ -2136,12 +3125,12 @@ ovs_frag_type_from_string(const char *s, enum ovs_frag_type *type) /* Parsing. */ static int -scan_eth(const char *s, uint8_t (*key)[ETH_ADDR_LEN], - uint8_t (*mask)[ETH_ADDR_LEN]) +scan_eth(const char *s, struct eth_addr *key, struct eth_addr *mask) { int n; - if (ovs_scan(s, ETH_ADDR_SCAN_FMT"%n", ETH_ADDR_SCAN_ARGS(*key), &n)) { + if (ovs_scan(s, ETH_ADDR_SCAN_FMT"%n", + ETH_ADDR_SCAN_ARGS(*key), &n)) { int len = n; if (mask) { @@ -2179,7 +3168,7 @@ scan_ipv4(const char *s, ovs_be32 *key, ovs_be32 *mask) } static int -scan_ipv6(const char *s, ovs_be32 (*key)[4], ovs_be32 (*mask)[4]) +scan_in6_addr(const char *s, struct in6_addr *key, struct in6_addr *mask) { int n; char ipv6_s[IPV6_SCAN_LEN + 1]; @@ -2201,6 +3190,13 @@ scan_ipv6(const char *s, ovs_be32 (*key)[4], ovs_be32 (*mask)[4]) return 0; } +static int +scan_ipv6(const char *s, ovs_be32 (*key)[4], ovs_be32 (*mask)[4]) +{ + return scan_in6_addr(s, key ? (struct in6_addr *) *key : NULL, + mask ? (struct in6_addr *) *mask : NULL); +} + static int scan_ipv6_label(const char *s, ovs_be32 *key, ovs_be32 *mask) { @@ -2227,18 +3223,38 @@ scan_ipv6_label(const char *s, ovs_be32 *key, ovs_be32 *mask) } static int -scan_u8(const char *s, uint8_t *key, uint8_t *mask) +scan_u8(const char *s, uint8_t *key, uint8_t *mask) +{ + int n; + + if (ovs_scan(s, "%"SCNi8"%n", key, &n)) { + int len = n; + + if (mask) { + if (ovs_scan(s + len, "/%"SCNi8"%n", mask, &n)) { + len += n; + } else { + *mask = UINT8_MAX; + } + } + return len; + } + return 0; +} + +static int +scan_u16(const char *s, uint16_t *key, uint16_t *mask) { int n; - if (ovs_scan(s, "%"SCNi8"%n", key, &n)) { + if (ovs_scan(s, "%"SCNi16"%n", key, &n)) { int len = n; if (mask) { - if (ovs_scan(s + len, "/%"SCNi8"%n", mask, &n)) { + if (ovs_scan(s + len, "/%"SCNi16"%n", mask, &n)) { len += n; } else { - *mask = UINT8_MAX; + *mask = UINT16_MAX; } } return len; @@ -2318,8 +3334,8 @@ scan_tun_flags(const char *s, uint16_t *key, uint16_t *mask) uint32_t flags, fmask; int n; - n = parse_flags(s, flow_tun_flag_to_string, &flags, - FLOW_TNL_F_MASK, mask ? &fmask : NULL); + n = parse_odp_flags(s, flow_tun_flag_to_string, &flags, + FLOW_TNL_F_MASK, mask ? &fmask : NULL); if (n >= 0 && s[n] == ')') { *key = flags; if (mask) { @@ -2336,8 +3352,8 @@ scan_tcp_flags(const char *s, ovs_be16 *key, ovs_be16 *mask) uint32_t flags, fmask; int n; - n = parse_flags(s, packet_tcp_flag_to_string, &flags, - TCP_FLAGS(OVS_BE16_MAX), mask ? &fmask : NULL); + n = parse_odp_flags(s, packet_tcp_flag_to_string, &flags, + TCP_FLAGS(OVS_BE16_MAX), mask ? &fmask : NULL); if (n >= 0) { *key = htons(flags); if (mask) { @@ -2348,6 +3364,92 @@ scan_tcp_flags(const char *s, ovs_be16 *key, ovs_be16 *mask) return 0; } +static uint32_t +ovs_to_odp_ct_state(uint8_t state) +{ + uint32_t odp = 0; + + if (state & CS_NEW) { + odp |= OVS_CS_F_NEW; + } + if (state & CS_ESTABLISHED) { + odp |= OVS_CS_F_ESTABLISHED; + } + if (state & CS_RELATED) { + odp |= OVS_CS_F_RELATED; + } + if (state & CS_INVALID) { + odp |= OVS_CS_F_INVALID; + } + if (state & CS_REPLY_DIR) { + odp |= OVS_CS_F_REPLY_DIR; + } + if (state & CS_TRACKED) { + odp |= OVS_CS_F_TRACKED; + } + if (state & CS_SRC_NAT) { + odp |= OVS_CS_F_SRC_NAT; + } + if (state & CS_DST_NAT) { + odp |= OVS_CS_F_DST_NAT; + } + + return odp; +} + +static uint8_t +odp_to_ovs_ct_state(uint32_t flags) +{ + uint32_t state = 0; + + if (flags & OVS_CS_F_NEW) { + state |= CS_NEW; + } + if (flags & OVS_CS_F_ESTABLISHED) { + state |= CS_ESTABLISHED; + } + if (flags & OVS_CS_F_RELATED) { + state |= CS_RELATED; + } + if (flags & OVS_CS_F_INVALID) { + state |= CS_INVALID; + } + if (flags & OVS_CS_F_REPLY_DIR) { + state |= CS_REPLY_DIR; + } + if (flags & OVS_CS_F_TRACKED) { + state |= CS_TRACKED; + } + if (flags & OVS_CS_F_SRC_NAT) { + state |= CS_SRC_NAT; + } + if (flags & OVS_CS_F_DST_NAT) { + state |= CS_DST_NAT; + } + + return state; +} + +static int +scan_ct_state(const char *s, uint32_t *key, uint32_t *mask) +{ + uint32_t flags, fmask; + int n; + + n = parse_flags(s, odp_ct_state_to_string, ')', NULL, NULL, &flags, + ovs_to_odp_ct_state(CS_SUPPORTED_MASK), + mask ? &fmask : NULL); + + if (n >= 0) { + *key = flags; + if (mask) { + *mask = fmask; + } + return n; + } + return 0; +} + static int scan_frag(const char *s, uint8_t *key, uint8_t *mask) { @@ -2533,14 +3635,185 @@ scan_mpls_bos(const char *s, ovs_be32 *key, ovs_be32 *mask) return scan_be32_bf(s, key, mask, 1, MPLS_BOS_SHIFT); } -/* ATTR is compile-time constant, so only the case with correct data type - * will be used. However, the compiler complains about the data type for - * the other cases, so we must cast to make the compiler silent. */ -#define SCAN_PUT_ATTR(BUF, ATTR, DATA) \ - if ((ATTR) == OVS_KEY_ATTR_TUNNEL) { \ - tun_key_to_attr(BUF, (const struct flow_tnl *)(void *)&(DATA)); \ - } else { \ - nl_msg_put_unspec(BUF, ATTR, &(DATA), sizeof (DATA)); \ +static int +scan_vxlan_gbp(const char *s, uint32_t *key, uint32_t *mask) +{ + const char *s_base = s; + ovs_be16 id = 0, id_mask = 0; + uint8_t flags = 0, flags_mask = 0; + + if (!strncmp(s, "id=", 3)) { + s += 3; + s += scan_be16(s, &id, mask ? &id_mask : NULL); + } + + if (s[0] == ',') { + s++; + } + if (!strncmp(s, "flags=", 6)) { + s += 6; + s += scan_u8(s, &flags, mask ? &flags_mask : NULL); + } + + if (!strncmp(s, "))", 2)) { + s += 2; + + *key = (flags << 16) | ntohs(id); + if (mask) { + *mask = (flags_mask << 16) | ntohs(id_mask); + } + + return s - s_base; + } + + return 0; +} + +static int +scan_geneve(const char *s, struct geneve_scan *key, struct geneve_scan *mask) +{ + const char *s_base = s; + struct geneve_opt *opt = key->d; + struct geneve_opt *opt_mask = mask ? mask->d : NULL; + int len_remain = sizeof key->d; + + while (s[0] == '{' && len_remain >= sizeof *opt) { + int data_len = 0; + + s++; + len_remain -= sizeof *opt; + + if (!strncmp(s, "class=", 6)) { + s += 6; + s += scan_be16(s, &opt->opt_class, + mask ? &opt_mask->opt_class : NULL); + } else if (mask) { + memset(&opt_mask->opt_class, 0, sizeof opt_mask->opt_class); + } + + if (s[0] == ',') { + s++; + } + if (!strncmp(s, "type=", 5)) { + s += 5; + s += scan_u8(s, &opt->type, mask ? &opt_mask->type : NULL); + } else if (mask) { + memset(&opt_mask->type, 0, sizeof opt_mask->type); + } + + if (s[0] == ',') { + s++; + } + if (!strncmp(s, "len=", 4)) { + uint8_t opt_len, opt_len_mask; + s += 4; + s += scan_u8(s, &opt_len, mask ? &opt_len_mask : NULL); + + if (opt_len > 124 || opt_len % 4 || opt_len > len_remain) { + return 0; + } + opt->length = opt_len / 4; + if (mask) { + opt_mask->length = opt_len_mask; + } + data_len = opt_len; + } else if (mask) { + memset(&opt_mask->type, 0, sizeof opt_mask->type); + } + + if (s[0] == ',') { + s++; + } + if (parse_int_string(s, (uint8_t *)(opt + 1), data_len, (char **)&s)) { + return 0; + } + + if (mask) { + if (s[0] == '/') { + s++; + if (parse_int_string(s, (uint8_t *)(opt_mask + 1), + data_len, (char **)&s)) { + return 0; + } + } + opt_mask->r1 = 0; + opt_mask->r2 = 0; + opt_mask->r3 = 0; + } + + if (s[0] == '}') { + s++; + opt += 1 + data_len / 4; + if (mask) { + opt_mask += 1 + data_len / 4; + } + len_remain -= data_len; + } + } + + if (s[0] == ')') { + int len = sizeof key->d - len_remain; + + s++; + key->len = len; + if (mask) { + mask->len = len; + } + return s - s_base; + } + + return 0; +} + +static void +tun_flags_to_attr(struct ofpbuf *a, const void *data_) +{ + const uint16_t *flags = data_; + + if (*flags & FLOW_TNL_F_DONT_FRAGMENT) { + nl_msg_put_flag(a, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT); + } + if (*flags & FLOW_TNL_F_CSUM) { + nl_msg_put_flag(a, OVS_TUNNEL_KEY_ATTR_CSUM); + } + if (*flags & FLOW_TNL_F_OAM) { + nl_msg_put_flag(a, OVS_TUNNEL_KEY_ATTR_OAM); + } +} + +static void +vxlan_gbp_to_attr(struct ofpbuf *a, const void *data_) +{ + const uint32_t *gbp = data_; + + if (*gbp) { + size_t vxlan_opts_ofs; + + vxlan_opts_ofs = nl_msg_start_nested(a, OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS); + nl_msg_put_u32(a, OVS_VXLAN_EXT_GBP, *gbp); + nl_msg_end_nested(a, vxlan_opts_ofs); + } +} + +static void +geneve_to_attr(struct ofpbuf *a, const void *data_) +{ + const struct geneve_scan *geneve = data_; + + nl_msg_put_unspec(a, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, geneve->d, + geneve->len); +} + +#define SCAN_PUT_ATTR(BUF, ATTR, DATA, FUNC) \ + { \ + unsigned long call_fn = (unsigned long)FUNC; \ + if (call_fn) { \ + typedef void (*fn)(struct ofpbuf *, const void *); \ + fn func = FUNC; \ + func(BUF, &(DATA)); \ + } else { \ + nl_msg_put_unspec(BUF, ATTR, &(DATA), sizeof (DATA)); \ + } \ } #define SCAN_IF(NAME) \ @@ -2559,6 +3832,15 @@ scan_mpls_bos(const char *s, ovs_be32 *key, ovs_be32 *mask) do { \ len = 0; +/* Init as fully-masked as mask will not be scanned. */ +#define SCAN_BEGIN_FULLY_MASKED(NAME, TYPE) \ + SCAN_IF(NAME); \ + TYPE skey, smask; \ + memset(&skey, 0, sizeof skey); \ + memset(&smask, 0xff, sizeof smask); \ + do { \ + len = 0; + /* VLAN needs special initialization. */ #define SCAN_BEGIN_INIT(NAME, TYPE, KEY_INIT, MASK_INIT) \ SCAN_IF(NAME); \ @@ -2595,23 +3877,102 @@ scan_mpls_bos(const char *s, ovs_be32 *key, ovs_be32 *mask) return -EINVAL; \ } -#define SCAN_PUT(ATTR) \ - if (!mask || !is_all_zeros(&smask, sizeof smask)) { \ - SCAN_PUT_ATTR(key, ATTR, skey); \ - if (mask) { \ - SCAN_PUT_ATTR(mask, ATTR, smask); \ - } \ - } +/* Beginning of nested attribute. */ +#define SCAN_BEGIN_NESTED(NAME, ATTR) \ + SCAN_IF(NAME); \ + size_t key_offset, mask_offset; \ + key_offset = nl_msg_start_nested(key, ATTR); \ + if (mask) { \ + mask_offset = nl_msg_start_nested(mask, ATTR); \ + } \ + do { \ + len = 0; + +#define SCAN_END_NESTED() \ + SCAN_FINISH(); \ + nl_msg_end_nested(key, key_offset); \ + if (mask) { \ + nl_msg_end_nested(mask, mask_offset); \ + } \ + return s - start; \ + } + +#define SCAN_FIELD_NESTED__(NAME, TYPE, SCAN_AS, ATTR, FUNC) \ + if (strncmp(s, NAME, strlen(NAME)) == 0) { \ + TYPE skey, smask; \ + memset(&skey, 0, sizeof skey); \ + memset(&smask, 0xff, sizeof smask); \ + s += strlen(NAME); \ + SCAN_TYPE(SCAN_AS, &skey, &smask); \ + SCAN_PUT(ATTR, FUNC); \ + continue; \ + } + +#define SCAN_FIELD_NESTED(NAME, TYPE, SCAN_AS, ATTR) \ + SCAN_FIELD_NESTED__(NAME, TYPE, SCAN_AS, ATTR, NULL) + +#define SCAN_FIELD_NESTED_FUNC(NAME, TYPE, SCAN_AS, FUNC) \ + SCAN_FIELD_NESTED__(NAME, TYPE, SCAN_AS, 0, FUNC) + +#define SCAN_PUT(ATTR, FUNC) \ + SCAN_PUT_ATTR(key, ATTR, skey, FUNC); \ + if (mask) \ + SCAN_PUT_ATTR(mask, ATTR, smask, FUNC); \ #define SCAN_END(ATTR) \ SCAN_FINISH(); \ - SCAN_PUT(ATTR); \ + SCAN_PUT(ATTR, NULL); \ return s - start; \ } +#define SCAN_BEGIN_ARRAY(NAME, TYPE, CNT) \ + SCAN_IF(NAME); \ + TYPE skey[CNT], smask[CNT]; \ + memset(&skey, 0, sizeof skey); \ + memset(&smask, 0, sizeof smask); \ + int idx = 0, cnt = CNT; \ + uint64_t fields = 0; \ + do { \ + int field = 0; \ + len = 0; + +/* Scan named ('NAME') entry 'FIELD' as 'TYPE'. */ +#define SCAN_FIELD_ARRAY(NAME, TYPE, FIELD) \ + if (strncmp(s, NAME, strlen(NAME)) == 0) { \ + if (fields & (1UL << field)) { \ + fields = 0; \ + if (++idx == cnt) { \ + break; \ + } \ + } \ + s += strlen(NAME); \ + SCAN_TYPE(TYPE, &skey[idx].FIELD, mask ? &smask[idx].FIELD : NULL); \ + fields |= 1UL << field; \ + continue; \ + } \ + field++; + +#define SCAN_PUT_ATTR_ARRAY(BUF, ATTR, DATA, CNT) \ + nl_msg_put_unspec(BUF, ATTR, &(DATA), sizeof (DATA)[0] * (CNT)); \ + +#define SCAN_PUT_ARRAY(ATTR, CNT) \ + SCAN_PUT_ATTR_ARRAY(key, ATTR, skey, CNT); \ + if (mask) { \ + SCAN_PUT_ATTR_ARRAY(mask, ATTR, smask, CNT); \ + } + +#define SCAN_END_ARRAY(ATTR) \ + SCAN_FINISH(); \ + if (idx == cnt) { \ + return -EINVAL; \ + } \ + SCAN_PUT_ARRAY(ATTR, idx + 1); \ + return s - start; \ + } + #define SCAN_END_SINGLE(ATTR) \ SCAN_FINISH_SINGLE(); \ - SCAN_PUT(ATTR); \ + SCAN_PUT(ATTR, NULL); \ return s - start; \ } @@ -2620,9 +3981,9 @@ scan_mpls_bos(const char *s, ovs_be32 *key, ovs_be32 *mask) SCAN_TYPE(SCAN_AS, &skey, &smask); \ } SCAN_END_SINGLE(ATTR) -#define SCAN_SINGLE_NO_MASK(NAME, TYPE, SCAN_AS, ATTR) \ - SCAN_BEGIN(NAME, TYPE) { \ - SCAN_TYPE(SCAN_AS, &skey, NULL); \ +#define SCAN_SINGLE_FULLY_MASKED(NAME, TYPE, SCAN_AS, ATTR) \ + SCAN_BEGIN_FULLY_MASKED(NAME, TYPE) { \ + SCAN_TYPE(SCAN_AS, &skey, NULL); \ } SCAN_END_SINGLE(ATTR) /* scan_port needs one extra argument. */ @@ -2639,21 +4000,41 @@ static int parse_odp_key_mask_attr(const char *s, const struct simap *port_names, struct ofpbuf *key, struct ofpbuf *mask) { + ovs_u128 ufid; + int len; + + /* Skip UFID. */ + len = odp_ufid_from_string(s, &ufid); + if (len) { + return len; + } + SCAN_SINGLE("skb_priority(", uint32_t, u32, OVS_KEY_ATTR_PRIORITY); SCAN_SINGLE("skb_mark(", uint32_t, u32, OVS_KEY_ATTR_SKB_MARK); - SCAN_SINGLE_NO_MASK("recirc_id(", uint32_t, u32, OVS_KEY_ATTR_RECIRC_ID); + SCAN_SINGLE_FULLY_MASKED("recirc_id(", uint32_t, u32, + OVS_KEY_ATTR_RECIRC_ID); SCAN_SINGLE("dp_hash(", uint32_t, u32, OVS_KEY_ATTR_DP_HASH); - SCAN_BEGIN("tunnel(", struct flow_tnl) { - SCAN_FIELD("tun_id=", be64, tun_id); - SCAN_FIELD("src=", ipv4, ip_src); - SCAN_FIELD("dst=", ipv4, ip_dst); - SCAN_FIELD("tos=", u8, ip_tos); - SCAN_FIELD("ttl=", u8, ip_ttl); - SCAN_FIELD("tp_src=", be16, tp_src); - SCAN_FIELD("tp_dst=", be16, tp_dst); - SCAN_FIELD("flags(", tun_flags, flags); - } SCAN_END(OVS_KEY_ATTR_TUNNEL); + SCAN_SINGLE("ct_state(", uint32_t, ct_state, OVS_KEY_ATTR_CT_STATE); + SCAN_SINGLE("ct_zone(", uint16_t, u16, OVS_KEY_ATTR_CT_ZONE); + SCAN_SINGLE("ct_mark(", uint32_t, u32, OVS_KEY_ATTR_CT_MARK); + SCAN_SINGLE("ct_label(", ovs_u128, u128, OVS_KEY_ATTR_CT_LABELS); + + SCAN_BEGIN_NESTED("tunnel(", OVS_KEY_ATTR_TUNNEL) { + SCAN_FIELD_NESTED("tun_id=", ovs_be64, be64, OVS_TUNNEL_KEY_ATTR_ID); + SCAN_FIELD_NESTED("src=", ovs_be32, ipv4, OVS_TUNNEL_KEY_ATTR_IPV4_SRC); + SCAN_FIELD_NESTED("dst=", ovs_be32, ipv4, OVS_TUNNEL_KEY_ATTR_IPV4_DST); + SCAN_FIELD_NESTED("ipv6_src=", struct in6_addr, in6_addr, OVS_TUNNEL_KEY_ATTR_IPV6_SRC); + SCAN_FIELD_NESTED("ipv6_dst=", struct in6_addr, in6_addr, OVS_TUNNEL_KEY_ATTR_IPV6_DST); + SCAN_FIELD_NESTED("tos=", uint8_t, u8, OVS_TUNNEL_KEY_ATTR_TOS); + SCAN_FIELD_NESTED("ttl=", uint8_t, u8, OVS_TUNNEL_KEY_ATTR_TTL); + SCAN_FIELD_NESTED("tp_src=", ovs_be16, be16, OVS_TUNNEL_KEY_ATTR_TP_SRC); + SCAN_FIELD_NESTED("tp_dst=", ovs_be16, be16, OVS_TUNNEL_KEY_ATTR_TP_DST); + SCAN_FIELD_NESTED_FUNC("vxlan(gbp(", uint32_t, vxlan_gbp, vxlan_gbp_to_attr); + SCAN_FIELD_NESTED_FUNC("geneve(", struct geneve_scan, geneve, + geneve_to_attr); + SCAN_FIELD_NESTED_FUNC("flags(", uint16_t, tun_flags, tun_flags_to_attr); + } SCAN_END_NESTED(); SCAN_SINGLE_PORT("in_port(", uint32_t, OVS_KEY_ATTR_IN_PORT); @@ -2671,12 +4052,12 @@ parse_odp_key_mask_attr(const char *s, const struct simap *port_names, SCAN_SINGLE("eth_type(", ovs_be16, be16, OVS_KEY_ATTR_ETHERTYPE); - SCAN_BEGIN("mpls(", struct ovs_key_mpls) { - SCAN_FIELD("label=", mpls_label, mpls_lse); - SCAN_FIELD("tc=", mpls_tc, mpls_lse); - SCAN_FIELD("ttl=", mpls_ttl, mpls_lse); - SCAN_FIELD("bos=", mpls_bos, mpls_lse); - } SCAN_END(OVS_KEY_ATTR_MPLS); + SCAN_BEGIN_ARRAY("mpls(", struct ovs_key_mpls, FLOW_MAX_MPLS_LABELS) { + SCAN_FIELD_ARRAY("label=", mpls_label, mpls_lse); + SCAN_FIELD_ARRAY("tc=", mpls_tc, mpls_lse); + SCAN_FIELD_ARRAY("ttl=", mpls_ttl, mpls_lse); + SCAN_FIELD_ARRAY("bos=", mpls_bos, mpls_lse); + } SCAN_END_ARRAY(OVS_KEY_ATTR_MPLS); SCAN_BEGIN("ipv4(", struct ovs_key_ipv4) { SCAN_FIELD("src=", ipv4, ipv4_src); @@ -2752,7 +4133,7 @@ parse_odp_key_mask_attr(const char *s, const struct simap *port_names, for (;;) { int retval; - s += strspn(s, ", \t\r\n"); + s += strspn(s, delimiters); if (!*s) { return -EINVAL; } else if (*s == ')') { @@ -2796,7 +4177,7 @@ int odp_flow_from_string(const char *s, const struct simap *port_names, struct ofpbuf *key, struct ofpbuf *mask) { - const size_t old_size = ofpbuf_size(key); + const size_t old_size = key->size; for (;;) { int retval; @@ -2807,7 +4188,7 @@ odp_flow_from_string(const char *s, const struct simap *port_names, retval = parse_odp_key_mask_attr(s, port_names, key, mask); if (retval < 0) { - ofpbuf_set_size(key, old_size); + key->size = old_size; return -retval; } s += retval; @@ -2844,6 +4225,8 @@ static void put_ipv6_key(const struct ovs_key_ipv6 *, struct flow *, bool is_mask); static void get_arp_key(const struct flow *, struct ovs_key_arp *); static void put_arp_key(const struct ovs_key_arp *, struct flow *); +static void get_nd_key(const struct flow *, struct ovs_key_nd *); +static void put_nd_key(const struct ovs_key_nd *, struct flow *); /* These share the same layout. */ union ovs_key_tp { @@ -2856,31 +4239,46 @@ static void get_tp_key(const struct flow *, union ovs_key_tp *); static void put_tp_key(const union ovs_key_tp *, struct flow *); static void -odp_flow_key_from_flow__(struct ofpbuf *buf, const struct flow *flow, - const struct flow *mask, odp_port_t odp_in_port, - size_t max_mpls_depth, bool recirc, bool export_mask) +odp_flow_key_from_flow__(const struct odp_flow_key_parms *parms, + bool export_mask, struct ofpbuf *buf) { struct ovs_key_ethernet *eth_key; size_t encap; - const struct flow *data = export_mask ? mask : flow; + const struct flow *flow = parms->flow; + const struct flow *data = export_mask ? parms->mask : parms->flow; nl_msg_put_u32(buf, OVS_KEY_ATTR_PRIORITY, data->skb_priority); - if (flow->tunnel.ip_dst || export_mask) { - tun_key_to_attr(buf, &data->tunnel); + if (flow_tnl_dst_is_set(&flow->tunnel) || export_mask) { + tun_key_to_attr(buf, &data->tunnel, &parms->flow->tunnel, + parms->key_buf); } nl_msg_put_u32(buf, OVS_KEY_ATTR_SKB_MARK, data->pkt_mark); - if (recirc) { + if (parms->support.ct_state) { + nl_msg_put_u32(buf, OVS_KEY_ATTR_CT_STATE, + ovs_to_odp_ct_state(data->ct_state)); + } + if (parms->support.ct_zone) { + nl_msg_put_u16(buf, OVS_KEY_ATTR_CT_ZONE, data->ct_zone); + } + if (parms->support.ct_mark) { + nl_msg_put_u32(buf, OVS_KEY_ATTR_CT_MARK, data->ct_mark); + } + if (parms->support.ct_label) { + nl_msg_put_unspec(buf, OVS_KEY_ATTR_CT_LABELS, &data->ct_label, + sizeof(data->ct_label)); + } + if (parms->support.recirc) { nl_msg_put_u32(buf, OVS_KEY_ATTR_RECIRC_ID, data->recirc_id); nl_msg_put_u32(buf, OVS_KEY_ATTR_DP_HASH, data->dp_hash); } /* Add an ingress port attribute if this is a mask or 'odp_in_port' * is not the magical value "ODPP_NONE". */ - if (export_mask || odp_in_port != ODPP_NONE) { - nl_msg_put_odp_port(buf, OVS_KEY_ATTR_IN_PORT, odp_in_port); + if (export_mask || parms->odp_in_port != ODPP_NONE) { + nl_msg_put_odp_port(buf, OVS_KEY_ATTR_IN_PORT, parms->odp_in_port); } eth_key = nl_msg_put_unspec_uninit(buf, OVS_KEY_ATTR_ETHERNET, @@ -2946,7 +4344,9 @@ odp_flow_key_from_flow__(struct ofpbuf *buf, const struct flow *flow, int i, n; n = flow_count_mpls_labels(flow, NULL); - n = MIN(n, max_mpls_depth); + if (export_mask) { + n = MIN(n, parms->support.max_mpls_depth); + } mpls_key = nl_msg_put_unspec_uninit(buf, OVS_KEY_ATTR_MPLS, n * sizeof *mpls_key); for (i = 0; i < n; i++) { @@ -2996,8 +4396,12 @@ odp_flow_key_from_flow__(struct ofpbuf *buf, const struct flow *flow, if (flow->tp_dst == htons(0) && (flow->tp_src == htons(ND_NEIGHBOR_SOLICIT) || flow->tp_src == htons(ND_NEIGHBOR_ADVERT)) - && (!export_mask || (data->tp_src == htons(0xffff) - && data->tp_dst == htons(0xffff)))) { + /* Even though 'tp_src' and 'tp_dst' are 16 bits wide, ICMP + * type and code are 8 bits wide. Therefore, an exact match + * looks like htons(0xff), not htons(0xffff). See + * xlate_wc_finish() for details. */ + && (!export_mask || (data->tp_src == htons(0xff) + && data->tp_dst == htons(0xff)))) { struct ovs_key_nd *nd_key; @@ -3005,8 +4409,8 @@ odp_flow_key_from_flow__(struct ofpbuf *buf, const struct flow *flow, sizeof *nd_key); memcpy(nd_key->nd_target, &data->nd_target, sizeof nd_key->nd_target); - memcpy(nd_key->nd_sll, data->arp_sha, ETH_ADDR_LEN); - memcpy(nd_key->nd_tll, data->arp_tha, ETH_ADDR_LEN); + nd_key->nd_sll = data->arp_sha; + nd_key->nd_tll = data->arp_tha; } } } @@ -3018,43 +4422,26 @@ unencap: } /* Appends a representation of 'flow' as OVS_KEY_ATTR_* attributes to 'buf'. - * 'flow->in_port' is ignored (since it is likely to be an OpenFlow port - * number rather than a datapath port number). Instead, if 'odp_in_port' - * is anything other than ODPP_NONE, it is included in 'buf' as the input - * port. * * 'buf' must have at least ODPUTIL_FLOW_KEY_BYTES bytes of space, or be - * capable of being expanded to allow for that much space. - * - * 'recirc' indicates support for recirculation fields. If this is true, then - * these fields will always be serialised. */ + * capable of being expanded to allow for that much space. */ void -odp_flow_key_from_flow(struct ofpbuf *buf, const struct flow *flow, - const struct flow *mask, odp_port_t odp_in_port, - bool recirc) +odp_flow_key_from_flow(const struct odp_flow_key_parms *parms, + struct ofpbuf *buf) { - odp_flow_key_from_flow__(buf, flow, mask, odp_in_port, SIZE_MAX, recirc, - false); + odp_flow_key_from_flow__(parms, false, buf); } /* Appends a representation of 'mask' as OVS_KEY_ATTR_* attributes to - * 'buf'. 'flow' is used as a template to determine how to interpret - * 'mask'. For example, the 'dl_type' of 'mask' describes the mask, but - * it doesn't indicate whether the other fields should be interpreted as - * ARP, IPv4, IPv6, etc. + * 'buf'. * * 'buf' must have at least ODPUTIL_FLOW_KEY_BYTES bytes of space, or be - * capable of being expanded to allow for that much space. - * - * 'recirc' indicates support for recirculation fields. If this is true, then - * these fields will always be serialised. */ + * capable of being expanded to allow for that much space. */ void -odp_flow_key_from_mask(struct ofpbuf *buf, const struct flow *mask, - const struct flow *flow, uint32_t odp_in_port_mask, - size_t max_mpls_depth, bool recirc) +odp_flow_key_from_mask(const struct odp_flow_key_parms *parms, + struct ofpbuf *buf) { - odp_flow_key_from_flow__(buf, flow, mask, u32_to_odp(odp_in_port_mask), - max_mpls_depth, recirc, true); + odp_flow_key_from_flow__(parms, true, buf); } /* Generate ODP flow key from the given packet metadata */ @@ -3063,12 +4450,27 @@ odp_key_from_pkt_metadata(struct ofpbuf *buf, const struct pkt_metadata *md) { nl_msg_put_u32(buf, OVS_KEY_ATTR_PRIORITY, md->skb_priority); - if (md->tunnel.ip_dst) { - tun_key_to_attr(buf, &md->tunnel); + if (flow_tnl_dst_is_set(&md->tunnel)) { + tun_key_to_attr(buf, &md->tunnel, &md->tunnel, NULL); } nl_msg_put_u32(buf, OVS_KEY_ATTR_SKB_MARK, md->pkt_mark); + if (md->ct_state) { + nl_msg_put_u32(buf, OVS_KEY_ATTR_CT_STATE, + ovs_to_odp_ct_state(md->ct_state)); + if (md->ct_zone) { + nl_msg_put_u16(buf, OVS_KEY_ATTR_CT_ZONE, md->ct_zone); + } + if (md->ct_mark) { + nl_msg_put_u32(buf, OVS_KEY_ATTR_CT_MARK, md->ct_mark); + } + if (!ovs_u128_is_zero(&md->ct_label)) { + nl_msg_put_unspec(buf, OVS_KEY_ATTR_CT_LABELS, &md->ct_label, + sizeof(md->ct_label)); + } + } + /* Add an ingress port attribute if 'odp_in_port' is not the magical * value "ODPP_NONE". */ if (md->in_port.odp_port != ODPP_NONE) { @@ -3087,12 +4489,13 @@ odp_key_to_pkt_metadata(const struct nlattr *key, size_t key_len, 1u << OVS_KEY_ATTR_SKB_MARK | 1u << OVS_KEY_ATTR_TUNNEL | 1u << OVS_KEY_ATTR_IN_PORT; - *md = PKT_METADATA_INITIALIZER(ODPP_NONE); + pkt_metadata_init(md, ODPP_NONE); NL_ATTR_FOR_EACH (nla, left, key, key_len) { uint16_t type = nl_attr_type(nla); size_t len = nl_attr_get_size(nla); - int expected_len = odp_flow_key_attr_len(type); + int expected_len = odp_key_attr_len(ovs_flow_key_attr_lens, + OVS_KEY_ATTR_MAX, type); if (len != expected_len && expected_len >= 0) { continue; @@ -3115,10 +4518,29 @@ odp_key_to_pkt_metadata(const struct nlattr *key, size_t key_len, md->pkt_mark = nl_attr_get_u32(nla); wanted_attrs &= ~(1u << OVS_KEY_ATTR_SKB_MARK); break; + case OVS_KEY_ATTR_CT_STATE: + md->ct_state = odp_to_ovs_ct_state(nl_attr_get_u32(nla)); + wanted_attrs &= ~(1u << OVS_KEY_ATTR_CT_STATE); + break; + case OVS_KEY_ATTR_CT_ZONE: + md->ct_zone = nl_attr_get_u16(nla); + wanted_attrs &= ~(1u << OVS_KEY_ATTR_CT_ZONE); + break; + case OVS_KEY_ATTR_CT_MARK: + md->ct_mark = nl_attr_get_u32(nla); + wanted_attrs &= ~(1u << OVS_KEY_ATTR_CT_MARK); + break; + case OVS_KEY_ATTR_CT_LABELS: { + const ovs_u128 *cl = nl_attr_get(nla); + + md->ct_label = *cl; + wanted_attrs &= ~(1u << OVS_KEY_ATTR_CT_LABELS); + break; + } case OVS_KEY_ATTR_TUNNEL: { enum odp_key_fitness res; - res = odp_tun_key_from_attr(nla, &md->tunnel); + res = odp_tun_key_from_attr(nla, true, &md->tunnel); if (res == ODP_FIT_ERROR) { memset(&md->tunnel, 0, sizeof md->tunnel); } else if (res == ODP_FIT_PERFECT) { @@ -3144,8 +4566,7 @@ uint32_t odp_flow_key_hash(const struct nlattr *key, size_t key_len) { BUILD_ASSERT_DECL(!(NLA_ALIGNTO % sizeof(uint32_t))); - return hash_words(ALIGNED_CAST(const uint32_t *, key), - key_len / sizeof(uint32_t), 0); + return hash_bytes32(ALIGNED_CAST(const uint32_t *, key), key_len, 0); } static void @@ -3215,7 +4636,8 @@ parse_flow_nlattrs(const struct nlattr *key, size_t key_len, NL_ATTR_FOR_EACH (nla, left, key, key_len) { uint16_t type = nl_attr_type(nla); size_t len = nl_attr_get_size(nla); - int expected_len = odp_flow_key_attr_len(type); + int expected_len = odp_key_attr_len(ovs_flow_key_attr_lens, + OVS_KEY_ATTR_MAX, type); if (len != expected_len && expected_len >= 0) { char namebuf[OVS_KEY_ATTR_BUFSIZE]; @@ -3520,12 +4942,17 @@ parse_l2_5_onward(const struct nlattr *attrs[OVS_KEY_ATTR_MAX + 1], nd_key = nl_attr_get(attrs[OVS_KEY_ATTR_ND]); memcpy(&flow->nd_target, nd_key->nd_target, sizeof flow->nd_target); - memcpy(flow->arp_sha, nd_key->nd_sll, ETH_ADDR_LEN); - memcpy(flow->arp_tha, nd_key->nd_tll, ETH_ADDR_LEN); + flow->arp_sha = nd_key->nd_sll; + flow->arp_tha = nd_key->nd_tll; if (is_mask) { + /* Even though 'tp_src' and 'tp_dst' are 16 bits wide, + * ICMP type and code are 8 bits wide. Therefore, an + * exact match looks like htons(0xff), not + * htons(0xffff). See xlate_wc_finish() for details. + * */ if (!is_all_zeros(nd_key, sizeof *nd_key) && - (flow->tp_src != htons(0xffff) || - flow->tp_dst != htons(0xffff))) { + (flow->tp_src != htons(0xff) || + flow->tp_dst != htons(0xff))) { return ODP_FIT_ERROR; } else { expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_ND; @@ -3626,7 +5053,9 @@ parse_8021q_onward(const struct nlattr *attrs[OVS_KEY_ATTR_MAX + 1], static enum odp_key_fitness odp_flow_key_to_flow__(const struct nlattr *key, size_t key_len, - struct flow *flow, const struct flow *src_flow) + const struct nlattr *src_key, size_t src_key_len, + struct flow *flow, const struct flow *src_flow, + bool udpif) { const struct nlattr *attrs[OVS_KEY_ATTR_MAX + 1]; uint64_t expected_attrs; @@ -3666,10 +5095,34 @@ odp_flow_key_to_flow__(const struct nlattr *key, size_t key_len, expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_SKB_MARK; } + if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_CT_STATE)) { + uint32_t odp_state = nl_attr_get_u32(attrs[OVS_KEY_ATTR_CT_STATE]); + + flow->ct_state = odp_to_ovs_ct_state(odp_state); + expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_CT_STATE; + } + if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_CT_ZONE)) { + flow->ct_zone = nl_attr_get_u16(attrs[OVS_KEY_ATTR_CT_ZONE]); + expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_CT_ZONE; + } + if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_CT_MARK)) { + flow->ct_mark = nl_attr_get_u32(attrs[OVS_KEY_ATTR_CT_MARK]); + expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_CT_MARK; + } + if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_CT_LABELS)) { + const ovs_u128 *cl = nl_attr_get(attrs[OVS_KEY_ATTR_CT_LABELS]); + + flow->ct_label = *cl; + expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_CT_LABELS; + } + if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_TUNNEL)) { enum odp_key_fitness res; - res = odp_tun_key_from_attr(attrs[OVS_KEY_ATTR_TUNNEL], &flow->tunnel); + res = odp_tun_key_from_attr__(attrs[OVS_KEY_ATTR_TUNNEL], + is_mask ? src_key : NULL, + src_key_len, &src_flow->tunnel, + &flow->tunnel, udpif); if (res == ODP_FIT_ERROR) { return ODP_FIT_ERROR; } else if (res == ODP_FIT_PERFECT) { @@ -3712,6 +5165,7 @@ odp_flow_key_to_flow__(const struct nlattr *key, size_t key_len, expected_attrs, flow, key, key_len, src_flow); } if (is_mask) { + /* A missing VLAN mask means exact match on vlan_tci 0 (== no VLAN). */ flow->vlan_tci = htons(0xffff); if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_VLAN)) { flow->vlan_tci = nl_attr_get_be16(attrs[OVS_KEY_ATTR_VLAN]); @@ -3741,18 +5195,69 @@ enum odp_key_fitness odp_flow_key_to_flow(const struct nlattr *key, size_t key_len, struct flow *flow) { - return odp_flow_key_to_flow__(key, key_len, flow, flow); + return odp_flow_key_to_flow__(key, key_len, NULL, 0, flow, flow, false); +} + +static enum odp_key_fitness +odp_flow_key_to_mask__(const struct nlattr *mask_key, size_t mask_key_len, + const struct nlattr *flow_key, size_t flow_key_len, + struct flow_wildcards *mask, + const struct flow *src_flow, + bool udpif) +{ + if (mask_key_len) { + return odp_flow_key_to_flow__(mask_key, mask_key_len, + flow_key, flow_key_len, + &mask->masks, src_flow, udpif); + + } else { + /* A missing mask means that the flow should be exact matched. + * Generate an appropriate exact wildcard for the flow. */ + flow_wildcards_init_for_packet(mask, src_flow); + + return ODP_FIT_PERFECT; + } +} +/* Converts the 'mask_key_len' bytes of OVS_KEY_ATTR_* attributes in 'mask_key' + * to a mask structure in 'mask'. 'flow' must be a previously translated flow + * corresponding to 'mask' and similarly flow_key/flow_key_len must be the + * attributes from that flow. Returns an ODP_FIT_* value that indicates how + * well 'key' fits our expectations for what a flow key should contain. */ +enum odp_key_fitness +odp_flow_key_to_mask(const struct nlattr *mask_key, size_t mask_key_len, + const struct nlattr *flow_key, size_t flow_key_len, + struct flow_wildcards *mask, const struct flow *flow) +{ + return odp_flow_key_to_mask__(mask_key, mask_key_len, + flow_key, flow_key_len, + mask, flow, false); +} + +/* These functions are similar to their non-"_udpif" variants but output a + * 'flow' that is suitable for fast-path packet processing. + * + * Some fields have different representation for flow setup and per- + * packet processing (i.e. different between ofproto-dpif and userspace + * datapath). In particular, with the non-"_udpif" functions, struct + * tun_metadata is in the per-flow format (using 'present.map' and 'opts.u8'); + * with these functions, struct tun_metadata is in the per-packet format + * (using 'present.len' and 'opts.gnv'). */ +enum odp_key_fitness +odp_flow_key_to_flow_udpif(const struct nlattr *key, size_t key_len, + struct flow *flow) +{ + return odp_flow_key_to_flow__(key, key_len, NULL, 0, flow, flow, true); } -/* Converts the 'key_len' bytes of OVS_KEY_ATTR_* attributes in 'key' to a mask - * structure in 'mask'. 'flow' must be a previously translated flow - * corresponding to 'mask'. Returns an ODP_FIT_* value that indicates how well - * 'key' fits our expectations for what a flow key should contain. */ enum odp_key_fitness -odp_flow_key_to_mask(const struct nlattr *key, size_t key_len, - struct flow *mask, const struct flow *flow) +odp_flow_key_to_mask_udpif(const struct nlattr *mask_key, size_t mask_key_len, + const struct nlattr *flow_key, size_t flow_key_len, + struct flow_wildcards *mask, + const struct flow *flow) { - return odp_flow_key_to_flow__(key, key_len, mask, flow); + return odp_flow_key_to_mask__(mask_key, mask_key_len, + flow_key, flow_key_len, + mask, flow, true); } /* Returns 'fitness' as a string, for use in debug messages. */ @@ -3782,6 +5287,7 @@ size_t odp_put_userspace_action(uint32_t pid, const void *userdata, size_t userdata_size, odp_port_t tunnel_out_port, + bool include_actions, struct ofpbuf *odp_actions) { size_t userdata_ofs; @@ -3790,7 +5296,7 @@ odp_put_userspace_action(uint32_t pid, offset = nl_msg_start_nested(odp_actions, OVS_ACTION_ATTR_USERSPACE); nl_msg_put_u32(odp_actions, OVS_USERSPACE_ATTR_PID, pid); if (userdata) { - userdata_ofs = ofpbuf_size(odp_actions) + NLA_HDRLEN; + userdata_ofs = odp_actions->size + NLA_HDRLEN; /* The OVS kernel module before OVS 1.11 and the upstream Linux kernel * module before Linux 3.10 required the userdata to be exactly 8 bytes @@ -3812,6 +5318,9 @@ odp_put_userspace_action(uint32_t pid, nl_msg_put_odp_port(odp_actions, OVS_USERSPACE_ATTR_EGRESS_TUN_PORT, tunnel_out_port); } + if (include_actions) { + nl_msg_put_flag(odp_actions, OVS_USERSPACE_ATTR_ACTIONS); + } nl_msg_end_nested(odp_actions, offset); return userdata_ofs; @@ -3822,7 +5331,7 @@ odp_put_tunnel_action(const struct flow_tnl *tunnel, struct ofpbuf *odp_actions) { size_t offset = nl_msg_start_nested(odp_actions, OVS_ACTION_ATTR_SET); - tun_key_to_attr(odp_actions, tunnel); + tun_key_to_attr(odp_actions, tunnel, tunnel, NULL); nl_msg_end_nested(odp_actions, offset); } @@ -3879,8 +5388,9 @@ void commit_odp_tunnel_action(const struct flow *flow, struct flow *base, struct ofpbuf *odp_actions) { - /* A valid IPV4_TUNNEL must have non-zero ip_dst. */ - if (flow->tunnel.ip_dst) { + /* A valid IPV4_TUNNEL must have non-zero ip_dst; a valid IPv6 tunnel + * must have non-zero ipv6_dst. */ + if (flow_tnl_dst_is_set(&flow->tunnel)) { if (!memcmp(&base->tunnel, &flow->tunnel, sizeof base->tunnel)) { return; } @@ -3918,15 +5428,15 @@ commit(enum ovs_key_attr attr, bool use_masked_set, static void get_ethernet_key(const struct flow *flow, struct ovs_key_ethernet *eth) { - memcpy(eth->eth_src, flow->dl_src, ETH_ADDR_LEN); - memcpy(eth->eth_dst, flow->dl_dst, ETH_ADDR_LEN); + eth->eth_src = flow->dl_src; + eth->eth_dst = flow->dl_dst; } static void put_ethernet_key(const struct ovs_key_ethernet *eth, struct flow *flow) { - memcpy(flow->dl_src, eth->eth_src, ETH_ADDR_LEN); - memcpy(flow->dl_dst, eth->eth_dst, ETH_ADDR_LEN); + flow->dl_src = eth->eth_src; + flow->dl_dst = eth->eth_dst; } static void @@ -4154,8 +5664,8 @@ get_arp_key(const struct flow *flow, struct ovs_key_arp *arp) arp->arp_sip = flow->nw_src; arp->arp_tip = flow->nw_dst; arp->arp_op = htons(flow->nw_proto); - memcpy(arp->arp_sha, flow->arp_sha, ETH_ADDR_LEN); - memcpy(arp->arp_tha, flow->arp_tha, ETH_ADDR_LEN); + arp->arp_sha = flow->arp_sha; + arp->arp_tha = flow->arp_tha; } static void @@ -4164,8 +5674,8 @@ put_arp_key(const struct ovs_key_arp *arp, struct flow *flow) flow->nw_src = arp->arp_sip; flow->nw_dst = arp->arp_tip; flow->nw_proto = ntohs(arp->arp_op); - memcpy(flow->arp_sha, arp->arp_sha, ETH_ADDR_LEN); - memcpy(flow->arp_tha, arp->arp_tha, ETH_ADDR_LEN); + flow->arp_sha = arp->arp_sha; + flow->arp_tha = arp->arp_tha; } static enum slow_path_reason @@ -4187,6 +5697,88 @@ commit_set_arp_action(const struct flow *flow, struct flow *base_flow, return 0; } +static void +get_icmp_key(const struct flow *flow, struct ovs_key_icmp *icmp) +{ + /* icmp_type and icmp_code are stored in tp_src and tp_dst, respectively */ + icmp->icmp_type = ntohs(flow->tp_src); + icmp->icmp_code = ntohs(flow->tp_dst); +} + +static void +put_icmp_key(const struct ovs_key_icmp *icmp, struct flow *flow) +{ + /* icmp_type and icmp_code are stored in tp_src and tp_dst, respectively */ + flow->tp_src = htons(icmp->icmp_type); + flow->tp_dst = htons(icmp->icmp_code); +} + +static enum slow_path_reason +commit_set_icmp_action(const struct flow *flow, struct flow *base_flow, + struct ofpbuf *odp_actions, struct flow_wildcards *wc) +{ + struct ovs_key_icmp key, mask, base; + enum ovs_key_attr attr; + + if (is_icmpv4(flow)) { + attr = OVS_KEY_ATTR_ICMP; + } else if (is_icmpv6(flow)) { + attr = OVS_KEY_ATTR_ICMPV6; + } else { + return 0; + } + + get_icmp_key(flow, &key); + get_icmp_key(base_flow, &base); + get_icmp_key(&wc->masks, &mask); + + if (commit(attr, false, &key, &base, &mask, sizeof key, odp_actions)) { + put_icmp_key(&base, base_flow); + put_icmp_key(&mask, &wc->masks); + return SLOW_ACTION; + } + return 0; +} + +static void +get_nd_key(const struct flow *flow, struct ovs_key_nd *nd) +{ + memcpy(nd->nd_target, &flow->nd_target, sizeof flow->nd_target); + /* nd_sll and nd_tll are stored in arp_sha and arp_tha, respectively */ + nd->nd_sll = flow->arp_sha; + nd->nd_tll = flow->arp_tha; +} + +static void +put_nd_key(const struct ovs_key_nd *nd, struct flow *flow) +{ + memcpy(&flow->nd_target, nd->nd_target, sizeof flow->nd_target); + /* nd_sll and nd_tll are stored in arp_sha and arp_tha, respectively */ + flow->arp_sha = nd->nd_sll; + flow->arp_tha = nd->nd_tll; +} + +static enum slow_path_reason +commit_set_nd_action(const struct flow *flow, struct flow *base_flow, + struct ofpbuf *odp_actions, + struct flow_wildcards *wc, bool use_masked) +{ + struct ovs_key_nd key, mask, base; + + get_nd_key(flow, &key); + get_nd_key(base_flow, &base); + get_nd_key(&wc->masks, &mask); + + if (commit(OVS_KEY_ATTR_ND, use_masked, &key, &base, &mask, sizeof key, + odp_actions)) { + put_nd_key(&base, base_flow); + put_nd_key(&mask, &wc->masks); + return SLOW_ACTION; + } + + return 0; +} + static enum slow_path_reason commit_set_nw_action(const struct flow *flow, struct flow *base, struct ofpbuf *odp_actions, struct flow_wildcards *wc, @@ -4204,7 +5796,7 @@ commit_set_nw_action(const struct flow *flow, struct flow *base, case ETH_TYPE_IPV6: commit_set_ipv6_action(flow, base, odp_actions, wc, use_masked); - break; + return commit_set_nd_action(flow, base, odp_actions, wc, use_masked); case ETH_TYPE_ARP: return commit_set_arp_action(flow, base, odp_actions, wc); @@ -4321,15 +5913,16 @@ commit_odp_actions(const struct flow *flow, struct flow *base, struct ofpbuf *odp_actions, struct flow_wildcards *wc, bool use_masked) { - enum slow_path_reason slow; + enum slow_path_reason slow1, slow2; commit_set_ether_addr_action(flow, base, odp_actions, wc, use_masked); - slow = commit_set_nw_action(flow, base, odp_actions, wc, use_masked); + slow1 = commit_set_nw_action(flow, base, odp_actions, wc, use_masked); commit_set_port_action(flow, base, odp_actions, wc, use_masked); + slow2 = commit_set_icmp_action(flow, base, odp_actions, wc); commit_mpls_action(flow, base, odp_actions); commit_vlan_action(flow->vlan_tci, base, odp_actions, wc); commit_set_priority_action(flow, base, odp_actions, wc, use_masked); commit_set_pkt_mark_action(flow, base, odp_actions, wc, use_masked); - return slow; + return slow1 ? slow1 : slow2; }