69d19193e3753fbb42dde8ccf78a60ffbadbb067
[cascardo/ovs.git] / datapath / flow_netlink.c
1 /*
2  * Copyright (c) 2007-2014 Nicira, Inc.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16  * 02110-1301, USA
17  */
18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21 #include "flow.h"
22 #include "datapath.h"
23 #include "mpls.h"
24 #include <linux/uaccess.h>
25 #include <linux/netdevice.h>
26 #include <linux/etherdevice.h>
27 #include <linux/if_ether.h>
28 #include <linux/if_vlan.h>
29 #include <net/llc_pdu.h>
30 #include <linux/kernel.h>
31 #include <linux/jhash.h>
32 #include <linux/jiffies.h>
33 #include <linux/llc.h>
34 #include <linux/module.h>
35 #include <linux/in.h>
36 #include <linux/rcupdate.h>
37 #include <linux/if_arp.h>
38 #include <linux/ip.h>
39 #include <linux/ipv6.h>
40 #include <linux/sctp.h>
41 #include <linux/tcp.h>
42 #include <linux/udp.h>
43 #include <linux/icmp.h>
44 #include <linux/icmpv6.h>
45 #include <linux/rculist.h>
46 #include <net/geneve.h>
47 #include <net/ip.h>
48 #include <net/ip_tunnels.h>
49 #include <net/ipv6.h>
50 #include <net/ndisc.h>
51
52 #include "flow_netlink.h"
53
54 static void update_range(struct sw_flow_match *match,
55                          size_t offset, size_t size, bool is_mask)
56 {
57         struct sw_flow_key_range *range;
58         size_t start = rounddown(offset, sizeof(long));
59         size_t end = roundup(offset + size, sizeof(long));
60
61         if (!is_mask)
62                 range = &match->range;
63         else
64                 range = &match->mask->range;
65
66         if (range->start == range->end) {
67                 range->start = start;
68                 range->end = end;
69                 return;
70         }
71
72         if (range->start > start)
73                 range->start = start;
74
75         if (range->end < end)
76                 range->end = end;
77 }
78
79 #define SW_FLOW_KEY_PUT(match, field, value, is_mask) \
80         do { \
81                 update_range(match, offsetof(struct sw_flow_key, field),    \
82                              sizeof((match)->key->field), is_mask);         \
83                 if (is_mask)                                                \
84                         (match)->mask->key.field = value;                   \
85                 else                                                        \
86                         (match)->key->field = value;                        \
87         } while (0)
88
89 #define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask)     \
90         do {                                                                \
91                 update_range(match, offset, len, is_mask);                  \
92                 if (is_mask)                                                \
93                         memcpy((u8 *)&(match)->mask->key + offset, value_p, len);\
94                 else                                                        \
95                         memcpy((u8 *)(match)->key + offset, value_p, len);  \
96         } while (0)
97
98 #define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask)               \
99         SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), \
100                                   value_p, len, is_mask)
101
102 #define SW_FLOW_KEY_MEMSET_FIELD(match, field, value, is_mask)              \
103         do {                                                                \
104                 update_range(match, offsetof(struct sw_flow_key, field),    \
105                              sizeof((match)->key->field), is_mask);         \
106                 if (is_mask)                                                \
107                         memset((u8 *)&(match)->mask->key.field, value,      \
108                                sizeof((match)->mask->key.field));           \
109                 else                                                        \
110                         memset((u8 *)&(match)->key->field, value,           \
111                                sizeof((match)->key->field));                \
112         } while (0)
113
114 static bool match_validate(const struct sw_flow_match *match,
115                            u64 key_attrs, u64 mask_attrs)
116 {
117         u64 key_expected = 1ULL << OVS_KEY_ATTR_ETHERNET;
118         u64 mask_allowed = key_attrs;  /* At most allow all key attributes */
119
120         /* The following mask attributes allowed only if they
121          * pass the validation tests. */
122         mask_allowed &= ~((1ULL << OVS_KEY_ATTR_IPV4)
123                         | (1ULL << OVS_KEY_ATTR_IPV6)
124                         | (1ULL << OVS_KEY_ATTR_TCP)
125                         | (1ULL << OVS_KEY_ATTR_TCP_FLAGS)
126                         | (1ULL << OVS_KEY_ATTR_UDP)
127                         | (1ULL << OVS_KEY_ATTR_SCTP)
128                         | (1ULL << OVS_KEY_ATTR_ICMP)
129                         | (1ULL << OVS_KEY_ATTR_ICMPV6)
130                         | (1ULL << OVS_KEY_ATTR_ARP)
131                         | (1ULL << OVS_KEY_ATTR_ND)
132                         | (1ULL << OVS_KEY_ATTR_MPLS));
133
134         /* Always allowed mask fields. */
135         mask_allowed |= ((1ULL << OVS_KEY_ATTR_TUNNEL)
136                        | (1ULL << OVS_KEY_ATTR_IN_PORT)
137                        | (1ULL << OVS_KEY_ATTR_ETHERTYPE));
138
139         /* Check key attributes. */
140         if (match->key->eth.type == htons(ETH_P_ARP)
141                         || match->key->eth.type == htons(ETH_P_RARP)) {
142                 key_expected |= 1ULL << OVS_KEY_ATTR_ARP;
143                 if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
144                         mask_allowed |= 1ULL << OVS_KEY_ATTR_ARP;
145         }
146
147
148         if (eth_p_mpls(match->key->eth.type)) {
149                 key_expected |= 1ULL << OVS_KEY_ATTR_MPLS;
150                 if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
151                         mask_allowed |= 1ULL << OVS_KEY_ATTR_MPLS;
152         }
153
154         if (match->key->eth.type == htons(ETH_P_IP)) {
155                 key_expected |= 1ULL << OVS_KEY_ATTR_IPV4;
156                 if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
157                         mask_allowed |= 1ULL << OVS_KEY_ATTR_IPV4;
158
159                 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
160                         if (match->key->ip.proto == IPPROTO_UDP) {
161                                 key_expected |= 1ULL << OVS_KEY_ATTR_UDP;
162                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
163                                         mask_allowed |= 1ULL << OVS_KEY_ATTR_UDP;
164                         }
165
166                         if (match->key->ip.proto == IPPROTO_SCTP) {
167                                 key_expected |= 1ULL << OVS_KEY_ATTR_SCTP;
168                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
169                                         mask_allowed |= 1ULL << OVS_KEY_ATTR_SCTP;
170                         }
171
172                         if (match->key->ip.proto == IPPROTO_TCP) {
173                                 key_expected |= 1ULL << OVS_KEY_ATTR_TCP;
174                                 key_expected |= 1ULL << OVS_KEY_ATTR_TCP_FLAGS;
175                                 if (match->mask && (match->mask->key.ip.proto == 0xff)) {
176                                         mask_allowed |= 1ULL << OVS_KEY_ATTR_TCP;
177                                         mask_allowed |= 1ULL << OVS_KEY_ATTR_TCP_FLAGS;
178                                 }
179                         }
180
181                         if (match->key->ip.proto == IPPROTO_ICMP) {
182                                 key_expected |= 1ULL << OVS_KEY_ATTR_ICMP;
183                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
184                                         mask_allowed |= 1ULL << OVS_KEY_ATTR_ICMP;
185                         }
186                 }
187         }
188
189         if (match->key->eth.type == htons(ETH_P_IPV6)) {
190                 key_expected |= 1ULL << OVS_KEY_ATTR_IPV6;
191                 if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
192                         mask_allowed |= 1ULL << OVS_KEY_ATTR_IPV6;
193
194                 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
195                         if (match->key->ip.proto == IPPROTO_UDP) {
196                                 key_expected |= 1ULL << OVS_KEY_ATTR_UDP;
197                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
198                                         mask_allowed |= 1ULL << OVS_KEY_ATTR_UDP;
199                         }
200
201                         if (match->key->ip.proto == IPPROTO_SCTP) {
202                                 key_expected |= 1ULL << OVS_KEY_ATTR_SCTP;
203                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
204                                         mask_allowed |= 1ULL << OVS_KEY_ATTR_SCTP;
205                         }
206
207                         if (match->key->ip.proto == IPPROTO_TCP) {
208                                 key_expected |= 1ULL << OVS_KEY_ATTR_TCP;
209                                 key_expected |= 1ULL << OVS_KEY_ATTR_TCP_FLAGS;
210                                 if (match->mask && (match->mask->key.ip.proto == 0xff)) {
211                                         mask_allowed |= 1ULL << OVS_KEY_ATTR_TCP;
212                                         mask_allowed |= 1ULL << OVS_KEY_ATTR_TCP_FLAGS;
213                                 }
214                         }
215
216                         if (match->key->ip.proto == IPPROTO_ICMPV6) {
217                                 key_expected |= 1ULL << OVS_KEY_ATTR_ICMPV6;
218                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
219                                         mask_allowed |= 1ULL << OVS_KEY_ATTR_ICMPV6;
220
221                                 if (match->key->tp.src ==
222                                                 htons(NDISC_NEIGHBOUR_SOLICITATION) ||
223                                     match->key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
224                                         key_expected |= 1ULL << OVS_KEY_ATTR_ND;
225                                         if (match->mask && (match->mask->key.tp.src == htons(0xff)))
226                                                 mask_allowed |= 1ULL << OVS_KEY_ATTR_ND;
227                                 }
228                         }
229                 }
230         }
231
232         if ((key_attrs & key_expected) != key_expected) {
233                 /* Key attributes check failed. */
234                 OVS_NLERR("Missing expected key attributes (key_attrs=%llx, expected=%llx).\n",
235                                 (unsigned long long)key_attrs, (unsigned long long)key_expected);
236                 return false;
237         }
238
239         if ((mask_attrs & mask_allowed) != mask_attrs) {
240                 /* Mask attributes check failed. */
241                 OVS_NLERR("Contain more than allowed mask fields (mask_attrs=%llx, mask_allowed=%llx).\n",
242                                 (unsigned long long)mask_attrs, (unsigned long long)mask_allowed);
243                 return false;
244         }
245
246         return true;
247 }
248
249 size_t ovs_tun_key_attr_size(void)
250 {
251         /* Whenever adding new OVS_TUNNEL_KEY_ FIELDS, we should consider
252          * updating this function.  */
253         return    nla_total_size(8)    /* OVS_TUNNEL_KEY_ATTR_ID */
254                 + nla_total_size(4)    /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */
255                 + nla_total_size(4)    /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */
256                 + nla_total_size(1)    /* OVS_TUNNEL_KEY_ATTR_TOS */
257                 + nla_total_size(1)    /* OVS_TUNNEL_KEY_ATTR_TTL */
258                 + nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
259                 + nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_CSUM */
260                 + nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_OAM */
261                 + nla_total_size(256)  /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
262                 + nla_total_size(2)    /* OVS_TUNNEL_KEY_ATTR_TP_SRC */
263                 + nla_total_size(2);   /* OVS_TUNNEL_KEY_ATTR_TP_DST */
264 }
265
266 size_t ovs_key_attr_size(void)
267 {
268         /* Whenever adding new OVS_KEY_ FIELDS, we should consider
269          * updating this function.  */
270         BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 22);
271
272         return    nla_total_size(4)   /* OVS_KEY_ATTR_PRIORITY */
273                 + nla_total_size(0)   /* OVS_KEY_ATTR_TUNNEL */
274                   + ovs_tun_key_attr_size()
275                 + nla_total_size(4)   /* OVS_KEY_ATTR_IN_PORT */
276                 + nla_total_size(4)   /* OVS_KEY_ATTR_SKB_MARK */
277                 + nla_total_size(4)   /* OVS_KEY_ATTR_DP_HASH */
278                 + nla_total_size(4)   /* OVS_KEY_ATTR_RECIRC_ID */
279                 + nla_total_size(12)  /* OVS_KEY_ATTR_ETHERNET */
280                 + nla_total_size(2)   /* OVS_KEY_ATTR_ETHERTYPE */
281                 + nla_total_size(4)   /* OVS_KEY_ATTR_VLAN */
282                 + nla_total_size(0)   /* OVS_KEY_ATTR_ENCAP */
283                 + nla_total_size(2)   /* OVS_KEY_ATTR_ETHERTYPE */
284                 + nla_total_size(40)  /* OVS_KEY_ATTR_IPV6 */
285                 + nla_total_size(2)   /* OVS_KEY_ATTR_ICMPV6 */
286                 + nla_total_size(28); /* OVS_KEY_ATTR_ND */
287 }
288
289 /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute.  */
290 static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
291         [OVS_KEY_ATTR_ENCAP] = -1,
292         [OVS_KEY_ATTR_PRIORITY] = sizeof(u32),
293         [OVS_KEY_ATTR_IN_PORT] = sizeof(u32),
294         [OVS_KEY_ATTR_SKB_MARK] = sizeof(u32),
295         [OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet),
296         [OVS_KEY_ATTR_VLAN] = sizeof(__be16),
297         [OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16),
298         [OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4),
299         [OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6),
300         [OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp),
301         [OVS_KEY_ATTR_TCP_FLAGS] = sizeof(__be16),
302         [OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp),
303         [OVS_KEY_ATTR_SCTP] = sizeof(struct ovs_key_sctp),
304         [OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp),
305         [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6),
306         [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp),
307         [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd),
308         [OVS_KEY_ATTR_DP_HASH] = sizeof(u32),
309         [OVS_KEY_ATTR_RECIRC_ID] = sizeof(u32),
310         [OVS_KEY_ATTR_TUNNEL] = -1,
311         [OVS_KEY_ATTR_MPLS] = sizeof(struct ovs_key_mpls),
312 };
313
314 static bool is_all_zero(const u8 *fp, size_t size)
315 {
316         int i;
317
318         if (!fp)
319                 return false;
320
321         for (i = 0; i < size; i++)
322                 if (fp[i])
323                         return false;
324
325         return true;
326 }
327
328 static int __parse_flow_nlattrs(const struct nlattr *attr,
329                                 const struct nlattr *a[],
330                                 u64 *attrsp, bool nz)
331 {
332         const struct nlattr *nla;
333         u64 attrs;
334         int rem;
335
336         attrs = *attrsp;
337         nla_for_each_nested(nla, attr, rem) {
338                 u16 type = nla_type(nla);
339                 int expected_len;
340
341                 if (type > OVS_KEY_ATTR_MAX) {
342                         OVS_NLERR("Unknown key attribute (type=%d, max=%d).\n",
343                                   type, OVS_KEY_ATTR_MAX);
344                         return -EINVAL;
345                 }
346
347                 if (attrs & (1ULL << type)) {
348                         OVS_NLERR("Duplicate key attribute (type %d).\n", type);
349                         return -EINVAL;
350                 }
351
352                 expected_len = ovs_key_lens[type];
353                 if (nla_len(nla) != expected_len && expected_len != -1) {
354                         OVS_NLERR("Key attribute has unexpected length (type=%d"
355                                   ", length=%d, expected=%d).\n", type,
356                                   nla_len(nla), expected_len);
357                         return -EINVAL;
358                 }
359
360                 if (!nz || !is_all_zero(nla_data(nla), expected_len)) {
361                         attrs |= 1ULL << type;
362                         a[type] = nla;
363                 }
364         }
365         if (rem) {
366                 OVS_NLERR("Message has %d unknown bytes.\n", rem);
367                 return -EINVAL;
368         }
369
370         *attrsp = attrs;
371         return 0;
372 }
373
374 static int parse_flow_mask_nlattrs(const struct nlattr *attr,
375                                    const struct nlattr *a[], u64 *attrsp)
376 {
377         return __parse_flow_nlattrs(attr, a, attrsp, true);
378 }
379
380 static int parse_flow_nlattrs(const struct nlattr *attr,
381                               const struct nlattr *a[], u64 *attrsp)
382 {
383         return __parse_flow_nlattrs(attr, a, attrsp, false);
384 }
385
386 static int ipv4_tun_from_nlattr(const struct nlattr *attr,
387                                 struct sw_flow_match *match, bool is_mask)
388 {
389         struct nlattr *a;
390         int rem;
391         bool ttl = false;
392         __be16 tun_flags = 0;
393
394         nla_for_each_nested(a, attr, rem) {
395                 int type = nla_type(a);
396                 static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = {
397                         [OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64),
398                         [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32),
399                         [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = sizeof(u32),
400                         [OVS_TUNNEL_KEY_ATTR_TOS] = 1,
401                         [OVS_TUNNEL_KEY_ATTR_TTL] = 1,
402                         [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0,
403                         [OVS_TUNNEL_KEY_ATTR_CSUM] = 0,
404                         [OVS_TUNNEL_KEY_ATTR_TP_SRC] = sizeof(u16),
405                         [OVS_TUNNEL_KEY_ATTR_TP_DST] = sizeof(u16),
406                         [OVS_TUNNEL_KEY_ATTR_OAM] = 0,
407                         [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = -1,
408                 };
409
410                 if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
411                         OVS_NLERR("Unknown IPv4 tunnel attribute (type=%d, max=%d).\n",
412                         type, OVS_TUNNEL_KEY_ATTR_MAX);
413                         return -EINVAL;
414                 }
415
416                 if (ovs_tunnel_key_lens[type] != nla_len(a) &&
417                     ovs_tunnel_key_lens[type] != -1) {
418                         OVS_NLERR("IPv4 tunnel attribute type has unexpected "
419                                   " length (type=%d, length=%d, expected=%d).\n",
420                                   type, nla_len(a), ovs_tunnel_key_lens[type]);
421                         return -EINVAL;
422                 }
423
424                 switch (type) {
425                 case OVS_TUNNEL_KEY_ATTR_ID:
426                         SW_FLOW_KEY_PUT(match, tun_key.tun_id,
427                                         nla_get_be64(a), is_mask);
428                         tun_flags |= TUNNEL_KEY;
429                         break;
430                 case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
431                         SW_FLOW_KEY_PUT(match, tun_key.ipv4_src,
432                                         nla_get_be32(a), is_mask);
433                         break;
434                 case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
435                         SW_FLOW_KEY_PUT(match, tun_key.ipv4_dst,
436                                         nla_get_be32(a), is_mask);
437                         break;
438                 case OVS_TUNNEL_KEY_ATTR_TOS:
439                         SW_FLOW_KEY_PUT(match, tun_key.ipv4_tos,
440                                         nla_get_u8(a), is_mask);
441                         break;
442                 case OVS_TUNNEL_KEY_ATTR_TTL:
443                         SW_FLOW_KEY_PUT(match, tun_key.ipv4_ttl,
444                                         nla_get_u8(a), is_mask);
445                         ttl = true;
446                         break;
447                 case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT:
448                         tun_flags |= TUNNEL_DONT_FRAGMENT;
449                         break;
450                 case OVS_TUNNEL_KEY_ATTR_CSUM:
451                         tun_flags |= TUNNEL_CSUM;
452                         break;
453                 case OVS_TUNNEL_KEY_ATTR_TP_SRC:
454                         SW_FLOW_KEY_PUT(match, tun_key.tp_src,
455                                         nla_get_be16(a), is_mask);
456                         break;
457                 case OVS_TUNNEL_KEY_ATTR_TP_DST:
458                         SW_FLOW_KEY_PUT(match, tun_key.tp_dst,
459                                         nla_get_be16(a), is_mask);
460                         break;
461                 case OVS_TUNNEL_KEY_ATTR_OAM:
462                         tun_flags |= TUNNEL_OAM;
463                         break;
464                 case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
465                         tun_flags |= TUNNEL_OPTIONS_PRESENT;
466                         if (nla_len(a) > sizeof(match->key->tun_opts)) {
467                                 OVS_NLERR("Geneve option length exceeds "
468                                           "maximum size (len %d, max %zu).\n",
469                                           nla_len(a),
470                                           sizeof(match->key->tun_opts));
471                                 return -EINVAL;
472                         }
473
474                         if (nla_len(a) % 4 != 0) {
475                                 OVS_NLERR("Geneve option length is not "
476                                           "a multiple of 4 (len %d).\n",
477                                           nla_len(a));
478                                 return -EINVAL;
479                         }
480
481                         /* We need to record the length of the options passed
482                          * down, otherwise packets with the same format but
483                          * additional options will be silently matched.
484                          */
485                         if (!is_mask) {
486                                 SW_FLOW_KEY_PUT(match, tun_opts_len, nla_len(a),
487                                                 false);
488                         } else {
489                                 /* This is somewhat unusual because it looks at
490                                  * both the key and mask while parsing the
491                                  * attributes (and by extension assumes the key
492                                  * is parsed first). Normally, we would verify
493                                  * that each is the correct length and that the
494                                  * attributes line up in the validate function.
495                                  * However, that is difficult because this is
496                                  * variable length and we won't have the
497                                  * information later.
498                                  */
499                                 if (match->key->tun_opts_len != nla_len(a)) {
500                                         OVS_NLERR("Geneve option key length (%d)"
501                                            " is different from mask length (%d).",
502                                            match->key->tun_opts_len, nla_len(a));
503                                         return -EINVAL;
504                                 }
505
506                                 SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff,
507                                                 true);
508                         }
509
510                         SW_FLOW_KEY_MEMCPY_OFFSET(match,
511                                 (unsigned long)GENEVE_OPTS((struct sw_flow_key *)0,
512                                                            nla_len(a)),
513                                 nla_data(a), nla_len(a), is_mask);
514                         break;
515                 default:
516                         OVS_NLERR("Unknown IPv4 tunnel attribute (%d).\n", type);
517                         return -EINVAL;
518                 }
519         }
520
521         SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask);
522
523         if (rem > 0) {
524                 OVS_NLERR("IPv4 tunnel attribute has %d unknown bytes.\n", rem);
525                 return -EINVAL;
526         }
527
528         if (!is_mask) {
529                 if (!match->key->tun_key.ipv4_dst) {
530                         OVS_NLERR("IPv4 tunnel destination address is zero.\n");
531                         return -EINVAL;
532                 }
533
534                 if (!ttl) {
535                         OVS_NLERR("IPv4 tunnel TTL not specified.\n");
536                         return -EINVAL;
537                 }
538         }
539
540         return 0;
541 }
542
543 static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
544                                 const struct ovs_key_ipv4_tunnel *output,
545                                 const struct geneve_opt *tun_opts,
546                                 int swkey_tun_opts_len)
547 {
548         if (output->tun_flags & TUNNEL_KEY &&
549             nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id))
550                 return -EMSGSIZE;
551         if (output->ipv4_src &&
552                 nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, output->ipv4_src))
553                 return -EMSGSIZE;
554         if (output->ipv4_dst &&
555                 nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, output->ipv4_dst))
556                 return -EMSGSIZE;
557         if (output->ipv4_tos &&
558                 nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos))
559                 return -EMSGSIZE;
560         if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ipv4_ttl))
561                 return -EMSGSIZE;
562         if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) &&
563                 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
564                 return -EMSGSIZE;
565         if ((output->tun_flags & TUNNEL_CSUM) &&
566                 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
567                 return -EMSGSIZE;
568         if (output->tp_src &&
569                 nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_SRC, output->tp_src))
570                 return -EMSGSIZE;
571         if (output->tp_dst &&
572                 nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_DST, output->tp_dst))
573                 return -EMSGSIZE;
574         if ((output->tun_flags & TUNNEL_OAM) &&
575                 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
576                 return -EMSGSIZE;
577         if (tun_opts &&
578             nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
579                     swkey_tun_opts_len, tun_opts))
580                 return -EMSGSIZE;
581
582         return 0;
583 }
584
585
586 static int ipv4_tun_to_nlattr(struct sk_buff *skb,
587                               const struct ovs_key_ipv4_tunnel *output,
588                               const struct geneve_opt *tun_opts,
589                               int swkey_tun_opts_len)
590 {
591         struct nlattr *nla;
592         int err;
593
594         nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL);
595         if (!nla)
596                 return -EMSGSIZE;
597
598         err = __ipv4_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len);
599         if (err)
600                 return err;
601
602         nla_nest_end(skb, nla);
603         return 0;
604 }
605
606 int ovs_nla_put_egress_tunnel_key(struct sk_buff *skb,
607                                   const struct ovs_tunnel_info *egress_tun_info)
608 {
609         return __ipv4_tun_to_nlattr(skb, &egress_tun_info->tunnel,
610                                     egress_tun_info->options,
611                                     egress_tun_info->options_len);
612 }
613
614 static int metadata_from_nlattrs(struct sw_flow_match *match,  u64 *attrs,
615                                  const struct nlattr **a, bool is_mask)
616 {
617         if (*attrs & (1ULL << OVS_KEY_ATTR_DP_HASH)) {
618                 u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]);
619
620                 SW_FLOW_KEY_PUT(match, ovs_flow_hash, hash_val, is_mask);
621                 *attrs &= ~(1ULL << OVS_KEY_ATTR_DP_HASH);
622         }
623
624         if (*attrs & (1ULL << OVS_KEY_ATTR_RECIRC_ID)) {
625                 u32 recirc_id = nla_get_u32(a[OVS_KEY_ATTR_RECIRC_ID]);
626
627                 SW_FLOW_KEY_PUT(match, recirc_id, recirc_id, is_mask);
628                 *attrs &= ~(1ULL << OVS_KEY_ATTR_RECIRC_ID);
629         }
630
631         if (*attrs & (1ULL << OVS_KEY_ATTR_PRIORITY)) {
632                 SW_FLOW_KEY_PUT(match, phy.priority,
633                           nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask);
634                 *attrs &= ~(1ULL << OVS_KEY_ATTR_PRIORITY);
635         }
636
637         if (*attrs & (1ULL << OVS_KEY_ATTR_IN_PORT)) {
638                 u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]);
639
640                 if (is_mask) {
641                         in_port = 0xffffffff; /* Always exact match in_port. */
642                 } else if (in_port >= DP_MAX_PORTS) {
643                         OVS_NLERR("Input port (%d) exceeds maximum allowable (%d).\n",
644                                   in_port, DP_MAX_PORTS);
645                         return -EINVAL;
646                 }
647
648                 SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask);
649                 *attrs &= ~(1ULL << OVS_KEY_ATTR_IN_PORT);
650         } else if (!is_mask) {
651                 SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask);
652         }
653
654         if (*attrs & (1ULL << OVS_KEY_ATTR_SKB_MARK)) {
655                 uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]);
656
657                 SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask);
658                 *attrs &= ~(1ULL << OVS_KEY_ATTR_SKB_MARK);
659         }
660         if (*attrs & (1ULL << OVS_KEY_ATTR_TUNNEL)) {
661                 if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
662                                          is_mask))
663                         return -EINVAL;
664                 *attrs &= ~(1ULL << OVS_KEY_ATTR_TUNNEL);
665         }
666         return 0;
667 }
668
669 static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
670                                 const struct nlattr **a, bool is_mask)
671 {
672         int err;
673
674         err = metadata_from_nlattrs(match, &attrs, a, is_mask);
675         if (err)
676                 return err;
677
678         if (attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)) {
679                 const struct ovs_key_ethernet *eth_key;
680
681                 eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]);
682                 SW_FLOW_KEY_MEMCPY(match, eth.src,
683                                 eth_key->eth_src, ETH_ALEN, is_mask);
684                 SW_FLOW_KEY_MEMCPY(match, eth.dst,
685                                 eth_key->eth_dst, ETH_ALEN, is_mask);
686                 attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERNET);
687         }
688
689         if (attrs & (1ULL << OVS_KEY_ATTR_VLAN)) {
690                 __be16 tci;
691
692                 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
693                 if (!(tci & htons(VLAN_TAG_PRESENT))) {
694                         if (is_mask)
695                                 OVS_NLERR("VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit.\n");
696                         else
697                                 OVS_NLERR("VLAN TCI does not have VLAN_TAG_PRESENT bit set.\n");
698
699                         return -EINVAL;
700                 }
701
702                 SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask);
703                 attrs &= ~(1ULL << OVS_KEY_ATTR_VLAN);
704         }
705
706         if (attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE)) {
707                 __be16 eth_type;
708
709                 eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
710                 if (is_mask) {
711                         /* Always exact match EtherType. */
712                         eth_type = htons(0xffff);
713                 } else if (ntohs(eth_type) < ETH_P_802_3_MIN) {
714                         OVS_NLERR("EtherType is less than minimum (type=%x, min=%x).\n",
715                                         ntohs(eth_type), ETH_P_802_3_MIN);
716                         return -EINVAL;
717                 }
718
719                 SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask);
720                 attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERTYPE);
721         } else if (!is_mask) {
722                 SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask);
723         }
724
725         if (attrs & (1ULL << OVS_KEY_ATTR_IPV4)) {
726                 const struct ovs_key_ipv4 *ipv4_key;
727
728                 ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]);
729                 if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) {
730                         OVS_NLERR("Unknown IPv4 fragment type (value=%d, max=%d).\n",
731                                 ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX);
732                         return -EINVAL;
733                 }
734                 SW_FLOW_KEY_PUT(match, ip.proto,
735                                 ipv4_key->ipv4_proto, is_mask);
736                 SW_FLOW_KEY_PUT(match, ip.tos,
737                                 ipv4_key->ipv4_tos, is_mask);
738                 SW_FLOW_KEY_PUT(match, ip.ttl,
739                                 ipv4_key->ipv4_ttl, is_mask);
740                 SW_FLOW_KEY_PUT(match, ip.frag,
741                                 ipv4_key->ipv4_frag, is_mask);
742                 SW_FLOW_KEY_PUT(match, ipv4.addr.src,
743                                 ipv4_key->ipv4_src, is_mask);
744                 SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
745                                 ipv4_key->ipv4_dst, is_mask);
746                 attrs &= ~(1ULL << OVS_KEY_ATTR_IPV4);
747         }
748
749         if (attrs & (1ULL << OVS_KEY_ATTR_IPV6)) {
750                 const struct ovs_key_ipv6 *ipv6_key;
751
752                 ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]);
753                 if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) {
754                         OVS_NLERR("Unknown IPv6 fragment type (value=%d, max=%d).\n",
755                                 ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX);
756                         return -EINVAL;
757                 }
758                 if (ipv6_key->ipv6_label & htonl(0xFFF00000)) {
759                         OVS_NLERR("Invalid IPv6 flow label value (value=%x, max=%x).\n",
760                                   ntohl(ipv6_key->ipv6_label), (1 << 20) - 1);
761                         return -EINVAL;
762                 }
763                 SW_FLOW_KEY_PUT(match, ipv6.label,
764                                 ipv6_key->ipv6_label, is_mask);
765                 SW_FLOW_KEY_PUT(match, ip.proto,
766                                 ipv6_key->ipv6_proto, is_mask);
767                 SW_FLOW_KEY_PUT(match, ip.tos,
768                                 ipv6_key->ipv6_tclass, is_mask);
769                 SW_FLOW_KEY_PUT(match, ip.ttl,
770                                 ipv6_key->ipv6_hlimit, is_mask);
771                 SW_FLOW_KEY_PUT(match, ip.frag,
772                                 ipv6_key->ipv6_frag, is_mask);
773                 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src,
774                                 ipv6_key->ipv6_src,
775                                 sizeof(match->key->ipv6.addr.src),
776                                 is_mask);
777                 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst,
778                                 ipv6_key->ipv6_dst,
779                                 sizeof(match->key->ipv6.addr.dst),
780                                 is_mask);
781
782                 attrs &= ~(1ULL << OVS_KEY_ATTR_IPV6);
783         }
784
785         if (attrs & (1ULL << OVS_KEY_ATTR_ARP)) {
786                 const struct ovs_key_arp *arp_key;
787
788                 arp_key = nla_data(a[OVS_KEY_ATTR_ARP]);
789                 if (!is_mask && (arp_key->arp_op & htons(0xff00))) {
790                         OVS_NLERR("Unknown ARP opcode (opcode=%d).\n",
791                                   arp_key->arp_op);
792                         return -EINVAL;
793                 }
794
795                 SW_FLOW_KEY_PUT(match, ipv4.addr.src,
796                                 arp_key->arp_sip, is_mask);
797                 SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
798                         arp_key->arp_tip, is_mask);
799                 SW_FLOW_KEY_PUT(match, ip.proto,
800                                 ntohs(arp_key->arp_op), is_mask);
801                 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha,
802                                 arp_key->arp_sha, ETH_ALEN, is_mask);
803                 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha,
804                                 arp_key->arp_tha, ETH_ALEN, is_mask);
805
806                 attrs &= ~(1ULL << OVS_KEY_ATTR_ARP);
807         }
808
809         if (attrs & (1ULL << OVS_KEY_ATTR_MPLS)) {
810                 const struct ovs_key_mpls *mpls_key;
811
812                 mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]);
813                 SW_FLOW_KEY_PUT(match, mpls.top_lse,
814                                 mpls_key->mpls_lse, is_mask);
815
816                 attrs &= ~(1ULL << OVS_KEY_ATTR_MPLS);
817         }
818
819         if (attrs & (1ULL << OVS_KEY_ATTR_TCP)) {
820                 const struct ovs_key_tcp *tcp_key;
821
822                 tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
823                 SW_FLOW_KEY_PUT(match, tp.src, tcp_key->tcp_src, is_mask);
824                 SW_FLOW_KEY_PUT(match, tp.dst, tcp_key->tcp_dst, is_mask);
825                 attrs &= ~(1ULL << OVS_KEY_ATTR_TCP);
826         }
827
828         if (attrs & (1ULL << OVS_KEY_ATTR_TCP_FLAGS)) {
829                 SW_FLOW_KEY_PUT(match, tp.flags,
830                                 nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]),
831                                 is_mask);
832                 attrs &= ~(1ULL << OVS_KEY_ATTR_TCP_FLAGS);
833         }
834
835         if (attrs & (1ULL << OVS_KEY_ATTR_UDP)) {
836                 const struct ovs_key_udp *udp_key;
837
838                 udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
839                 SW_FLOW_KEY_PUT(match, tp.src, udp_key->udp_src, is_mask);
840                 SW_FLOW_KEY_PUT(match, tp.dst, udp_key->udp_dst, is_mask);
841                 attrs &= ~(1ULL << OVS_KEY_ATTR_UDP);
842         }
843
844         if (attrs & (1ULL << OVS_KEY_ATTR_SCTP)) {
845                 const struct ovs_key_sctp *sctp_key;
846
847                 sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]);
848                 SW_FLOW_KEY_PUT(match, tp.src, sctp_key->sctp_src, is_mask);
849                 SW_FLOW_KEY_PUT(match, tp.dst, sctp_key->sctp_dst, is_mask);
850                 attrs &= ~(1ULL << OVS_KEY_ATTR_SCTP);
851         }
852
853         if (attrs & (1ULL << OVS_KEY_ATTR_ICMP)) {
854                 const struct ovs_key_icmp *icmp_key;
855
856                 icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]);
857                 SW_FLOW_KEY_PUT(match, tp.src,
858                                 htons(icmp_key->icmp_type), is_mask);
859                 SW_FLOW_KEY_PUT(match, tp.dst,
860                                 htons(icmp_key->icmp_code), is_mask);
861                 attrs &= ~(1ULL << OVS_KEY_ATTR_ICMP);
862         }
863
864         if (attrs & (1ULL << OVS_KEY_ATTR_ICMPV6)) {
865                 const struct ovs_key_icmpv6 *icmpv6_key;
866
867                 icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]);
868                 SW_FLOW_KEY_PUT(match, tp.src,
869                                 htons(icmpv6_key->icmpv6_type), is_mask);
870                 SW_FLOW_KEY_PUT(match, tp.dst,
871                                 htons(icmpv6_key->icmpv6_code), is_mask);
872                 attrs &= ~(1ULL << OVS_KEY_ATTR_ICMPV6);
873         }
874
875         if (attrs & (1ULL << OVS_KEY_ATTR_ND)) {
876                 const struct ovs_key_nd *nd_key;
877
878                 nd_key = nla_data(a[OVS_KEY_ATTR_ND]);
879                 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target,
880                         nd_key->nd_target,
881                         sizeof(match->key->ipv6.nd.target),
882                         is_mask);
883                 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll,
884                         nd_key->nd_sll, ETH_ALEN, is_mask);
885                 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll,
886                                 nd_key->nd_tll, ETH_ALEN, is_mask);
887                 attrs &= ~(1ULL << OVS_KEY_ATTR_ND);
888         }
889
890         if (attrs != 0) {
891                 OVS_NLERR("Unknown key attributes (%llx).\n",
892                           (unsigned long long)attrs);
893                 return -EINVAL;
894         }
895
896         return 0;
897 }
898
899 static void nlattr_set(struct nlattr *attr, u8 val, bool is_attr_mask_key)
900 {
901         struct nlattr *nla;
902         int rem;
903
904         /* The nlattr stream should already have been validated */
905         nla_for_each_nested(nla, attr, rem) {
906                 /* We assume that ovs_key_lens[type] == -1 means that type is a
907                  * nested attribute
908                  */
909                 if (is_attr_mask_key && ovs_key_lens[nla_type(nla)] == -1)
910                         nlattr_set(nla, val, false);
911                 else
912                         memset(nla_data(nla), val, nla_len(nla));
913         }
914 }
915
916 static void mask_set_nlattr(struct nlattr *attr, u8 val)
917 {
918         nlattr_set(attr, val, true);
919 }
920
921 /**
922  * ovs_nla_get_match - parses Netlink attributes into a flow key and
923  * mask. In case the 'mask' is NULL, the flow is treated as exact match
924  * flow. Otherwise, it is treated as a wildcarded flow, except the mask
925  * does not include any don't care bit.
926  * @match: receives the extracted flow match information.
927  * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
928  * sequence. The fields should of the packet that triggered the creation
929  * of this flow.
930  * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink
931  * attribute specifies the mask field of the wildcarded flow.
932  */
933 int ovs_nla_get_match(struct sw_flow_match *match,
934                       const struct nlattr *nla_key,
935                       const struct nlattr *nla_mask)
936 {
937         const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
938         const struct nlattr *encap;
939         struct nlattr *newmask = NULL;
940         u64 key_attrs = 0;
941         u64 mask_attrs = 0;
942         bool encap_valid = false;
943         int err;
944
945         err = parse_flow_nlattrs(nla_key, a, &key_attrs);
946         if (err)
947                 return err;
948
949         if ((key_attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)) &&
950             (key_attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE)) &&
951             (nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q))) {
952                 __be16 tci;
953
954                 if (!((key_attrs & (1ULL << OVS_KEY_ATTR_VLAN)) &&
955                       (key_attrs & (1ULL << OVS_KEY_ATTR_ENCAP)))) {
956                         OVS_NLERR("Invalid Vlan frame.\n");
957                         return -EINVAL;
958                 }
959
960                 key_attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERTYPE);
961                 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
962                 encap = a[OVS_KEY_ATTR_ENCAP];
963                 key_attrs &= ~(1ULL << OVS_KEY_ATTR_ENCAP);
964                 encap_valid = true;
965
966                 if (tci & htons(VLAN_TAG_PRESENT)) {
967                         err = parse_flow_nlattrs(encap, a, &key_attrs);
968                         if (err)
969                                 return err;
970                 } else if (!tci) {
971                         /* Corner case for truncated 802.1Q header. */
972                         if (nla_len(encap)) {
973                                 OVS_NLERR("Truncated 802.1Q header has non-zero encap attribute.\n");
974                                 return -EINVAL;
975                         }
976                 } else {
977                         OVS_NLERR("Encap attribute is set for a non-VLAN frame.\n");
978                         return  -EINVAL;
979                 }
980         }
981
982         err = ovs_key_from_nlattrs(match, key_attrs, a, false);
983         if (err)
984                 return err;
985
986         if (match->mask) {
987
988                 if (!nla_mask) {
989                         /* Create an exact match mask. We need to set to 0xff
990                          * all the 'match->mask' fields that have been touched
991                          * in 'match->key'. We cannot simply memset
992                          * 'match->mask', because padding bytes and fields not
993                          * specified in 'match->key' should be left to 0.
994                          * Instead, we use a stream of netlink attributes,
995                          * copied from 'key' and set to 0xff: ovs_key_from_nlattrs()
996                          * will take care of filling 'match->mask'
997                          * appropriately.
998                          */
999                         newmask = kmemdup(nla_key,
1000                                           nla_total_size(nla_len(nla_key)),
1001                                           GFP_KERNEL);
1002                         if (!newmask)
1003                                 return -ENOMEM;
1004
1005                         mask_set_nlattr(newmask, 0xff);
1006
1007                         /* The userspace does not send tunnel attributes that
1008                          * are 0, but we should not wildcard them nonetheless.
1009                          */
1010                         if (match->key->tun_key.ipv4_dst)
1011                                 SW_FLOW_KEY_MEMSET_FIELD(match, tun_key,
1012                                                          0xff, true);
1013
1014                         nla_mask = newmask;
1015                 }
1016
1017                 err = parse_flow_mask_nlattrs(nla_mask, a, &mask_attrs);
1018                 if (err)
1019                         goto free_newmask;
1020
1021                 /* Always match on tci. */
1022                 SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true);
1023
1024                 if (mask_attrs & 1ULL << OVS_KEY_ATTR_ENCAP) {
1025                         __be16 eth_type = 0;
1026                         __be16 tci = 0;
1027
1028                         if (!encap_valid) {
1029                                 OVS_NLERR("Encap mask attribute is set for non-VLAN frame.\n");
1030                                 err = -EINVAL;
1031                                 goto free_newmask;
1032                         }
1033
1034                         mask_attrs &= ~(1ULL << OVS_KEY_ATTR_ENCAP);
1035                         if (a[OVS_KEY_ATTR_ETHERTYPE])
1036                                 eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
1037
1038                         if (eth_type == htons(0xffff)) {
1039                                 mask_attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERTYPE);
1040                                 encap = a[OVS_KEY_ATTR_ENCAP];
1041                                 err = parse_flow_mask_nlattrs(encap, a, &mask_attrs);
1042                                 if (err)
1043                                         goto free_newmask;
1044                         } else {
1045                                 OVS_NLERR("VLAN frames must have an exact match on the TPID (mask=%x).\n",
1046                                                 ntohs(eth_type));
1047                                 err = -EINVAL;
1048                                 goto free_newmask;
1049                         }
1050
1051                         if (a[OVS_KEY_ATTR_VLAN])
1052                                 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
1053
1054                         if (!(tci & htons(VLAN_TAG_PRESENT))) {
1055                                 OVS_NLERR("VLAN tag present bit must have an exact match (tci_mask=%x).\n", ntohs(tci));
1056                                 err = -EINVAL;
1057                                 goto free_newmask;
1058                         }
1059                 }
1060
1061                 err = ovs_key_from_nlattrs(match, mask_attrs, a, true);
1062                 if (err)
1063                         goto free_newmask;
1064         }
1065
1066         if (!match_validate(match, key_attrs, mask_attrs))
1067                 err = -EINVAL;
1068
1069 free_newmask:
1070         kfree(newmask);
1071         return err;
1072 }
1073
1074 /**
1075  * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key.
1076  * @key: Receives extracted in_port, priority, tun_key and skb_mark.
1077  * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
1078  * sequence.
1079  *
1080  * This parses a series of Netlink attributes that form a flow key, which must
1081  * take the same form accepted by flow_from_nlattrs(), but only enough of it to
1082  * get the metadata, that is, the parts of the flow key that cannot be
1083  * extracted from the packet itself.
1084  */
1085 int ovs_nla_get_flow_metadata(const struct nlattr *attr,
1086                               struct sw_flow_key *key)
1087 {
1088         const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
1089         struct sw_flow_match match;
1090         u64 attrs = 0;
1091         int err;
1092
1093         err = parse_flow_nlattrs(attr, a, &attrs);
1094         if (err)
1095                 return -EINVAL;
1096
1097         memset(&match, 0, sizeof(match));
1098         match.key = key;
1099
1100         memset(key, 0, OVS_SW_FLOW_KEY_METADATA_SIZE);
1101         key->phy.in_port = DP_MAX_PORTS;
1102
1103         return metadata_from_nlattrs(&match, &attrs, a, false);
1104 }
1105
1106 int ovs_nla_put_flow(struct datapath *dp, const struct sw_flow_key *swkey,
1107                      const struct sw_flow_key *output, struct sk_buff *skb)
1108 {
1109         struct ovs_key_ethernet *eth_key;
1110         struct nlattr *nla, *encap;
1111         bool is_mask = (swkey != output);
1112
1113         if (nla_put_u32(skb, OVS_KEY_ATTR_DP_HASH, output->ovs_flow_hash))
1114                 goto nla_put_failure;
1115
1116         if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id))
1117                 goto nla_put_failure;
1118
1119         if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
1120                 goto nla_put_failure;
1121
1122         if ((swkey->tun_key.ipv4_dst || is_mask)) {
1123                 const struct geneve_opt *opts = NULL;
1124
1125                 if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT)
1126                         opts = GENEVE_OPTS(output, swkey->tun_opts_len);
1127
1128                 if (ipv4_tun_to_nlattr(skb, &output->tun_key, opts,
1129                                         swkey->tun_opts_len))
1130                         goto nla_put_failure;
1131         }
1132
1133         if (swkey->phy.in_port == DP_MAX_PORTS) {
1134                 if (is_mask && (output->phy.in_port == 0xffff))
1135                         if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff))
1136                                 goto nla_put_failure;
1137         } else {
1138                 u16 upper_u16;
1139                 upper_u16 = !is_mask ? 0 : 0xffff;
1140
1141                 if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT,
1142                                 (upper_u16 << 16) | output->phy.in_port))
1143                         goto nla_put_failure;
1144         }
1145
1146         if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark))
1147                 goto nla_put_failure;
1148
1149         nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
1150         if (!nla)
1151                 goto nla_put_failure;
1152
1153         eth_key = nla_data(nla);
1154         ether_addr_copy(eth_key->eth_src, output->eth.src);
1155         ether_addr_copy(eth_key->eth_dst, output->eth.dst);
1156
1157         if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) {
1158                 __be16 eth_type;
1159                 eth_type = !is_mask ? htons(ETH_P_8021Q) : htons(0xffff);
1160                 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) ||
1161                     nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci))
1162                         goto nla_put_failure;
1163                 encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
1164                 if (!swkey->eth.tci)
1165                         goto unencap;
1166         } else
1167                 encap = NULL;
1168
1169         if (swkey->eth.type == htons(ETH_P_802_2)) {
1170                 /*
1171                  * Ethertype 802.2 is represented in the netlink with omitted
1172                  * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and
1173                  * 0xffff in the mask attribute.  Ethertype can also
1174                  * be wildcarded.
1175                  */
1176                 if (is_mask && output->eth.type)
1177                         if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE,
1178                                                 output->eth.type))
1179                                 goto nla_put_failure;
1180                 goto unencap;
1181         }
1182
1183         if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))
1184                 goto nla_put_failure;
1185
1186         if (swkey->eth.type == htons(ETH_P_IP)) {
1187                 struct ovs_key_ipv4 *ipv4_key;
1188
1189                 nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key));
1190                 if (!nla)
1191                         goto nla_put_failure;
1192                 ipv4_key = nla_data(nla);
1193                 ipv4_key->ipv4_src = output->ipv4.addr.src;
1194                 ipv4_key->ipv4_dst = output->ipv4.addr.dst;
1195                 ipv4_key->ipv4_proto = output->ip.proto;
1196                 ipv4_key->ipv4_tos = output->ip.tos;
1197                 ipv4_key->ipv4_ttl = output->ip.ttl;
1198                 ipv4_key->ipv4_frag = output->ip.frag;
1199         } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
1200                 struct ovs_key_ipv6 *ipv6_key;
1201
1202                 nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key));
1203                 if (!nla)
1204                         goto nla_put_failure;
1205                 ipv6_key = nla_data(nla);
1206                 memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src,
1207                                 sizeof(ipv6_key->ipv6_src));
1208                 memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst,
1209                                 sizeof(ipv6_key->ipv6_dst));
1210                 ipv6_key->ipv6_label = output->ipv6.label;
1211                 ipv6_key->ipv6_proto = output->ip.proto;
1212                 ipv6_key->ipv6_tclass = output->ip.tos;
1213                 ipv6_key->ipv6_hlimit = output->ip.ttl;
1214                 ipv6_key->ipv6_frag = output->ip.frag;
1215         } else if (swkey->eth.type == htons(ETH_P_ARP) ||
1216                    swkey->eth.type == htons(ETH_P_RARP)) {
1217                 struct ovs_key_arp *arp_key;
1218
1219                 nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key));
1220                 if (!nla)
1221                         goto nla_put_failure;
1222                 arp_key = nla_data(nla);
1223                 memset(arp_key, 0, sizeof(struct ovs_key_arp));
1224                 arp_key->arp_sip = output->ipv4.addr.src;
1225                 arp_key->arp_tip = output->ipv4.addr.dst;
1226                 arp_key->arp_op = htons(output->ip.proto);
1227                 ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha);
1228                 ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha);
1229         } else if (eth_p_mpls(swkey->eth.type)) {
1230                 struct ovs_key_mpls *mpls_key;
1231
1232                 nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key));
1233                 if (!nla)
1234                         goto nla_put_failure;
1235                 mpls_key = nla_data(nla);
1236                 mpls_key->mpls_lse = output->mpls.top_lse;
1237         }
1238
1239         if ((swkey->eth.type == htons(ETH_P_IP) ||
1240              swkey->eth.type == htons(ETH_P_IPV6)) &&
1241              swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
1242
1243                 if (swkey->ip.proto == IPPROTO_TCP) {
1244                         struct ovs_key_tcp *tcp_key;
1245
1246                         nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key));
1247                         if (!nla)
1248                                 goto nla_put_failure;
1249                         tcp_key = nla_data(nla);
1250                         tcp_key->tcp_src = output->tp.src;
1251                         tcp_key->tcp_dst = output->tp.dst;
1252                         if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS,
1253                                          output->tp.flags))
1254                                 goto nla_put_failure;
1255                 } else if (swkey->ip.proto == IPPROTO_UDP) {
1256                         struct ovs_key_udp *udp_key;
1257
1258                         nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key));
1259                         if (!nla)
1260                                 goto nla_put_failure;
1261                         udp_key = nla_data(nla);
1262                         udp_key->udp_src = output->tp.src;
1263                         udp_key->udp_dst = output->tp.dst;
1264                 } else if (swkey->ip.proto == IPPROTO_SCTP) {
1265                         struct ovs_key_sctp *sctp_key;
1266
1267                         nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key));
1268                         if (!nla)
1269                                 goto nla_put_failure;
1270                         sctp_key = nla_data(nla);
1271                         sctp_key->sctp_src = output->tp.src;
1272                         sctp_key->sctp_dst = output->tp.dst;
1273                 } else if (swkey->eth.type == htons(ETH_P_IP) &&
1274                            swkey->ip.proto == IPPROTO_ICMP) {
1275                         struct ovs_key_icmp *icmp_key;
1276
1277                         nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key));
1278                         if (!nla)
1279                                 goto nla_put_failure;
1280                         icmp_key = nla_data(nla);
1281                         icmp_key->icmp_type = ntohs(output->tp.src);
1282                         icmp_key->icmp_code = ntohs(output->tp.dst);
1283                 } else if (swkey->eth.type == htons(ETH_P_IPV6) &&
1284                            swkey->ip.proto == IPPROTO_ICMPV6) {
1285                         struct ovs_key_icmpv6 *icmpv6_key;
1286
1287                         nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6,
1288                                                 sizeof(*icmpv6_key));
1289                         if (!nla)
1290                                 goto nla_put_failure;
1291                         icmpv6_key = nla_data(nla);
1292                         icmpv6_key->icmpv6_type = ntohs(output->tp.src);
1293                         icmpv6_key->icmpv6_code = ntohs(output->tp.dst);
1294
1295                         if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION ||
1296                             icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) {
1297                                 struct ovs_key_nd *nd_key;
1298
1299                                 nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key));
1300                                 if (!nla)
1301                                         goto nla_put_failure;
1302                                 nd_key = nla_data(nla);
1303                                 memcpy(nd_key->nd_target, &output->ipv6.nd.target,
1304                                                         sizeof(nd_key->nd_target));
1305                                 ether_addr_copy(nd_key->nd_sll, output->ipv6.nd.sll);
1306                                 ether_addr_copy(nd_key->nd_tll, output->ipv6.nd.tll);
1307                         }
1308                 }
1309         }
1310
1311 unencap:
1312         if (encap)
1313                 nla_nest_end(skb, encap);
1314
1315         return 0;
1316
1317 nla_put_failure:
1318         return -EMSGSIZE;
1319 }
1320
1321 #define MAX_ACTIONS_BUFSIZE     (32 * 1024)
1322
1323 static struct sw_flow_actions *nla_alloc_flow_actions(int size)
1324 {
1325         struct sw_flow_actions *sfa;
1326
1327         if (size > MAX_ACTIONS_BUFSIZE) {
1328                 OVS_NLERR("Flow action size (%u bytes) exceeds maximum "
1329                           "(%u bytes)\n", size, MAX_ACTIONS_BUFSIZE);
1330                 return ERR_PTR(-EINVAL);
1331         }
1332
1333         sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL);
1334         if (!sfa)
1335                 return ERR_PTR(-ENOMEM);
1336
1337         sfa->actions_len = 0;
1338         return sfa;
1339 }
1340
1341 /* RCU callback used by ovs_nla_free_flow_actions. */
1342 static void rcu_free_acts_callback(struct rcu_head *rcu)
1343 {
1344         struct sw_flow_actions *sf_acts = container_of(rcu,
1345                         struct sw_flow_actions, rcu);
1346         kfree(sf_acts);
1347 }
1348
1349 /* Schedules 'sf_acts' to be freed after the next RCU grace period.
1350  * The caller must hold rcu_read_lock for this to be sensible. */
1351 void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
1352 {
1353         call_rcu(&sf_acts->rcu, rcu_free_acts_callback);
1354 }
1355
1356 static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
1357                                        int attr_len)
1358 {
1359
1360         struct sw_flow_actions *acts;
1361         int new_acts_size;
1362         int req_size = NLA_ALIGN(attr_len);
1363         int next_offset = offsetof(struct sw_flow_actions, actions) +
1364                                         (*sfa)->actions_len;
1365
1366         if (req_size <= (ksize(*sfa) - next_offset))
1367                 goto out;
1368
1369         new_acts_size = ksize(*sfa) * 2;
1370
1371         if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
1372                 if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size)
1373                         return ERR_PTR(-EMSGSIZE);
1374                 new_acts_size = MAX_ACTIONS_BUFSIZE;
1375         }
1376
1377         acts = nla_alloc_flow_actions(new_acts_size);
1378         if (IS_ERR(acts))
1379                 return (void *)acts;
1380
1381         memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len);
1382         acts->actions_len = (*sfa)->actions_len;
1383         kfree(*sfa);
1384         *sfa = acts;
1385
1386 out:
1387         (*sfa)->actions_len += req_size;
1388         return  (struct nlattr *) ((unsigned char *)(*sfa) + next_offset);
1389 }
1390
1391 static struct nlattr *__add_action(struct sw_flow_actions **sfa, int attrtype,
1392                                    void *data, int len)
1393 {
1394         struct nlattr *a;
1395
1396         a = reserve_sfa_size(sfa, nla_attr_size(len));
1397         if (IS_ERR(a))
1398                 return a;
1399
1400         a->nla_type = attrtype;
1401         a->nla_len = nla_attr_size(len);
1402
1403         if (data)
1404                 memcpy(nla_data(a), data, len);
1405         memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len));
1406
1407         return a;
1408 }
1409
1410 static int add_action(struct sw_flow_actions **sfa, int attrtype,
1411                       void *data, int len)
1412 {
1413         struct nlattr *a;
1414
1415         a = __add_action(sfa, attrtype, data, len);
1416         if (IS_ERR(a))
1417                 return PTR_ERR(a);
1418
1419         return 0;
1420 }
1421
1422 static inline int add_nested_action_start(struct sw_flow_actions **sfa,
1423                                           int attrtype)
1424 {
1425         int used = (*sfa)->actions_len;
1426         int err;
1427
1428         err = add_action(sfa, attrtype, NULL, 0);
1429         if (err)
1430                 return err;
1431
1432         return used;
1433 }
1434
1435 static inline void add_nested_action_end(struct sw_flow_actions *sfa,
1436                                          int st_offset)
1437 {
1438         struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions +
1439                                                                st_offset);
1440
1441         a->nla_len = sfa->actions_len - st_offset;
1442 }
1443
1444 static int __ovs_nla_copy_actions(const struct nlattr *attr,
1445                                   const struct sw_flow_key *key,
1446                                   int depth, struct sw_flow_actions **sfa,
1447                                   __be16 eth_type, __be16 vlan_tci);
1448
1449 static int validate_and_copy_sample(const struct nlattr *attr,
1450                                     const struct sw_flow_key *key, int depth,
1451                                     struct sw_flow_actions **sfa,
1452                                     __be16 eth_type, __be16 vlan_tci)
1453 {
1454         const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
1455         const struct nlattr *probability, *actions;
1456         const struct nlattr *a;
1457         int rem, start, err, st_acts;
1458
1459         memset(attrs, 0, sizeof(attrs));
1460         nla_for_each_nested(a, attr, rem) {
1461                 int type = nla_type(a);
1462                 if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type])
1463                         return -EINVAL;
1464                 attrs[type] = a;
1465         }
1466         if (rem)
1467                 return -EINVAL;
1468
1469         probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY];
1470         if (!probability || nla_len(probability) != sizeof(u32))
1471                 return -EINVAL;
1472
1473         actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
1474         if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
1475                 return -EINVAL;
1476
1477         /* validation done, copy sample action. */
1478         start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE);
1479         if (start < 0)
1480                 return start;
1481         err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY,
1482                          nla_data(probability), sizeof(u32));
1483         if (err)
1484                 return err;
1485         st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS);
1486         if (st_acts < 0)
1487                 return st_acts;
1488
1489         err = __ovs_nla_copy_actions(actions, key, depth + 1, sfa,
1490                                      eth_type, vlan_tci);
1491         if (err)
1492                 return err;
1493
1494         add_nested_action_end(*sfa, st_acts);
1495         add_nested_action_end(*sfa, start);
1496
1497         return 0;
1498 }
1499
1500 static int validate_tp_port(const struct sw_flow_key *flow_key,
1501                             __be16 eth_type)
1502 {
1503         if ((eth_type == htons(ETH_P_IP) || eth_type == htons(ETH_P_IPV6)) &&
1504             (flow_key->tp.src || flow_key->tp.dst))
1505                 return 0;
1506
1507         return -EINVAL;
1508 }
1509
1510 void ovs_match_init(struct sw_flow_match *match,
1511                     struct sw_flow_key *key,
1512                     struct sw_flow_mask *mask)
1513 {
1514         memset(match, 0, sizeof(*match));
1515         match->key = key;
1516         match->mask = mask;
1517
1518         memset(key, 0, sizeof(*key));
1519
1520         if (mask) {
1521                 memset(&mask->key, 0, sizeof(mask->key));
1522                 mask->range.start = mask->range.end = 0;
1523         }
1524 }
1525
1526 static int validate_and_copy_set_tun(const struct nlattr *attr,
1527                                      struct sw_flow_actions **sfa)
1528 {
1529         struct sw_flow_match match;
1530         struct sw_flow_key key;
1531         struct ovs_tunnel_info *tun_info;
1532         struct nlattr *a;
1533         int err, start;
1534
1535         ovs_match_init(&match, &key, NULL);
1536         err = ipv4_tun_from_nlattr(nla_data(attr), &match, false);
1537         if (err)
1538                 return err;
1539
1540         if (key.tun_opts_len) {
1541                 struct geneve_opt *option = GENEVE_OPTS(&key,
1542                                                         key.tun_opts_len);
1543                 int opts_len = key.tun_opts_len;
1544                 bool crit_opt = false;
1545
1546                 while (opts_len > 0) {
1547                         int len;
1548
1549                         if (opts_len < sizeof(*option))
1550                                 return -EINVAL;
1551
1552                         len = sizeof(*option) + option->length * 4;
1553                         if (len > opts_len)
1554                                 return -EINVAL;
1555
1556                         crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE);
1557
1558                         option = (struct geneve_opt *)((u8 *)option + len);
1559                         opts_len -= len;
1560                 };
1561
1562                 key.tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0;
1563         };
1564
1565         start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET);
1566         if (start < 0)
1567                 return start;
1568
1569         a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL,
1570                         sizeof(*tun_info) + key.tun_opts_len);
1571         if (IS_ERR(a))
1572                 return PTR_ERR(a);
1573
1574         tun_info = nla_data(a);
1575         tun_info->tunnel = key.tun_key;
1576         tun_info->options_len = key.tun_opts_len;
1577
1578         if (tun_info->options_len) {
1579                 /* We need to store the options in the action itself since
1580                  * everything else will go away after flow setup. We can append
1581                  * it to tun_info and then point there.
1582                  */
1583                 tun_info->options = (struct geneve_opt *)(tun_info + 1);
1584                 memcpy(tun_info->options, GENEVE_OPTS(&key, key.tun_opts_len),
1585                         key.tun_opts_len);
1586         } else {
1587                 tun_info->options = NULL;
1588         }
1589
1590         add_nested_action_end(*sfa, start);
1591
1592         return err;
1593 }
1594
1595 static int validate_set(const struct nlattr *a,
1596                         const struct sw_flow_key *flow_key,
1597                         struct sw_flow_actions **sfa,
1598                         bool *set_tun, __be16 eth_type)
1599 {
1600         const struct nlattr *ovs_key = nla_data(a);
1601         int key_type = nla_type(ovs_key);
1602
1603         /* There can be only one key in a action */
1604         if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
1605                 return -EINVAL;
1606
1607         if (key_type > OVS_KEY_ATTR_MAX ||
1608             (ovs_key_lens[key_type] != nla_len(ovs_key) &&
1609              ovs_key_lens[key_type] != -1))
1610                 return -EINVAL;
1611
1612         switch (key_type) {
1613         const struct ovs_key_ipv4 *ipv4_key;
1614         const struct ovs_key_ipv6 *ipv6_key;
1615         int err;
1616
1617         case OVS_KEY_ATTR_PRIORITY:
1618         case OVS_KEY_ATTR_SKB_MARK:
1619         case OVS_KEY_ATTR_ETHERNET:
1620                 break;
1621
1622         case OVS_KEY_ATTR_TUNNEL:
1623                 *set_tun = true;
1624                 err = validate_and_copy_set_tun(a, sfa);
1625                 if (err)
1626                         return err;
1627                 break;
1628
1629         case OVS_KEY_ATTR_IPV4:
1630                 if (eth_type != htons(ETH_P_IP))
1631                         return -EINVAL;
1632
1633                 if (!flow_key->ip.proto)
1634                         return -EINVAL;
1635
1636                 ipv4_key = nla_data(ovs_key);
1637                 if (ipv4_key->ipv4_proto != flow_key->ip.proto)
1638                         return -EINVAL;
1639
1640                 if (ipv4_key->ipv4_frag != flow_key->ip.frag)
1641                         return -EINVAL;
1642
1643                 break;
1644
1645         case OVS_KEY_ATTR_IPV6:
1646                 if (eth_type != htons(ETH_P_IPV6))
1647                         return -EINVAL;
1648
1649                 if (!flow_key->ip.proto)
1650                         return -EINVAL;
1651
1652                 ipv6_key = nla_data(ovs_key);
1653                 if (ipv6_key->ipv6_proto != flow_key->ip.proto)
1654                         return -EINVAL;
1655
1656                 if (ipv6_key->ipv6_frag != flow_key->ip.frag)
1657                         return -EINVAL;
1658
1659                 if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
1660                         return -EINVAL;
1661
1662                 break;
1663
1664         case OVS_KEY_ATTR_TCP:
1665                 if (flow_key->ip.proto != IPPROTO_TCP)
1666                         return -EINVAL;
1667
1668                 return validate_tp_port(flow_key, eth_type);
1669
1670         case OVS_KEY_ATTR_UDP:
1671                 if (flow_key->ip.proto != IPPROTO_UDP)
1672                         return -EINVAL;
1673
1674                 return validate_tp_port(flow_key, eth_type);
1675
1676         case OVS_KEY_ATTR_MPLS:
1677                 if (!eth_p_mpls(eth_type))
1678                         return -EINVAL;
1679                 break;
1680
1681         case OVS_KEY_ATTR_SCTP:
1682                 if (flow_key->ip.proto != IPPROTO_SCTP)
1683                         return -EINVAL;
1684
1685                 return validate_tp_port(flow_key, eth_type);
1686
1687         default:
1688                 return -EINVAL;
1689         }
1690
1691         return 0;
1692 }
1693
1694 static int validate_userspace(const struct nlattr *attr)
1695 {
1696         static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = {
1697                 [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
1698                 [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC },
1699                 [OVS_USERSPACE_ATTR_EGRESS_TUN_PORT] = {.type = NLA_U32 },
1700         };
1701         struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
1702         int error;
1703
1704         error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX,
1705                                  attr, userspace_policy);
1706         if (error)
1707                 return error;
1708
1709         if (!a[OVS_USERSPACE_ATTR_PID] ||
1710             !nla_get_u32(a[OVS_USERSPACE_ATTR_PID]))
1711                 return -EINVAL;
1712
1713         return 0;
1714 }
1715
1716 static int copy_action(const struct nlattr *from,
1717                        struct sw_flow_actions **sfa)
1718 {
1719         int totlen = NLA_ALIGN(from->nla_len);
1720         struct nlattr *to;
1721
1722         to = reserve_sfa_size(sfa, from->nla_len);
1723         if (IS_ERR(to))
1724                 return PTR_ERR(to);
1725
1726         memcpy(to, from, totlen);
1727         return 0;
1728 }
1729
1730 static int __ovs_nla_copy_actions(const struct nlattr *attr,
1731                                   const struct sw_flow_key *key,
1732                                   int depth, struct sw_flow_actions **sfa,
1733                                   __be16 eth_type, __be16 vlan_tci)
1734 {
1735         const struct nlattr *a;
1736         int rem, err;
1737
1738         if (depth >= SAMPLE_ACTION_DEPTH)
1739                 return -EOVERFLOW;
1740
1741         nla_for_each_nested(a, attr, rem) {
1742                 /* Expected argument lengths, (u32)-1 for variable length. */
1743                 static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
1744                         [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
1745                         [OVS_ACTION_ATTR_RECIRC] = sizeof(u32),
1746                         [OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
1747                         [OVS_ACTION_ATTR_PUSH_MPLS] = sizeof(struct ovs_action_push_mpls),
1748                         [OVS_ACTION_ATTR_POP_MPLS] = sizeof(__be16),
1749                         [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
1750                         [OVS_ACTION_ATTR_POP_VLAN] = 0,
1751                         [OVS_ACTION_ATTR_SET] = (u32)-1,
1752                         [OVS_ACTION_ATTR_SAMPLE] = (u32)-1,
1753                         [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash)
1754                 };
1755                 const struct ovs_action_push_vlan *vlan;
1756                 int type = nla_type(a);
1757                 bool skip_copy;
1758
1759                 if (type > OVS_ACTION_ATTR_MAX ||
1760                     (action_lens[type] != nla_len(a) &&
1761                      action_lens[type] != (u32)-1))
1762                         return -EINVAL;
1763
1764                 skip_copy = false;
1765                 switch (type) {
1766                 case OVS_ACTION_ATTR_UNSPEC:
1767                         return -EINVAL;
1768
1769                 case OVS_ACTION_ATTR_USERSPACE:
1770                         err = validate_userspace(a);
1771                         if (err)
1772                                 return err;
1773                         break;
1774
1775                 case OVS_ACTION_ATTR_OUTPUT:
1776                         if (nla_get_u32(a) >= DP_MAX_PORTS)
1777                                 return -EINVAL;
1778                         break;
1779
1780                 case OVS_ACTION_ATTR_HASH: {
1781                         const struct ovs_action_hash *act_hash = nla_data(a);
1782
1783                         switch (act_hash->hash_alg) {
1784                         case OVS_HASH_ALG_L4:
1785                                 break;
1786                         default:
1787                                 return  -EINVAL;
1788                         }
1789
1790                         break;
1791                 }
1792
1793                 case OVS_ACTION_ATTR_POP_VLAN:
1794                         vlan_tci = htons(0);
1795                         break;
1796
1797                 case OVS_ACTION_ATTR_PUSH_VLAN:
1798                         vlan = nla_data(a);
1799                         if (vlan->vlan_tpid != htons(ETH_P_8021Q))
1800                                 return -EINVAL;
1801                         if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT)))
1802                                 return -EINVAL;
1803                         vlan_tci = vlan->vlan_tci;
1804                         break;
1805
1806                 case OVS_ACTION_ATTR_RECIRC:
1807                         break;
1808
1809                 case OVS_ACTION_ATTR_PUSH_MPLS: {
1810                         const struct ovs_action_push_mpls *mpls = nla_data(a);
1811
1812                         if (!eth_p_mpls(mpls->mpls_ethertype))
1813                                 return -EINVAL;
1814                         /* Prohibit push MPLS other than to a white list
1815                          * for packets that have a known tag order.
1816                          */
1817                         if (vlan_tci & htons(VLAN_TAG_PRESENT) ||
1818                             (eth_type != htons(ETH_P_IP) &&
1819                              eth_type != htons(ETH_P_IPV6) &&
1820                              eth_type != htons(ETH_P_ARP) &&
1821                              eth_type != htons(ETH_P_RARP) &&
1822                              !eth_p_mpls(eth_type)))
1823                                 return -EINVAL;
1824                         eth_type = mpls->mpls_ethertype;
1825                         break;
1826                 }
1827
1828                 case OVS_ACTION_ATTR_POP_MPLS:
1829                         if (vlan_tci & htons(VLAN_TAG_PRESENT) ||
1830                             !eth_p_mpls(eth_type))
1831                                 return -EINVAL;
1832
1833                         /* Disallow subsequent L2.5+ set and mpls_pop actions
1834                          * as there is no check here to ensure that the new
1835                          * eth_type is valid and thus set actions could
1836                          * write off the end of the packet or otherwise
1837                          * corrupt it.
1838                          *
1839                          * Support for these actions is planned using packet
1840                          * recirculation.
1841                          */
1842                         eth_type = htons(0);
1843                         break;
1844
1845                 case OVS_ACTION_ATTR_SET:
1846                         err = validate_set(a, key, sfa, &skip_copy, eth_type);
1847                         if (err)
1848                                 return err;
1849                         break;
1850
1851                 case OVS_ACTION_ATTR_SAMPLE:
1852                         err = validate_and_copy_sample(a, key, depth, sfa,
1853                                                        eth_type, vlan_tci);
1854                         if (err)
1855                                 return err;
1856                         skip_copy = true;
1857                         break;
1858
1859                 default:
1860                         return -EINVAL;
1861                 }
1862                 if (!skip_copy) {
1863                         err = copy_action(a, sfa);
1864                         if (err)
1865                                 return err;
1866                 }
1867         }
1868
1869         if (rem > 0)
1870                 return -EINVAL;
1871
1872         return 0;
1873 }
1874
1875 int ovs_nla_copy_actions(const struct nlattr *attr,
1876                          const struct sw_flow_key *key,
1877                          struct sw_flow_actions **sfa)
1878 {
1879         int err;
1880
1881         *sfa = nla_alloc_flow_actions(nla_len(attr));
1882         if (IS_ERR(*sfa))
1883                 return PTR_ERR(*sfa);
1884
1885         err = __ovs_nla_copy_actions(attr, key, 0, sfa, key->eth.type,
1886                                      key->eth.tci);
1887         if (err)
1888                 kfree(*sfa);
1889
1890         return err;
1891 }
1892
1893 static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb)
1894 {
1895         const struct nlattr *a;
1896         struct nlattr *start;
1897         int err = 0, rem;
1898
1899         start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE);
1900         if (!start)
1901                 return -EMSGSIZE;
1902
1903         nla_for_each_nested(a, attr, rem) {
1904                 int type = nla_type(a);
1905                 struct nlattr *st_sample;
1906
1907                 switch (type) {
1908                 case OVS_SAMPLE_ATTR_PROBABILITY:
1909                         if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY,
1910                                     sizeof(u32), nla_data(a)))
1911                                 return -EMSGSIZE;
1912                         break;
1913                 case OVS_SAMPLE_ATTR_ACTIONS:
1914                         st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS);
1915                         if (!st_sample)
1916                                 return -EMSGSIZE;
1917                         err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb);
1918                         if (err)
1919                                 return err;
1920                         nla_nest_end(skb, st_sample);
1921                         break;
1922                 }
1923         }
1924
1925         nla_nest_end(skb, start);
1926         return err;
1927 }
1928
1929 static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
1930 {
1931         const struct nlattr *ovs_key = nla_data(a);
1932         int key_type = nla_type(ovs_key);
1933         struct nlattr *start;
1934         int err;
1935
1936         switch (key_type) {
1937         case OVS_KEY_ATTR_TUNNEL_INFO: {
1938                 struct ovs_tunnel_info *tun_info = nla_data(ovs_key);
1939
1940                 start = nla_nest_start(skb, OVS_ACTION_ATTR_SET);
1941                 if (!start)
1942                         return -EMSGSIZE;
1943
1944                 err = ipv4_tun_to_nlattr(skb, &tun_info->tunnel,
1945                                          tun_info->options_len ?
1946                                                 tun_info->options : NULL,
1947                                          tun_info->options_len);
1948                 if (err)
1949                         return err;
1950                 nla_nest_end(skb, start);
1951                 break;
1952         }
1953         default:
1954                 if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key))
1955                         return -EMSGSIZE;
1956                 break;
1957         }
1958
1959         return 0;
1960 }
1961
1962 int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
1963 {
1964         const struct nlattr *a;
1965         int rem, err;
1966
1967         nla_for_each_attr(a, attr, len, rem) {
1968                 int type = nla_type(a);
1969
1970                 switch (type) {
1971                 case OVS_ACTION_ATTR_SET:
1972                         err = set_action_to_attr(a, skb);
1973                         if (err)
1974                                 return err;
1975                         break;
1976
1977                 case OVS_ACTION_ATTR_SAMPLE:
1978                         err = sample_action_to_attr(a, skb);
1979                         if (err)
1980                                 return err;
1981                         break;
1982                 default:
1983                         if (nla_put(skb, type, nla_len(a), nla_data(a)))
1984                                 return -EMSGSIZE;
1985                         break;
1986                 }
1987         }
1988
1989         return 0;
1990 }