762ea035be6ea758f292460ccfd1e9533adf651a
[cascardo/ovs.git] / datapath / flow_netlink.c
1 /*
2  * Copyright (c) 2007-2014 Nicira, Inc.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16  * 02110-1301, USA
17  */
18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21 #include "flow.h"
22 #include "datapath.h"
23 #include "mpls.h"
24 #include <linux/uaccess.h>
25 #include <linux/netdevice.h>
26 #include <linux/etherdevice.h>
27 #include <linux/if_ether.h>
28 #include <linux/if_vlan.h>
29 #include <net/llc_pdu.h>
30 #include <linux/kernel.h>
31 #include <linux/jhash.h>
32 #include <linux/jiffies.h>
33 #include <linux/llc.h>
34 #include <linux/module.h>
35 #include <linux/in.h>
36 #include <linux/rcupdate.h>
37 #include <linux/if_arp.h>
38 #include <linux/ip.h>
39 #include <linux/ipv6.h>
40 #include <linux/sctp.h>
41 #include <linux/tcp.h>
42 #include <linux/udp.h>
43 #include <linux/icmp.h>
44 #include <linux/icmpv6.h>
45 #include <linux/rculist.h>
46 #include <net/geneve.h>
47 #include <net/ip.h>
48 #include <net/ip_tunnels.h>
49 #include <net/ipv6.h>
50 #include <net/ndisc.h>
51
52 #include "flow_netlink.h"
53
54 static void update_range(struct sw_flow_match *match,
55                          size_t offset, size_t size, bool is_mask)
56 {
57         struct sw_flow_key_range *range;
58         size_t start = rounddown(offset, sizeof(long));
59         size_t end = roundup(offset + size, sizeof(long));
60
61         if (!is_mask)
62                 range = &match->range;
63         else
64                 range = &match->mask->range;
65
66         if (range->start == range->end) {
67                 range->start = start;
68                 range->end = end;
69                 return;
70         }
71
72         if (range->start > start)
73                 range->start = start;
74
75         if (range->end < end)
76                 range->end = end;
77 }
78
79 #define SW_FLOW_KEY_PUT(match, field, value, is_mask) \
80         do { \
81                 update_range(match, offsetof(struct sw_flow_key, field),    \
82                              sizeof((match)->key->field), is_mask);         \
83                 if (is_mask)                                                \
84                         (match)->mask->key.field = value;                   \
85                 else                                                        \
86                         (match)->key->field = value;                        \
87         } while (0)
88
89 #define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask)     \
90         do {                                                                \
91                 update_range(match, offset, len, is_mask);                  \
92                 if (is_mask)                                                \
93                         memcpy((u8 *)&(match)->mask->key + offset, value_p, len);\
94                 else                                                        \
95                         memcpy((u8 *)(match)->key + offset, value_p, len);  \
96         } while (0)
97
98 #define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask)               \
99         SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), \
100                                   value_p, len, is_mask)
101
102 #define SW_FLOW_KEY_MEMSET_FIELD(match, field, value, is_mask)              \
103         do {                                                                \
104                 update_range(match, offsetof(struct sw_flow_key, field),    \
105                              sizeof((match)->key->field), is_mask);         \
106                 if (is_mask)                                                \
107                         memset((u8 *)&(match)->mask->key.field, value,      \
108                                sizeof((match)->mask->key.field));           \
109                 else                                                        \
110                         memset((u8 *)&(match)->key->field, value,           \
111                                sizeof((match)->key->field));                \
112         } while (0)
113
114 static bool match_validate(const struct sw_flow_match *match,
115                            u64 key_attrs, u64 mask_attrs, bool log)
116 {
117         u64 key_expected = 1ULL << OVS_KEY_ATTR_ETHERNET;
118         u64 mask_allowed = key_attrs;  /* At most allow all key attributes */
119
120         /* The following mask attributes allowed only if they
121          * pass the validation tests. */
122         mask_allowed &= ~((1ULL << OVS_KEY_ATTR_IPV4)
123                         | (1ULL << OVS_KEY_ATTR_IPV6)
124                         | (1ULL << OVS_KEY_ATTR_TCP)
125                         | (1ULL << OVS_KEY_ATTR_TCP_FLAGS)
126                         | (1ULL << OVS_KEY_ATTR_UDP)
127                         | (1ULL << OVS_KEY_ATTR_SCTP)
128                         | (1ULL << OVS_KEY_ATTR_ICMP)
129                         | (1ULL << OVS_KEY_ATTR_ICMPV6)
130                         | (1ULL << OVS_KEY_ATTR_ARP)
131                         | (1ULL << OVS_KEY_ATTR_ND)
132                         | (1ULL << OVS_KEY_ATTR_MPLS));
133
134         /* Always allowed mask fields. */
135         mask_allowed |= ((1ULL << OVS_KEY_ATTR_TUNNEL)
136                        | (1ULL << OVS_KEY_ATTR_IN_PORT)
137                        | (1ULL << OVS_KEY_ATTR_ETHERTYPE));
138
139         /* Check key attributes. */
140         if (match->key->eth.type == htons(ETH_P_ARP)
141                         || match->key->eth.type == htons(ETH_P_RARP)) {
142                 key_expected |= 1ULL << OVS_KEY_ATTR_ARP;
143                 if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
144                         mask_allowed |= 1ULL << OVS_KEY_ATTR_ARP;
145         }
146
147
148         if (eth_p_mpls(match->key->eth.type)) {
149                 key_expected |= 1ULL << OVS_KEY_ATTR_MPLS;
150                 if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
151                         mask_allowed |= 1ULL << OVS_KEY_ATTR_MPLS;
152         }
153
154         if (match->key->eth.type == htons(ETH_P_IP)) {
155                 key_expected |= 1ULL << OVS_KEY_ATTR_IPV4;
156                 if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
157                         mask_allowed |= 1ULL << OVS_KEY_ATTR_IPV4;
158
159                 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
160                         if (match->key->ip.proto == IPPROTO_UDP) {
161                                 key_expected |= 1ULL << OVS_KEY_ATTR_UDP;
162                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
163                                         mask_allowed |= 1ULL << OVS_KEY_ATTR_UDP;
164                         }
165
166                         if (match->key->ip.proto == IPPROTO_SCTP) {
167                                 key_expected |= 1ULL << OVS_KEY_ATTR_SCTP;
168                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
169                                         mask_allowed |= 1ULL << OVS_KEY_ATTR_SCTP;
170                         }
171
172                         if (match->key->ip.proto == IPPROTO_TCP) {
173                                 key_expected |= 1ULL << OVS_KEY_ATTR_TCP;
174                                 key_expected |= 1ULL << OVS_KEY_ATTR_TCP_FLAGS;
175                                 if (match->mask && (match->mask->key.ip.proto == 0xff)) {
176                                         mask_allowed |= 1ULL << OVS_KEY_ATTR_TCP;
177                                         mask_allowed |= 1ULL << OVS_KEY_ATTR_TCP_FLAGS;
178                                 }
179                         }
180
181                         if (match->key->ip.proto == IPPROTO_ICMP) {
182                                 key_expected |= 1ULL << OVS_KEY_ATTR_ICMP;
183                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
184                                         mask_allowed |= 1ULL << OVS_KEY_ATTR_ICMP;
185                         }
186                 }
187         }
188
189         if (match->key->eth.type == htons(ETH_P_IPV6)) {
190                 key_expected |= 1ULL << OVS_KEY_ATTR_IPV6;
191                 if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
192                         mask_allowed |= 1ULL << OVS_KEY_ATTR_IPV6;
193
194                 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
195                         if (match->key->ip.proto == IPPROTO_UDP) {
196                                 key_expected |= 1ULL << OVS_KEY_ATTR_UDP;
197                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
198                                         mask_allowed |= 1ULL << OVS_KEY_ATTR_UDP;
199                         }
200
201                         if (match->key->ip.proto == IPPROTO_SCTP) {
202                                 key_expected |= 1ULL << OVS_KEY_ATTR_SCTP;
203                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
204                                         mask_allowed |= 1ULL << OVS_KEY_ATTR_SCTP;
205                         }
206
207                         if (match->key->ip.proto == IPPROTO_TCP) {
208                                 key_expected |= 1ULL << OVS_KEY_ATTR_TCP;
209                                 key_expected |= 1ULL << OVS_KEY_ATTR_TCP_FLAGS;
210                                 if (match->mask && (match->mask->key.ip.proto == 0xff)) {
211                                         mask_allowed |= 1ULL << OVS_KEY_ATTR_TCP;
212                                         mask_allowed |= 1ULL << OVS_KEY_ATTR_TCP_FLAGS;
213                                 }
214                         }
215
216                         if (match->key->ip.proto == IPPROTO_ICMPV6) {
217                                 key_expected |= 1ULL << OVS_KEY_ATTR_ICMPV6;
218                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
219                                         mask_allowed |= 1ULL << OVS_KEY_ATTR_ICMPV6;
220
221                                 if (match->key->tp.src ==
222                                                 htons(NDISC_NEIGHBOUR_SOLICITATION) ||
223                                     match->key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
224                                         key_expected |= 1ULL << OVS_KEY_ATTR_ND;
225                                         if (match->mask && (match->mask->key.tp.src == htons(0xff)))
226                                                 mask_allowed |= 1ULL << OVS_KEY_ATTR_ND;
227                                 }
228                         }
229                 }
230         }
231
232         if ((key_attrs & key_expected) != key_expected) {
233                 /* Key attributes check failed. */
234                 OVS_NLERR(log,
235                           "Missing expected key attributes (key_attrs=%llx, expected=%llx).",
236                           (unsigned long long)key_attrs,
237                           (unsigned long long)key_expected);
238                 return false;
239         }
240
241         if ((mask_attrs & mask_allowed) != mask_attrs) {
242                 /* Mask attributes check failed. */
243                 OVS_NLERR(log,
244                           "Contain more than allowed mask fields (mask_attrs=%llx, mask_allowed=%llx).",
245                           (unsigned long long)mask_attrs,
246                           (unsigned long long)mask_allowed);
247                 return false;
248         }
249
250         return true;
251 }
252
253 size_t ovs_tun_key_attr_size(void)
254 {
255         /* Whenever adding new OVS_TUNNEL_KEY_ FIELDS, we should consider
256          * updating this function.  */
257         return    nla_total_size(8)    /* OVS_TUNNEL_KEY_ATTR_ID */
258                 + nla_total_size(4)    /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */
259                 + nla_total_size(4)    /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */
260                 + nla_total_size(1)    /* OVS_TUNNEL_KEY_ATTR_TOS */
261                 + nla_total_size(1)    /* OVS_TUNNEL_KEY_ATTR_TTL */
262                 + nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
263                 + nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_CSUM */
264                 + nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_OAM */
265                 + nla_total_size(256)  /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
266                 + nla_total_size(2)    /* OVS_TUNNEL_KEY_ATTR_TP_SRC */
267                 + nla_total_size(2);   /* OVS_TUNNEL_KEY_ATTR_TP_DST */
268 }
269
270 size_t ovs_key_attr_size(void)
271 {
272         /* Whenever adding new OVS_KEY_ FIELDS, we should consider
273          * updating this function.  */
274         BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 22);
275
276         return    nla_total_size(4)   /* OVS_KEY_ATTR_PRIORITY */
277                 + nla_total_size(0)   /* OVS_KEY_ATTR_TUNNEL */
278                   + ovs_tun_key_attr_size()
279                 + nla_total_size(4)   /* OVS_KEY_ATTR_IN_PORT */
280                 + nla_total_size(4)   /* OVS_KEY_ATTR_SKB_MARK */
281                 + nla_total_size(4)   /* OVS_KEY_ATTR_DP_HASH */
282                 + nla_total_size(4)   /* OVS_KEY_ATTR_RECIRC_ID */
283                 + nla_total_size(12)  /* OVS_KEY_ATTR_ETHERNET */
284                 + nla_total_size(2)   /* OVS_KEY_ATTR_ETHERTYPE */
285                 + nla_total_size(4)   /* OVS_KEY_ATTR_VLAN */
286                 + nla_total_size(0)   /* OVS_KEY_ATTR_ENCAP */
287                 + nla_total_size(2)   /* OVS_KEY_ATTR_ETHERTYPE */
288                 + nla_total_size(40)  /* OVS_KEY_ATTR_IPV6 */
289                 + nla_total_size(2)   /* OVS_KEY_ATTR_ICMPV6 */
290                 + nla_total_size(28); /* OVS_KEY_ATTR_ND */
291 }
292
293 /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute.  */
294 static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
295         [OVS_KEY_ATTR_ENCAP] = -1,
296         [OVS_KEY_ATTR_PRIORITY] = sizeof(u32),
297         [OVS_KEY_ATTR_IN_PORT] = sizeof(u32),
298         [OVS_KEY_ATTR_SKB_MARK] = sizeof(u32),
299         [OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet),
300         [OVS_KEY_ATTR_VLAN] = sizeof(__be16),
301         [OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16),
302         [OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4),
303         [OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6),
304         [OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp),
305         [OVS_KEY_ATTR_TCP_FLAGS] = sizeof(__be16),
306         [OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp),
307         [OVS_KEY_ATTR_SCTP] = sizeof(struct ovs_key_sctp),
308         [OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp),
309         [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6),
310         [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp),
311         [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd),
312         [OVS_KEY_ATTR_DP_HASH] = sizeof(u32),
313         [OVS_KEY_ATTR_RECIRC_ID] = sizeof(u32),
314         [OVS_KEY_ATTR_TUNNEL] = -1,
315         [OVS_KEY_ATTR_MPLS] = sizeof(struct ovs_key_mpls),
316 };
317
318 static bool is_all_zero(const u8 *fp, size_t size)
319 {
320         int i;
321
322         if (!fp)
323                 return false;
324
325         for (i = 0; i < size; i++)
326                 if (fp[i])
327                         return false;
328
329         return true;
330 }
331
332 static int __parse_flow_nlattrs(const struct nlattr *attr,
333                                 const struct nlattr *a[],
334                                 u64 *attrsp, bool log, bool nz)
335 {
336         const struct nlattr *nla;
337         u64 attrs;
338         int rem;
339
340         attrs = *attrsp;
341         nla_for_each_nested(nla, attr, rem) {
342                 u16 type = nla_type(nla);
343                 int expected_len;
344
345                 if (type > OVS_KEY_ATTR_MAX) {
346                         OVS_NLERR(log,
347                                   "Unknown key attribute (type=%d, max=%d).",
348                                   type, OVS_KEY_ATTR_MAX);
349                         return -EINVAL;
350                 }
351
352                 if (attrs & (1ULL << type)) {
353                         OVS_NLERR(log, "Duplicate key attribute (type %d).",
354                                   type);
355                         return -EINVAL;
356                 }
357
358                 expected_len = ovs_key_lens[type];
359                 if (nla_len(nla) != expected_len && expected_len != -1) {
360                         OVS_NLERR(log, "Key attribute has unexpected length "
361                                   "(type=%d, length=%d, expected=%d).", type,
362                                   nla_len(nla), expected_len);
363                         return -EINVAL;
364                 }
365
366                 if (!nz || !is_all_zero(nla_data(nla), expected_len)) {
367                         attrs |= 1ULL << type;
368                         a[type] = nla;
369                 }
370         }
371         if (rem) {
372                 OVS_NLERR(log, "Message has %d unknown bytes.", rem);
373                 return -EINVAL;
374         }
375
376         *attrsp = attrs;
377         return 0;
378 }
379
380 static int parse_flow_mask_nlattrs(const struct nlattr *attr,
381                                    const struct nlattr *a[], u64 *attrsp,
382                                    bool log)
383 {
384         return __parse_flow_nlattrs(attr, a, attrsp, log, true);
385 }
386
387 static int parse_flow_nlattrs(const struct nlattr *attr,
388                               const struct nlattr *a[], u64 *attrsp,
389                               bool log)
390 {
391         return __parse_flow_nlattrs(attr, a, attrsp, log, false);
392 }
393
394 static int ipv4_tun_from_nlattr(const struct nlattr *attr,
395                                 struct sw_flow_match *match, bool is_mask,
396                                 bool log)
397 {
398         struct nlattr *a;
399         int rem;
400         bool ttl = false;
401         __be16 tun_flags = 0;
402
403         nla_for_each_nested(a, attr, rem) {
404                 int type = nla_type(a);
405                 static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = {
406                         [OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64),
407                         [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32),
408                         [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = sizeof(u32),
409                         [OVS_TUNNEL_KEY_ATTR_TOS] = 1,
410                         [OVS_TUNNEL_KEY_ATTR_TTL] = 1,
411                         [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0,
412                         [OVS_TUNNEL_KEY_ATTR_CSUM] = 0,
413                         [OVS_TUNNEL_KEY_ATTR_TP_SRC] = sizeof(u16),
414                         [OVS_TUNNEL_KEY_ATTR_TP_DST] = sizeof(u16),
415                         [OVS_TUNNEL_KEY_ATTR_OAM] = 0,
416                         [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = -1,
417                 };
418
419                 if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
420                         OVS_NLERR(log,
421                                   "Unknown tunnel attribute (type=%d, max=%d).",
422                                   type, OVS_TUNNEL_KEY_ATTR_MAX);
423                         return -EINVAL;
424                 }
425
426                 if (ovs_tunnel_key_lens[type] != nla_len(a) &&
427                     ovs_tunnel_key_lens[type] != -1) {
428                         OVS_NLERR(log,
429                                   "Tunnel attribute has unexpected length "
430                                   "(type=%d, length=%d, expected=%d).",
431                                   type, nla_len(a),
432                                   ovs_tunnel_key_lens[type]);
433                         return -EINVAL;
434                 }
435
436                 switch (type) {
437                 case OVS_TUNNEL_KEY_ATTR_ID:
438                         SW_FLOW_KEY_PUT(match, tun_key.tun_id,
439                                         nla_get_be64(a), is_mask);
440                         tun_flags |= TUNNEL_KEY;
441                         break;
442                 case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
443                         SW_FLOW_KEY_PUT(match, tun_key.ipv4_src,
444                                         nla_get_be32(a), is_mask);
445                         break;
446                 case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
447                         SW_FLOW_KEY_PUT(match, tun_key.ipv4_dst,
448                                         nla_get_be32(a), is_mask);
449                         break;
450                 case OVS_TUNNEL_KEY_ATTR_TOS:
451                         SW_FLOW_KEY_PUT(match, tun_key.ipv4_tos,
452                                         nla_get_u8(a), is_mask);
453                         break;
454                 case OVS_TUNNEL_KEY_ATTR_TTL:
455                         SW_FLOW_KEY_PUT(match, tun_key.ipv4_ttl,
456                                         nla_get_u8(a), is_mask);
457                         ttl = true;
458                         break;
459                 case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT:
460                         tun_flags |= TUNNEL_DONT_FRAGMENT;
461                         break;
462                 case OVS_TUNNEL_KEY_ATTR_CSUM:
463                         tun_flags |= TUNNEL_CSUM;
464                         break;
465                 case OVS_TUNNEL_KEY_ATTR_TP_SRC:
466                         SW_FLOW_KEY_PUT(match, tun_key.tp_src,
467                                         nla_get_be16(a), is_mask);
468                         break;
469                 case OVS_TUNNEL_KEY_ATTR_TP_DST:
470                         SW_FLOW_KEY_PUT(match, tun_key.tp_dst,
471                                         nla_get_be16(a), is_mask);
472                         break;
473                 case OVS_TUNNEL_KEY_ATTR_OAM:
474                         tun_flags |= TUNNEL_OAM;
475                         break;
476                 case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
477                         tun_flags |= TUNNEL_OPTIONS_PRESENT;
478                         if (nla_len(a) > sizeof(match->key->tun_opts)) {
479                                 OVS_NLERR(log, "Geneve option too long "
480                                           "(len %d, max %zu).",
481                                           nla_len(a),
482                                           sizeof(match->key->tun_opts));
483                                 return -EINVAL;
484                         }
485
486                         if (nla_len(a) % 4 != 0) {
487                                 OVS_NLERR(log, "Geneve option length is not "
488                                           "a multiple of 4 (len %d).",
489                                           nla_len(a));
490                                 return -EINVAL;
491                         }
492
493                         /* We need to record the length of the options passed
494                          * down, otherwise packets with the same format but
495                          * additional options will be silently matched.
496                          */
497                         if (!is_mask) {
498                                 SW_FLOW_KEY_PUT(match, tun_opts_len, nla_len(a),
499                                                 false);
500                         } else {
501                                 /* This is somewhat unusual because it looks at
502                                  * both the key and mask while parsing the
503                                  * attributes (and by extension assumes the key
504                                  * is parsed first). Normally, we would verify
505                                  * that each is the correct length and that the
506                                  * attributes line up in the validate function.
507                                  * However, that is difficult because this is
508                                  * variable length and we won't have the
509                                  * information later.
510                                  */
511                                 if (match->key->tun_opts_len != nla_len(a)) {
512                                         OVS_NLERR(log,
513                                                   "Geneve option key length (%d)"
514                                                   " is different from mask length (%d).",
515                                                   match->key->tun_opts_len,
516                                                   nla_len(a));
517                                         return -EINVAL;
518                                 }
519
520                                 SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff,
521                                                 true);
522                         }
523
524                         SW_FLOW_KEY_MEMCPY_OFFSET(match,
525                                 (unsigned long)GENEVE_OPTS((struct sw_flow_key *)0,
526                                                            nla_len(a)),
527                                 nla_data(a), nla_len(a), is_mask);
528                         break;
529                 default:
530                         OVS_NLERR(log, "Unknown IPv4 tunnel attribute (%d).",
531                                   type);
532                         return -EINVAL;
533                 }
534         }
535
536         SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask);
537
538         if (rem > 0) {
539                 OVS_NLERR(log, "IPv4 tunnel attribute has %d unknown bytes.",
540                           rem);
541                 return -EINVAL;
542         }
543
544         if (!is_mask) {
545                 if (!match->key->tun_key.ipv4_dst) {
546                         OVS_NLERR(log,
547                                   "IPv4 tunnel destination address is zero.");
548                         return -EINVAL;
549                 }
550
551                 if (!ttl) {
552                         OVS_NLERR(log, "IPv4 tunnel TTL not specified.");
553                         return -EINVAL;
554                 }
555         }
556
557         return 0;
558 }
559
560 static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
561                                 const struct ovs_key_ipv4_tunnel *output,
562                                 const struct geneve_opt *tun_opts,
563                                 int swkey_tun_opts_len)
564 {
565         if (output->tun_flags & TUNNEL_KEY &&
566             nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id))
567                 return -EMSGSIZE;
568         if (output->ipv4_src &&
569                 nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, output->ipv4_src))
570                 return -EMSGSIZE;
571         if (output->ipv4_dst &&
572                 nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, output->ipv4_dst))
573                 return -EMSGSIZE;
574         if (output->ipv4_tos &&
575                 nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos))
576                 return -EMSGSIZE;
577         if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ipv4_ttl))
578                 return -EMSGSIZE;
579         if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) &&
580                 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
581                 return -EMSGSIZE;
582         if ((output->tun_flags & TUNNEL_CSUM) &&
583                 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
584                 return -EMSGSIZE;
585         if (output->tp_src &&
586                 nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_SRC, output->tp_src))
587                 return -EMSGSIZE;
588         if (output->tp_dst &&
589                 nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_DST, output->tp_dst))
590                 return -EMSGSIZE;
591         if ((output->tun_flags & TUNNEL_OAM) &&
592                 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
593                 return -EMSGSIZE;
594         if (tun_opts &&
595             nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
596                     swkey_tun_opts_len, tun_opts))
597                 return -EMSGSIZE;
598
599         return 0;
600 }
601
602
603 static int ipv4_tun_to_nlattr(struct sk_buff *skb,
604                               const struct ovs_key_ipv4_tunnel *output,
605                               const struct geneve_opt *tun_opts,
606                               int swkey_tun_opts_len)
607 {
608         struct nlattr *nla;
609         int err;
610
611         nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL);
612         if (!nla)
613                 return -EMSGSIZE;
614
615         err = __ipv4_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len);
616         if (err)
617                 return err;
618
619         nla_nest_end(skb, nla);
620         return 0;
621 }
622
623 int ovs_nla_put_egress_tunnel_key(struct sk_buff *skb,
624                                   const struct ovs_tunnel_info *egress_tun_info)
625 {
626         return __ipv4_tun_to_nlattr(skb, &egress_tun_info->tunnel,
627                                     egress_tun_info->options,
628                                     egress_tun_info->options_len);
629 }
630
631 static int metadata_from_nlattrs(struct sw_flow_match *match,  u64 *attrs,
632                                  const struct nlattr **a, bool is_mask,
633                                  bool log)
634 {
635         if (*attrs & (1ULL << OVS_KEY_ATTR_DP_HASH)) {
636                 u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]);
637
638                 SW_FLOW_KEY_PUT(match, ovs_flow_hash, hash_val, is_mask);
639                 *attrs &= ~(1ULL << OVS_KEY_ATTR_DP_HASH);
640         }
641
642         if (*attrs & (1ULL << OVS_KEY_ATTR_RECIRC_ID)) {
643                 u32 recirc_id = nla_get_u32(a[OVS_KEY_ATTR_RECIRC_ID]);
644
645                 SW_FLOW_KEY_PUT(match, recirc_id, recirc_id, is_mask);
646                 *attrs &= ~(1ULL << OVS_KEY_ATTR_RECIRC_ID);
647         }
648
649         if (*attrs & (1ULL << OVS_KEY_ATTR_PRIORITY)) {
650                 SW_FLOW_KEY_PUT(match, phy.priority,
651                           nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask);
652                 *attrs &= ~(1ULL << OVS_KEY_ATTR_PRIORITY);
653         }
654
655         if (*attrs & (1ULL << OVS_KEY_ATTR_IN_PORT)) {
656                 u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]);
657
658                 if (is_mask) {
659                         in_port = 0xffffffff; /* Always exact match in_port. */
660                 } else if (in_port >= DP_MAX_PORTS) {
661                         OVS_NLERR(log,
662                                   "Input port (%d) exceeds maximum allowable (%d).",
663                                   in_port, DP_MAX_PORTS);
664                         return -EINVAL;
665                 }
666
667                 SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask);
668                 *attrs &= ~(1ULL << OVS_KEY_ATTR_IN_PORT);
669         } else if (!is_mask) {
670                 SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask);
671         }
672
673         if (*attrs & (1ULL << OVS_KEY_ATTR_SKB_MARK)) {
674                 uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]);
675
676                 SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask);
677                 *attrs &= ~(1ULL << OVS_KEY_ATTR_SKB_MARK);
678         }
679         if (*attrs & (1ULL << OVS_KEY_ATTR_TUNNEL)) {
680                 if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
681                                          is_mask, log))
682                         return -EINVAL;
683                 *attrs &= ~(1ULL << OVS_KEY_ATTR_TUNNEL);
684         }
685         return 0;
686 }
687
688 static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
689                                 const struct nlattr **a, bool is_mask,
690                                 bool log)
691 {
692         int err;
693
694         err = metadata_from_nlattrs(match, &attrs, a, is_mask, log);
695         if (err)
696                 return err;
697
698         if (attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)) {
699                 const struct ovs_key_ethernet *eth_key;
700
701                 eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]);
702                 SW_FLOW_KEY_MEMCPY(match, eth.src,
703                                 eth_key->eth_src, ETH_ALEN, is_mask);
704                 SW_FLOW_KEY_MEMCPY(match, eth.dst,
705                                 eth_key->eth_dst, ETH_ALEN, is_mask);
706                 attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERNET);
707         }
708
709         if (attrs & (1ULL << OVS_KEY_ATTR_VLAN)) {
710                 __be16 tci;
711
712                 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
713                 if (!(tci & htons(VLAN_TAG_PRESENT))) {
714                         if (is_mask)
715                                 OVS_NLERR(log,
716                                           "VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit.");
717                         else
718                                 OVS_NLERR(log,
719                                           "VLAN TCI does not have VLAN_TAG_PRESENT bit set.");
720
721                         return -EINVAL;
722                 }
723
724                 SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask);
725                 attrs &= ~(1ULL << OVS_KEY_ATTR_VLAN);
726         }
727
728         if (attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE)) {
729                 __be16 eth_type;
730
731                 eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
732                 if (is_mask) {
733                         /* Always exact match EtherType. */
734                         eth_type = htons(0xffff);
735                 } else if (ntohs(eth_type) < ETH_P_802_3_MIN) {
736                         OVS_NLERR(log,
737                                   "EtherType is less than minimum (type=%x, min=%x).",
738                                   ntohs(eth_type), ETH_P_802_3_MIN);
739                         return -EINVAL;
740                 }
741
742                 SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask);
743                 attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERTYPE);
744         } else if (!is_mask) {
745                 SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask);
746         }
747
748         if (attrs & (1ULL << OVS_KEY_ATTR_IPV4)) {
749                 const struct ovs_key_ipv4 *ipv4_key;
750
751                 ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]);
752                 if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) {
753                         OVS_NLERR(log,
754                                   "Unknown IPv4 fragment type (value=%d, max=%d).",
755                                   ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX);
756                         return -EINVAL;
757                 }
758                 SW_FLOW_KEY_PUT(match, ip.proto,
759                                 ipv4_key->ipv4_proto, is_mask);
760                 SW_FLOW_KEY_PUT(match, ip.tos,
761                                 ipv4_key->ipv4_tos, is_mask);
762                 SW_FLOW_KEY_PUT(match, ip.ttl,
763                                 ipv4_key->ipv4_ttl, is_mask);
764                 SW_FLOW_KEY_PUT(match, ip.frag,
765                                 ipv4_key->ipv4_frag, is_mask);
766                 SW_FLOW_KEY_PUT(match, ipv4.addr.src,
767                                 ipv4_key->ipv4_src, is_mask);
768                 SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
769                                 ipv4_key->ipv4_dst, is_mask);
770                 attrs &= ~(1ULL << OVS_KEY_ATTR_IPV4);
771         }
772
773         if (attrs & (1ULL << OVS_KEY_ATTR_IPV6)) {
774                 const struct ovs_key_ipv6 *ipv6_key;
775
776                 ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]);
777                 if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) {
778                         OVS_NLERR(log,
779                                   "Unknown IPv6 fragment type (value=%d, max=%d).",
780                                   ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX);
781                         return -EINVAL;
782                 }
783                 if (ipv6_key->ipv6_label & htonl(0xFFF00000)) {
784                         OVS_NLERR(log,
785                                   "Invalid IPv6 flow label value (value=%x, max=%x).",
786                                   ntohl(ipv6_key->ipv6_label), (1 << 20) - 1);
787                         return -EINVAL;
788                 }
789                 SW_FLOW_KEY_PUT(match, ipv6.label,
790                                 ipv6_key->ipv6_label, is_mask);
791                 SW_FLOW_KEY_PUT(match, ip.proto,
792                                 ipv6_key->ipv6_proto, is_mask);
793                 SW_FLOW_KEY_PUT(match, ip.tos,
794                                 ipv6_key->ipv6_tclass, is_mask);
795                 SW_FLOW_KEY_PUT(match, ip.ttl,
796                                 ipv6_key->ipv6_hlimit, is_mask);
797                 SW_FLOW_KEY_PUT(match, ip.frag,
798                                 ipv6_key->ipv6_frag, is_mask);
799                 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src,
800                                 ipv6_key->ipv6_src,
801                                 sizeof(match->key->ipv6.addr.src),
802                                 is_mask);
803                 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst,
804                                 ipv6_key->ipv6_dst,
805                                 sizeof(match->key->ipv6.addr.dst),
806                                 is_mask);
807
808                 attrs &= ~(1ULL << OVS_KEY_ATTR_IPV6);
809         }
810
811         if (attrs & (1ULL << OVS_KEY_ATTR_ARP)) {
812                 const struct ovs_key_arp *arp_key;
813
814                 arp_key = nla_data(a[OVS_KEY_ATTR_ARP]);
815                 if (!is_mask && (arp_key->arp_op & htons(0xff00))) {
816                         OVS_NLERR(log, "Unknown ARP opcode (opcode=%d).",
817                                   arp_key->arp_op);
818                         return -EINVAL;
819                 }
820
821                 SW_FLOW_KEY_PUT(match, ipv4.addr.src,
822                                 arp_key->arp_sip, is_mask);
823                 SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
824                         arp_key->arp_tip, is_mask);
825                 SW_FLOW_KEY_PUT(match, ip.proto,
826                                 ntohs(arp_key->arp_op), is_mask);
827                 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha,
828                                 arp_key->arp_sha, ETH_ALEN, is_mask);
829                 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha,
830                                 arp_key->arp_tha, ETH_ALEN, is_mask);
831
832                 attrs &= ~(1ULL << OVS_KEY_ATTR_ARP);
833         }
834
835         if (attrs & (1ULL << OVS_KEY_ATTR_MPLS)) {
836                 const struct ovs_key_mpls *mpls_key;
837
838                 mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]);
839                 SW_FLOW_KEY_PUT(match, mpls.top_lse,
840                                 mpls_key->mpls_lse, is_mask);
841
842                 attrs &= ~(1ULL << OVS_KEY_ATTR_MPLS);
843         }
844
845         if (attrs & (1ULL << OVS_KEY_ATTR_TCP)) {
846                 const struct ovs_key_tcp *tcp_key;
847
848                 tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
849                 SW_FLOW_KEY_PUT(match, tp.src, tcp_key->tcp_src, is_mask);
850                 SW_FLOW_KEY_PUT(match, tp.dst, tcp_key->tcp_dst, is_mask);
851                 attrs &= ~(1ULL << OVS_KEY_ATTR_TCP);
852         }
853
854         if (attrs & (1ULL << OVS_KEY_ATTR_TCP_FLAGS)) {
855                 SW_FLOW_KEY_PUT(match, tp.flags,
856                                 nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]),
857                                 is_mask);
858                 attrs &= ~(1ULL << OVS_KEY_ATTR_TCP_FLAGS);
859         }
860
861         if (attrs & (1ULL << OVS_KEY_ATTR_UDP)) {
862                 const struct ovs_key_udp *udp_key;
863
864                 udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
865                 SW_FLOW_KEY_PUT(match, tp.src, udp_key->udp_src, is_mask);
866                 SW_FLOW_KEY_PUT(match, tp.dst, udp_key->udp_dst, is_mask);
867                 attrs &= ~(1ULL << OVS_KEY_ATTR_UDP);
868         }
869
870         if (attrs & (1ULL << OVS_KEY_ATTR_SCTP)) {
871                 const struct ovs_key_sctp *sctp_key;
872
873                 sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]);
874                 SW_FLOW_KEY_PUT(match, tp.src, sctp_key->sctp_src, is_mask);
875                 SW_FLOW_KEY_PUT(match, tp.dst, sctp_key->sctp_dst, is_mask);
876                 attrs &= ~(1ULL << OVS_KEY_ATTR_SCTP);
877         }
878
879         if (attrs & (1ULL << OVS_KEY_ATTR_ICMP)) {
880                 const struct ovs_key_icmp *icmp_key;
881
882                 icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]);
883                 SW_FLOW_KEY_PUT(match, tp.src,
884                                 htons(icmp_key->icmp_type), is_mask);
885                 SW_FLOW_KEY_PUT(match, tp.dst,
886                                 htons(icmp_key->icmp_code), is_mask);
887                 attrs &= ~(1ULL << OVS_KEY_ATTR_ICMP);
888         }
889
890         if (attrs & (1ULL << OVS_KEY_ATTR_ICMPV6)) {
891                 const struct ovs_key_icmpv6 *icmpv6_key;
892
893                 icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]);
894                 SW_FLOW_KEY_PUT(match, tp.src,
895                                 htons(icmpv6_key->icmpv6_type), is_mask);
896                 SW_FLOW_KEY_PUT(match, tp.dst,
897                                 htons(icmpv6_key->icmpv6_code), is_mask);
898                 attrs &= ~(1ULL << OVS_KEY_ATTR_ICMPV6);
899         }
900
901         if (attrs & (1ULL << OVS_KEY_ATTR_ND)) {
902                 const struct ovs_key_nd *nd_key;
903
904                 nd_key = nla_data(a[OVS_KEY_ATTR_ND]);
905                 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target,
906                         nd_key->nd_target,
907                         sizeof(match->key->ipv6.nd.target),
908                         is_mask);
909                 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll,
910                         nd_key->nd_sll, ETH_ALEN, is_mask);
911                 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll,
912                                 nd_key->nd_tll, ETH_ALEN, is_mask);
913                 attrs &= ~(1ULL << OVS_KEY_ATTR_ND);
914         }
915
916         if (attrs != 0) {
917                 OVS_NLERR(log, "Unknown key attributes (%llx).",
918                           (unsigned long long)attrs);
919                 return -EINVAL;
920         }
921
922         return 0;
923 }
924
925 static void nlattr_set(struct nlattr *attr, u8 val, bool is_attr_mask_key)
926 {
927         struct nlattr *nla;
928         int rem;
929
930         /* The nlattr stream should already have been validated */
931         nla_for_each_nested(nla, attr, rem) {
932                 /* We assume that ovs_key_lens[type] == -1 means that type is a
933                  * nested attribute
934                  */
935                 if (is_attr_mask_key && ovs_key_lens[nla_type(nla)] == -1)
936                         nlattr_set(nla, val, false);
937                 else
938                         memset(nla_data(nla), val, nla_len(nla));
939         }
940 }
941
942 static void mask_set_nlattr(struct nlattr *attr, u8 val)
943 {
944         nlattr_set(attr, val, true);
945 }
946
947 /**
948  * ovs_nla_get_match - parses Netlink attributes into a flow key and
949  * mask. In case the 'mask' is NULL, the flow is treated as exact match
950  * flow. Otherwise, it is treated as a wildcarded flow, except the mask
951  * does not include any don't care bit.
952  * @match: receives the extracted flow match information.
953  * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
954  * sequence. The fields should of the packet that triggered the creation
955  * of this flow.
956  * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink
957  * attribute specifies the mask field of the wildcarded flow.
958  * @log: Boolean to allow kernel error logging.  Normally true, but when
959  * probing for feature compatibility this should be passed in as false to
960  * suppress unnecessary error logging.
961  */
962 int ovs_nla_get_match(struct sw_flow_match *match,
963                       const struct nlattr *nla_key,
964                       const struct nlattr *nla_mask,
965                       bool log)
966 {
967         const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
968         const struct nlattr *encap;
969         struct nlattr *newmask = NULL;
970         u64 key_attrs = 0;
971         u64 mask_attrs = 0;
972         bool encap_valid = false;
973         int err;
974
975         err = parse_flow_nlattrs(nla_key, a, &key_attrs, log);
976         if (err)
977                 return err;
978
979         if ((key_attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)) &&
980             (key_attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE)) &&
981             (nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q))) {
982                 __be16 tci;
983
984                 if (!((key_attrs & (1ULL << OVS_KEY_ATTR_VLAN)) &&
985                       (key_attrs & (1ULL << OVS_KEY_ATTR_ENCAP)))) {
986                         OVS_NLERR(log, "Invalid Vlan frame.");
987                         return -EINVAL;
988                 }
989
990                 key_attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERTYPE);
991                 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
992                 encap = a[OVS_KEY_ATTR_ENCAP];
993                 key_attrs &= ~(1ULL << OVS_KEY_ATTR_ENCAP);
994                 encap_valid = true;
995
996                 if (tci & htons(VLAN_TAG_PRESENT)) {
997                         err = parse_flow_nlattrs(encap, a, &key_attrs, log);
998                         if (err)
999                                 return err;
1000                 } else if (!tci) {
1001                         /* Corner case for truncated 802.1Q header. */
1002                         if (nla_len(encap)) {
1003                                 OVS_NLERR(log,
1004                                           "Truncated 802.1Q header has non-zero encap attribute.");
1005                                 return -EINVAL;
1006                         }
1007                 } else {
1008                         OVS_NLERR(log,
1009                                   "Encap attribute is set for a non-VLAN frame.");
1010                         return  -EINVAL;
1011                 }
1012         }
1013
1014         err = ovs_key_from_nlattrs(match, key_attrs, a, false, log);
1015         if (err)
1016                 return err;
1017
1018         if (match->mask) {
1019
1020                 if (!nla_mask) {
1021                         /* Create an exact match mask. We need to set to 0xff
1022                          * all the 'match->mask' fields that have been touched
1023                          * in 'match->key'. We cannot simply memset
1024                          * 'match->mask', because padding bytes and fields not
1025                          * specified in 'match->key' should be left to 0.
1026                          * Instead, we use a stream of netlink attributes,
1027                          * copied from 'key' and set to 0xff: ovs_key_from_nlattrs()
1028                          * will take care of filling 'match->mask'
1029                          * appropriately.
1030                          */
1031                         newmask = kmemdup(nla_key,
1032                                           nla_total_size(nla_len(nla_key)),
1033                                           GFP_KERNEL);
1034                         if (!newmask)
1035                                 return -ENOMEM;
1036
1037                         mask_set_nlattr(newmask, 0xff);
1038
1039                         /* The userspace does not send tunnel attributes that
1040                          * are 0, but we should not wildcard them nonetheless.
1041                          */
1042                         if (match->key->tun_key.ipv4_dst)
1043                                 SW_FLOW_KEY_MEMSET_FIELD(match, tun_key,
1044                                                          0xff, true);
1045
1046                         nla_mask = newmask;
1047                 }
1048
1049                 err = parse_flow_mask_nlattrs(nla_mask, a, &mask_attrs, log);
1050                 if (err)
1051                         goto free_newmask;
1052
1053                 /* Always match on tci. */
1054                 SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true);
1055
1056                 if (mask_attrs & 1ULL << OVS_KEY_ATTR_ENCAP) {
1057                         __be16 eth_type = 0;
1058                         __be16 tci = 0;
1059
1060                         if (!encap_valid) {
1061                                 OVS_NLERR(log,
1062                                           "Encap mask attribute is set for non-VLAN frame.");
1063                                 err = -EINVAL;
1064                                 goto free_newmask;
1065                         }
1066
1067                         mask_attrs &= ~(1ULL << OVS_KEY_ATTR_ENCAP);
1068                         if (a[OVS_KEY_ATTR_ETHERTYPE])
1069                                 eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
1070
1071                         if (eth_type == htons(0xffff)) {
1072                                 mask_attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERTYPE);
1073                                 encap = a[OVS_KEY_ATTR_ENCAP];
1074                                 err = parse_flow_mask_nlattrs(encap, a,
1075                                                               &mask_attrs, log);
1076                                 if (err)
1077                                         goto free_newmask;
1078                         } else {
1079                                 OVS_NLERR(log,
1080                                           "VLAN frames must have an exact match on the TPID (mask=%x).",
1081                                           ntohs(eth_type));
1082                                 err = -EINVAL;
1083                                 goto free_newmask;
1084                         }
1085
1086                         if (a[OVS_KEY_ATTR_VLAN])
1087                                 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
1088
1089                         if (!(tci & htons(VLAN_TAG_PRESENT))) {
1090                                 OVS_NLERR(log,
1091                                           "VLAN tag present bit must have an exact match (tci_mask=%x).",
1092                                           ntohs(tci));
1093                                 err = -EINVAL;
1094                                 goto free_newmask;
1095                         }
1096                 }
1097
1098                 err = ovs_key_from_nlattrs(match, mask_attrs, a, true, log);
1099                 if (err)
1100                         goto free_newmask;
1101         }
1102
1103         if (!match_validate(match, key_attrs, mask_attrs, log))
1104                 err = -EINVAL;
1105
1106 free_newmask:
1107         kfree(newmask);
1108         return err;
1109 }
1110
1111 /**
1112  * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key.
1113  * @key: Receives extracted in_port, priority, tun_key and skb_mark.
1114  * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
1115  * sequence.
1116  * @log: Boolean to allow kernel error logging.  Normally true, but when
1117  * probing for feature compatibility this should be passed in as false to
1118  * suppress unnecessary error logging.
1119  *
1120  * This parses a series of Netlink attributes that form a flow key, which must
1121  * take the same form accepted by flow_from_nlattrs(), but only enough of it to
1122  * get the metadata, that is, the parts of the flow key that cannot be
1123  * extracted from the packet itself.
1124  */
1125 int ovs_nla_get_flow_metadata(const struct nlattr *attr,
1126                               struct sw_flow_key *key,
1127                               bool log)
1128 {
1129         const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
1130         struct sw_flow_match match;
1131         u64 attrs = 0;
1132         int err;
1133
1134         err = parse_flow_nlattrs(attr, a, &attrs, log);
1135         if (err)
1136                 return -EINVAL;
1137
1138         memset(&match, 0, sizeof(match));
1139         match.key = key;
1140
1141         memset(key, 0, OVS_SW_FLOW_KEY_METADATA_SIZE);
1142         key->phy.in_port = DP_MAX_PORTS;
1143
1144         return metadata_from_nlattrs(&match, &attrs, a, false, log);
1145 }
1146
1147 int ovs_nla_put_flow(const struct sw_flow_key *swkey,
1148                      const struct sw_flow_key *output, struct sk_buff *skb)
1149 {
1150         struct ovs_key_ethernet *eth_key;
1151         struct nlattr *nla, *encap;
1152         bool is_mask = (swkey != output);
1153
1154         if (nla_put_u32(skb, OVS_KEY_ATTR_DP_HASH, output->ovs_flow_hash))
1155                 goto nla_put_failure;
1156
1157         if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id))
1158                 goto nla_put_failure;
1159
1160         if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
1161                 goto nla_put_failure;
1162
1163         if ((swkey->tun_key.ipv4_dst || is_mask)) {
1164                 const struct geneve_opt *opts = NULL;
1165
1166                 if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT)
1167                         opts = GENEVE_OPTS(output, swkey->tun_opts_len);
1168
1169                 if (ipv4_tun_to_nlattr(skb, &output->tun_key, opts,
1170                                         swkey->tun_opts_len))
1171                         goto nla_put_failure;
1172         }
1173
1174         if (swkey->phy.in_port == DP_MAX_PORTS) {
1175                 if (is_mask && (output->phy.in_port == 0xffff))
1176                         if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff))
1177                                 goto nla_put_failure;
1178         } else {
1179                 u16 upper_u16;
1180                 upper_u16 = !is_mask ? 0 : 0xffff;
1181
1182                 if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT,
1183                                 (upper_u16 << 16) | output->phy.in_port))
1184                         goto nla_put_failure;
1185         }
1186
1187         if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark))
1188                 goto nla_put_failure;
1189
1190         nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
1191         if (!nla)
1192                 goto nla_put_failure;
1193
1194         eth_key = nla_data(nla);
1195         ether_addr_copy(eth_key->eth_src, output->eth.src);
1196         ether_addr_copy(eth_key->eth_dst, output->eth.dst);
1197
1198         if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) {
1199                 __be16 eth_type;
1200                 eth_type = !is_mask ? htons(ETH_P_8021Q) : htons(0xffff);
1201                 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) ||
1202                     nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci))
1203                         goto nla_put_failure;
1204                 encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
1205                 if (!swkey->eth.tci)
1206                         goto unencap;
1207         } else
1208                 encap = NULL;
1209
1210         if (swkey->eth.type == htons(ETH_P_802_2)) {
1211                 /*
1212                  * Ethertype 802.2 is represented in the netlink with omitted
1213                  * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and
1214                  * 0xffff in the mask attribute.  Ethertype can also
1215                  * be wildcarded.
1216                  */
1217                 if (is_mask && output->eth.type)
1218                         if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE,
1219                                                 output->eth.type))
1220                                 goto nla_put_failure;
1221                 goto unencap;
1222         }
1223
1224         if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))
1225                 goto nla_put_failure;
1226
1227         if (swkey->eth.type == htons(ETH_P_IP)) {
1228                 struct ovs_key_ipv4 *ipv4_key;
1229
1230                 nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key));
1231                 if (!nla)
1232                         goto nla_put_failure;
1233                 ipv4_key = nla_data(nla);
1234                 ipv4_key->ipv4_src = output->ipv4.addr.src;
1235                 ipv4_key->ipv4_dst = output->ipv4.addr.dst;
1236                 ipv4_key->ipv4_proto = output->ip.proto;
1237                 ipv4_key->ipv4_tos = output->ip.tos;
1238                 ipv4_key->ipv4_ttl = output->ip.ttl;
1239                 ipv4_key->ipv4_frag = output->ip.frag;
1240         } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
1241                 struct ovs_key_ipv6 *ipv6_key;
1242
1243                 nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key));
1244                 if (!nla)
1245                         goto nla_put_failure;
1246                 ipv6_key = nla_data(nla);
1247                 memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src,
1248                                 sizeof(ipv6_key->ipv6_src));
1249                 memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst,
1250                                 sizeof(ipv6_key->ipv6_dst));
1251                 ipv6_key->ipv6_label = output->ipv6.label;
1252                 ipv6_key->ipv6_proto = output->ip.proto;
1253                 ipv6_key->ipv6_tclass = output->ip.tos;
1254                 ipv6_key->ipv6_hlimit = output->ip.ttl;
1255                 ipv6_key->ipv6_frag = output->ip.frag;
1256         } else if (swkey->eth.type == htons(ETH_P_ARP) ||
1257                    swkey->eth.type == htons(ETH_P_RARP)) {
1258                 struct ovs_key_arp *arp_key;
1259
1260                 nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key));
1261                 if (!nla)
1262                         goto nla_put_failure;
1263                 arp_key = nla_data(nla);
1264                 memset(arp_key, 0, sizeof(struct ovs_key_arp));
1265                 arp_key->arp_sip = output->ipv4.addr.src;
1266                 arp_key->arp_tip = output->ipv4.addr.dst;
1267                 arp_key->arp_op = htons(output->ip.proto);
1268                 ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha);
1269                 ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha);
1270         } else if (eth_p_mpls(swkey->eth.type)) {
1271                 struct ovs_key_mpls *mpls_key;
1272
1273                 nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key));
1274                 if (!nla)
1275                         goto nla_put_failure;
1276                 mpls_key = nla_data(nla);
1277                 mpls_key->mpls_lse = output->mpls.top_lse;
1278         }
1279
1280         if ((swkey->eth.type == htons(ETH_P_IP) ||
1281              swkey->eth.type == htons(ETH_P_IPV6)) &&
1282              swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
1283
1284                 if (swkey->ip.proto == IPPROTO_TCP) {
1285                         struct ovs_key_tcp *tcp_key;
1286
1287                         nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key));
1288                         if (!nla)
1289                                 goto nla_put_failure;
1290                         tcp_key = nla_data(nla);
1291                         tcp_key->tcp_src = output->tp.src;
1292                         tcp_key->tcp_dst = output->tp.dst;
1293                         if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS,
1294                                          output->tp.flags))
1295                                 goto nla_put_failure;
1296                 } else if (swkey->ip.proto == IPPROTO_UDP) {
1297                         struct ovs_key_udp *udp_key;
1298
1299                         nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key));
1300                         if (!nla)
1301                                 goto nla_put_failure;
1302                         udp_key = nla_data(nla);
1303                         udp_key->udp_src = output->tp.src;
1304                         udp_key->udp_dst = output->tp.dst;
1305                 } else if (swkey->ip.proto == IPPROTO_SCTP) {
1306                         struct ovs_key_sctp *sctp_key;
1307
1308                         nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key));
1309                         if (!nla)
1310                                 goto nla_put_failure;
1311                         sctp_key = nla_data(nla);
1312                         sctp_key->sctp_src = output->tp.src;
1313                         sctp_key->sctp_dst = output->tp.dst;
1314                 } else if (swkey->eth.type == htons(ETH_P_IP) &&
1315                            swkey->ip.proto == IPPROTO_ICMP) {
1316                         struct ovs_key_icmp *icmp_key;
1317
1318                         nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key));
1319                         if (!nla)
1320                                 goto nla_put_failure;
1321                         icmp_key = nla_data(nla);
1322                         icmp_key->icmp_type = ntohs(output->tp.src);
1323                         icmp_key->icmp_code = ntohs(output->tp.dst);
1324                 } else if (swkey->eth.type == htons(ETH_P_IPV6) &&
1325                            swkey->ip.proto == IPPROTO_ICMPV6) {
1326                         struct ovs_key_icmpv6 *icmpv6_key;
1327
1328                         nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6,
1329                                                 sizeof(*icmpv6_key));
1330                         if (!nla)
1331                                 goto nla_put_failure;
1332                         icmpv6_key = nla_data(nla);
1333                         icmpv6_key->icmpv6_type = ntohs(output->tp.src);
1334                         icmpv6_key->icmpv6_code = ntohs(output->tp.dst);
1335
1336                         if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION ||
1337                             icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) {
1338                                 struct ovs_key_nd *nd_key;
1339
1340                                 nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key));
1341                                 if (!nla)
1342                                         goto nla_put_failure;
1343                                 nd_key = nla_data(nla);
1344                                 memcpy(nd_key->nd_target, &output->ipv6.nd.target,
1345                                                         sizeof(nd_key->nd_target));
1346                                 ether_addr_copy(nd_key->nd_sll, output->ipv6.nd.sll);
1347                                 ether_addr_copy(nd_key->nd_tll, output->ipv6.nd.tll);
1348                         }
1349                 }
1350         }
1351
1352 unencap:
1353         if (encap)
1354                 nla_nest_end(skb, encap);
1355
1356         return 0;
1357
1358 nla_put_failure:
1359         return -EMSGSIZE;
1360 }
1361
1362 #define MAX_ACTIONS_BUFSIZE     (32 * 1024)
1363
1364 static struct sw_flow_actions *nla_alloc_flow_actions(int size, bool log)
1365 {
1366         struct sw_flow_actions *sfa;
1367
1368         if (size > MAX_ACTIONS_BUFSIZE) {
1369                 OVS_NLERR(log, "Flow action size (%u bytes) exceeds maximum "
1370                           "(%u bytes)", size, MAX_ACTIONS_BUFSIZE);
1371                 return ERR_PTR(-EINVAL);
1372         }
1373
1374         sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL);
1375         if (!sfa)
1376                 return ERR_PTR(-ENOMEM);
1377
1378         sfa->actions_len = 0;
1379         return sfa;
1380 }
1381
1382 /* RCU callback used by ovs_nla_free_flow_actions. */
1383 static void rcu_free_acts_callback(struct rcu_head *rcu)
1384 {
1385         struct sw_flow_actions *sf_acts = container_of(rcu,
1386                         struct sw_flow_actions, rcu);
1387         kfree(sf_acts);
1388 }
1389
1390 /* Schedules 'sf_acts' to be freed after the next RCU grace period.
1391  * The caller must hold rcu_read_lock for this to be sensible. */
1392 void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
1393 {
1394         call_rcu(&sf_acts->rcu, rcu_free_acts_callback);
1395 }
1396
1397 static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
1398                                        int attr_len, bool log)
1399 {
1400
1401         struct sw_flow_actions *acts;
1402         int new_acts_size;
1403         int req_size = NLA_ALIGN(attr_len);
1404         int next_offset = offsetof(struct sw_flow_actions, actions) +
1405                                         (*sfa)->actions_len;
1406
1407         if (req_size <= (ksize(*sfa) - next_offset))
1408                 goto out;
1409
1410         new_acts_size = ksize(*sfa) * 2;
1411
1412         if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
1413                 if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size)
1414                         return ERR_PTR(-EMSGSIZE);
1415                 new_acts_size = MAX_ACTIONS_BUFSIZE;
1416         }
1417
1418         acts = nla_alloc_flow_actions(new_acts_size, log);
1419         if (IS_ERR(acts))
1420                 return (void *)acts;
1421
1422         memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len);
1423         acts->actions_len = (*sfa)->actions_len;
1424         kfree(*sfa);
1425         *sfa = acts;
1426
1427 out:
1428         (*sfa)->actions_len += req_size;
1429         return  (struct nlattr *) ((unsigned char *)(*sfa) + next_offset);
1430 }
1431
1432 static struct nlattr *__add_action(struct sw_flow_actions **sfa, int attrtype,
1433                                    void *data, int len, bool log)
1434 {
1435         struct nlattr *a;
1436
1437         a = reserve_sfa_size(sfa, nla_attr_size(len), log);
1438         if (IS_ERR(a))
1439                 return a;
1440
1441         a->nla_type = attrtype;
1442         a->nla_len = nla_attr_size(len);
1443
1444         if (data)
1445                 memcpy(nla_data(a), data, len);
1446         memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len));
1447
1448         return a;
1449 }
1450
1451 static int add_action(struct sw_flow_actions **sfa, int attrtype,
1452                       void *data, int len, bool log)
1453 {
1454         struct nlattr *a;
1455
1456         a = __add_action(sfa, attrtype, data, len, log);
1457         if (IS_ERR(a))
1458                 return PTR_ERR(a);
1459
1460         return 0;
1461 }
1462
1463 static inline int add_nested_action_start(struct sw_flow_actions **sfa,
1464                                           int attrtype, bool log)
1465 {
1466         int used = (*sfa)->actions_len;
1467         int err;
1468
1469         err = add_action(sfa, attrtype, NULL, 0, log);
1470         if (err)
1471                 return err;
1472
1473         return used;
1474 }
1475
1476 static inline void add_nested_action_end(struct sw_flow_actions *sfa,
1477                                          int st_offset)
1478 {
1479         struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions +
1480                                                                st_offset);
1481
1482         a->nla_len = sfa->actions_len - st_offset;
1483 }
1484
1485 static int __ovs_nla_copy_actions(const struct nlattr *attr,
1486                                   const struct sw_flow_key *key,
1487                                   int depth, struct sw_flow_actions **sfa,
1488                                   __be16 eth_type, __be16 vlan_tci, bool log);
1489
1490 static int validate_and_copy_sample(const struct nlattr *attr,
1491                                     const struct sw_flow_key *key, int depth,
1492                                     struct sw_flow_actions **sfa,
1493                                     __be16 eth_type, __be16 vlan_tci, bool log)
1494 {
1495         const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
1496         const struct nlattr *probability, *actions;
1497         const struct nlattr *a;
1498         int rem, start, err, st_acts;
1499
1500         memset(attrs, 0, sizeof(attrs));
1501         nla_for_each_nested(a, attr, rem) {
1502                 int type = nla_type(a);
1503                 if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type])
1504                         return -EINVAL;
1505                 attrs[type] = a;
1506         }
1507         if (rem)
1508                 return -EINVAL;
1509
1510         probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY];
1511         if (!probability || nla_len(probability) != sizeof(u32))
1512                 return -EINVAL;
1513
1514         actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
1515         if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
1516                 return -EINVAL;
1517
1518         /* validation done, copy sample action. */
1519         start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE, log);
1520         if (start < 0)
1521                 return start;
1522         err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY,
1523                          nla_data(probability), sizeof(u32), log);
1524         if (err)
1525                 return err;
1526         st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS, log);
1527         if (st_acts < 0)
1528                 return st_acts;
1529
1530         err = __ovs_nla_copy_actions(actions, key, depth + 1, sfa,
1531                                      eth_type, vlan_tci, log);
1532         if (err)
1533                 return err;
1534
1535         add_nested_action_end(*sfa, st_acts);
1536         add_nested_action_end(*sfa, start);
1537
1538         return 0;
1539 }
1540
1541 static int validate_tp_port(const struct sw_flow_key *flow_key,
1542                             __be16 eth_type)
1543 {
1544         if ((eth_type == htons(ETH_P_IP) || eth_type == htons(ETH_P_IPV6)) &&
1545             (flow_key->tp.src || flow_key->tp.dst))
1546                 return 0;
1547
1548         return -EINVAL;
1549 }
1550
1551 void ovs_match_init(struct sw_flow_match *match,
1552                     struct sw_flow_key *key,
1553                     struct sw_flow_mask *mask)
1554 {
1555         memset(match, 0, sizeof(*match));
1556         match->key = key;
1557         match->mask = mask;
1558
1559         memset(key, 0, sizeof(*key));
1560
1561         if (mask) {
1562                 memset(&mask->key, 0, sizeof(mask->key));
1563                 mask->range.start = mask->range.end = 0;
1564         }
1565 }
1566
1567 static int validate_and_copy_set_tun(const struct nlattr *attr,
1568                                      struct sw_flow_actions **sfa, bool log)
1569 {
1570         struct sw_flow_match match;
1571         struct sw_flow_key key;
1572         struct ovs_tunnel_info *tun_info;
1573         struct nlattr *a;
1574         int err, start;
1575
1576         ovs_match_init(&match, &key, NULL);
1577         err = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log);
1578         if (err)
1579                 return err;
1580
1581         if (key.tun_opts_len) {
1582                 struct geneve_opt *option = GENEVE_OPTS(&key,
1583                                                         key.tun_opts_len);
1584                 int opts_len = key.tun_opts_len;
1585                 bool crit_opt = false;
1586
1587                 while (opts_len > 0) {
1588                         int len;
1589
1590                         if (opts_len < sizeof(*option))
1591                                 return -EINVAL;
1592
1593                         len = sizeof(*option) + option->length * 4;
1594                         if (len > opts_len)
1595                                 return -EINVAL;
1596
1597                         crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE);
1598
1599                         option = (struct geneve_opt *)((u8 *)option + len);
1600                         opts_len -= len;
1601                 };
1602
1603                 key.tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0;
1604         };
1605
1606         start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log);
1607         if (start < 0)
1608                 return start;
1609
1610         a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL,
1611                          sizeof(*tun_info) + key.tun_opts_len, log);
1612         if (IS_ERR(a))
1613                 return PTR_ERR(a);
1614
1615         tun_info = nla_data(a);
1616         tun_info->tunnel = key.tun_key;
1617         tun_info->options_len = key.tun_opts_len;
1618
1619         if (tun_info->options_len) {
1620                 /* We need to store the options in the action itself since
1621                  * everything else will go away after flow setup. We can append
1622                  * it to tun_info and then point there.
1623                  */
1624                 memcpy((tun_info + 1), GENEVE_OPTS(&key, key.tun_opts_len),
1625                         key.tun_opts_len);
1626                 tun_info->options = (struct geneve_opt *)(tun_info + 1);
1627         } else {
1628                 tun_info->options = NULL;
1629         }
1630
1631         add_nested_action_end(*sfa, start);
1632
1633         return err;
1634 }
1635
1636 static int validate_set(const struct nlattr *a,
1637                         const struct sw_flow_key *flow_key,
1638                         struct sw_flow_actions **sfa,
1639                         bool *set_tun, __be16 eth_type, bool log)
1640 {
1641         const struct nlattr *ovs_key = nla_data(a);
1642         int key_type = nla_type(ovs_key);
1643
1644         /* There can be only one key in a action */
1645         if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
1646                 return -EINVAL;
1647
1648         if (key_type > OVS_KEY_ATTR_MAX ||
1649             (ovs_key_lens[key_type] != nla_len(ovs_key) &&
1650              ovs_key_lens[key_type] != -1))
1651                 return -EINVAL;
1652
1653         switch (key_type) {
1654         const struct ovs_key_ipv4 *ipv4_key;
1655         const struct ovs_key_ipv6 *ipv6_key;
1656         int err;
1657
1658         case OVS_KEY_ATTR_PRIORITY:
1659         case OVS_KEY_ATTR_SKB_MARK:
1660         case OVS_KEY_ATTR_ETHERNET:
1661                 break;
1662
1663         case OVS_KEY_ATTR_TUNNEL:
1664                 *set_tun = true;
1665                 err = validate_and_copy_set_tun(a, sfa, log);
1666                 if (err)
1667                         return err;
1668                 break;
1669
1670         case OVS_KEY_ATTR_IPV4:
1671                 if (eth_type != htons(ETH_P_IP))
1672                         return -EINVAL;
1673
1674                 if (!flow_key->ip.proto)
1675                         return -EINVAL;
1676
1677                 ipv4_key = nla_data(ovs_key);
1678                 if (ipv4_key->ipv4_proto != flow_key->ip.proto)
1679                         return -EINVAL;
1680
1681                 if (ipv4_key->ipv4_frag != flow_key->ip.frag)
1682                         return -EINVAL;
1683
1684                 break;
1685
1686         case OVS_KEY_ATTR_IPV6:
1687                 if (eth_type != htons(ETH_P_IPV6))
1688                         return -EINVAL;
1689
1690                 if (!flow_key->ip.proto)
1691                         return -EINVAL;
1692
1693                 ipv6_key = nla_data(ovs_key);
1694                 if (ipv6_key->ipv6_proto != flow_key->ip.proto)
1695                         return -EINVAL;
1696
1697                 if (ipv6_key->ipv6_frag != flow_key->ip.frag)
1698                         return -EINVAL;
1699
1700                 if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
1701                         return -EINVAL;
1702
1703                 break;
1704
1705         case OVS_KEY_ATTR_TCP:
1706                 if (flow_key->ip.proto != IPPROTO_TCP)
1707                         return -EINVAL;
1708
1709                 return validate_tp_port(flow_key, eth_type);
1710
1711         case OVS_KEY_ATTR_UDP:
1712                 if (flow_key->ip.proto != IPPROTO_UDP)
1713                         return -EINVAL;
1714
1715                 return validate_tp_port(flow_key, eth_type);
1716
1717         case OVS_KEY_ATTR_MPLS:
1718                 if (!eth_p_mpls(eth_type))
1719                         return -EINVAL;
1720                 break;
1721
1722         case OVS_KEY_ATTR_SCTP:
1723                 if (flow_key->ip.proto != IPPROTO_SCTP)
1724                         return -EINVAL;
1725
1726                 return validate_tp_port(flow_key, eth_type);
1727
1728         default:
1729                 return -EINVAL;
1730         }
1731
1732         return 0;
1733 }
1734
1735 static int validate_userspace(const struct nlattr *attr)
1736 {
1737         static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = {
1738                 [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
1739                 [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC },
1740                 [OVS_USERSPACE_ATTR_EGRESS_TUN_PORT] = {.type = NLA_U32 },
1741         };
1742         struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
1743         int error;
1744
1745         error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX,
1746                                  attr, userspace_policy);
1747         if (error)
1748                 return error;
1749
1750         if (!a[OVS_USERSPACE_ATTR_PID] ||
1751             !nla_get_u32(a[OVS_USERSPACE_ATTR_PID]))
1752                 return -EINVAL;
1753
1754         return 0;
1755 }
1756
1757 static int copy_action(const struct nlattr *from,
1758                        struct sw_flow_actions **sfa, bool log)
1759 {
1760         int totlen = NLA_ALIGN(from->nla_len);
1761         struct nlattr *to;
1762
1763         to = reserve_sfa_size(sfa, from->nla_len, log);
1764         if (IS_ERR(to))
1765                 return PTR_ERR(to);
1766
1767         memcpy(to, from, totlen);
1768         return 0;
1769 }
1770
1771 static int __ovs_nla_copy_actions(const struct nlattr *attr,
1772                                   const struct sw_flow_key *key,
1773                                   int depth, struct sw_flow_actions **sfa,
1774                                   __be16 eth_type, __be16 vlan_tci, bool log)
1775 {
1776         const struct nlattr *a;
1777         int rem, err;
1778
1779         if (depth >= SAMPLE_ACTION_DEPTH)
1780                 return -EOVERFLOW;
1781
1782         nla_for_each_nested(a, attr, rem) {
1783                 /* Expected argument lengths, (u32)-1 for variable length. */
1784                 static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
1785                         [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
1786                         [OVS_ACTION_ATTR_RECIRC] = sizeof(u32),
1787                         [OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
1788                         [OVS_ACTION_ATTR_PUSH_MPLS] = sizeof(struct ovs_action_push_mpls),
1789                         [OVS_ACTION_ATTR_POP_MPLS] = sizeof(__be16),
1790                         [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
1791                         [OVS_ACTION_ATTR_POP_VLAN] = 0,
1792                         [OVS_ACTION_ATTR_SET] = (u32)-1,
1793                         [OVS_ACTION_ATTR_SAMPLE] = (u32)-1,
1794                         [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash)
1795                 };
1796                 const struct ovs_action_push_vlan *vlan;
1797                 int type = nla_type(a);
1798                 bool skip_copy;
1799
1800                 if (type > OVS_ACTION_ATTR_MAX ||
1801                     (action_lens[type] != nla_len(a) &&
1802                      action_lens[type] != (u32)-1))
1803                         return -EINVAL;
1804
1805                 skip_copy = false;
1806                 switch (type) {
1807                 case OVS_ACTION_ATTR_UNSPEC:
1808                         return -EINVAL;
1809
1810                 case OVS_ACTION_ATTR_USERSPACE:
1811                         err = validate_userspace(a);
1812                         if (err)
1813                                 return err;
1814                         break;
1815
1816                 case OVS_ACTION_ATTR_OUTPUT:
1817                         if (nla_get_u32(a) >= DP_MAX_PORTS)
1818                                 return -EINVAL;
1819                         break;
1820
1821                 case OVS_ACTION_ATTR_HASH: {
1822                         const struct ovs_action_hash *act_hash = nla_data(a);
1823
1824                         switch (act_hash->hash_alg) {
1825                         case OVS_HASH_ALG_L4:
1826                                 break;
1827                         default:
1828                                 return  -EINVAL;
1829                         }
1830
1831                         break;
1832                 }
1833
1834                 case OVS_ACTION_ATTR_POP_VLAN:
1835                         vlan_tci = htons(0);
1836                         break;
1837
1838                 case OVS_ACTION_ATTR_PUSH_VLAN:
1839                         vlan = nla_data(a);
1840                         if (vlan->vlan_tpid != htons(ETH_P_8021Q))
1841                                 return -EINVAL;
1842                         if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT)))
1843                                 return -EINVAL;
1844                         vlan_tci = vlan->vlan_tci;
1845                         break;
1846
1847                 case OVS_ACTION_ATTR_RECIRC:
1848                         break;
1849
1850                 case OVS_ACTION_ATTR_PUSH_MPLS: {
1851                         const struct ovs_action_push_mpls *mpls = nla_data(a);
1852
1853                         if (!eth_p_mpls(mpls->mpls_ethertype))
1854                                 return -EINVAL;
1855                         /* Prohibit push MPLS other than to a white list
1856                          * for packets that have a known tag order.
1857                          */
1858                         if (vlan_tci & htons(VLAN_TAG_PRESENT) ||
1859                             (eth_type != htons(ETH_P_IP) &&
1860                              eth_type != htons(ETH_P_IPV6) &&
1861                              eth_type != htons(ETH_P_ARP) &&
1862                              eth_type != htons(ETH_P_RARP) &&
1863                              !eth_p_mpls(eth_type)))
1864                                 return -EINVAL;
1865                         eth_type = mpls->mpls_ethertype;
1866                         break;
1867                 }
1868
1869                 case OVS_ACTION_ATTR_POP_MPLS:
1870                         if (vlan_tci & htons(VLAN_TAG_PRESENT) ||
1871                             !eth_p_mpls(eth_type))
1872                                 return -EINVAL;
1873
1874                         /* Disallow subsequent L2.5+ set and mpls_pop actions
1875                          * as there is no check here to ensure that the new
1876                          * eth_type is valid and thus set actions could
1877                          * write off the end of the packet or otherwise
1878                          * corrupt it.
1879                          *
1880                          * Support for these actions is planned using packet
1881                          * recirculation.
1882                          */
1883                         eth_type = htons(0);
1884                         break;
1885
1886                 case OVS_ACTION_ATTR_SET:
1887                         err = validate_set(a, key, sfa, &skip_copy, eth_type,
1888                                            log);
1889                         if (err)
1890                                 return err;
1891                         break;
1892
1893                 case OVS_ACTION_ATTR_SAMPLE:
1894                         err = validate_and_copy_sample(a, key, depth, sfa,
1895                                                        eth_type, vlan_tci, log);
1896                         if (err)
1897                                 return err;
1898                         skip_copy = true;
1899                         break;
1900
1901                 default:
1902                         return -EINVAL;
1903                 }
1904                 if (!skip_copy) {
1905                         err = copy_action(a, sfa, log);
1906                         if (err)
1907                                 return err;
1908                 }
1909         }
1910
1911         if (rem > 0)
1912                 return -EINVAL;
1913
1914         return 0;
1915 }
1916
1917 int ovs_nla_copy_actions(const struct nlattr *attr,
1918                          const struct sw_flow_key *key,
1919                          struct sw_flow_actions **sfa, bool log)
1920 {
1921         int err;
1922
1923         *sfa = nla_alloc_flow_actions(nla_len(attr), log);
1924         if (IS_ERR(*sfa))
1925                 return PTR_ERR(*sfa);
1926
1927         err = __ovs_nla_copy_actions(attr, key, 0, sfa, key->eth.type,
1928                                      key->eth.tci, log);
1929         if (err)
1930                 kfree(*sfa);
1931
1932         return err;
1933 }
1934
1935 static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb)
1936 {
1937         const struct nlattr *a;
1938         struct nlattr *start;
1939         int err = 0, rem;
1940
1941         start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE);
1942         if (!start)
1943                 return -EMSGSIZE;
1944
1945         nla_for_each_nested(a, attr, rem) {
1946                 int type = nla_type(a);
1947                 struct nlattr *st_sample;
1948
1949                 switch (type) {
1950                 case OVS_SAMPLE_ATTR_PROBABILITY:
1951                         if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY,
1952                                     sizeof(u32), nla_data(a)))
1953                                 return -EMSGSIZE;
1954                         break;
1955                 case OVS_SAMPLE_ATTR_ACTIONS:
1956                         st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS);
1957                         if (!st_sample)
1958                                 return -EMSGSIZE;
1959                         err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb);
1960                         if (err)
1961                                 return err;
1962                         nla_nest_end(skb, st_sample);
1963                         break;
1964                 }
1965         }
1966
1967         nla_nest_end(skb, start);
1968         return err;
1969 }
1970
1971 static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
1972 {
1973         const struct nlattr *ovs_key = nla_data(a);
1974         int key_type = nla_type(ovs_key);
1975         struct nlattr *start;
1976         int err;
1977
1978         switch (key_type) {
1979         case OVS_KEY_ATTR_TUNNEL_INFO: {
1980                 struct ovs_tunnel_info *tun_info = nla_data(ovs_key);
1981
1982                 start = nla_nest_start(skb, OVS_ACTION_ATTR_SET);
1983                 if (!start)
1984                         return -EMSGSIZE;
1985
1986                 err = ipv4_tun_to_nlattr(skb, &tun_info->tunnel,
1987                                          tun_info->options_len ?
1988                                                 tun_info->options : NULL,
1989                                          tun_info->options_len);
1990                 if (err)
1991                         return err;
1992                 nla_nest_end(skb, start);
1993                 break;
1994         }
1995         default:
1996                 if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key))
1997                         return -EMSGSIZE;
1998                 break;
1999         }
2000
2001         return 0;
2002 }
2003
2004 int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
2005 {
2006         const struct nlattr *a;
2007         int rem, err;
2008
2009         nla_for_each_attr(a, attr, len, rem) {
2010                 int type = nla_type(a);
2011
2012                 switch (type) {
2013                 case OVS_ACTION_ATTR_SET:
2014                         err = set_action_to_attr(a, skb);
2015                         if (err)
2016                                 return err;
2017                         break;
2018
2019                 case OVS_ACTION_ATTR_SAMPLE:
2020                         err = sample_action_to_attr(a, skb);
2021                         if (err)
2022                                 return err;
2023                         break;
2024                 default:
2025                         if (nla_put(skb, type, nla_len(a), nla_data(a)))
2026                                 return -EMSGSIZE;
2027                         break;
2028                 }
2029         }
2030
2031         return 0;
2032 }