2 * Copyright (c) 2015 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
26 #include "odp-netlink.h"
28 #include "ovs-thread.h"
31 #include "tun-metadata.h"
33 struct tun_meta_entry {
34 struct hmap_node node; /* In struct tun_table's key_hmap. */
35 uint32_t key; /* (class << 16) | type. */
36 struct tun_metadata_loc loc;
37 bool valid; /* True if allocated to a class and type. */
40 /* Maps from Geneve option class+type to positions in a struct tun_metadata's
43 /* TUN_METADATA<i> is stored in element <i>. */
44 struct tun_meta_entry entries[TUN_METADATA_NUM_OPTS];
46 /* Each bit represents 4 bytes of space, 0-bits are free space. */
47 unsigned long alloc_map[BITMAP_N_LONGS(TUN_METADATA_TOT_OPT_SIZE / 4)];
49 /* The valid elements in entries[], indexed by class+type. */
52 BUILD_ASSERT_DECL(TUN_METADATA_TOT_OPT_SIZE % 4 == 0);
54 static struct ovs_mutex tab_mutex = OVS_MUTEX_INITIALIZER;
55 static OVSRCU_TYPE(struct tun_table *) metadata_tab;
57 static enum ofperr tun_metadata_add_entry(struct tun_table *map, uint8_t idx,
58 uint16_t opt_class, uint8_t type,
59 uint8_t len) OVS_REQUIRES(tab_mutex);
60 static void tun_metadata_del_entry(struct tun_table *map, uint8_t idx)
61 OVS_REQUIRES(tab_mutex);
62 static void memcpy_to_metadata(struct tun_metadata *dst, const void *src,
63 const struct tun_metadata_loc *);
64 static void memcpy_from_metadata(void *dst, const struct tun_metadata *src,
65 const struct tun_metadata_loc *);
68 tun_meta_key(ovs_be16 class, uint8_t type)
70 return (OVS_FORCE uint16_t)class << 8 | type;
74 tun_key_class(uint32_t key)
76 return (OVS_FORCE ovs_be16)(key >> 8);
80 tun_key_type(uint32_t key)
85 /* Returns a newly allocated tun_table. If 'old_map' is nonnull then the new
86 * tun_table is a deep copy of the old one. */
87 static struct tun_table *
88 table_alloc(const struct tun_table *old_map) OVS_REQUIRES(tab_mutex)
90 struct tun_table *new_map;
92 new_map = xzalloc(sizeof *new_map);
95 struct tun_meta_entry *entry;
98 hmap_init(&new_map->key_hmap);
100 HMAP_FOR_EACH (entry, node, &old_map->key_hmap) {
101 struct tun_meta_entry *new_entry;
102 struct tun_metadata_loc_chain *chain;
104 new_entry = &new_map->entries[entry - old_map->entries];
105 hmap_insert(&new_map->key_hmap, &new_entry->node, entry->node.hash);
107 chain = &new_entry->loc.c;
108 while (chain->next) {
109 chain->next = xmemdup(chain->next, sizeof *chain->next);
114 hmap_init(&new_map->key_hmap);
120 /* Frees 'map' and all the memory it owns. */
122 table_free(struct tun_table *map) OVS_REQUIRES(tab_mutex)
124 struct tun_meta_entry *entry;
130 HMAP_FOR_EACH (entry, node, &map->key_hmap) {
131 tun_metadata_del_entry(map, entry - map->entries);
137 /* Creates a global tunnel metadata mapping table, if none already exists. */
139 tun_metadata_init(void)
141 ovs_mutex_lock(&tab_mutex);
143 if (!ovsrcu_get_protected(struct tun_table *, &metadata_tab)) {
144 ovsrcu_set(&metadata_tab, table_alloc(NULL));
147 ovs_mutex_unlock(&tab_mutex);
151 tun_metadata_table_mod(struct ofputil_geneve_table_mod *gtm)
153 struct tun_table *old_map, *new_map;
154 struct ofputil_geneve_map *ofp_map;
157 ovs_mutex_lock(&tab_mutex);
159 old_map = ovsrcu_get_protected(struct tun_table *, &metadata_tab);
161 switch (gtm->command) {
163 new_map = table_alloc(old_map);
165 LIST_FOR_EACH (ofp_map, list_node, >m->mappings) {
166 err = tun_metadata_add_entry(new_map, ofp_map->index,
167 ofp_map->option_class,
168 ofp_map->option_type,
169 ofp_map->option_len);
178 new_map = table_alloc(old_map);
180 LIST_FOR_EACH (ofp_map, list_node, >m->mappings) {
181 tun_metadata_del_entry(new_map, ofp_map->index);
186 new_map = table_alloc(NULL);
193 ovsrcu_set(&metadata_tab, new_map);
194 ovsrcu_postpone(table_free, old_map);
197 ovs_mutex_unlock(&tab_mutex);
202 tun_metadata_table_request(struct ofputil_geneve_table_reply *gtr)
204 struct tun_table *map = ovsrcu_get(struct tun_table *, &metadata_tab);
207 gtr->max_option_space = TUN_METADATA_TOT_OPT_SIZE;
208 gtr->max_fields = TUN_METADATA_NUM_OPTS;
209 list_init(>r->mappings);
211 for (i = 0; i < TUN_METADATA_NUM_OPTS; i++) {
212 struct tun_meta_entry *entry = &map->entries[i];
213 struct ofputil_geneve_map *map;
219 map = xmalloc(sizeof *map);
220 map->option_class = ntohs(tun_key_class(entry->key));
221 map->option_type = tun_key_type(entry->key);
222 map->option_len = entry->loc.len;
225 list_push_back(>r->mappings, &map->list_node);
229 /* Copies the value of field 'mf' from 'metadata' into 'value'.
231 * 'mf' must be an MFF_TUN_METADATA* field.
233 * This uses the global tunnel metadata mapping table created by
234 * tun_metadata_init(). If no such table has been created or if 'mf' hasn't
235 * been allocated in it yet, this just zeros 'value'. */
237 tun_metadata_read(const struct tun_metadata *metadata,
238 const struct mf_field *mf, union mf_value *value)
240 struct tun_table *map = ovsrcu_get(struct tun_table *, &metadata_tab);
241 unsigned int idx = mf->id - MFF_TUN_METADATA0;
242 struct tun_metadata_loc *loc;
245 memset(value->tun_metadata, 0, mf->n_bytes);
249 loc = &map->entries[idx].loc;
251 memset(value->tun_metadata, 0, mf->n_bytes - loc->len);
252 memcpy_from_metadata(value->tun_metadata + mf->n_bytes - loc->len,
256 /* Copies 'value' into field 'mf' in 'metadata'.
258 * 'mf' must be an MFF_TUN_METADATA* field.
260 * This uses the global tunnel metadata mapping table created by
261 * tun_metadata_init(). If no such table has been created or if 'mf' hasn't
262 * been allocated in it yet, this function does nothing. */
264 tun_metadata_write(struct tun_metadata *metadata,
265 const struct mf_field *mf, const union mf_value *value)
267 struct tun_table *map = ovsrcu_get(struct tun_table *, &metadata_tab);
268 unsigned int idx = mf->id - MFF_TUN_METADATA0;
269 struct tun_metadata_loc *loc;
271 if (!map || !map->entries[idx].valid) {
275 loc = &map->entries[idx].loc;
277 ULLONG_SET1(metadata->opt_map, idx);
278 memcpy_to_metadata(metadata, value->tun_metadata + mf->n_bytes - loc->len,
282 static const struct tun_metadata_loc *
283 metadata_loc_from_match(struct tun_table *map, struct match *match,
284 unsigned int idx, unsigned int field_len)
286 ovs_assert(idx < TUN_METADATA_NUM_OPTS);
289 if (map->entries[idx].valid) {
290 return &map->entries[idx].loc;
296 if (match->tun_md.alloc_offset + field_len >= TUN_METADATA_TOT_OPT_SIZE ||
297 match->tun_md.loc[idx].len) {
301 match->tun_md.loc[idx].len = field_len;
302 match->tun_md.loc[idx].c.offset = match->tun_md.alloc_offset;
303 match->tun_md.loc[idx].c.len = field_len;
304 match->tun_md.loc[idx].c.next = NULL;
305 match->tun_md.alloc_offset += field_len;
306 match->tun_md.valid = true;
308 return &match->tun_md.loc[idx];
311 /* Makes 'match' match 'value'/'mask' on field 'mf'.
313 * 'mf' must be an MFF_TUN_METADATA* field.
315 * If there is global tunnel metadata matching table, this function is
316 * effective only if there is already a mapping for 'mf'. Otherwise, the
317 * metadata mapping table integrated into 'match' is used, adding 'mf' to its
318 * mapping table if it isn't already mapped (and if there is room). If 'mf'
319 * isn't or can't be mapped, this function returns without modifying 'match'.
321 * 'value' may be NULL; if so, then 'mf' is made to match on an all-zeros
324 * 'mask' may be NULL; if so, then 'mf' is made exact-match.
327 tun_metadata_set_match(const struct mf_field *mf, const union mf_value *value,
328 const union mf_value *mask, struct match *match)
330 struct tun_table *map = ovsrcu_get(struct tun_table *, &metadata_tab);
331 const struct tun_metadata_loc *loc;
332 unsigned int idx = mf->id - MFF_TUN_METADATA0;
333 unsigned int field_len;
334 unsigned int data_offset;
337 field_len = mf_field_len(mf, value, mask);
338 loc = metadata_loc_from_match(map, match, idx, field_len);
343 data_offset = mf->n_bytes - loc->len;
346 memset(data.tun_metadata, 0, loc->len);
348 memcpy(data.tun_metadata, value->tun_metadata + data_offset, loc->len);
351 for (i = 0; i < loc->len; i++) {
352 data.tun_metadata[i] = value->tun_metadata[data_offset + i] &
353 mask->tun_metadata[data_offset + i];
356 ULLONG_SET1(match->flow.tunnel.metadata.opt_map, idx);
357 memcpy_to_metadata(&match->flow.tunnel.metadata, data.tun_metadata, loc);
360 memset(data.tun_metadata, 0, loc->len);
362 memset(data.tun_metadata, 0xff, loc->len);
364 memcpy(data.tun_metadata, mask->tun_metadata + data_offset, loc->len);
366 ULLONG_SET1(match->wc.masks.tunnel.metadata.opt_map, idx);
367 memcpy_to_metadata(&match->wc.masks.tunnel.metadata, data.tun_metadata, loc);
370 /* Copies all MFF_TUN_METADATA* fields from 'metadata' to 'flow_metadata'. */
372 tun_metadata_get_fmd(const struct tun_metadata *metadata,
373 struct match *flow_metadata)
375 struct tun_table *map;
380 map = ovsrcu_get(struct tun_table *, &metadata_tab);
383 ULLONG_FOR_EACH_1 (i, metadata->opt_map) {
385 const struct tun_metadata_loc *old_loc = &map->entries[i].loc;
386 const struct tun_metadata_loc *new_loc;
388 new_loc = metadata_loc_from_match(NULL, flow_metadata, i, old_loc->len);
390 memcpy_from_metadata(opts.tun_metadata, metadata, old_loc);
391 memcpy_to_metadata(&flow_metadata->flow.tunnel.metadata,
392 opts.tun_metadata, new_loc);
394 memset(opts.tun_metadata, 0xff, old_loc->len);
395 memcpy_to_metadata(&flow_metadata->wc.masks.tunnel.metadata,
396 opts.tun_metadata, new_loc);
401 tun_meta_hash(uint32_t key)
403 return hash_int(key, 0);
406 static struct tun_meta_entry *
407 tun_meta_find_key(const struct hmap *hmap, uint32_t key)
409 struct tun_meta_entry *entry;
411 HMAP_FOR_EACH_IN_BUCKET (entry, node, tun_meta_hash(key), hmap) {
412 if (entry->key == key) {
420 memcpy_to_metadata(struct tun_metadata *dst, const void *src,
421 const struct tun_metadata_loc *loc)
423 const struct tun_metadata_loc_chain *chain = &loc->c;
427 memcpy(dst->opts + loc->c.offset + addr, (uint8_t *)src + addr,
435 memcpy_from_metadata(void *dst, const struct tun_metadata *src,
436 const struct tun_metadata_loc *loc)
438 const struct tun_metadata_loc_chain *chain = &loc->c;
442 memcpy((uint8_t *)dst + addr, src->opts + loc->c.offset + addr,
450 tun_metadata_alloc_chain(struct tun_table *map, uint8_t len,
451 struct tun_metadata_loc_chain *loc)
452 OVS_REQUIRES(tab_mutex)
454 int alloc_len = len / 4;
456 int scan_end = TUN_METADATA_TOT_OPT_SIZE / 4;
457 int pos_start, pos_end, pos_len;
458 int best_start = 0, best_len = 0;
461 pos_start = bitmap_scan(map->alloc_map, 0, scan_start, scan_end);
462 if (pos_start == scan_end) {
466 pos_end = bitmap_scan(map->alloc_map, 1, pos_start,
467 MIN(pos_start + alloc_len, scan_end));
468 pos_len = pos_end - pos_start;
469 if (pos_len == alloc_len) {
473 if (pos_len > best_len) {
474 best_start = pos_start;
477 scan_start = pos_end + 1;
484 pos_start = best_start;
488 bitmap_set_multiple(map->alloc_map, pos_start, pos_len, 1);
489 loc->offset = pos_start * 4;
490 loc->len = pos_len * 4;
496 tun_metadata_add_entry(struct tun_table *map, uint8_t idx, uint16_t opt_class,
497 uint8_t type, uint8_t len) OVS_REQUIRES(tab_mutex)
499 struct tun_meta_entry *entry;
500 struct tun_metadata_loc_chain *cur_chain, *prev_chain;
502 ovs_assert(idx < TUN_METADATA_NUM_OPTS);
504 entry = &map->entries[idx];
506 return OFPERR_NXGTMFC_ALREADY_MAPPED;
509 entry->key = tun_meta_key(htons(opt_class), type);
510 if (tun_meta_find_key(&map->key_hmap, entry->key)) {
511 return OFPERR_NXGTMFC_DUP_ENTRY;
515 hmap_insert(&map->key_hmap, &entry->node,
516 tun_meta_hash(entry->key));
518 entry->loc.len = len;
519 cur_chain = &entry->loc.c;
520 memset(cur_chain, 0, sizeof *cur_chain);
527 cur_chain = xzalloc(sizeof *cur_chain);
530 err = tun_metadata_alloc_chain(map, len, cur_chain);
532 tun_metadata_del_entry(map, idx);
533 return OFPERR_NXGTMFC_TABLE_FULL;
536 len -= cur_chain->len;
539 prev_chain->next = cur_chain;
541 prev_chain = cur_chain;
549 tun_metadata_del_entry(struct tun_table *map, uint8_t idx)
550 OVS_REQUIRES(tab_mutex)
552 struct tun_meta_entry *entry;
553 struct tun_metadata_loc_chain *chain;
555 if (idx >= TUN_METADATA_NUM_OPTS) {
559 entry = &map->entries[idx];
564 chain = &entry->loc.c;
566 struct tun_metadata_loc_chain *next = chain->next;
568 bitmap_set_multiple(map->alloc_map, chain->offset / 4,
570 if (chain != &entry->loc.c) {
576 entry->valid = false;
577 hmap_remove(&map->key_hmap, &entry->node);
578 memset(&entry->loc, 0, sizeof entry->loc);
582 tun_metadata_from_geneve__(struct tun_table *map, const struct geneve_opt *opt,
583 const struct geneve_opt *flow_opt, int opts_len,
584 struct tun_metadata *metadata)
590 while (opts_len > 0) {
592 struct tun_meta_entry *entry;
594 if (opts_len < sizeof(*opt)) {
598 len = sizeof(*opt) + flow_opt->length * 4;
599 if (len > opts_len) {
603 entry = tun_meta_find_key(&map->key_hmap,
604 tun_meta_key(flow_opt->opt_class,
607 if (entry->loc.len == flow_opt->length * 4) {
608 memcpy_to_metadata(metadata, opt + 1, &entry->loc);
609 ULLONG_SET1(metadata->opt_map, entry - map->entries);
613 } else if (flow_opt->type & GENEVE_CRIT_OPT_TYPE) {
617 opt = opt + len / sizeof(*opt);
618 flow_opt = flow_opt + len / sizeof(*opt);
626 tun_metadata_from_geneve_nlattr(const struct nlattr *attr,
627 const struct nlattr *flow_attrs,
628 size_t flow_attr_len,
629 const struct tun_metadata *flow_metadata,
630 struct tun_metadata *metadata)
632 struct tun_table *map;
633 bool is_mask = !!flow_attrs;
634 const struct nlattr *flow;
637 const struct nlattr *tnl_key;
638 int mask_len = nl_attr_get_size(attr);
640 tnl_key = nl_attr_find__(flow_attrs, flow_attr_len, OVS_KEY_ATTR_TUNNEL);
642 return mask_len ? EINVAL : 0;
645 flow = nl_attr_find_nested(tnl_key, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS);
647 return mask_len ? EINVAL : 0;
650 if (mask_len != nl_attr_get_size(flow)) {
658 map = ovsrcu_get(struct tun_table *, &metadata_tab);
661 map = flow_metadata->tab;
664 return tun_metadata_from_geneve__(map, nl_attr_get(attr), nl_attr_get(flow),
665 nl_attr_get_size(flow), metadata);
669 tun_metadata_from_geneve_header(const struct geneve_opt *opts, int opt_len,
670 struct tun_metadata *metadata)
672 struct tun_table *map;
674 map = ovsrcu_get(struct tun_table *, &metadata_tab);
677 return tun_metadata_from_geneve__(map, opts, opts, opt_len, metadata);
681 tun_metadata_to_geneve__(const struct tun_metadata *flow, struct ofpbuf *b,
684 struct tun_table *map;
689 map = ovsrcu_get(struct tun_table *, &metadata_tab);
694 ULLONG_FOR_EACH_1 (i, flow->opt_map) {
695 struct tun_meta_entry *entry = &map->entries[i];
696 struct geneve_opt *opt;
698 opt = ofpbuf_put_uninit(b, sizeof *opt + entry->loc.len);
700 opt->opt_class = tun_key_class(entry->key);
701 opt->type = tun_key_type(entry->key);
702 opt->length = entry->loc.len / 4;
707 memcpy_from_metadata(opt + 1, flow, &entry->loc);
708 *crit_opt |= !!(opt->type & GENEVE_CRIT_OPT_TYPE);
713 tun_metadata_to_geneve_nlattr_flow(const struct tun_metadata *flow,
716 size_t nlattr_offset;
719 if (!flow->opt_map) {
723 /* For all intents and purposes, the Geneve options are nested
724 * attributes even if this doesn't show up directly to netlink. It's
725 * similar enough that we can use the same mechanism. */
726 nlattr_offset = nl_msg_start_nested(b, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS);
728 tun_metadata_to_geneve__(flow, b, &crit_opt);
730 nl_msg_end_nested(b, nlattr_offset);
734 tun_metadata_to_geneve_header(const struct tun_metadata *flow,
735 struct geneve_opt *opts, bool *crit_opt)
739 ofpbuf_use_stack(&b, opts, GENEVE_TOT_OPT_SIZE);
740 tun_metadata_to_geneve__(flow, &b, crit_opt);
746 tun_metadata_to_geneve_nlattr_mask(const struct ofpbuf *key,
747 const struct tun_metadata *mask,
748 const struct tun_metadata *flow,
751 struct tun_table *map = flow->tab;
752 const struct nlattr *tnl_key, *geneve_key;
753 struct nlattr *geneve_mask;
754 struct geneve_opt *opt;
761 tnl_key = nl_attr_find(key, 0, OVS_KEY_ATTR_TUNNEL);
766 geneve_key = nl_attr_find_nested(tnl_key,
767 OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS);
772 geneve_mask = ofpbuf_tail(b);
773 nl_msg_put(b, geneve_key, geneve_key->nla_len);
775 /* All of these options have already been validated, so no need
776 * for sanity checking. */
777 opt = CONST_CAST(struct geneve_opt *, nl_attr_get(geneve_mask));
778 opts_len = nl_attr_get_size(geneve_mask);
780 while (opts_len > 0) {
781 struct tun_meta_entry *entry;
782 int len = sizeof(*opt) + opt->length * 4;
784 entry = tun_meta_find_key(&map->key_hmap,
785 tun_meta_key(opt->opt_class, opt->type));
787 memcpy_from_metadata(opt + 1, mask, &entry->loc);
789 memset(opt + 1, 0, opt->length * 4);
792 opt->opt_class = htons(0xffff);
799 opt = opt + len / sizeof(*opt);
804 static const struct tun_metadata_loc *
805 metadata_loc_from_match_read(struct tun_table *map, const struct match *match,
808 if (match->tun_md.valid) {
809 return &match->tun_md.loc[idx];
812 return &map->entries[idx].loc;
816 tun_metadata_to_nx_match(struct ofpbuf *b, enum ofp_version oxm,
817 const struct match *match)
819 struct tun_table *map = ovsrcu_get(struct tun_table *, &metadata_tab);
820 const struct tun_metadata *metadata = &match->flow.tunnel.metadata;
821 const struct tun_metadata *mask = &match->wc.masks.tunnel.metadata;
824 ULLONG_FOR_EACH_1 (i, mask->opt_map) {
825 const struct tun_metadata_loc *loc;
827 union mf_value mask_opts;
829 loc = metadata_loc_from_match_read(map, match, i);
830 memcpy_from_metadata(opts.tun_metadata, metadata, loc);
831 memcpy_from_metadata(mask_opts.tun_metadata, mask, loc);
832 nxm_put(b, MFF_TUN_METADATA0 + i, oxm, opts.tun_metadata,
833 mask_opts.tun_metadata, loc->len);
838 tun_metadata_match_format(struct ds *s, const struct match *match)
840 struct tun_table *map = ovsrcu_get(struct tun_table *, &metadata_tab);
841 const struct tun_metadata *metadata = &match->flow.tunnel.metadata;
842 const struct tun_metadata *mask = &match->wc.masks.tunnel.metadata;
845 ULLONG_FOR_EACH_1 (i, mask->opt_map) {
846 const struct tun_metadata_loc *loc;
849 loc = metadata_loc_from_match_read(map, match, i);
851 ds_put_format(s, "tun_metadata%u=", i);
852 memcpy_from_metadata(opts.tun_metadata, metadata, loc);
853 ds_put_hex(s, opts.tun_metadata, loc->len);
855 memcpy_from_metadata(opts.tun_metadata, mask, loc);
856 if (!is_all_ones(opts.tun_metadata, loc->len)) {
858 ds_put_hex(s, opts.tun_metadata, loc->len);