VLOG_DEFINE_THIS_MODULE(classifier);
struct trie_node;
+struct trie_ctx;
+
+/* Ports trie depends on both ports sharing the same ovs_be32. */
+#define TP_PORTS_OFS32 (offsetof(struct flow, tp_src) / 4)
+BUILD_ASSERT_DECL(TP_PORTS_OFS32 == offsetof(struct flow, tp_dst) / 4);
/* Prefix trie for a 'field' */
struct cls_trie {
uint8_t index_ofs[CLS_MAX_INDICES]; /* u32 flow segment boundaries. */
struct hindex indices[CLS_MAX_INDICES]; /* Staged lookup indices. */
unsigned int trie_plen[CLS_MAX_TRIES]; /* Trie prefix length in 'mask'. */
+ int ports_mask_len;
+ struct trie_node *ports_trie; /* NULL if none. */
struct minimask mask; /* Wildcards for fields. */
/* 'mask' must be the last field. */
};
return cls_match;
}
-struct trie_ctx;
static struct cls_subtable *find_subtable(const struct cls_classifier *,
const struct minimask *);
static struct cls_subtable *insert_subtable(struct cls_classifier *,
const struct mf_field *);
static unsigned int trie_lookup(const struct cls_trie *, const struct flow *,
unsigned int *checkbits);
-
+static unsigned int trie_lookup_value(const struct trie_node *,
+ const ovs_be32 value[],
+ unsigned int *checkbits);
static void trie_destroy(struct trie_node *);
static void trie_insert(struct cls_trie *, const struct cls_rule *, int mlen);
+static void trie_insert_prefix(struct trie_node **, const ovs_be32 *prefix,
+ int mlen);
static void trie_remove(struct cls_trie *, const struct cls_rule *, int mlen);
+static void trie_remove_prefix(struct trie_node **, const ovs_be32 *prefix,
+ int mlen);
static void mask_set_prefix_bits(struct flow_wildcards *, uint8_t be32ofs,
unsigned int nbits);
static bool mask_prefix_bits_set(const struct flow_wildcards *,
array->subtables[array->size++] = a;
}
-/* Only for rearranging entries in the same cache. */
+/* Move subtable entry at 'from' to 'to', shifting the elements in between
+ * (including the one at 'to') accordingly. */
static inline void
-cls_subtable_cache_splice(struct cls_subtable_entry *to,
- struct cls_subtable_entry *start,
- struct cls_subtable_entry *end)
-{
- if (to > end) {
- /* Same as splicing entries to (start) from [end, to). */
- struct cls_subtable_entry *temp = to;
- to = start; start = end; end = temp;
- }
- if (to < start) {
- while (start != end) {
- struct cls_subtable_entry temp = *start;
-
- memmove(to + 1, to, (start - to) * sizeof *to);
- *to = temp;
- start++;
+cls_subtable_cache_move(struct cls_subtable_entry *to,
+ struct cls_subtable_entry *from)
+{
+ if (to != from) {
+ struct cls_subtable_entry temp = *from;
+
+ if (to > from) {
+ /* Shift entries (from,to] backwards to make space at 'to'. */
+ memmove(from, from + 1, (to - from) * sizeof *to);
+ } else {
+ /* Shift entries [to,from) forward to make space at 'to'. */
+ memmove(to + 1, to, (from - to) * sizeof *to);
}
- } /* Else nothing to be done. */
+
+ *to = temp;
+ }
}
/* Array removal. */
ITER > (ARRAY)->subtables \
&& OVS_LIKELY(SUBTABLE = (--ITER)->subtable);)
+static void
+cls_subtable_cache_verify(struct cls_subtable_cache *array)
+{
+ struct cls_subtable *table;
+ struct cls_subtable_entry *iter;
+ unsigned int priority = 0;
+
+ CLS_SUBTABLE_CACHE_FOR_EACH_REVERSE (table, iter, array) {
+ if (iter->max_priority != table->max_priority) {
+ VLOG_WARN("Subtable %p has mismatching priority in cache (%u != %u)",
+ table, iter->max_priority, table->max_priority);
+ }
+ if (iter->max_priority < priority) {
+ VLOG_WARN("Subtable cache is out of order (%u < %u)",
+ iter->max_priority, priority);
+ }
+ priority = iter->max_priority;
+ }
+}
+
+static void
+cls_subtable_cache_reset(struct cls_classifier *cls)
+{
+ struct cls_subtable_cache old = cls->subtables_priority;
+ struct cls_subtable *subtable;
+
+ VLOG_WARN("Resetting subtable cache.");
+
+ cls_subtable_cache_verify(&cls->subtables_priority);
+
+ cls_subtable_cache_init(&cls->subtables_priority);
+
+ HMAP_FOR_EACH (subtable, hmap_node, &cls->subtables) {
+ struct cls_match *head;
+ struct cls_subtable_entry elem;
+ struct cls_subtable *table;
+ struct cls_subtable_entry *iter, *from = NULL;
+ unsigned int new_max = 0;
+ unsigned int max_count = 0;
+ bool found;
+
+ /* Verify max_priority. */
+ HMAP_FOR_EACH (head, hmap_node, &subtable->rules) {
+ if (head->priority > new_max) {
+ new_max = head->priority;
+ max_count = 1;
+ } else if (head->priority == new_max) {
+ max_count++;
+ }
+ }
+ if (new_max != subtable->max_priority ||
+ max_count != subtable->max_count) {
+ VLOG_WARN("subtable %p (%u rules) has mismatching max_priority "
+ "(%u) or max_count (%u). Highest priority found was %u, "
+ "count: %u",
+ subtable, subtable->n_rules, subtable->max_priority,
+ subtable->max_count, new_max, max_count);
+ subtable->max_priority = new_max;
+ subtable->max_count = max_count;
+ }
+
+ /* Locate the subtable from the old cache. */
+ found = false;
+ CLS_SUBTABLE_CACHE_FOR_EACH (table, iter, &old) {
+ if (table == subtable) {
+ if (iter->max_priority != new_max) {
+ VLOG_WARN("Subtable %p has wrong max priority (%u != %u) "
+ "in the old cache.",
+ subtable, iter->max_priority, new_max);
+ }
+ if (found) {
+ VLOG_WARN("Subtable %p duplicated in the old cache.",
+ subtable);
+ }
+ found = true;
+ }
+ }
+ if (!found) {
+ VLOG_WARN("Subtable %p not found from the old cache.", subtable);
+ }
+
+ elem.subtable = subtable;
+ elem.tag = subtable->tag;
+ elem.max_priority = subtable->max_priority;
+ cls_subtable_cache_push_back(&cls->subtables_priority, elem);
+
+ /* Possibly move 'subtable' earlier in the priority array. If
+ * we break out of the loop, then the subtable (at 'from')
+ * should be moved to the position right after the current
+ * element. If the loop terminates normally, then 'iter' will
+ * be at the first array element and we'll move the subtable
+ * to the front of the array. */
+ CLS_SUBTABLE_CACHE_FOR_EACH_REVERSE (table, iter,
+ &cls->subtables_priority) {
+ if (table == subtable) {
+ from = iter; /* Locate the subtable as we go. */
+ } else if (table->max_priority >= new_max) {
+ ovs_assert(from != NULL);
+ iter++; /* After this. */
+ break;
+ }
+ }
+
+ /* Move subtable at 'from' to 'iter'. */
+ cls_subtable_cache_move(iter, from);
+ }
+
+ /* Verify that the old and the new have the same size. */
+ if (old.size != cls->subtables_priority.size) {
+ VLOG_WARN("subtables cache sizes differ: old (%"PRIuSIZE
+ ") != new (%"PRIuSIZE").",
+ old.size, cls->subtables_priority.size);
+ }
+
+ cls_subtable_cache_destroy(&old);
+
+ cls_subtable_cache_verify(&cls->subtables_priority);
+}
+
\f
/* flow/miniflow/minimask/minimatch utilities.
* These are only used by the classifier, so place them here to allow
return partition;
}
+static inline ovs_be32 minimatch_get_ports(const struct minimatch *match)
+{
+ /* Could optimize to use the same map if needed for fast path. */
+ return MINIFLOW_GET_BE32(&match->flow, tp_src)
+ & MINIFLOW_GET_BE32(&match->mask.masks, tp_src);
+}
+
/* Inserts 'rule' into 'cls'. Until 'rule' is removed from 'cls', the caller
* must not modify or free it.
*
trie_insert(&cls->tries[i], rule, subtable->trie_plen[i]);
}
}
+
+ /* Ports trie. */
+ if (subtable->ports_mask_len) {
+ /* We mask the value to be inserted to always have the wildcarded
+ * bits in known (zero) state, so we can include them in comparison
+ * and they will always match (== their original value does not
+ * matter). */
+ ovs_be32 masked_ports = minimatch_get_ports(&rule->match);
+
+ trie_insert_prefix(&subtable->ports_trie, &masked_ports,
+ subtable->ports_mask_len);
+ }
+
return NULL;
} else {
struct cls_rule *old_cls_rule = old_rule->cls_rule;
ovs_assert(cls_match);
subtable = find_subtable(cls, &rule->match.mask);
-
ovs_assert(subtable);
+ if (subtable->ports_mask_len) {
+ ovs_be32 masked_ports = minimatch_get_ports(&rule->match);
+
+ trie_remove_prefix(&subtable->ports_trie,
+ &masked_ports, subtable->ports_mask_len);
+ }
for (i = 0; i < cls->n_tries; i++) {
if (subtable->trie_plen[i]) {
trie_remove(&cls->tries[i], rule, subtable->trie_plen[i]);
cls->tries[i].field);
}
+ /* Ports trie. */
+ subtable->ports_trie = NULL;
+ subtable->ports_mask_len
+ = 32 - ctz32(ntohl(MINIFLOW_GET_BE32(&mask->masks, tp_src)));
+
hmap_insert(&cls->subtables, &subtable->hmap_node, hash);
elem.subtable = subtable;
elem.tag = subtable->tag;
}
}
+ trie_destroy(subtable->ports_trie);
+
for (i = 0; i < subtable->n_indices; i++) {
hindex_destroy(&subtable->indices[i]);
}
++subtable->max_count;
} else if (new_priority > subtable->max_priority) {
struct cls_subtable *table;
- struct cls_subtable_entry *iter, *subtable_iter = NULL;
+ struct cls_subtable_entry *iter, *from = NULL;
subtable->max_priority = new_priority;
subtable->max_count = 1;
- /* Possibly move 'subtable' earlier in the priority list. If we break
- * out of the loop, then 'subtable_iter' should be moved just before
- * 'iter'. If the loop terminates normally, then 'iter' will be the
- * first list element and we'll move subtable just before that
- * (e.g. to the front of the list). */
- CLS_SUBTABLE_CACHE_FOR_EACH_REVERSE (table, iter, &cls->subtables_priority) {
+ /* Possibly move 'subtable' earlier in the priority array. If
+ * we break out of the loop, then the subtable (at 'from')
+ * should be moved to the position right after the current
+ * element. If the loop terminates normally, then 'iter' will
+ * be at the first array element and we'll move the subtable
+ * to the front of the array. */
+ CLS_SUBTABLE_CACHE_FOR_EACH_REVERSE (table, iter,
+ &cls->subtables_priority) {
if (table == subtable) {
- subtable_iter = iter; /* Locate the subtable as we go. */
+ from = iter; /* Locate the subtable as we go. */
iter->max_priority = new_priority;
} else if (table->max_priority >= new_priority) {
- ovs_assert(subtable_iter != NULL);
- iter++;
+ if (from == NULL) {
+ /* Corrupted cache? */
+ cls_subtable_cache_reset(cls);
+ VLOG_ABORT("update_subtables_after_insertion(): Subtable priority list corrupted.");
+ OVS_NOT_REACHED();
+ }
+ iter++; /* After this. */
break;
}
}
- /* Move 'subtable' just before 'iter' (unless it's already there). */
- if (iter != subtable_iter) {
- cls_subtable_cache_splice(iter, subtable_iter, subtable_iter + 1);
- }
+ /* Move subtable at 'from' to 'iter'. */
+ cls_subtable_cache_move(iter, from);
}
}
if (del_priority == subtable->max_priority && --subtable->max_count == 0) {
struct cls_match *head;
struct cls_subtable *table;
- struct cls_subtable_entry *iter, *subtable_iter = NULL;
+ struct cls_subtable_entry *iter, *from = NULL;
subtable->max_priority = 0;
HMAP_FOR_EACH (head, hmap_node, &subtable->rules) {
}
}
- /* Possibly move 'subtable' later in the priority list. If we break
- * out of the loop, then 'subtable' should be moved just before that
- * 'iter'. If the loop terminates normally, then 'iter' will be the
- * list head and we'll move subtable just before that (e.g. to the back
- * of the list). */
+ /* Possibly move 'subtable' later in the priority array.
+ * After the loop the 'iter' will point right after the position
+ * at which the subtable should be moved (either at a subtable
+ * with an equal or lower priority, or just past the array),
+ * so it is decremented once. */
CLS_SUBTABLE_CACHE_FOR_EACH (table, iter, &cls->subtables_priority) {
if (table == subtable) {
- subtable_iter = iter; /* Locate the subtable as we go. */
+ from = iter; /* Locate the subtable as we go. */
iter->max_priority = subtable->max_priority;
} else if (table->max_priority <= subtable->max_priority) {
- ovs_assert(subtable_iter != NULL);
+ if (from == NULL) {
+ /* Corrupted cache? */
+ cls_subtable_cache_reset(cls);
+ VLOG_ABORT("update_subtables_after_removal(): Subtable priority list corrupted.");
+ OVS_NOT_REACHED();
+ }
break;
}
}
+ /* Now at one past the destination. */
+ iter--;
- /* Move 'subtable' just before 'iter' (unless it's already there). */
- if (iter != subtable_iter) {
- cls_subtable_cache_splice(iter, subtable_iter, subtable_iter + 1);
- }
+ /* Move subtable at 'from' to 'iter'. */
+ cls_subtable_cache_move(iter, from);
}
}
* but it didn't match. */
rule = NULL;
}
+ if (!rule && subtable->ports_mask_len) {
+ /* Ports are always part of the final range, if any.
+ * No match was found for the ports. Use the ports trie to figure out
+ * which ports bits to unwildcard. */
+ unsigned int mbits;
+ ovs_be32 value, mask;
+
+ mask = MINIFLOW_GET_BE32(&subtable->mask.masks, tp_src);
+ value = ((OVS_FORCE ovs_be32 *)flow)[TP_PORTS_OFS32] & mask;
+ trie_lookup_value(subtable->ports_trie, &value, &mbits);
+
+ ((OVS_FORCE ovs_be32 *)&wc->masks)[TP_PORTS_OFS32] |=
+ mask & htonl(~0 << (32 - mbits));
+
+ ofs.start = TP_PORTS_OFS32;
+ goto range_out;
+ }
out:
/* Must unwildcard all the fields, as they were looked at. */
flow_wildcards_fold_minimask(wc, &subtable->mask);
trie_lookup_value(const struct trie_node *node, const ovs_be32 value[],
unsigned int *checkbits)
{
- unsigned int plen = 0, match_len = 0;
+ unsigned int ofs = 0, match_len = 0;
const struct trie_node *prev = NULL;
- for (; node; prev = node, node = trie_next_node(node, value, plen)) {
+ for (; node; prev = node, node = trie_next_node(node, value, ofs)) {
unsigned int eqbits;
/* Check if this edge can be followed. */
- eqbits = prefix_equal_bits(node->prefix, node->nbits, value, plen);
- plen += eqbits;
+ eqbits = prefix_equal_bits(node->prefix, node->nbits, value, ofs);
+ ofs += eqbits;
if (eqbits < node->nbits) { /* Mismatch, nothing more to be found. */
- /* Bit at offset 'plen' differed. */
- *checkbits = plen + 1; /* Includes the first mismatching bit. */
+ /* Bit at offset 'ofs' differed. */
+ *checkbits = ofs + 1; /* Includes the first mismatching bit. */
return match_len;
}
/* Full match, check if rules exist at this prefix length. */
if (node->n_rules > 0) {
- match_len = plen;
+ match_len = ofs;
}
}
/* Dead end, exclude the other branch if it exists. */
- *checkbits = !prev || trie_is_leaf(prev) ? plen : plen + 1;
+ *checkbits = !prev || trie_is_leaf(prev) ? ofs : ofs + 1;
return match_len;
}
static void
trie_insert(struct cls_trie *trie, const struct cls_rule *rule, int mlen)
{
- const ovs_be32 *prefix = minimatch_get_prefix(&rule->match, trie->field);
+ trie_insert_prefix(&trie->root,
+ minimatch_get_prefix(&rule->match, trie->field), mlen);
+}
+
+static void
+trie_insert_prefix(struct trie_node **edge, const ovs_be32 *prefix, int mlen)
+{
struct trie_node *node;
- struct trie_node **edge;
int ofs = 0;
/* Walk the tree. */
- for (edge = &trie->root;
- (node = *edge) != NULL;
+ for (; (node = *edge) != NULL;
edge = trie_next_edge(node, prefix, ofs)) {
unsigned int eqbits = trie_prefix_equal_bits(node, prefix, ofs, mlen);
ofs += eqbits;
static void
trie_remove(struct cls_trie *trie, const struct cls_rule *rule, int mlen)
{
- const ovs_be32 *prefix = minimatch_get_prefix(&rule->match, trie->field);
+ trie_remove_prefix(&trie->root,
+ minimatch_get_prefix(&rule->match, trie->field), mlen);
+}
+
+/* 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask
+ * in 'rule'. */
+static void
+trie_remove_prefix(struct trie_node **root, const ovs_be32 *prefix, int mlen)
+{
struct trie_node *node;
struct trie_node **edges[sizeof(union mf_value) * 8];
int depth = 0, ofs = 0;
/* Walk the tree. */
- for (edges[depth] = &trie->root;
+ for (edges[0] = root;
(node = *edges[depth]) != NULL;
edges[++depth] = trie_next_edge(node, prefix, ofs)) {
unsigned int eqbits = trie_prefix_equal_bits(node, prefix, ofs, mlen);
+
if (eqbits < node->nbits) {
/* Mismatch, nothing to be removed. This should never happen, as
* only rules in the classifier are ever removed. */