#include "ofp-util.h"
#include "ovs-thread.h"
#include "packets.h"
+#include "pvector.h"
#include "tag.h"
#include "util.h"
#include "vlog.h"
struct trie_node *root; /* NULL if none. */
};
-struct cls_subtable_entry {
- struct cls_subtable *subtable;
- tag_type tag;
- unsigned int max_priority;
-};
-
-struct cls_subtables {
- size_t count; /* One past last valid array element. */
- size_t alloc_size; /* Number of allocated elements. */
- struct cls_subtable_entry *array;
-};
-
enum {
CLS_MAX_INDICES = 3 /* Maximum number of lookup indices per subtable. */
};
uint8_t flow_segments[CLS_MAX_INDICES]; /* Flow segment boundaries to use
* for staged lookup. */
struct hmap subtables_map; /* Contains "struct cls_subtable"s. */
- struct cls_subtables subtables;
+ struct pvector subtables;
struct hmap partitions; /* Contains "struct cls_partition"s. */
struct cls_trie tries[CLS_MAX_TRIES]; /* Prefix tries. */
unsigned int n_tries;
static void destroy_subtable(struct cls_classifier *, struct cls_subtable *);
-static void update_subtables_after_insertion(struct cls_classifier *,
- struct cls_subtable *,
- unsigned int new_priority);
-static void update_subtables_after_removal(struct cls_classifier *,
- struct cls_subtable *,
- unsigned int del_priority);
-
static struct cls_match *find_match_wc(const struct cls_subtable *,
const struct flow *, struct trie_ctx *,
unsigned int n_tries,
unsigned int nbits);
static bool mask_prefix_bits_set(const struct flow_wildcards *,
uint8_t be32ofs, unsigned int nbits);
-
-static void
-cls_subtables_init(struct cls_subtables *subtables)
-{
- memset(subtables, 0, sizeof *subtables);
-}
-
-static void
-cls_subtables_destroy(struct cls_subtables *subtables)
-{
- free(subtables->array);
- memset(subtables, 0, sizeof *subtables);
-}
-
-/* Subtables insertion. */
-static void
-cls_subtables_push_back(struct cls_subtables *subtables,
- struct cls_subtable_entry a)
-{
- if (subtables->count == subtables->alloc_size) {
- subtables->array = x2nrealloc(subtables->array, &subtables->alloc_size,
- sizeof a);
- }
-
- subtables->array[subtables->count++] = a;
-}
-
-/* Move subtable entry at 'from' to 'to', shifting the elements in between
- * (including the one at 'to') accordingly. */
-static inline void
-cls_subtables_move(struct cls_subtable_entry *to,
- struct cls_subtable_entry *from)
-{
- if (to != from) {
- struct cls_subtable_entry temp = *from;
-
- if (to > from) {
- /* Shift entries (from,to] backwards to make space at 'to'. */
- memmove(from, from + 1, (to - from) * sizeof *to);
- } else {
- /* Shift entries [to,from) forward to make space at 'to'. */
- memmove(to + 1, to, (from - to) * sizeof *to);
- }
-
- *to = temp;
- }
-}
-
-/* Subtables removal. */
-static inline void
-cls_subtables_remove(struct cls_subtables *subtables,
- struct cls_subtable_entry *elem)
-{
- ssize_t size = (&subtables->array[subtables->count]
- - (elem + 1)) * sizeof *elem;
- if (size > 0) {
- memmove(elem, elem + 1, size);
- }
- subtables->count--;
-}
-
-#define CLS_SUBTABLES_FOR_EACH(SUBTABLE, ITER, SUBTABLES) \
- for ((ITER) = (SUBTABLES)->array; \
- (ITER) < &(SUBTABLES)->array[(SUBTABLES)->count] \
- && OVS_LIKELY((SUBTABLE) = (ITER)->subtable); \
- ++(ITER))
-#define CLS_SUBTABLES_FOR_EACH_CONTINUE(SUBTABLE, ITER, SUBTABLES) \
- for (++(ITER); \
- (ITER) < &(SUBTABLES)->array[(SUBTABLES)->count] \
- && OVS_LIKELY((SUBTABLE) = (ITER)->subtable); \
- ++(ITER))
-#define CLS_SUBTABLES_FOR_EACH_REVERSE(SUBTABLE, ITER, SUBTABLES) \
- for ((ITER) = &(SUBTABLES)->array[(SUBTABLES)->count]; \
- (ITER) > (SUBTABLES)->array \
- && OVS_LIKELY((SUBTABLE) = (--(ITER))->subtable);)
-
-static void
-cls_subtables_verify(struct cls_subtables *subtables)
-{
- struct cls_subtable *table;
- struct cls_subtable_entry *iter;
- unsigned int priority = 0;
-
- CLS_SUBTABLES_FOR_EACH_REVERSE (table, iter, subtables) {
- if (iter->max_priority != table->max_priority) {
- VLOG_WARN("Subtable %p has mismatching priority in cache (%u != %u)",
- table, iter->max_priority, table->max_priority);
- }
- if (iter->max_priority < priority) {
- VLOG_WARN("Subtable cache is out of order (%u < %u)",
- iter->max_priority, priority);
- }
- priority = iter->max_priority;
- }
-}
-
-static void
-cls_subtables_reset(struct cls_classifier *cls)
-{
- struct cls_subtables old = cls->subtables;
- struct cls_subtable *subtable;
-
- VLOG_WARN("Resetting subtable cache.");
-
- cls_subtables_verify(&cls->subtables);
-
- cls_subtables_init(&cls->subtables);
-
- HMAP_FOR_EACH (subtable, hmap_node, &cls->subtables_map) {
- struct cls_match *head;
- struct cls_subtable_entry elem;
- struct cls_subtable *table;
- struct cls_subtable_entry *iter, *from = NULL;
- unsigned int new_max = 0;
- unsigned int max_count = 0;
- bool found;
-
- /* Verify max_priority. */
- HMAP_FOR_EACH (head, hmap_node, &subtable->rules) {
- if (head->priority > new_max) {
- new_max = head->priority;
- max_count = 1;
- } else if (head->priority == new_max) {
- max_count++;
- }
- }
- if (new_max != subtable->max_priority ||
- max_count != subtable->max_count) {
- VLOG_WARN("subtable %p (%u rules) has mismatching max_priority "
- "(%u) or max_count (%u). Highest priority found was %u, "
- "count: %u",
- subtable, subtable->n_rules, subtable->max_priority,
- subtable->max_count, new_max, max_count);
- subtable->max_priority = new_max;
- subtable->max_count = max_count;
- }
-
- /* Locate the subtable from the old cache. */
- found = false;
- CLS_SUBTABLES_FOR_EACH (table, iter, &old) {
- if (table == subtable) {
- if (iter->max_priority != new_max) {
- VLOG_WARN("Subtable %p has wrong max priority (%u != %u) "
- "in the old cache.",
- subtable, iter->max_priority, new_max);
- }
- if (found) {
- VLOG_WARN("Subtable %p duplicated in the old cache.",
- subtable);
- }
- found = true;
- }
- }
- if (!found) {
- VLOG_WARN("Subtable %p not found from the old cache.", subtable);
- }
-
- elem.subtable = subtable;
- elem.tag = subtable->tag;
- elem.max_priority = subtable->max_priority;
- cls_subtables_push_back(&cls->subtables, elem);
-
- /* Possibly move 'subtable' earlier in the priority array. If
- * we break out of the loop, then the subtable (at 'from')
- * should be moved to the position right after the current
- * element. If the loop terminates normally, then 'iter' will
- * be at the first array element and we'll move the subtable
- * to the front of the array. */
- CLS_SUBTABLES_FOR_EACH_REVERSE (table, iter, &cls->subtables) {
- if (table == subtable) {
- from = iter; /* Locate the subtable as we go. */
- } else if (table->max_priority >= new_max) {
- ovs_assert(from != NULL);
- iter++; /* After this. */
- break;
- }
- }
-
- /* Move subtable at 'from' to 'iter'. */
- cls_subtables_move(iter, from);
- }
-
- /* Verify that the old and the new have the same size. */
- if (old.count != cls->subtables.count) {
- VLOG_WARN("subtables cache sizes differ: old (%"PRIuSIZE
- ") != new (%"PRIuSIZE").",
- old.count, cls->subtables.count);
- }
-
- cls_subtables_destroy(&old);
-
- cls_subtables_verify(&cls->subtables);
-}
-
\f
/* flow/miniflow/minimask/minimatch utilities.
* These are only used by the classifier, so place them here to allow
cls->n_rules = 0;
hmap_init(&cls->subtables_map);
- cls_subtables_init(&cls->subtables);
+ pvector_init(&cls->subtables);
hmap_init(&cls->partitions);
cls->n_flow_segments = 0;
if (flow_segments) {
}
hmap_destroy(&cls->partitions);
- cls_subtables_destroy(&cls->subtables);
+ pvector_destroy(&cls->subtables);
free(cls);
}
}
{
struct cls_trie *trie = &cls->tries[trie_idx];
struct cls_subtable *subtable;
- struct cls_subtable_entry *iter;
if (trie_idx < cls->n_tries) {
trie_destroy(trie->root);
trie->field = field;
/* Add existing rules to the trie. */
- CLS_SUBTABLES_FOR_EACH (subtable, iter, &cls->subtables) {
+ HMAP_FOR_EACH (subtable, hmap_node, &cls->subtables_map) {
unsigned int plen;
plen = field ? minimask_get_prefix_len(&subtable->mask, field) : 0;
metadata);
}
- subtable->n_rules++;
cls->n_rules++;
for (i = 0; i < cls->n_tries; i++) {
if (--subtable->n_rules == 0) {
destroy_subtable(cls, subtable);
- } else {
- update_subtables_after_removal(cls, subtable, cls_match->priority);
+ } else if (subtable->max_priority == cls_match->priority
+ && --subtable->max_count == 0) {
+ /* Find the new 'max_priority' and 'max_count'. */
+ struct cls_match *head;
+ unsigned int max_priority = 0;
+
+ HMAP_FOR_EACH (head, hmap_node, &subtable->rules) {
+ if (head->priority > max_priority) {
+ max_priority = head->priority;
+ subtable->max_count = 1;
+ } else if (head->priority == max_priority) {
+ ++subtable->max_count;
+ }
+ }
+ subtable->max_priority = max_priority;
+ pvector_change_priority(&cls->subtables, subtable, max_priority);
}
cls->n_rules--;
ctx->lookup_done = false;
}
-static inline void
-lookahead_subtable(const struct cls_subtable_entry *subtables)
-{
- ovs_prefetch_range(subtables->subtable, sizeof *subtables->subtable);
-}
-
/* Finds and returns the highest-priority rule in 'cls' that matches 'flow'.
* Returns a null pointer if no rules in 'cls' match 'flow'. If multiple rules
* of equal priority match 'flow', returns one arbitrarily.
struct cls_classifier *cls = cls_->cls;
const struct cls_partition *partition;
tag_type tags;
- struct cls_match *best;
- struct trie_ctx trie_ctx[CLS_MAX_TRIES];
- int i;
- struct cls_subtable_entry *subtables = cls->subtables.array;
- int n_subtables = cls->subtables.count;
int64_t best_priority = -1;
-
- /* Prefetch the subtables array. */
- ovs_prefetch_range(subtables, n_subtables * sizeof *subtables);
+ const struct cls_match *best;
+ struct trie_ctx trie_ctx[CLS_MAX_TRIES];
+ struct cls_subtable *subtable;
/* Determine 'tags' such that, if 'subtable->tag' doesn't intersect them,
* then 'flow' cannot possibly match in 'subtable':
tags = partition ? partition->tags : TAG_ARBITRARY;
/* Initialize trie contexts for match_find_wc(). */
- for (i = 0; i < cls->n_tries; i++) {
+ for (int i = 0; i < cls->n_tries; i++) {
trie_ctx_init(&trie_ctx[i], &cls->tries[i]);
}
- /* Prefetch the first subtables. */
- if (n_subtables > 1) {
- lookahead_subtable(subtables);
- lookahead_subtable(subtables + 1);
- }
-
best = NULL;
- for (i = 0; OVS_LIKELY(i < n_subtables); i++) {
+ PVECTOR_FOR_EACH_PRIORITY(subtable, best_priority, 2,
+ sizeof(struct cls_subtable), &cls->subtables) {
struct cls_match *rule;
- if ((int64_t)subtables[i].max_priority <= best_priority) {
- /* Subtables are in descending priority order,
- * can not find anything better. */
- break;
- }
-
- /* Prefetch a forthcoming subtable. */
- if (i + 2 < n_subtables) {
- lookahead_subtable(&subtables[i + 2]);
- }
-
- if (!tag_intersects(tags, subtables[i].tag)) {
+ if (!tag_intersects(tags, subtable->tag)) {
continue;
}
- rule = find_match_wc(subtables[i].subtable, flow, trie_ctx,
- cls->n_tries, wc);
+ rule = find_match_wc(subtable, flow, trie_ctx, cls->n_tries, wc);
if (rule && (int64_t)rule->priority > best_priority) {
best_priority = (int64_t)rule->priority;
best = rule;
{
struct cls_classifier *cls = cls_->cls;
struct cls_subtable *subtable;
- struct cls_subtable_entry *iter;
- CLS_SUBTABLES_FOR_EACH (subtable, iter, &cls->subtables) {
+ PVECTOR_FOR_EACH (subtable, &cls->subtables) {
struct cls_match *rule;
rule = find_match_miniflow(subtable, flow,
{
struct cls_classifier *cls = cls_->cls;
struct cls_subtable *subtable;
- struct cls_subtable_entry *iter;
+ int64_t stop_at_priority = (int64_t)target->priority - 1;
/* Iterate subtables in the descending max priority order. */
- CLS_SUBTABLES_FOR_EACH (subtable, iter, &cls->subtables) {
+ PVECTOR_FOR_EACH_PRIORITY (subtable, stop_at_priority, 2,
+ sizeof(struct cls_subtable), &cls->subtables) {
uint32_t storage[FLOW_U32S];
struct minimask mask;
struct cls_match *head;
- if (target->priority > iter->max_priority) {
- break; /* Can skip this and the rest of the subtables. */
- }
-
minimask_combine(&mask, &target->match.mask, &subtable->mask, storage);
HMAP_FOR_EACH (head, hmap_node, &subtable->rules) {
struct cls_match *rule;
int i, index = 0;
struct flow_wildcards old, new;
uint8_t prev;
- struct cls_subtable_entry elem;
int count = count_1bits(mask->masks.map);
subtable = xzalloc(sizeof *subtable - sizeof mask->masks.inline_values
= 32 - ctz32(ntohl(MINIFLOW_GET_BE32(&mask->masks, tp_src)));
hmap_insert(&cls->subtables_map, &subtable->hmap_node, hash);
- elem.subtable = subtable;
- elem.tag = subtable->tag;
- elem.max_priority = subtable->max_priority;
- cls_subtables_push_back(&cls->subtables, elem);
return subtable;
}
destroy_subtable(struct cls_classifier *cls, struct cls_subtable *subtable)
{
int i;
- struct cls_subtable *table = NULL;
- struct cls_subtable_entry *iter;
-
- CLS_SUBTABLES_FOR_EACH (table, iter, &cls->subtables) {
- if (table == subtable) {
- cls_subtables_remove(&cls->subtables, iter);
- break;
- }
- }
+ pvector_remove(&cls->subtables, subtable);
trie_destroy(subtable->ports_trie);
for (i = 0; i < subtable->n_indices; i++) {
hindex_destroy(&subtable->indices[i]);
}
- minimask_destroy(&subtable->mask);
hmap_remove(&cls->subtables_map, &subtable->hmap_node);
+ minimask_destroy(&subtable->mask);
hmap_destroy(&subtable->rules);
- free(subtable);
-}
-
-/* This function performs the following updates for 'subtable' in 'cls'
- * following the addition of a new rule with priority 'new_priority' to
- * 'subtable':
- *
- * - Update 'subtable->max_priority' and 'subtable->max_count' if necessary.
- *
- * - Update 'subtable''s position in 'cls->subtables' if necessary.
- *
- * This function should only be called after adding a new rule, not after
- * replacing a rule by an identical one or modifying a rule in-place. */
-static void
-update_subtables_after_insertion(struct cls_classifier *cls,
- struct cls_subtable *subtable,
- unsigned int new_priority)
-{
- if (new_priority == subtable->max_priority) {
- ++subtable->max_count;
- } else if (new_priority > subtable->max_priority) {
- struct cls_subtable *table;
- struct cls_subtable_entry *iter, *from = NULL;
-
- subtable->max_priority = new_priority;
- subtable->max_count = 1;
-
- /* Possibly move 'subtable' earlier in the priority array. If
- * we break out of the loop, then the subtable (at 'from')
- * should be moved to the position right after the current
- * element. If the loop terminates normally, then 'iter' will
- * be at the first array element and we'll move the subtable
- * to the front of the array. */
- CLS_SUBTABLES_FOR_EACH_REVERSE (table, iter, &cls->subtables) {
- if (table == subtable) {
- from = iter; /* Locate the subtable as we go. */
- iter->max_priority = new_priority;
- } else if (table->max_priority >= new_priority) {
- if (from == NULL) {
- /* Corrupted cache? */
- cls_subtables_reset(cls);
- VLOG_ABORT("update_subtables_after_insertion(): Subtable priority list corrupted.");
- OVS_NOT_REACHED();
- }
- iter++; /* After this. */
- break;
- }
- }
-
- /* Move subtable at 'from' to 'iter'. */
- cls_subtables_move(iter, from);
- }
-}
-
-/* This function performs the following updates for 'subtable' in 'cls'
- * following the deletion of a rule with priority 'del_priority' from
- * 'subtable':
- *
- * - Update 'subtable->max_priority' and 'subtable->max_count' if necessary.
- *
- * - Update 'subtable''s position in 'cls->subtables' if necessary.
- *
- * This function should only be called after removing a rule, not after
- * replacing a rule by an identical one or modifying a rule in-place. */
-static void
-update_subtables_after_removal(struct cls_classifier *cls,
- struct cls_subtable *subtable,
- unsigned int del_priority)
-{
- if (del_priority == subtable->max_priority && --subtable->max_count == 0) {
- struct cls_match *head;
- struct cls_subtable *table;
- struct cls_subtable_entry *iter, *from = NULL;
-
- subtable->max_priority = 0;
- HMAP_FOR_EACH (head, hmap_node, &subtable->rules) {
- if (head->priority > subtable->max_priority) {
- subtable->max_priority = head->priority;
- subtable->max_count = 1;
- } else if (head->priority == subtable->max_priority) {
- ++subtable->max_count;
- }
- }
-
- /* Possibly move 'subtable' later in the priority array.
- * After the loop the 'iter' will point right after the position
- * at which the subtable should be moved (either at a subtable
- * with an equal or lower priority, or just past the array),
- * so it is decremented once. */
- CLS_SUBTABLES_FOR_EACH (table, iter, &cls->subtables) {
- if (table == subtable) {
- from = iter; /* Locate the subtable as we go. */
- iter->max_priority = subtable->max_priority;
- } else if (table->max_priority <= subtable->max_priority) {
- if (from == NULL) {
- /* Corrupted cache? */
- cls_subtables_reset(cls);
- VLOG_ABORT("update_subtables_after_removal(): Subtable priority list corrupted.");
- OVS_NOT_REACHED();
- }
- break;
- }
- }
- /* Now at one past the destination. */
- iter--;
-
- /* Move subtable at 'from' to 'iter'. */
- cls_subtables_move(iter, from);
- }
+ ovsrcu_postpone(free, subtable);
}
struct range {
FOR_EACH_RULE_IN_LIST (rule, head) {
if (cls_match->priority >= rule->priority) {
if (rule == head) {
- /* 'new' is the new highest-priority flow in the list. */
+ /* 'cls_match' is the new highest-priority flow in the
+ * list. */
hmap_replace(&subtable->rules,
&rule->hmap_node, &cls_match->hmap_node);
}
if (cls_match->priority == rule->priority) {
list_replace(&cls_match->list, &rule->list);
old = rule;
- goto out;
} else {
list_insert(&rule->list, &cls_match->list);
- goto out;
}
+ goto out;
}
}
out:
if (!old) {
- update_subtables_after_insertion(cls, subtable, cls_match->priority);
+ subtable->n_rules++;
+
+ /* Rule was added, not replaced. Update 'subtable's 'max_priority'
+ * and 'max_count', if necessary. */
+ if (subtable->n_rules == 1) {
+ subtable->max_priority = cls_match->priority;
+ subtable->max_count = 1;
+ pvector_insert(&cls->subtables, subtable, cls_match->priority);
+ } else if (subtable->max_priority == cls_match->priority) {
+ ++subtable->max_count;
+ } else if (cls_match->priority > subtable->max_priority) {
+ subtable->max_priority = cls_match->priority;
+ subtable->max_count = 1;
+ pvector_change_priority(&cls->subtables, subtable, cls_match->priority);
+ }
} else {
/* Remove old node from indices. */
for (i = 0; i < subtable->n_indices; i++) {
--- /dev/null
+/*
+ * Copyright (c) 2014 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+#include "pvector.h"
+
+static struct pvector_impl *
+pvector_impl_get(const struct pvector *pvec)
+{
+ return ovsrcu_get(struct pvector_impl *, &pvec->impl);
+}
+
+static struct pvector_impl *
+pvector_impl_alloc(size_t size)
+{
+ struct pvector_impl *impl;
+
+ impl = xmalloc(sizeof *impl + size * sizeof impl->vector[0]);
+ impl->size = 0;
+ impl->allocated = size;
+
+ return impl;
+}
+
+static struct pvector_impl *
+pvector_impl_dup(struct pvector_impl *old)
+{
+ return xmemdup(old, sizeof *old + old->allocated * sizeof old->vector[0]);
+}
+
+/* Initializes 'pvec' as an empty concurrent priority vector. */
+void
+pvector_init(struct pvector *pvec)
+{
+ ovsrcu_set(&pvec->impl, pvector_impl_alloc(PVECTOR_EXTRA_ALLOC));
+}
+
+/* Destroys 'pvec'.
+ *
+ * The client is responsible for destroying any data previously held in
+ * 'pvec'. */
+void
+pvector_destroy(struct pvector *pvec)
+{
+ ovsrcu_postpone(free, pvector_impl_get(pvec));
+ ovsrcu_set(&pvec->impl, NULL); /* Poison. */
+}
+
+/* Iterators for callers that need the 'index' afterward. */
+#define PVECTOR_IMPL_FOR_EACH(ENTRY, INDEX, IMPL) \
+ for ((INDEX) = 0; \
+ (INDEX) < (IMPL)->size \
+ && ((ENTRY) = &(IMPL)->vector[INDEX], true); \
+ (INDEX)++)
+
+static int
+pvector_entry_cmp(const void *a_, const void *b_)
+{
+ unsigned int a = ((const struct pvector_entry *)a_)->priority;
+ unsigned int b = ((const struct pvector_entry *)b_)->priority;
+
+ return a > b ? -1 : a < b;
+}
+
+static void
+pvector_impl_sort(struct pvector_impl *impl)
+{
+ qsort(impl->vector, impl->size, sizeof *impl->vector, pvector_entry_cmp);
+}
+
+/* Returns the index with priority equal or lower than 'target_priority',
+ * which will be one past the vector if none exists. */
+static int
+pvector_impl_find_priority(struct pvector_impl *impl,
+ unsigned int target_priority)
+{
+ const struct pvector_entry *entry;
+ int index;
+
+ PVECTOR_IMPL_FOR_EACH (entry, index, impl) {
+ if (entry->priority <= target_priority) {
+ break;
+ }
+ }
+ return index;
+}
+
+/* Returns the index of the 'ptr' in the vector, or -1 if none is found. */
+static int
+pvector_impl_find(struct pvector_impl *impl, void *target)
+{
+ const struct pvector_entry *entry;
+ int index;
+
+ PVECTOR_IMPL_FOR_EACH (entry, index, impl) {
+ if (entry->ptr == target) {
+ return index;
+ }
+ }
+ return -1;
+}
+
+void
+pvector_insert(struct pvector *pvec, void *ptr, unsigned int priority)
+{
+ struct pvector_impl *old, *new;
+ int index;
+
+ ovs_assert(ptr != NULL);
+
+ old = pvector_impl_get(pvec);
+
+ /* Check if can add to the end without reallocation. */
+ if (old->allocated > old->size &&
+ (!old->size || priority <= old->vector[old->size - 1].priority)) {
+ old->vector[old->size].ptr = ptr;
+ old->vector[old->size].priority = priority;
+ /* Size increment must not be visible to the readers before the new
+ * entry is stored. */
+ atomic_thread_fence(memory_order_release);
+ ++old->size;
+ } else {
+ new = pvector_impl_alloc(old->size + 1 + PVECTOR_EXTRA_ALLOC);
+
+ index = pvector_impl_find_priority(old, priority);
+ /* Now at the insertion index. */
+ memcpy(new->vector, old->vector, index * sizeof old->vector[0]);
+ new->vector[index].ptr = ptr;
+ new->vector[index].priority = priority;
+ memcpy(&new->vector[index + 1], &old->vector[index],
+ (old->size - index) * sizeof old->vector[0]);
+ new->size = old->size + 1;
+
+ ovsrcu_set(&pvec->impl, new);
+ ovsrcu_postpone(free, old);
+ }
+}
+
+void
+pvector_remove(struct pvector *pvec, void *ptr)
+{
+ struct pvector_impl *old, *new;
+ int index;
+
+ old = pvector_impl_get(pvec);
+
+ ovs_assert(old->size > 0);
+
+ index = pvector_impl_find(old, ptr);
+ ovs_assert(index >= 0);
+ /* Now at the index of the entry to be deleted. */
+
+ /* We do not try to delete the last entry without reallocation so that
+ * the readers can read the 'size' once in the beginning of each iteration.
+ */
+
+ /* Keep extra space for insertions to the end. */
+ new = pvector_impl_alloc(old->size - 1 + PVECTOR_EXTRA_ALLOC);
+
+ memcpy(new->vector, old->vector, index * sizeof old->vector[0]);
+ memcpy(&new->vector[index], &old->vector[index + 1],
+ (old->size - (index + 1)) * sizeof old->vector[0]);
+
+ new->size = old->size - 1;
+
+ ovsrcu_set(&pvec->impl, new);
+ ovsrcu_postpone(free, old);
+}
+
+/* Change entry's 'priority' and keep the vector ordered. */
+void
+pvector_change_priority(struct pvector *pvec, void *ptr, unsigned int priority)
+{
+ struct pvector_impl *old = pvector_impl_get(pvec);
+ int index = pvector_impl_find(old, ptr);
+
+ ovs_assert(index >= 0);
+ /* Now at the index of the entry to be updated. */
+
+ if ((priority > old->vector[index].priority && index > 0
+ && priority > old->vector[index - 1].priority)
+ || (priority < old->vector[index].priority && index < old->size - 1
+ && priority < old->vector[index + 1].priority)) {
+ /* Have to reallocate to reorder. */
+ struct pvector_impl *new = pvector_impl_dup(old);
+
+ new->vector[index].priority = priority;
+ pvector_impl_sort(new);
+
+ ovsrcu_set(&pvec->impl, new);
+ ovsrcu_postpone(free, old);
+ } else {
+ /* Can update in place. Readers are free to use either value,
+ * so we do not try to synchronize here. */
+ old->vector[index].priority = priority;
+ }
+}
--- /dev/null
+/*
+ * Copyright (c) 2014 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef PVECTOR_H
+#define PVECTOR_H 1
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "ovs-rcu.h"
+#include "util.h"
+
+/* Concurrent Priority Vector
+ * ==========================
+ *
+ * Concurrent priority vector holds non-NULL pointers to objects in an
+ * increasing priority order and allows readers to traverse the vector without
+ * being concerned about writers modifying the vector as they are traversing
+ * it.
+ *
+ * The priority order is maintained as a linear vector of elements to allow
+ * for efficient memory prefetching.
+ *
+ * Concurrency is implemented with OVS RCU so that the readers can assume
+ * that once they have taken a pointer to the vector with
+ * pvector_cursor_init(), the 'size' member will not decrease, so that
+ * they can safely read 'size' entries from 'vector', and find that each
+ * entry has a valid, non-NULL 'ptr', and the vector is in order from highest
+ * to lowest 'priority'. The 'priority' values can change any time, but only
+ * so that the order of the entries does not change, so readers can use
+ * 'priority' values read at any time after acquisition of the vector pointer.
+ *
+ * Writers can concurrently add entries to the end of the vector, incrementing
+ * 'size', or update the 'priority' value of an entry, but only if that does
+ * not change the ordering of the entries. Writers will never change the 'ptr'
+ * values, or decrement the 'size' on a copy that readers have access to.
+ */
+
+struct pvector_entry {
+ unsigned int priority;
+ void *ptr;
+};
+
+/* Writers will preallocate space for some entries at the end to avoid future
+ * reallocations. */
+enum { PVECTOR_EXTRA_ALLOC = 4 };
+
+struct pvector_impl {
+ size_t size; /* Number of entries in the vector. */
+ size_t allocated; /* Number of allocated entries. */
+ struct pvector_entry vector[];
+};
+
+/* Concurrent priority vector. */
+struct pvector {
+ OVSRCU_TYPE(struct pvector_impl *) impl;
+};
+
+/* Initialization. */
+void pvector_init(struct pvector *);
+void pvector_destroy(struct pvector *);
+
+/* Count. */
+static inline size_t pvector_count(const struct pvector *);
+static inline bool pvector_is_empty(const struct pvector *);
+
+/* Insertion and deletion. */
+void pvector_insert(struct pvector *, void *, unsigned int);
+void pvector_change_priority(struct pvector *, void *, unsigned int);
+void pvector_remove(struct pvector *, void *);
+
+/* Iteration.
+ *
+ *
+ * Thread-safety
+ * =============
+ *
+ * Iteration is safe even in a pvector that is changing concurrently.
+ * Multiple writers must exclude each other via e.g., a mutex.
+ *
+ * Example
+ * =======
+ *
+ * struct my_node {
+ * int data;
+ * };
+ *
+ * struct my_node elem1, elem2, *iter;
+ * struct pvector my_pvector;
+ *
+ * pvector_init(&my_pvector);
+ * ...add data...
+ * pvector_insert(&my_pvector, &elem1, 1);
+ * pvector_insert(&my_pvector, &elem2, 2);
+ * ...
+ * PVECTOR_FOR_EACH (iter, &my_pvector) {
+ * ...operate on '*iter'...
+ * ...elem2 to be seen before elem1...
+ * }
+ * ...
+ * pvector_destroy(&my_pvector);
+ *
+ * There is no PVECTOR_FOR_EACH_SAFE variant as iteration is performed on RCU
+ * protected instance of the priority vector. Any concurrent modifications
+ * that would be disruptive for readers (such as deletions), will be performed
+ * on a new instance. To see any of the modifications, a new iteration loop
+ * has to be started.
+ *
+ * The PVECTOR_FOR_EACH_PRIORITY limits the iteration to entries with higher
+ * than given priority and allows for object lookahead.
+ *
+ * The iteration loop must be completed without entering the OVS RCU quiescent
+ * period. That is, an old iteration loop must not be continued after any
+ * blocking IO (VLOG is non-blocking, so that is OK).
+ */
+struct pvector_cursor {
+ size_t size; /* Number of entries in the vector. */
+ size_t entry_idx; /* Current index. */
+ const struct pvector_entry *vector;
+};
+
+static inline struct pvector_cursor pvector_cursor_init(const struct pvector *,
+ size_t n_ahead,
+ size_t obj_size);
+static inline void *pvector_cursor_next(struct pvector_cursor *,
+ int64_t stop_at_priority,
+ size_t n_ahead, size_t obj_size);
+static inline void pvector_cursor_lookahead(const struct pvector_cursor *,
+ int n, size_t size);
+
+#define PVECTOR_FOR_EACH(PTR, PVECTOR) \
+ for (struct pvector_cursor cursor__ = pvector_cursor_init(PVECTOR, 0, 0); \
+ ((PTR) = pvector_cursor_next(&cursor__, -1, 0, 0)) != NULL; )
+
+/* Loop while priority is higher than 'PRIORITY' and prefetch objects
+ * of size 'SZ' 'N' objects ahead from the current object. */
+#define PVECTOR_FOR_EACH_PRIORITY(PTR, PRIORITY, N, SZ, PVECTOR) \
+ for (struct pvector_cursor cursor__ = pvector_cursor_init(PVECTOR, N, SZ); \
+ ((PTR) = pvector_cursor_next(&cursor__, PRIORITY, N, SZ)) != NULL; )
+
+\f
+/* Inline implementations. */
+
+static inline struct pvector_cursor
+pvector_cursor_init(const struct pvector *pvec,
+ size_t n_ahead, size_t obj_size)
+{
+ const struct pvector_impl *impl;
+ struct pvector_cursor cursor;
+
+ impl = ovsrcu_get(struct pvector_impl *, &pvec->impl);
+
+ ovs_prefetch_range(impl->vector, impl->size * sizeof impl->vector[0]);
+
+ cursor.size = impl->size;
+ cursor.vector = impl->vector;
+ cursor.entry_idx = -1;
+
+ for (size_t i = 0; i < n_ahead; i++) {
+ /* Prefetch the first objects. */
+ pvector_cursor_lookahead(&cursor, i, obj_size);
+ }
+ return cursor;
+}
+
+static inline void *pvector_cursor_next(struct pvector_cursor *cursor,
+ int64_t stop_at_priority,
+ size_t n_ahead, size_t obj_size)
+{
+ if (++cursor->entry_idx < cursor->size &&
+ cursor->vector[cursor->entry_idx].priority > stop_at_priority) {
+ if (n_ahead) {
+ pvector_cursor_lookahead(cursor, n_ahead, obj_size);
+ }
+ return cursor->vector[cursor->entry_idx].ptr;
+ }
+ return NULL;
+}
+
+static inline void pvector_cursor_lookahead(const struct pvector_cursor *cursor,
+ int n, size_t size)
+{
+ if (cursor->entry_idx + n < cursor->size) {
+ ovs_prefetch_range(cursor->vector[cursor->entry_idx + n].ptr, size);
+ }
+}
+
+static inline size_t pvector_count(const struct pvector *pvec)
+{
+ return ovsrcu_get(struct pvector_impl *, &pvec->impl)->size;
+}
+
+static inline bool pvector_is_empty(const struct pvector *pvec)
+{
+ return pvector_count(pvec) == 0;
+}
+
+#endif /* pvector.h */