2 * Copyright (c) 2014 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
25 /* Optimistic Concurrent Cuckoo Hash
26 * =================================
28 * A "cuckoo hash" is an open addressing hash table schema, designed such that
29 * a given element can be in one of only a small number of buckets 'd', each of
30 * which holds up to a small number 'k' elements. Thus, the expected and
31 * worst-case lookup times are O(1) because they require comparing no more than
32 * a fixed number of elements (k * d). Inserting a new element can require
33 * moving around existing elements, but it is also O(1) amortized expected
36 * An optimistic concurrent hash table goes one step further, making it
37 * possible for a single writer to execute concurrently with any number of
38 * readers without requiring the readers to take any locks.
40 * This cuckoo hash implementation uses:
42 * - Two hash functions (d=2). More hash functions allow for a higher load
43 * factor, but increasing 'k' is easier and the benefits of increasing 'd'
44 * quickly fall off with the 'k' values used here. Also, the method of
45 * generating hashes used in this implementation is hard to reasonably
46 * extend beyond d=2. Finally, each additional hash function means that a
47 * lookup has to look at least one extra cache line.
49 * - 5 or 7 elements per bucket (k=5 or k=7), chosen to make buckets
50 * exactly one cache line in size.
52 * According to Erlingsson [4], these parameters suggest a maximum load factor
53 * of about 93%. The current implementation is conservative, expanding the
54 * hash table when it is over 85% full.
60 * A cuckoo hash requires multiple hash functions. When reorganizing the hash
61 * becomes too difficult, it also requires the ability to change the hash
62 * functions. Requiring the client to provide multiple hashes and to be able
63 * to change them to new hashes upon insertion is inconvenient.
65 * This implementation takes another approach. The client provides a single,
66 * fixed hash. The cuckoo hash internally "rehashes" this hash against a
67 * randomly selected basis value (see rehash()). This rehashed value is one of
68 * the two hashes. The other hash is computed by 16-bit circular rotation of
69 * the rehashed value. Updating the basis changes the hash functions.
71 * To work properly, the hash functions used by a cuckoo hash must be
72 * independent. If one hash function is a function of the other (e.g. h2(x) =
73 * h1(x) + 1, or h2(x) = hash(h1(x))), then insertion will eventually fail
74 * catastrophically (loop forever) because of collisions. With this rehashing
75 * technique, the two hashes are completely independent for masks up to 16 bits
76 * wide. For masks wider than 16 bits, only 32-n bits are independent between
77 * the two hashes. Thus, it becomes risky to grow a cuckoo hash table beyond
78 * about 2**24 buckets (about 71 million elements with k=5 and maximum load
79 * 85%). Fortunately, Open vSwitch does not normally deal with hash tables
86 * This cuckoo hash table implementation deals with duplicate client-provided
87 * hash values by chaining: the second and subsequent cmap_nodes with a given
88 * hash are chained off the initially inserted node's 'next' member. The hash
89 * table maintains the invariant that a single client-provided hash value
90 * exists in only a single chain in a single bucket (even though that hash
91 * could be stored in two buckets).
97 * [1] D. Zhou, B. Fan, H. Lim, M. Kaminsky, D. G. Andersen, "Scalable, High
98 * Performance Ethernet Forwarding with CuckooSwitch". In Proc. 9th
101 * [2] B. Fan, D. G. Andersen, and M. Kaminsky. "MemC3: Compact and concurrent
102 * memcache with dumber caching and smarter hashing". In Proc. 10th USENIX
105 * [3] R. Pagh and F. Rodler. "Cuckoo hashing". Journal of Algorithms, 51(2):
108 * [4] U. Erlingsson, M. Manasse, F. McSherry, "A Cool and Practical
109 * Alternative to Traditional Hash Tables". In Proc. 7th Workshop on
110 * Distributed Data and Structures (WDAS'06), 2006.
112 /* An entry is an int and a pointer: 8 bytes on 32-bit, 12 bytes on 64-bit. */
113 #define CMAP_ENTRY_SIZE (4 + (UINTPTR_MAX == UINT32_MAX ? 4 : 8))
115 /* Number of entries per bucket: 7 on 32-bit, 5 on 64-bit. */
116 #define CMAP_K ((CACHE_LINE_SIZE - 4) / CMAP_ENTRY_SIZE)
118 /* Pad to make a bucket a full cache line in size: 4 on 32-bit, 0 on 64-bit. */
119 #define CMAP_PADDING ((CACHE_LINE_SIZE - 4) - (CMAP_K * CMAP_ENTRY_SIZE))
121 /* A cuckoo hash bucket. Designed to be cache-aligned and exactly one cache
124 /* Allows readers to track in-progress changes. Initially zero, each
125 * writer increments this value just before and just after each change (see
126 * cmap_set_bucket()). Thus, a reader can ensure that it gets a consistent
127 * snapshot by waiting for the counter to become even (see
128 * read_even_counter()), then checking that its value does not change while
129 * examining the bucket (see cmap_find()). */
130 atomic_uint32_t counter;
132 /* (hash, node) slots. They are parallel arrays instead of an array of
133 * structs to reduce the amount of space lost to padding.
135 * The slots are in no particular order. A null pointer indicates that a
136 * pair is unused. In-use slots are not necessarily in the earliest
138 uint32_t hashes[CMAP_K];
139 struct cmap_node nodes[CMAP_K];
141 /* Padding to make cmap_bucket exactly one cache line long. */
143 uint8_t pad[CMAP_PADDING];
146 BUILD_ASSERT_DECL(sizeof(struct cmap_bucket) == CACHE_LINE_SIZE);
148 /* Default maximum load factor (as a fraction of UINT32_MAX + 1) before
149 * enlarging a cmap. Reasonable values lie between about 75% and 93%. Smaller
150 * values waste memory; larger values increase the average insertion time. */
151 #define CMAP_MAX_LOAD ((uint32_t) (UINT32_MAX * .85))
153 /* The implementation of a concurrent hash map. */
155 unsigned int n; /* Number of in-use elements. */
156 unsigned int max_n; /* Max elements before enlarging. */
157 uint32_t mask; /* Number of 'buckets', minus one. */
158 uint32_t basis; /* Basis for rehashing client's hash values. */
160 /* Padding to make cmap_impl exactly one cache line long. */
161 uint8_t pad[CACHE_LINE_SIZE - sizeof(unsigned int) * 4];
163 struct cmap_bucket buckets[];
165 BUILD_ASSERT_DECL(sizeof(struct cmap_impl) == CACHE_LINE_SIZE);
167 static struct cmap_impl *cmap_rehash(struct cmap *, uint32_t mask);
169 /* Explicit inline keywords in utility functions seem to be necessary
170 * to prevent performance regression on cmap_find(). */
172 /* Given a rehashed value 'hash', returns the other hash for that rehashed
173 * value. This is symmetric: other_hash(other_hash(x)) == x. (See also "Hash
174 * Functions" at the top of this file.) */
175 static inline uint32_t
176 other_hash(uint32_t hash)
178 return (hash << 16) | (hash >> 16);
181 /* Returns the rehashed value for 'hash' within 'impl'. (See also "Hash
182 * Functions" at the top of this file.) */
183 static inline uint32_t
184 rehash(const struct cmap_impl *impl, uint32_t hash)
186 return hash_finish(impl->basis, hash);
189 /* Not always without the inline keyword. */
190 static inline struct cmap_impl *
191 cmap_get_impl(const struct cmap *cmap)
193 return ovsrcu_get(struct cmap_impl *, &cmap->impl);
197 calc_max_n(uint32_t mask)
199 return ((uint64_t) (mask + 1) * CMAP_K * CMAP_MAX_LOAD) >> 32;
202 static struct cmap_impl *
203 cmap_impl_create(uint32_t mask)
205 struct cmap_impl *impl;
207 ovs_assert(is_pow2(mask + 1));
209 impl = xzalloc_cacheline(sizeof *impl
210 + (mask + 1) * sizeof *impl->buckets);
212 impl->max_n = calc_max_n(mask);
214 impl->basis = random_uint32();
219 /* Initializes 'cmap' as an empty concurrent hash map. */
221 cmap_init(struct cmap *cmap)
223 ovsrcu_set(&cmap->impl, cmap_impl_create(0));
228 * The client is responsible for destroying any data previously held in
231 cmap_destroy(struct cmap *cmap)
234 ovsrcu_postpone(free_cacheline, cmap_get_impl(cmap));
238 /* Returns the number of elements in 'cmap'. */
240 cmap_count(const struct cmap *cmap)
242 return cmap_get_impl(cmap)->n;
245 /* Returns true if 'cmap' is empty, false otherwise. */
247 cmap_is_empty(const struct cmap *cmap)
249 return cmap_count(cmap) == 0;
252 static inline uint32_t
253 read_counter(const struct cmap_bucket *bucket_)
255 struct cmap_bucket *bucket = CONST_CAST(struct cmap_bucket *, bucket_);
258 atomic_read_explicit(&bucket->counter, &counter, memory_order_acquire);
263 static inline uint32_t
264 read_even_counter(const struct cmap_bucket *bucket)
269 counter = read_counter(bucket);
270 } while (OVS_UNLIKELY(counter & 1));
276 counter_changed(const struct cmap_bucket *b_, uint32_t c)
278 struct cmap_bucket *b = CONST_CAST(struct cmap_bucket *, b_);
281 /* Need to make sure the counter read is not moved up, before the hash and
282 * cmap_node_next(). Using atomic_read_explicit with memory_order_acquire
283 * would allow prior reads to be moved after the barrier.
284 * atomic_thread_fence prevents all following memory accesses from moving
285 * prior to preceding loads. */
286 atomic_thread_fence(memory_order_acquire);
287 atomic_read_relaxed(&b->counter, &counter);
289 return OVS_UNLIKELY(counter != c);
292 static inline const struct cmap_node *
293 cmap_find_in_bucket(const struct cmap_bucket *bucket, uint32_t hash)
295 for (int i = 0; i < CMAP_K; i++) {
296 if (bucket->hashes[i] == hash) {
297 return cmap_node_next(&bucket->nodes[i]);
303 static inline const struct cmap_node *
304 cmap_find__(const struct cmap_bucket *b1, const struct cmap_bucket *b2,
308 const struct cmap_node *node;
312 c1 = read_even_counter(b1);
313 node = cmap_find_in_bucket(b1, hash);
314 } while (OVS_UNLIKELY(counter_changed(b1, c1)));
319 c2 = read_even_counter(b2);
320 node = cmap_find_in_bucket(b2, hash);
321 } while (OVS_UNLIKELY(counter_changed(b2, c2)));
325 } while (OVS_UNLIKELY(counter_changed(b1, c1)));
330 /* Searches 'cmap' for an element with the specified 'hash'. If one or more is
331 * found, returns a pointer to the first one, otherwise a null pointer. All of
332 * the nodes on the returned list are guaranteed to have exactly the given
335 * This function works even if 'cmap' is changing concurrently. If 'cmap' is
336 * not changing, then cmap_find_protected() is slightly faster.
338 * CMAP_FOR_EACH_WITH_HASH is usually more convenient. */
339 const struct cmap_node *
340 cmap_find(const struct cmap *cmap, uint32_t hash)
342 const struct cmap_impl *impl = cmap_get_impl(cmap);
343 uint32_t h1 = rehash(impl, hash);
344 uint32_t h2 = other_hash(h1);
346 return cmap_find__(&impl->buckets[h1 & impl->mask],
347 &impl->buckets[h2 & impl->mask],
351 /* Looks up multiple 'hashes', when the corresponding bit in 'map' is 1,
352 * and sets the corresponding pointer in 'nodes', if the hash value was
353 * found from the 'cmap'. In other cases the 'nodes' values are not changed,
354 * i.e., no NULL pointers are stored there.
355 * Returns a map where a bit is set to 1 if the corresponding 'nodes' pointer
356 * was stored, 0 otherwise.
357 * Generally, the caller wants to use CMAP_NODE_FOR_EACH to verify for
358 * hash collisions. */
360 cmap_find_batch(const struct cmap *cmap, unsigned long map,
361 uint32_t hashes[], const struct cmap_node *nodes[])
363 const struct cmap_impl *impl = cmap_get_impl(cmap);
364 unsigned long result = map;
366 uint32_t h1s[sizeof map * CHAR_BIT];
367 const struct cmap_bucket *b1s[sizeof map * CHAR_BIT];
368 const struct cmap_bucket *b2s[sizeof map * CHAR_BIT];
369 uint32_t c1s[sizeof map * CHAR_BIT];
371 /* Compute hashes and prefetch 1st buckets. */
372 ULONG_FOR_EACH_1(i, map) {
373 h1s[i] = rehash(impl, hashes[i]);
374 b1s[i] = &impl->buckets[h1s[i] & impl->mask];
375 OVS_PREFETCH(b1s[i]);
377 /* Lookups, Round 1. Only look up at the first bucket. */
378 ULONG_FOR_EACH_1(i, map) {
380 const struct cmap_bucket *b1 = b1s[i];
381 const struct cmap_node *node;
384 c1 = read_even_counter(b1);
385 node = cmap_find_in_bucket(b1, hashes[i]);
386 } while (OVS_UNLIKELY(counter_changed(b1, c1)));
389 /* Not found (yet); Prefetch the 2nd bucket. */
390 b2s[i] = &impl->buckets[other_hash(h1s[i]) & impl->mask];
391 OVS_PREFETCH(b2s[i]);
392 c1s[i] = c1; /* We may need to check this after Round 2. */
396 ULONG_SET0(map, i); /* Ignore this on round 2. */
400 /* Round 2. Look into the 2nd bucket, if needed. */
401 ULONG_FOR_EACH_1(i, map) {
403 const struct cmap_bucket *b2 = b2s[i];
404 const struct cmap_node *node;
407 c2 = read_even_counter(b2);
408 node = cmap_find_in_bucket(b2, hashes[i]);
409 } while (OVS_UNLIKELY(counter_changed(b2, c2)));
412 /* Not found, but the node may have been moved from b2 to b1 right
413 * after we finished with b1 earlier. We just got a clean reading
414 * of the 2nd bucket, so we check the counter of the 1st bucket
415 * only. However, we need to check both buckets again, as the
416 * entry may be moved again to the 2nd bucket. Basically, we
417 * need to loop as long as it takes to get stable readings of
418 * both buckets. cmap_find__() does that, and now that we have
419 * fetched both buckets we can just use it. */
420 if (OVS_UNLIKELY(counter_changed(b1s[i], c1s[i]))) {
421 node = cmap_find__(b1s[i], b2s[i], hashes[i]);
427 ULONG_SET0(result, i); /* Fix the result. */
438 cmap_find_slot_protected(struct cmap_bucket *b, uint32_t hash)
442 for (i = 0; i < CMAP_K; i++) {
443 if (b->hashes[i] == hash && cmap_node_next_protected(&b->nodes[i])) {
450 static struct cmap_node *
451 cmap_find_bucket_protected(struct cmap_impl *impl, uint32_t hash, uint32_t h)
453 struct cmap_bucket *b = &impl->buckets[h & impl->mask];
456 for (i = 0; i < CMAP_K; i++) {
457 if (b->hashes[i] == hash) {
458 return cmap_node_next_protected(&b->nodes[i]);
464 /* Like cmap_find(), but only for use if 'cmap' cannot change concurrently.
466 * CMAP_FOR_EACH_WITH_HASH_PROTECTED is usually more convenient. */
468 cmap_find_protected(const struct cmap *cmap, uint32_t hash)
470 struct cmap_impl *impl = cmap_get_impl(cmap);
471 uint32_t h1 = rehash(impl, hash);
472 uint32_t h2 = other_hash(hash);
473 struct cmap_node *node;
475 node = cmap_find_bucket_protected(impl, hash, h1);
479 return cmap_find_bucket_protected(impl, hash, h2);
483 cmap_find_empty_slot_protected(const struct cmap_bucket *b)
487 for (i = 0; i < CMAP_K; i++) {
488 if (!cmap_node_next_protected(&b->nodes[i])) {
496 cmap_set_bucket(struct cmap_bucket *b, int i,
497 struct cmap_node *node, uint32_t hash)
501 atomic_read_explicit(&b->counter, &c, memory_order_acquire);
502 atomic_store_explicit(&b->counter, c + 1, memory_order_release);
503 ovsrcu_set(&b->nodes[i].next, node); /* Also atomic. */
505 atomic_store_explicit(&b->counter, c + 2, memory_order_release);
508 /* Searches 'b' for a node with the given 'hash'. If it finds one, adds
509 * 'new_node' to the node's linked list and returns true. If it does not find
510 * one, returns false. */
512 cmap_insert_dup(struct cmap_node *new_node, uint32_t hash,
513 struct cmap_bucket *b)
517 for (i = 0; i < CMAP_K; i++) {
518 if (b->hashes[i] == hash) {
519 struct cmap_node *node = cmap_node_next_protected(&b->nodes[i]);
524 /* The common case is that 'new_node' is a singleton,
525 * with a null 'next' pointer. Rehashing can add a
526 * longer chain, but due to our invariant of always
527 * having all nodes with the same (user) hash value at
528 * a single chain, rehashing will always insert the
529 * chain to an empty node. The only way we can end up
530 * here is by the user inserting a chain of nodes at
531 * once. Find the end of the chain starting at
532 * 'new_node', then splice 'node' to the end of that
536 struct cmap_node *next = cmap_node_next_protected(p);
543 ovsrcu_set_hidden(&p->next, node);
545 /* The hash value is there from some previous insertion, but
546 * the associated node has been removed. We're not really
547 * inserting a duplicate, but we can still reuse the slot.
551 /* Change the bucket to point to 'new_node'. This is a degenerate
552 * form of cmap_set_bucket() that doesn't update the counter since
553 * we're only touching one field and in a way that doesn't change
554 * the bucket's meaning for readers. */
555 ovsrcu_set(&b->nodes[i].next, new_node);
563 /* Searches 'b' for an empty slot. If successful, stores 'node' and 'hash' in
564 * the slot and returns true. Otherwise, returns false. */
566 cmap_insert_bucket(struct cmap_node *node, uint32_t hash,
567 struct cmap_bucket *b)
571 for (i = 0; i < CMAP_K; i++) {
572 if (!cmap_node_next_protected(&b->nodes[i])) {
573 cmap_set_bucket(b, i, node, hash);
580 /* Returns the other bucket that b->nodes[slot] could occupy in 'impl'. (This
581 * might be the same as 'b'.) */
582 static struct cmap_bucket *
583 other_bucket_protected(struct cmap_impl *impl, struct cmap_bucket *b, int slot)
585 uint32_t h1 = rehash(impl, b->hashes[slot]);
586 uint32_t h2 = other_hash(h1);
587 uint32_t b_idx = b - impl->buckets;
588 uint32_t other_h = (h1 & impl->mask) == b_idx ? h2 : h1;
590 return &impl->buckets[other_h & impl->mask];
593 /* 'new_node' is to be inserted into 'impl', but both candidate buckets 'b1'
594 * and 'b2' are full. This function attempts to rearrange buckets within
595 * 'impl' to make room for 'new_node'.
597 * The implementation is a general-purpose breadth-first search. At first
598 * glance, this is more complex than a random walk through 'impl' (suggested by
599 * some references), but random walks have a tendency to loop back through a
600 * single bucket. We have to move nodes backward along the path that we find,
601 * so that no node actually disappears from the hash table, which means a
602 * random walk would have to be careful to deal with loops. By contrast, a
603 * successful breadth-first search always finds a *shortest* path through the
604 * hash table, and a shortest path will never contain loops, so it avoids that
608 cmap_insert_bfs(struct cmap_impl *impl, struct cmap_node *new_node,
609 uint32_t hash, struct cmap_bucket *b1, struct cmap_bucket *b2)
611 enum { MAX_DEPTH = 4 };
613 /* A path from 'start' to 'end' via the 'n' steps in 'slots[]'.
615 * One can follow the path via:
617 * struct cmap_bucket *b;
621 * for (i = 0; i < path->n; i++) {
622 * b = other_bucket_protected(impl, b, path->slots[i]);
624 * ovs_assert(b == path->end);
627 struct cmap_bucket *start; /* First bucket along the path. */
628 struct cmap_bucket *end; /* Last bucket on the path. */
629 uint8_t slots[MAX_DEPTH]; /* Slots used for each hop. */
630 int n; /* Number of slots[]. */
633 /* We need to limit the amount of work we do trying to find a path. It
634 * might actually be impossible to rearrange the cmap, and after some time
635 * it is likely to be easier to rehash the entire cmap.
637 * This value of MAX_QUEUE is an arbitrary limit suggested by one of the
638 * references. Empirically, it seems to work OK. */
639 enum { MAX_QUEUE = 500 };
640 struct cmap_path queue[MAX_QUEUE];
644 /* Add 'b1' and 'b2' as starting points for the search. */
645 queue[head].start = b1;
646 queue[head].end = b1;
650 queue[head].start = b2;
651 queue[head].end = b2;
656 while (tail < head) {
657 const struct cmap_path *path = &queue[tail++];
658 struct cmap_bucket *this = path->end;
661 for (i = 0; i < CMAP_K; i++) {
662 struct cmap_bucket *next = other_bucket_protected(impl, this, i);
669 j = cmap_find_empty_slot_protected(next);
671 /* We've found a path along which we can rearrange the hash
672 * table: Start at path->start, follow all the slots in
673 * path->slots[], then follow slot 'i', then the bucket you
674 * arrive at has slot 'j' empty. */
675 struct cmap_bucket *buckets[MAX_DEPTH + 2];
676 int slots[MAX_DEPTH + 2];
679 /* Figure out the full sequence of slots. */
680 for (k = 0; k < path->n; k++) {
681 slots[k] = path->slots[k];
684 slots[path->n + 1] = j;
686 /* Figure out the full sequence of buckets. */
687 buckets[0] = path->start;
688 for (k = 0; k <= path->n; k++) {
689 buckets[k + 1] = other_bucket_protected(impl, buckets[k], slots[k]);
692 /* Now the path is fully expressed. One can start from
693 * buckets[0], go via slots[0] to buckets[1], via slots[1] to
694 * buckets[2], and so on.
696 * Move all the nodes across the path "backward". After each
697 * step some node appears in two buckets. Thus, every node is
698 * always visible to a concurrent search. */
699 for (k = path->n + 1; k > 0; k--) {
700 int slot = slots[k - 1];
703 buckets[k], slots[k],
704 cmap_node_next_protected(&buckets[k - 1]->nodes[slot]),
705 buckets[k - 1]->hashes[slot]);
708 /* Finally, replace the first node on the path by
710 cmap_set_bucket(buckets[0], slots[0], new_node, hash);
715 if (path->n < MAX_DEPTH && head < MAX_QUEUE) {
716 struct cmap_path *new_path = &queue[head++];
719 new_path->end = next;
720 new_path->slots[new_path->n++] = i;
728 /* Adds 'node', with the given 'hash', to 'impl'.
730 * 'node' is ordinarily a single node, with a null 'next' pointer. When
731 * rehashing, however, it may be a longer chain of nodes. */
733 cmap_try_insert(struct cmap_impl *impl, struct cmap_node *node, uint32_t hash)
735 uint32_t h1 = rehash(impl, hash);
736 uint32_t h2 = other_hash(h1);
737 struct cmap_bucket *b1 = &impl->buckets[h1 & impl->mask];
738 struct cmap_bucket *b2 = &impl->buckets[h2 & impl->mask];
740 return (OVS_UNLIKELY(cmap_insert_dup(node, hash, b1) ||
741 cmap_insert_dup(node, hash, b2)) ||
742 OVS_LIKELY(cmap_insert_bucket(node, hash, b1) ||
743 cmap_insert_bucket(node, hash, b2)) ||
744 cmap_insert_bfs(impl, node, hash, b1, b2));
747 /* Inserts 'node', with the given 'hash', into 'cmap'. The caller must ensure
748 * that 'cmap' cannot change concurrently (from another thread). If duplicates
749 * are undesirable, the caller must have already verified that 'cmap' does not
750 * contain a duplicate of 'node'.
752 * Returns the current number of nodes in the cmap after the insertion. */
754 cmap_insert(struct cmap *cmap, struct cmap_node *node, uint32_t hash)
756 struct cmap_impl *impl = cmap_get_impl(cmap);
758 ovsrcu_set_hidden(&node->next, NULL);
760 if (OVS_UNLIKELY(impl->n >= impl->max_n)) {
761 impl = cmap_rehash(cmap, (impl->mask << 1) | 1);
764 while (OVS_UNLIKELY(!cmap_try_insert(impl, node, hash))) {
765 impl = cmap_rehash(cmap, impl->mask);
771 cmap_replace__(struct cmap_impl *impl, struct cmap_node *node,
772 struct cmap_node *replacement, uint32_t hash, uint32_t h)
774 struct cmap_bucket *b = &impl->buckets[h & impl->mask];
777 slot = cmap_find_slot_protected(b, hash);
782 /* The pointer to 'node' is changed to point to 'replacement',
783 * which is the next node if no replacement node is given. */
785 replacement = cmap_node_next_protected(node);
787 /* 'replacement' takes the position of 'node' in the list. */
788 ovsrcu_set_hidden(&replacement->next, cmap_node_next_protected(node));
791 struct cmap_node *iter = &b->nodes[slot];
793 struct cmap_node *next = cmap_node_next_protected(iter);
796 ovsrcu_set(&iter->next, replacement);
803 /* Replaces 'old_node' in 'cmap' with 'new_node'. The caller must
804 * ensure that 'cmap' cannot change concurrently (from another thread).
806 * 'old_node' must not be destroyed or modified or inserted back into 'cmap' or
807 * into any other concurrent hash map while any other thread might be accessing
808 * it. One correct way to do this is to free it from an RCU callback with
811 * Returns the current number of nodes in the cmap after the replacement. The
812 * number of nodes decreases by one if 'new_node' is NULL. */
814 cmap_replace(struct cmap *cmap, struct cmap_node *old_node,
815 struct cmap_node *new_node, uint32_t hash)
817 struct cmap_impl *impl = cmap_get_impl(cmap);
818 uint32_t h1 = rehash(impl, hash);
819 uint32_t h2 = other_hash(h1);
822 ok = cmap_replace__(impl, old_node, new_node, hash, h1)
823 || cmap_replace__(impl, old_node, new_node, hash, h2);
833 cmap_try_rehash(const struct cmap_impl *old, struct cmap_impl *new)
835 const struct cmap_bucket *b;
837 for (b = old->buckets; b <= &old->buckets[old->mask]; b++) {
840 for (i = 0; i < CMAP_K; i++) {
841 /* possible optimization here because we know the hashes are
843 struct cmap_node *node = cmap_node_next_protected(&b->nodes[i]);
845 if (node && !cmap_try_insert(new, node, b->hashes[i])) {
853 static struct cmap_impl *
854 cmap_rehash(struct cmap *cmap, uint32_t mask)
856 struct cmap_impl *old = cmap_get_impl(cmap);
857 struct cmap_impl *new;
859 new = cmap_impl_create(mask);
860 ovs_assert(old->n < new->max_n);
862 while (!cmap_try_rehash(old, new)) {
863 memset(new->buckets, 0, (mask + 1) * sizeof *new->buckets);
864 new->basis = random_uint32();
868 ovsrcu_set(&cmap->impl, new);
869 ovsrcu_postpone(free_cacheline, old);
875 cmap_cursor_start(const struct cmap *cmap)
877 struct cmap_cursor cursor;
879 cursor.impl = cmap_get_impl(cmap);
880 cursor.bucket_idx = 0;
881 cursor.entry_idx = 0;
883 cmap_cursor_advance(&cursor);
889 cmap_cursor_advance(struct cmap_cursor *cursor)
891 const struct cmap_impl *impl = cursor->impl;
894 cursor->node = cmap_node_next(cursor->node);
900 while (cursor->bucket_idx <= impl->mask) {
901 const struct cmap_bucket *b = &impl->buckets[cursor->bucket_idx];
903 while (cursor->entry_idx < CMAP_K) {
904 cursor->node = cmap_node_next(&b->nodes[cursor->entry_idx++]);
910 cursor->bucket_idx++;
911 cursor->entry_idx = 0;
915 /* Returns the next node in 'cmap' in hash order, or NULL if no nodes remain in
916 * 'cmap'. Uses '*pos' to determine where to begin iteration, and updates
917 * '*pos' to pass on the next iteration into them before returning.
919 * It's better to use plain CMAP_FOR_EACH and related functions, since they are
920 * faster and better at dealing with cmaps that change during iteration.
922 * Before beginning iteration, set '*pos' to all zeros. */
924 cmap_next_position(const struct cmap *cmap,
925 struct cmap_position *pos)
927 struct cmap_impl *impl = cmap_get_impl(cmap);
928 unsigned int bucket = pos->bucket;
929 unsigned int entry = pos->entry;
930 unsigned int offset = pos->offset;
932 while (bucket <= impl->mask) {
933 const struct cmap_bucket *b = &impl->buckets[bucket];
935 while (entry < CMAP_K) {
936 const struct cmap_node *node = cmap_node_next(&b->nodes[entry]);
939 for (i = 0; node; i++, node = cmap_node_next(node)) {
941 if (cmap_node_next(node)) {
947 pos->bucket = bucket;
949 pos->offset = offset;
950 return CONST_CAST(struct cmap_node *, node);
962 pos->bucket = pos->entry = pos->offset = 0;