ovsdb-server: Fix a reference count leak bug

[cascardo/ovs.git] / lib / cmap.c
diff --git a/lib/cmap.c b/lib/cmap.c

index 1eb79b4..7a54ea6 100644 (file)
--- a/lib/cmap.c
+++ b/lib/cmap.c
@@ -16,11 +16,16 @@
  
  #include <config.h>
  #include "cmap.h"
+#include "coverage.h"
+#include "bitmap.h"
  #include "hash.h"
  #include "ovs-rcu.h"
  #include "random.h"
  #include "util.h"
  
+COVERAGE_DEFINE(cmap_expand);
+COVERAGE_DEFINE(cmap_shrink);
+
  /* Optimistic Concurrent Cuckoo Hash
   * =================================
   *
@@ -52,6 +57,10 @@
   * of about 93%.  The current implementation is conservative, expanding the
   * hash table when it is over 85% full.
   *
+ * When the load factor is below 20%, the hash table will be shrinked by half.
+ * This is to reduce the memory utilization of the hash table and to avoid
+ * the hash table occupying the top of heap chunk which prevents the trimming
+ * of heap.
   *
   * Hash Functions
   * ==============
@@ -135,7 +144,7 @@ struct cmap_bucket {
       * pair is unused.  In-use slots are not necessarily in the earliest
       * slots. */
      uint32_t hashes[CMAP_K];
-    struct cmap_node *nodes[CMAP_K];
+    struct cmap_node nodes[CMAP_K];
  
      /* Padding to make cmap_bucket exactly one cache line long. */
  #if CMAP_PADDING > 0
@@ -149,15 +158,21 @@ BUILD_ASSERT_DECL(sizeof(struct cmap_bucket) == CACHE_LINE_SIZE);
   * values waste memory; larger values increase the average insertion time. */
  #define CMAP_MAX_LOAD ((uint32_t) (UINT32_MAX * .85))
  
+/* Default minimum load factor (as a fraction of UINT32_MAX + 1) before
+ * shrinking a cmap.  Currently, the value is chosen to be 20%, this
+ * means cmap will have a 40% load factor after shrink. */
+#define CMAP_MIN_LOAD ((uint32_t) (UINT32_MAX * .20))
+
  /* The implementation of a concurrent hash map. */
  struct cmap_impl {
      unsigned int n;             /* Number of in-use elements. */
      unsigned int max_n;         /* Max elements before enlarging. */
+    unsigned int min_n;         /* Min elements before shrinking. */
      uint32_t mask;              /* Number of 'buckets', minus one. */
      uint32_t basis;             /* Basis for rehashing client's hash values. */
  
      /* Padding to make cmap_impl exactly one cache line long. */
-    uint8_t pad[CACHE_LINE_SIZE - sizeof(unsigned int) * 4];
+    uint8_t pad[CACHE_LINE_SIZE - sizeof(unsigned int) * 5];
  
      struct cmap_bucket buckets[];
  };
@@ -165,10 +180,13 @@ BUILD_ASSERT_DECL(sizeof(struct cmap_impl) == CACHE_LINE_SIZE);
  
  static struct cmap_impl *cmap_rehash(struct cmap *, uint32_t mask);
  
+/* Explicit inline keywords in utility functions seem to be necessary
+ * to prevent performance regression on cmap_find(). */
+
  /* Given a rehashed value 'hash', returns the other hash for that rehashed
   * value.  This is symmetric: other_hash(other_hash(x)) == x.  (See also "Hash
   * Functions" at the top of this file.) */
-static uint32_t
+static inline uint32_t
  other_hash(uint32_t hash)
  {
      return (hash << 16) | (hash >> 16);
@@ -176,13 +194,14 @@ other_hash(uint32_t hash)
  
  /* Returns the rehashed value for 'hash' within 'impl'.  (See also "Hash
   * Functions" at the top of this file.) */
-static uint32_t
+static inline uint32_t
  rehash(const struct cmap_impl *impl, uint32_t hash)
  {
-    return mhash_finish(impl->basis, hash);
+    return hash_finish(impl->basis, hash);
  }
  
-static struct cmap_impl *
+/* Not always without the inline keyword. */
+static inline struct cmap_impl *
  cmap_get_impl(const struct cmap *cmap)
  {
      return ovsrcu_get(struct cmap_impl *, &cmap->impl);
@@ -194,6 +213,12 @@ calc_max_n(uint32_t mask)
      return ((uint64_t) (mask + 1) * CMAP_K * CMAP_MAX_LOAD) >> 32;
  }
  
+static uint32_t
+calc_min_n(uint32_t mask)
+{
+    return ((uint64_t) (mask + 1) * CMAP_K * CMAP_MIN_LOAD) >> 32;
+}
+
  static struct cmap_impl *
  cmap_impl_create(uint32_t mask)
  {
@@ -205,6 +230,7 @@ cmap_impl_create(uint32_t mask)
                               + (mask + 1) * sizeof *impl->buckets);
      impl->n = 0;
      impl->max_n = calc_max_n(mask);
+    impl->min_n = calc_min_n(mask);
      impl->mask = mask;
      impl->basis = random_uint32();
  
@@ -226,7 +252,7 @@ void
  cmap_destroy(struct cmap *cmap)
  {
      if (cmap) {
-        free_cacheline(cmap_get_impl(cmap));
+        ovsrcu_postpone(free_cacheline, cmap_get_impl(cmap));
      }
  }
  
@@ -244,17 +270,19 @@ cmap_is_empty(const struct cmap *cmap)
      return cmap_count(cmap) == 0;
  }
  
-static uint32_t
-read_counter(struct cmap_bucket *bucket)
+static inline uint32_t
+read_counter(const struct cmap_bucket *bucket_)
  {
+    struct cmap_bucket *bucket = CONST_CAST(struct cmap_bucket *, bucket_);
      uint32_t counter;
  
      atomic_read_explicit(&bucket->counter, &counter, memory_order_acquire);
+
      return counter;
  }
  
-static uint32_t
-read_even_counter(struct cmap_bucket *bucket)
+static inline uint32_t
+read_even_counter(const struct cmap_bucket *bucket)
  {
      uint32_t counter;
  
@@ -265,10 +293,59 @@ read_even_counter(struct cmap_bucket *bucket)
      return counter;
  }
  
-static bool
-counter_changed(struct cmap_bucket *b, uint32_t c)
+static inline bool
+counter_changed(const struct cmap_bucket *b_, uint32_t c)
  {
-    return OVS_UNLIKELY(read_counter(b) != c);
+    struct cmap_bucket *b = CONST_CAST(struct cmap_bucket *, b_);
+    uint32_t counter;
+
+    /* Need to make sure the counter read is not moved up, before the hash and
+     * cmap_node_next().  Using atomic_read_explicit with memory_order_acquire
+     * would allow prior reads to be moved after the barrier.
+     * atomic_thread_fence prevents all following memory accesses from moving
+     * prior to preceding loads. */
+    atomic_thread_fence(memory_order_acquire);
+    atomic_read_relaxed(&b->counter, &counter);
+
+    return OVS_UNLIKELY(counter != c);
+}
+
+static inline const struct cmap_node *
+cmap_find_in_bucket(const struct cmap_bucket *bucket, uint32_t hash)
+{
+    for (int i = 0; i < CMAP_K; i++) {
+        if (bucket->hashes[i] == hash) {
+            return cmap_node_next(&bucket->nodes[i]);
+        }
+    }
+    return NULL;
+}
+
+static inline const struct cmap_node *
+cmap_find__(const struct cmap_bucket *b1, const struct cmap_bucket *b2,
+            uint32_t hash)
+{
+    uint32_t c1, c2;
+    const struct cmap_node *node;
+
+    do {
+        do {
+            c1 = read_even_counter(b1);
+            node = cmap_find_in_bucket(b1, hash);
+        } while (OVS_UNLIKELY(counter_changed(b1, c1)));
+        if (node) {
+            break;
+        }
+        do {
+            c2 = read_even_counter(b2);
+            node = cmap_find_in_bucket(b2, hash);
+        } while (OVS_UNLIKELY(counter_changed(b2, c2)));
+        if (node) {
+            break;
+        }
+    } while (OVS_UNLIKELY(counter_changed(b1, c1)));
+
+    return node;
  }
  
  /* Searches 'cmap' for an element with the specified 'hash'.  If one or more is
@@ -280,56 +357,111 @@ counter_changed(struct cmap_bucket *b, uint32_t c)
   * not changing, then cmap_find_protected() is slightly faster.
   *
   * CMAP_FOR_EACH_WITH_HASH is usually more convenient. */
-struct cmap_node *
+const struct cmap_node *
  cmap_find(const struct cmap *cmap, uint32_t hash)
  {
-    struct cmap_impl *impl = cmap_get_impl(cmap);
+    const struct cmap_impl *impl = cmap_get_impl(cmap);
      uint32_t h1 = rehash(impl, hash);
      uint32_t h2 = other_hash(h1);
-    struct cmap_bucket *b1;
-    struct cmap_bucket *b2;
-    uint32_t c1, c2;
-    int i;
  
-retry:
-    b1 = &impl->buckets[h1 & impl->mask];
-    c1 = read_even_counter(b1);
-    for (i = 0; i < CMAP_K; i++) {
-        struct cmap_node *node = b1->nodes[i];
-        if (node && b1->hashes[i] == hash) {
-            if (counter_changed(b1, c1)) {
-                goto retry;
-            }
-            return node;
+    return cmap_find__(&impl->buckets[h1 & impl->mask],
+                       &impl->buckets[h2 & impl->mask],
+                       hash);
+}
+
+/* Looks up multiple 'hashes', when the corresponding bit in 'map' is 1,
+ * and sets the corresponding pointer in 'nodes', if the hash value was
+ * found from the 'cmap'.  In other cases the 'nodes' values are not changed,
+ * i.e., no NULL pointers are stored there.
+ * Returns a map where a bit is set to 1 if the corresponding 'nodes' pointer
+ * was stored, 0 otherwise.
+ * Generally, the caller wants to use CMAP_NODE_FOR_EACH to verify for
+ * hash collisions. */
+unsigned long
+cmap_find_batch(const struct cmap *cmap, unsigned long map,
+                uint32_t hashes[], const struct cmap_node *nodes[])
+{
+    const struct cmap_impl *impl = cmap_get_impl(cmap);
+    unsigned long result = map;
+    int i;
+    uint32_t h1s[sizeof map * CHAR_BIT];
+    const struct cmap_bucket *b1s[sizeof map * CHAR_BIT];
+    const struct cmap_bucket *b2s[sizeof map * CHAR_BIT];
+    uint32_t c1s[sizeof map * CHAR_BIT];
+
+    /* Compute hashes and prefetch 1st buckets. */
+    ULLONG_FOR_EACH_1(i, map) {
+        h1s[i] = rehash(impl, hashes[i]);
+        b1s[i] = &impl->buckets[h1s[i] & impl->mask];
+        OVS_PREFETCH(b1s[i]);
+    }
+    /* Lookups, Round 1. Only look up at the first bucket. */
+    ULLONG_FOR_EACH_1(i, map) {
+        uint32_t c1;
+        const struct cmap_bucket *b1 = b1s[i];
+        const struct cmap_node *node;
+
+        do {
+            c1 = read_even_counter(b1);
+            node = cmap_find_in_bucket(b1, hashes[i]);
+        } while (OVS_UNLIKELY(counter_changed(b1, c1)));
+
+        if (!node) {
+            /* Not found (yet); Prefetch the 2nd bucket. */
+            b2s[i] = &impl->buckets[other_hash(h1s[i]) & impl->mask];
+            OVS_PREFETCH(b2s[i]);
+            c1s[i] = c1; /* We may need to check this after Round 2. */
+            continue;
          }
+        /* Found. */
+        ULLONG_SET0(map, i); /* Ignore this on round 2. */
+        OVS_PREFETCH(node);
+        nodes[i] = node;
      }
-
-    b2 = &impl->buckets[h2 & impl->mask];
-    c2 = read_even_counter(b2);
-    for (i = 0; i < CMAP_K; i++) {
-        struct cmap_node *node = b2->nodes[i];
-        if (node && b2->hashes[i] == hash) {
-            if (counter_changed(b2, c2)) {
-                goto retry;
+    /* Round 2. Look into the 2nd bucket, if needed. */
+    ULLONG_FOR_EACH_1(i, map) {
+        uint32_t c2;
+        const struct cmap_bucket *b2 = b2s[i];
+        const struct cmap_node *node;
+
+        do {
+            c2 = read_even_counter(b2);
+            node = cmap_find_in_bucket(b2, hashes[i]);
+        } while (OVS_UNLIKELY(counter_changed(b2, c2)));
+
+        if (!node) {
+            /* Not found, but the node may have been moved from b2 to b1 right
+             * after we finished with b1 earlier.  We just got a clean reading
+             * of the 2nd bucket, so we check the counter of the 1st bucket
+             * only.  However, we need to check both buckets again, as the
+             * entry may be moved again to the 2nd bucket.  Basically, we
+             * need to loop as long as it takes to get stable readings of
+             * both buckets.  cmap_find__() does that, and now that we have
+             * fetched both buckets we can just use it. */
+            if (OVS_UNLIKELY(counter_changed(b1s[i], c1s[i]))) {
+                node = cmap_find__(b1s[i], b2s[i], hashes[i]);
+                if (node) {
+                    goto found;
+                }
              }
-            return node;
+            /* Not found. */
+            ULLONG_SET0(result, i); /* Fix the result. */
+            continue;
          }
+found:
+        OVS_PREFETCH(node);
+        nodes[i] = node;
      }
-
-    if (counter_changed(b1, c1) || counter_changed(b2, c2)) {
-        goto retry;
-    }
-    return NULL;
+    return result;
  }
  
  static int
-cmap_find_slot(struct cmap_bucket *b, uint32_t hash)
+cmap_find_slot_protected(struct cmap_bucket *b, uint32_t hash)
  {
      int i;
  
      for (i = 0; i < CMAP_K; i++) {
-        struct cmap_node *node = b->nodes[i];
-        if (node && b->hashes[i] == hash) {
+        if (b->hashes[i] == hash && cmap_node_next_protected(&b->nodes[i])) {
              return i;
          }
      }
@@ -343,9 +475,8 @@ cmap_find_bucket_protected(struct cmap_impl *impl, uint32_t hash, uint32_t h)
      int i;
  
      for (i = 0; i < CMAP_K; i++) {
-        struct cmap_node *node = b->nodes[i];
-        if (node && b->hashes[i] == hash) {
-            return node;
+        if (b->hashes[i] == hash) {
+            return cmap_node_next_protected(&b->nodes[i]);
          }
      }
      return NULL;
@@ -370,12 +501,12 @@ cmap_find_protected(const struct cmap *cmap, uint32_t hash)
  }
  
  static int
-cmap_find_empty_slot(const struct cmap_bucket *b)
+cmap_find_empty_slot_protected(const struct cmap_bucket *b)
  {
      int i;
  
      for (i = 0; i < CMAP_K; i++) {
-        if (!b->nodes[i]) {
+        if (!cmap_node_next_protected(&b->nodes[i])) {
              return i;
          }
      }
@@ -390,7 +521,7 @@ cmap_set_bucket(struct cmap_bucket *b, int i,
  
      atomic_read_explicit(&b->counter, &c, memory_order_acquire);
      atomic_store_explicit(&b->counter, c + 1, memory_order_release);
-    b->nodes[i] = node;
+    ovsrcu_set(&b->nodes[i].next, node); /* Also atomic. */
      b->hashes[i] = hash;
      atomic_store_explicit(&b->counter, c + 2, memory_order_release);
  }
@@ -405,24 +536,32 @@ cmap_insert_dup(struct cmap_node *new_node, uint32_t hash,
      int i;
  
      for (i = 0; i < CMAP_K; i++) {
-        struct cmap_node *node = b->nodes[i];
          if (b->hashes[i] == hash) {
+            struct cmap_node *node = cmap_node_next_protected(&b->nodes[i]);
+
              if (node) {
                  struct cmap_node *p;
  
-                /* The common case is that 'new_node' is a singleton, with a
-                 * null 'next' pointer, but rehashing can add a longer chain.
-                 * Find the end of the chain starting at 'new_node', then
-                 * splice 'node' to the end of that chain. */
+                /* The common case is that 'new_node' is a singleton,
+                 * with a null 'next' pointer.  Rehashing can add a
+                 * longer chain, but due to our invariant of always
+                 * having all nodes with the same (user) hash value at
+                 * a single chain, rehashing will always insert the
+                 * chain to an empty node.  The only way we can end up
+                 * here is by the user inserting a chain of nodes at
+                 * once.  Find the end of the chain starting at
+                 * 'new_node', then splice 'node' to the end of that
+                 * chain. */
                  p = new_node;
                  for (;;) {
                      struct cmap_node *next = cmap_node_next_protected(p);
+
                      if (!next) {
                          break;
                      }
                      p = next;
                  }
-                ovsrcu_set(&p->next, node);
+                ovsrcu_set_hidden(&p->next, node);
              } else {
                  /* The hash value is there from some previous insertion, but
                   * the associated node has been removed.  We're not really
@@ -434,8 +573,7 @@ cmap_insert_dup(struct cmap_node *new_node, uint32_t hash,
               * form of cmap_set_bucket() that doesn't update the counter since
               * we're only touching one field and in a way that doesn't change
               * the bucket's meaning for readers. */
-            b->nodes[i] = new_node;
-            atomic_thread_fence(memory_order_release);
+            ovsrcu_set(&b->nodes[i].next, new_node);
  
              return true;
          }
@@ -452,7 +590,7 @@ cmap_insert_bucket(struct cmap_node *node, uint32_t hash,
      int i;
  
      for (i = 0; i < CMAP_K; i++) {
-        if (!b->nodes[i]) {
+        if (!cmap_node_next_protected(&b->nodes[i])) {
              cmap_set_bucket(b, i, node, hash);
              return true;
          }
@@ -463,7 +601,7 @@ cmap_insert_bucket(struct cmap_node *node, uint32_t hash,
  /* Returns the other bucket that b->nodes[slot] could occupy in 'impl'.  (This
   * might be the same as 'b'.) */
  static struct cmap_bucket *
-other_bucket(struct cmap_impl *impl, struct cmap_bucket *b, int slot)
+other_bucket_protected(struct cmap_impl *impl, struct cmap_bucket *b, int slot)
  {
      uint32_t h1 = rehash(impl, b->hashes[slot]);
      uint32_t h2 = other_hash(h1);
@@ -491,7 +629,7 @@ static bool
  cmap_insert_bfs(struct cmap_impl *impl, struct cmap_node *new_node,
                  uint32_t hash, struct cmap_bucket *b1, struct cmap_bucket *b2)
  {
-    enum { MAX_PATH = 4 };
+    enum { MAX_DEPTH = 4 };
  
      /* A path from 'start' to 'end' via the 'n' steps in 'slots[]'.
       *
@@ -502,14 +640,14 @@ cmap_insert_bfs(struct cmap_impl *impl, struct cmap_node *new_node,
       *
       *     b = path->start;
       *     for (i = 0; i < path->n; i++) {
-     *         b = other_bucket(impl, b, path->slots[i]);
+     *         b = other_bucket_protected(impl, b, path->slots[i]);
       *     }
       *     ovs_assert(b == path->end);
       */
      struct cmap_path {
          struct cmap_bucket *start; /* First bucket along the path. */
          struct cmap_bucket *end;   /* Last bucket on the path. */
-        uint8_t slots[MAX_PATH];   /* Slots used for each hop. */
+        uint8_t slots[MAX_DEPTH];  /* Slots used for each hop. */
          int n;                     /* Number of slots[]. */
      };
  
@@ -542,21 +680,21 @@ cmap_insert_bfs(struct cmap_impl *impl, struct cmap_node *new_node,
          int i;
  
          for (i = 0; i < CMAP_K; i++) {
-            struct cmap_bucket *next = other_bucket(impl, this, i);
+            struct cmap_bucket *next = other_bucket_protected(impl, this, i);
              int j;
  
              if (this == next) {
                  continue;
              }
  
-            j = cmap_find_empty_slot(next);
+            j = cmap_find_empty_slot_protected(next);
              if (j >= 0) {
                  /* We've found a path along which we can rearrange the hash
                   * table:  Start at path->start, follow all the slots in
                   * path->slots[], then follow slot 'i', then the bucket you
                   * arrive at has slot 'j' empty. */
-                struct cmap_bucket *buckets[MAX_PATH + 2];
-                int slots[MAX_PATH + 2];
+                struct cmap_bucket *buckets[MAX_DEPTH + 2];
+                int slots[MAX_DEPTH + 2];
                  int k;
  
                  /* Figure out the full sequence of slots. */
@@ -569,7 +707,7 @@ cmap_insert_bfs(struct cmap_impl *impl, struct cmap_node *new_node,
                  /* Figure out the full sequence of buckets. */
                  buckets[0] = path->start;
                  for (k = 0; k <= path->n; k++) {
-                    buckets[k + 1] = other_bucket(impl, buckets[k], slots[k]);
+                    buckets[k + 1] = other_bucket_protected(impl, buckets[k], slots[k]);
                  }
  
                  /* Now the path is fully expressed.  One can start from
@@ -582,9 +720,10 @@ cmap_insert_bfs(struct cmap_impl *impl, struct cmap_node *new_node,
                  for (k = path->n + 1; k > 0; k--) {
                      int slot = slots[k - 1];
  
-                    cmap_set_bucket(buckets[k], slots[k],
-                                    buckets[k - 1]->nodes[slot],
-                                    buckets[k - 1]->hashes[slot]);
+                    cmap_set_bucket(
+                        buckets[k], slots[k],
+                        cmap_node_next_protected(&buckets[k - 1]->nodes[slot]),
+                        buckets[k - 1]->hashes[slot]);
                  }
  
                  /* Finally, replace the first node on the path by
@@ -594,7 +733,7 @@ cmap_insert_bfs(struct cmap_impl *impl, struct cmap_node *new_node,
                  return true;
              }
  
-            if (path->n < MAX_PATH && head < MAX_QUEUE) {
+            if (path->n < MAX_DEPTH && head < MAX_QUEUE) {
                  struct cmap_path *new_path = &queue[head++];
  
                  *new_path = *path;
@@ -629,72 +768,91 @@ cmap_try_insert(struct cmap_impl *impl, struct cmap_node *node, uint32_t hash)
  /* Inserts 'node', with the given 'hash', into 'cmap'.  The caller must ensure
   * that 'cmap' cannot change concurrently (from another thread).  If duplicates
   * are undesirable, the caller must have already verified that 'cmap' does not
- * contain a duplicate of 'node'. */
-void
+ * contain a duplicate of 'node'.
+ *
+ * Returns the current number of nodes in the cmap after the insertion. */
+size_t
  cmap_insert(struct cmap *cmap, struct cmap_node *node, uint32_t hash)
  {
      struct cmap_impl *impl = cmap_get_impl(cmap);
  
-    ovsrcu_set(&node->next, NULL);
+    ovsrcu_set_hidden(&node->next, NULL);
  
      if (OVS_UNLIKELY(impl->n >= impl->max_n)) {
+        COVERAGE_INC(cmap_expand);
          impl = cmap_rehash(cmap, (impl->mask << 1) | 1);
      }
  
      while (OVS_UNLIKELY(!cmap_try_insert(impl, node, hash))) {
          impl = cmap_rehash(cmap, impl->mask);
      }
-    impl->n++;
+    return ++impl->n;
  }
  
  static bool
-cmap_remove__(struct cmap_impl *impl, struct cmap_node *node,
-              uint32_t hash, uint32_t h)
+cmap_replace__(struct cmap_impl *impl, struct cmap_node *node,
+               struct cmap_node *replacement, uint32_t hash, uint32_t h)
  {
      struct cmap_bucket *b = &impl->buckets[h & impl->mask];
      int slot;
  
-    slot = cmap_find_slot(b, hash);
+    slot = cmap_find_slot_protected(b, hash);
      if (slot < 0) {
          return false;
      }
  
-    if (b->nodes[slot] == node) {
-        b->nodes[slot] = cmap_node_next_protected(node);
-        atomic_thread_fence(memory_order_release);
+    /* The pointer to 'node' is changed to point to 'replacement',
+     * which is the next node if no replacement node is given. */
+    if (!replacement) {
+        replacement = cmap_node_next_protected(node);
      } else {
-        struct cmap_node *iter = b->nodes[slot];
-        for (;;) {
-            struct cmap_node *next = cmap_node_next_protected(iter);
-            if (next == node) {
-                break;
-            }
-            iter = next;
+        /* 'replacement' takes the position of 'node' in the list. */
+        ovsrcu_set_hidden(&replacement->next, cmap_node_next_protected(node));
+    }
+
+    struct cmap_node *iter = &b->nodes[slot];
+    for (;;) {
+        struct cmap_node *next = cmap_node_next_protected(iter);
+
+        if (next == node) {
+            ovsrcu_set(&iter->next, replacement);
+            return true;
          }
-        ovsrcu_set(&iter->next, cmap_node_next_protected(node));
+        iter = next;
      }
-    return true;
  }
  
-/* Removes 'node' from 'cmap'.  The caller must ensure that 'cmap' cannot
- * change concurrently (from another thread).
+/* Replaces 'old_node' in 'cmap' with 'new_node'.  The caller must
+ * ensure that 'cmap' cannot change concurrently (from another thread).
   *
- * 'node' must not be destroyed or modified or inserted back into 'cmap' or
+ * 'old_node' must not be destroyed or modified or inserted back into 'cmap' or
   * into any other concurrent hash map while any other thread might be accessing
   * it.  One correct way to do this is to free it from an RCU callback with
- * ovsrcu_postpone(). */
-void
-cmap_remove(struct cmap *cmap, struct cmap_node *node, uint32_t hash)
+ * ovsrcu_postpone().
+ *
+ * Returns the current number of nodes in the cmap after the replacement.  The
+ * number of nodes decreases by one if 'new_node' is NULL. */
+size_t
+cmap_replace(struct cmap *cmap, struct cmap_node *old_node,
+             struct cmap_node *new_node, uint32_t hash)
  {
      struct cmap_impl *impl = cmap_get_impl(cmap);
      uint32_t h1 = rehash(impl, hash);
      uint32_t h2 = other_hash(h1);
      bool ok;
  
-    ok = (cmap_remove__(impl, node, hash, h1) ||
-          cmap_remove__(impl, node, hash, h2));
+    ok = cmap_replace__(impl, old_node, new_node, hash, h1)
+        || cmap_replace__(impl, old_node, new_node, hash, h2);
      ovs_assert(ok);
-    impl->n--;
+
+    if (!new_node) {
+        impl->n--;
+        if (OVS_UNLIKELY(impl->n < impl->min_n)) {
+            COVERAGE_INC(cmap_shrink);
+            impl = cmap_rehash(cmap, impl->mask >> 1);
+        }
+    }
+    return impl->n;
  }
  
  static bool
@@ -708,7 +866,7 @@ cmap_try_rehash(const struct cmap_impl *old, struct cmap_impl *new)
          for (i = 0; i < CMAP_K; i++) {
              /* possible optimization here because we know the hashes are
               * unique */
-            struct cmap_node *node = b->nodes[i];
+            struct cmap_node *node = cmap_node_next_protected(&b->nodes[i]);
  
              if (node && !cmap_try_insert(new, node, b->hashes[i])) {
                  return false;
@@ -739,30 +897,29 @@ cmap_rehash(struct cmap *cmap, uint32_t mask)
      return new;
  }
  
-/* Initializes 'cursor' for iterating through 'cmap'.
- *
- * Use via CMAP_FOR_EACH. */
-void
-cmap_cursor_init(struct cmap_cursor *cursor, const struct cmap *cmap)
+struct cmap_cursor
+cmap_cursor_start(const struct cmap *cmap)
  {
-    cursor->impl = cmap_get_impl(cmap);
-    cursor->bucket_idx = 0;
-    cursor->entry_idx = 0;
+    struct cmap_cursor cursor;
+
+    cursor.impl = cmap_get_impl(cmap);
+    cursor.bucket_idx = 0;
+    cursor.entry_idx = 0;
+    cursor.node = NULL;
+    cmap_cursor_advance(&cursor);
+
+    return cursor;
  }
  
-/* Returns the next node for 'cursor' to visit, following 'node', or NULL if
- * the last node has been visited.
- *
- * Use via CMAP_FOR_EACH. */
-struct cmap_node *
-cmap_cursor_next(struct cmap_cursor *cursor, const struct cmap_node *node)
+void
+cmap_cursor_advance(struct cmap_cursor *cursor)
  {
      const struct cmap_impl *impl = cursor->impl;
  
-    if (node) {
-        struct cmap_node *next = cmap_node_next(node);
-        if (next) {
-            return next;
+    if (cursor->node) {
+        cursor->node = cmap_node_next(cursor->node);
+        if (cursor->node) {
+            return;
          }
      }
  
@@ -770,17 +927,15 @@ cmap_cursor_next(struct cmap_cursor *cursor, const struct cmap_node *node)
          const struct cmap_bucket *b = &impl->buckets[cursor->bucket_idx];
  
          while (cursor->entry_idx < CMAP_K) {
-            struct cmap_node *node = b->nodes[cursor->entry_idx++];
-            if (node) {
-                return node;
+            cursor->node = cmap_node_next(&b->nodes[cursor->entry_idx++]);
+            if (cursor->node) {
+                return;
              }
          }
  
          cursor->bucket_idx++;
          cursor->entry_idx = 0;
      }
-
-    return NULL;
  }
  
  /* Returns the next node in 'cmap' in hash order, or NULL if no nodes remain in
@@ -804,10 +959,10 @@ cmap_next_position(const struct cmap *cmap,
          const struct cmap_bucket *b = &impl->buckets[bucket];
  
          while (entry < CMAP_K) {
-            const struct cmap_node *node = b->nodes[entry];
+            const struct cmap_node *node = cmap_node_next(&b->nodes[entry]);
              unsigned int i;
  
-            for (i = 0; node; i++) {
+            for (i = 0; node; i++, node = cmap_node_next(node)) {
                  if (i == offset) {
                      if (cmap_node_next(node)) {
                          offset++;