From: Ben Pfaff <blp@nicira.com>
Date: Mon, 13 Jan 2014 19:17:55 +0000 (-0800)
Subject: fat-rwlock: New big but fast synchronization primitive.
X-Git-Tag: v2.1.0~40
X-Git-Url: http://git.cascardo.eti.br/?p=cascardo%2Fovs.git;a=commitdiff_plain;h=3a5a7d79989cdad38372257ef6399f2d4e47cbec

fat-rwlock: New big but fast synchronization primitive.

This implements a reader-writer lock that uses a lot of memory (128 to 192
bytes per thread that takes the lock) but avoids cache line bouncing when
taking the read side.  Thus, a fat_rwlock is a good choice for rwlocks
taken frequently by readers.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
---

diff --git a/lib/automake.mk b/lib/automake.mk
index 5eece1c2a..aa50db81b 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -1,4 +1,4 @@
-# Copyright (C) 2009, 2010, 2011, 2012, 2013 Nicira, Inc.
+# Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
 #
 # Copying and distribution of this file, with or without modification,
 # are permitted in any medium without royalty provided the copyright
@@ -57,6 +57,8 @@ lib_libopenvswitch_la_SOURCES = \
 	lib/dynamic-string.h \
 	lib/entropy.c \
 	lib/entropy.h \
+	lib/fat-rwlock.c \
+	lib/fat-rwlock.h \
 	lib/fatal-signal.c \
 	lib/fatal-signal.h \
 	lib/flow.c \
diff --git a/lib/fat-rwlock.c b/lib/fat-rwlock.c
new file mode 100644
index 000000000..49dbc7247
--- /dev/null
+++ b/lib/fat-rwlock.c
@@ -0,0 +1,275 @@
+/*
+ * Copyright (c) 2013, 2014 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+
+#include "fat-rwlock.h"
+
+#include <errno.h>
+
+#include "hmap.h"
+#include "list.h"
+#include "ovs-thread.h"
+#include "random.h"
+
+/* This system's cache line size, in bytes.
+ * Being wrong hurts performance but not correctness. */
+#define CACHE_LINE_SIZE 64      /* Correct for most CPUs. */
+BUILD_ASSERT_DECL(IS_POW2(CACHE_LINE_SIZE));
+
+struct fat_rwlock_slot {
+    /* Membership in rwlock's list of "struct fat_rwlock_slot"s.
+     *
+     * fat_rwlock_destroy() sets 'rwlock' to NULL to indicate that this
+     * slot may be destroyed. */
+    struct list list_node;      /* In struct rwlock's 'threads' list. */
+    struct fat_rwlock *rwlock;  /* Owner. */
+
+    /* Mutex.
+     *
+     * A thread holding the read-lock holds its own mutex.
+     *
+     * A thread holding the write-lock holds every thread's mutex, plus
+     * 'rwlock->mutex'. */
+    struct ovs_mutex mutex;
+
+    /* This thread's locking status for 'rwlock':
+     *
+     *     - 0: This thread does not have any lock on 'rwlock'.  This thread
+     *       does not have 'mutex' locked.
+     *
+     *     - 1: This thread has a read-lock on 'rwlock' and holds 'mutex'.
+     *
+     *     - 2...UINT_MAX-1: This thread has recursively taken the read-lock on
+     *       'rwlock' to the level of 'depth'.  This thread holds 'mutex'.
+     *
+     *     - UINT_MAX: This thread has the write-lock on 'rwlock' and holds
+     *       'mutex' (plus the 'mutex' of all of 'rwlock''s other slots).
+     *
+     * Accessed only by the slot's own thread, so no synchronization is
+     * needed. */
+    unsigned int depth;
+
+    /* To prevent two of these structures from accidentally occupying the same
+     * cache line (causing "false sharing"), we cache-align each of these data
+     * structures.  That requires malloc()ing extra space and throwing away
+     * some space at the beginning, which means that the pointer to this struct
+     * isn't necessarily the pointer to the beginning of the block, and so we
+     * need to retain the original pointer to free later.
+     *
+     * Accessed only by a single thread, so no synchronization is needed. */
+    void *base;                /* Pointer to pass to free() for this block.  */
+};
+
+static void
+free_slot(struct fat_rwlock_slot *slot)
+{
+    if (slot->depth) {
+        abort();
+    }
+
+    list_remove(&slot->list_node);
+    free(slot->base);
+}
+
+static void
+slot_destructor(void *slot_)
+{
+    struct fat_rwlock_slot *slot = slot_;
+    struct fat_rwlock *rwlock = slot->rwlock;
+
+    ovs_mutex_lock(&rwlock->mutex);
+    free_slot(slot);
+    ovs_mutex_unlock(&rwlock->mutex);
+}
+
+/* Initialize 'rwlock' as a new fat_rwlock. */
+void
+fat_rwlock_init(struct fat_rwlock *rwlock)
+{
+    ovsthread_key_create(&rwlock->key, slot_destructor);
+    ovs_mutex_init(&rwlock->mutex);
+    ovs_mutex_lock(&rwlock->mutex);
+    list_init(&rwlock->threads);
+    ovs_mutex_unlock(&rwlock->mutex);
+}
+
+/* Destroys 'rwlock', which must not be locked or otherwise in use by any
+ * thread. */
+void
+fat_rwlock_destroy(struct fat_rwlock *rwlock)
+{
+    struct fat_rwlock_slot *slot, *next;
+
+    /* Order is important here.  By destroying the thread-specific data first,
+     * before we destroy the slots, we ensure that the thread-specific
+     * data destructor can't race with our loop below. */
+    ovsthread_key_delete(rwlock->key);
+
+    LIST_FOR_EACH_SAFE (slot, next, list_node, &rwlock->threads) {
+        free_slot(slot);
+    }
+}
+
+static struct fat_rwlock_slot *
+fat_rwlock_get_slot__(struct fat_rwlock *rwlock)
+{
+    struct fat_rwlock_slot *slot;
+    void *base;
+
+    /* Fast path. */
+    slot = ovsthread_getspecific(rwlock->key);
+    if (slot) {
+        return slot;
+    }
+
+    /* Slow path: create a new slot for 'rwlock' in this thread. */
+
+    /* Allocate room for:
+     *
+     *     - Up to CACHE_LINE_SIZE - 1 bytes before the per-thread, so that
+     *       the start of the slot doesn't potentially share a cache line.
+     *
+     *     - The slot itself.
+     *
+     *     - Space following the slot up to the end of the cache line, so
+     *       that the end of the slot doesn't potentially share a cache
+     *       line. */
+    base = xmalloc((CACHE_LINE_SIZE - 1)
+                   + ROUND_UP(sizeof *slot, CACHE_LINE_SIZE));
+    slot = (void *) ROUND_UP((uintptr_t) base, CACHE_LINE_SIZE);
+
+    slot->base = base;
+    slot->rwlock = rwlock;
+    ovs_mutex_init(&slot->mutex);
+    slot->depth = 0;
+
+    ovs_mutex_lock(&rwlock->mutex);
+    list_push_back(&rwlock->threads, &slot->list_node);
+    ovs_mutex_unlock(&rwlock->mutex);
+
+    ovsthread_setspecific(rwlock->key, slot);
+
+    return slot;
+}
+
+/* Locks 'rwlock' for reading.  The read-lock is recursive: it may be acquired
+ * any number of times by a single thread (which must then release it the same
+ * number of times for it to truly be released). */
+void
+fat_rwlock_rdlock(const struct fat_rwlock *rwlock_)
+    OVS_ACQ_RDLOCK(rwlock_)
+    OVS_NO_THREAD_SAFETY_ANALYSIS
+{
+    struct fat_rwlock *rwlock = CONST_CAST(struct fat_rwlock *, rwlock_);
+    struct fat_rwlock_slot *this = fat_rwlock_get_slot__(rwlock);
+
+    switch (this->depth) {
+    case UINT_MAX:
+        /* This thread already holds the write-lock. */
+        abort();
+
+    case 0:
+        ovs_mutex_lock(&this->mutex);
+        /* fall through */
+    default:
+        this->depth++;
+        break;
+    }
+}
+
+/* Tries to lock 'rwlock' for reading.  If successful, returns 0.  If taking
+ * the lock would require blocking, returns EBUSY (without blocking). */
+int
+fat_rwlock_tryrdlock(const struct fat_rwlock *rwlock_)
+    OVS_TRY_RDLOCK(0, rwlock_)
+    OVS_NO_THREAD_SAFETY_ANALYSIS
+{
+    struct fat_rwlock *rwlock = CONST_CAST(struct fat_rwlock *, rwlock_);
+    struct fat_rwlock_slot *this = fat_rwlock_get_slot__(rwlock);
+    int error;
+
+    switch (this->depth) {
+    case UINT_MAX:
+        return EBUSY;
+
+    case 0:
+        error = ovs_mutex_trylock(&this->mutex);
+        if (error) {
+            return error;
+        }
+        /* fall through */
+    default:
+        this->depth++;
+        break;
+    }
+
+    return 0;
+}
+
+/* Locks 'rwlock' for writing.
+ *
+ * The write lock is not recursive. */
+void
+fat_rwlock_wrlock(const struct fat_rwlock *rwlock_)
+    OVS_ACQ_WRLOCK(rwlock_)
+    OVS_NO_THREAD_SAFETY_ANALYSIS
+{
+    struct fat_rwlock *rwlock = CONST_CAST(struct fat_rwlock *, rwlock_);
+    struct fat_rwlock_slot *this = fat_rwlock_get_slot__(rwlock);
+    struct fat_rwlock_slot *slot;
+
+    ovs_assert(!this->depth);
+    this->depth = UINT_MAX;
+
+    ovs_mutex_lock(&rwlock->mutex);
+    LIST_FOR_EACH (slot, list_node, &rwlock->threads) {
+        ovs_mutex_lock(&slot->mutex);
+    }
+}
+
+/* Unlocks 'rwlock', which the current thread must have locked for reading or
+ * for writing.  If the read lock has been taken recursively, it must be
+ * released the same number of times to be truly released. */
+void
+fat_rwlock_unlock(const struct fat_rwlock *rwlock_)
+    OVS_RELEASES(rwlock_)
+    OVS_NO_THREAD_SAFETY_ANALYSIS
+{
+    struct fat_rwlock *rwlock = CONST_CAST(struct fat_rwlock *, rwlock_);
+    struct fat_rwlock_slot *this = fat_rwlock_get_slot__(rwlock);
+    struct fat_rwlock_slot *slot;
+
+    switch (this->depth) {
+    case UINT_MAX:
+        LIST_FOR_EACH (slot, list_node, &rwlock->threads) {
+            ovs_mutex_unlock(&slot->mutex);
+        }
+        ovs_mutex_unlock(&rwlock->mutex);
+        this->depth = 0;
+        break;
+
+    case 0:
+        /* This thread doesn't hold any lock. */
+        abort();
+
+    case 1:
+        ovs_mutex_unlock(&this->mutex);
+    default:
+        this->depth--;
+        break;
+    }
+}
diff --git a/lib/fat-rwlock.h b/lib/fat-rwlock.h
new file mode 100644
index 000000000..27e3c2288
--- /dev/null
+++ b/lib/fat-rwlock.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2013, 2014 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FAT_RWLOCK_H
+#define FAT_RWLOCK_H 1
+
+#include "compiler.h"
+#include "list.h"
+#include "ovs-thread.h"
+
+/* "Fat rwlock".
+ *
+ * This implements a reader-writer lock that uses a lot of memory (128 to 192
+ * bytes per thread that takes the lock) but avoids cache line bouncing when
+ * taking the read side.  Thus, a fat_rwlock is a good choice for rwlocks taken
+ * frequently by readers.
+ */
+struct OVS_LOCKABLE fat_rwlock {
+    ovsthread_key_t key;
+
+    /* Contains "struct fat_rwlock_slot"s, one for each thread that has taken
+     * this lock.  Guarded by 'mutex'. */
+    struct list threads OVS_GUARDED;
+    struct ovs_mutex mutex;
+};
+
+void fat_rwlock_init(struct fat_rwlock *);
+void fat_rwlock_destroy(struct fat_rwlock *);
+
+void fat_rwlock_rdlock(const struct fat_rwlock *rwlock) OVS_ACQ_RDLOCK(rwlock);
+int fat_rwlock_tryrdlock(const struct fat_rwlock *rwlock)
+    OVS_TRY_RDLOCK(0, rwlock);
+void fat_rwlock_wrlock(const struct fat_rwlock *rwlock) OVS_ACQ_WRLOCK(rwlock);
+void fat_rwlock_unlock(const struct fat_rwlock *rwlock) OVS_RELEASES(rwlock);
+
+#endif /* fat-rwlock.h */