2 * Copyright (c) 2013, 2014, 2015, 2016 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 #include "ovs-thread.h"
27 #include "fatal-signal.h"
29 #include "openvswitch/list.h"
30 #include "netdev-dpdk.h"
32 #include "poll-loop.h"
34 #include "socket-util.h"
38 /* Omit the definitions in this file because they are somewhat difficult to
39 * write without prompting "sparse" complaints, without ugliness or
40 * cut-and-paste. Since "sparse" is just a checker, not a compiler, it
41 * doesn't matter that we don't define them. */
43 #include "openvswitch/vlog.h"
45 VLOG_DEFINE_THIS_MODULE(ovs_thread);
47 /* If there is a reason that we cannot fork anymore (unless the fork will be
48 * immediately followed by an exec), then this points to a string that
50 static const char *must_not_fork;
52 /* True if we created any threads beyond the main initial thread. */
53 static bool multithreaded;
55 #define LOCK_FUNCTION(TYPE, FUN) \
57 ovs_##TYPE##_##FUN##_at(const struct ovs_##TYPE *l_, \
59 OVS_NO_THREAD_SAFETY_ANALYSIS \
61 struct ovs_##TYPE *l = CONST_CAST(struct ovs_##TYPE *, l_); \
64 /* Verify that 'l' was initialized. */ \
65 if (OVS_UNLIKELY(!l->where)) { \
66 ovs_abort(0, "%s: %s() passed uninitialized ovs_"#TYPE, \
70 error = pthread_##TYPE##_##FUN(&l->lock); \
71 if (OVS_UNLIKELY(error)) { \
72 ovs_abort(error, "%s: pthread_%s_%s failed", where, #TYPE, #FUN); \
76 LOCK_FUNCTION(mutex, lock);
77 LOCK_FUNCTION(rwlock, rdlock);
78 LOCK_FUNCTION(rwlock, wrlock);
80 #define TRY_LOCK_FUNCTION(TYPE, FUN) \
82 ovs_##TYPE##_##FUN##_at(const struct ovs_##TYPE *l_, \
84 OVS_NO_THREAD_SAFETY_ANALYSIS \
86 struct ovs_##TYPE *l = CONST_CAST(struct ovs_##TYPE *, l_); \
89 /* Verify that 'l' was initialized. */ \
90 if (OVS_UNLIKELY(!l->where)) { \
91 ovs_abort(0, "%s: %s() passed uninitialized ovs_"#TYPE, \
95 error = pthread_##TYPE##_##FUN(&l->lock); \
96 if (OVS_UNLIKELY(error) && error != EBUSY) { \
97 ovs_abort(error, "%s: pthread_%s_%s failed", where, #TYPE, #FUN); \
104 TRY_LOCK_FUNCTION(mutex, trylock);
105 TRY_LOCK_FUNCTION(rwlock, tryrdlock);
106 TRY_LOCK_FUNCTION(rwlock, trywrlock);
108 #define UNLOCK_FUNCTION(TYPE, FUN, WHERE) \
110 ovs_##TYPE##_##FUN(const struct ovs_##TYPE *l_) \
111 OVS_NO_THREAD_SAFETY_ANALYSIS \
113 struct ovs_##TYPE *l = CONST_CAST(struct ovs_##TYPE *, l_); \
116 /* Verify that 'l' was initialized. */ \
117 ovs_assert(l->where); \
120 error = pthread_##TYPE##_##FUN(&l->lock); \
121 if (OVS_UNLIKELY(error)) { \
122 ovs_abort(error, "pthread_%s_%s failed", #TYPE, #FUN); \
125 UNLOCK_FUNCTION(mutex, unlock, "<unlocked>");
126 UNLOCK_FUNCTION(mutex, destroy, NULL);
127 UNLOCK_FUNCTION(rwlock, unlock, "<unlocked>");
128 UNLOCK_FUNCTION(rwlock, destroy, NULL);
130 #define XPTHREAD_FUNC1(FUNCTION, PARAM1) \
132 x##FUNCTION(PARAM1 arg1) \
134 int error = FUNCTION(arg1); \
135 if (OVS_UNLIKELY(error)) { \
136 ovs_abort(error, "%s failed", #FUNCTION); \
139 #define XPTHREAD_FUNC2(FUNCTION, PARAM1, PARAM2) \
141 x##FUNCTION(PARAM1 arg1, PARAM2 arg2) \
143 int error = FUNCTION(arg1, arg2); \
144 if (OVS_UNLIKELY(error)) { \
145 ovs_abort(error, "%s failed", #FUNCTION); \
148 #define XPTHREAD_FUNC3(FUNCTION, PARAM1, PARAM2, PARAM3)\
150 x##FUNCTION(PARAM1 arg1, PARAM2 arg2, PARAM3 arg3) \
152 int error = FUNCTION(arg1, arg2, arg3); \
153 if (OVS_UNLIKELY(error)) { \
154 ovs_abort(error, "%s failed", #FUNCTION); \
158 XPTHREAD_FUNC1(pthread_mutex_lock, pthread_mutex_t *);
159 XPTHREAD_FUNC1(pthread_mutex_unlock, pthread_mutex_t *);
160 XPTHREAD_FUNC1(pthread_mutexattr_init, pthread_mutexattr_t *);
161 XPTHREAD_FUNC1(pthread_mutexattr_destroy, pthread_mutexattr_t *);
162 XPTHREAD_FUNC2(pthread_mutexattr_settype, pthread_mutexattr_t *, int);
163 XPTHREAD_FUNC2(pthread_mutexattr_gettype, pthread_mutexattr_t *, int *);
165 XPTHREAD_FUNC1(pthread_rwlockattr_init, pthread_rwlockattr_t *);
166 XPTHREAD_FUNC1(pthread_rwlockattr_destroy, pthread_rwlockattr_t *);
167 #ifdef PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
168 XPTHREAD_FUNC2(pthread_rwlockattr_setkind_np, pthread_rwlockattr_t *, int);
171 XPTHREAD_FUNC2(pthread_cond_init, pthread_cond_t *, pthread_condattr_t *);
172 XPTHREAD_FUNC1(pthread_cond_destroy, pthread_cond_t *);
173 XPTHREAD_FUNC1(pthread_cond_signal, pthread_cond_t *);
174 XPTHREAD_FUNC1(pthread_cond_broadcast, pthread_cond_t *);
176 XPTHREAD_FUNC2(pthread_join, pthread_t, void **);
178 typedef void destructor_func(void *);
179 XPTHREAD_FUNC2(pthread_key_create, pthread_key_t *, destructor_func *);
180 XPTHREAD_FUNC1(pthread_key_delete, pthread_key_t);
181 XPTHREAD_FUNC2(pthread_setspecific, pthread_key_t, const void *);
184 XPTHREAD_FUNC3(pthread_sigmask, int, const sigset_t *, sigset_t *);
188 ovs_mutex_init__(const struct ovs_mutex *l_, int type)
190 struct ovs_mutex *l = CONST_CAST(struct ovs_mutex *, l_);
191 pthread_mutexattr_t attr;
194 l->where = "<unlocked>";
195 xpthread_mutexattr_init(&attr);
196 xpthread_mutexattr_settype(&attr, type);
197 error = pthread_mutex_init(&l->lock, &attr);
198 if (OVS_UNLIKELY(error)) {
199 ovs_abort(error, "pthread_mutex_init failed");
201 xpthread_mutexattr_destroy(&attr);
204 /* Initializes 'mutex' as a normal (non-recursive) mutex. */
206 ovs_mutex_init(const struct ovs_mutex *mutex)
208 ovs_mutex_init__(mutex, PTHREAD_MUTEX_ERRORCHECK);
211 /* Initializes 'mutex' as a recursive mutex. */
213 ovs_mutex_init_recursive(const struct ovs_mutex *mutex)
215 ovs_mutex_init__(mutex, PTHREAD_MUTEX_RECURSIVE);
218 /* Initializes 'mutex' as a recursive mutex. */
220 ovs_mutex_init_adaptive(const struct ovs_mutex *mutex)
222 #ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
223 ovs_mutex_init__(mutex, PTHREAD_MUTEX_ADAPTIVE_NP);
225 ovs_mutex_init(mutex);
230 ovs_rwlock_init(const struct ovs_rwlock *l_)
232 struct ovs_rwlock *l = CONST_CAST(struct ovs_rwlock *, l_);
233 pthread_rwlockattr_t attr;
236 l->where = "<unlocked>";
238 xpthread_rwlockattr_init(&attr);
239 #ifdef PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
240 xpthread_rwlockattr_setkind_np(
241 &attr, PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP);
243 error = pthread_rwlock_init(&l->lock, NULL);
244 if (OVS_UNLIKELY(error)) {
245 ovs_abort(error, "pthread_rwlock_init failed");
247 xpthread_rwlockattr_destroy(&attr);
250 /* Provides an error-checking wrapper around pthread_cond_wait().
252 * If the wait can take a significant amount of time, consider bracketing this
253 * call with calls to ovsrcu_quiesce_start() and ovsrcu_quiesce_end(). */
255 ovs_mutex_cond_wait(pthread_cond_t *cond, const struct ovs_mutex *mutex_)
257 struct ovs_mutex *mutex = CONST_CAST(struct ovs_mutex *, mutex_);
260 error = pthread_cond_wait(cond, &mutex->lock);
262 if (OVS_UNLIKELY(error)) {
263 ovs_abort(error, "pthread_cond_wait failed");
267 /* Initializes the 'barrier'. 'size' is the number of threads
268 * expected to hit the barrier. */
270 ovs_barrier_init(struct ovs_barrier *barrier, uint32_t size)
272 barrier->size = size;
273 atomic_count_init(&barrier->count, 0);
274 barrier->seq = seq_create();
277 /* Destroys the 'barrier'. */
279 ovs_barrier_destroy(struct ovs_barrier *barrier)
281 seq_destroy(barrier->seq);
284 /* Makes the calling thread block on the 'barrier' until all
285 * 'barrier->size' threads hit the barrier.
286 * ovs_barrier provides the necessary acquire-release semantics to make
287 * the effects of prior memory accesses of all the participating threads
288 * visible on return and to prevent the following memory accesses to be
289 * reordered before the ovs_barrier_block(). */
291 ovs_barrier_block(struct ovs_barrier *barrier)
293 uint64_t seq = seq_read(barrier->seq);
296 orig = atomic_count_inc(&barrier->count);
297 if (orig + 1 == barrier->size) {
298 atomic_count_set(&barrier->count, 0);
299 /* seq_change() serves as a release barrier against the other threads,
300 * so the zeroed count is visible to them as they continue. */
301 seq_change(barrier->seq);
303 /* To prevent thread from waking up by other event,
304 * keeps waiting for the change of 'barrier->seq'. */
305 while (seq == seq_read(barrier->seq)) {
306 seq_wait(barrier->seq, seq);
312 DEFINE_EXTERN_PER_THREAD_DATA(ovsthread_id, 0);
314 struct ovsthread_aux {
315 void *(*start)(void *);
321 ovsthread_wrapper(void *aux_)
323 static atomic_count next_id = ATOMIC_COUNT_INIT(1);
325 struct ovsthread_aux *auxp = aux_;
326 struct ovsthread_aux aux;
329 id = atomic_count_inc(&next_id);
330 *ovsthread_id_get() = id;
335 /* The order of the following calls is important, because
336 * ovsrcu_quiesce_end() saves a copy of the thread name. */
337 char *subprogram_name = xasprintf("%s%u", aux.name, id);
338 set_subprogram_name(subprogram_name);
339 free(subprogram_name);
340 ovsrcu_quiesce_end();
342 return aux.start(aux.arg);
346 set_min_stack_size(pthread_attr_t *attr, size_t min_stacksize)
351 error = pthread_attr_getstacksize(attr, &stacksize);
353 ovs_abort(error, "pthread_attr_getstacksize failed");
356 if (stacksize < min_stacksize) {
357 error = pthread_attr_setstacksize(attr, min_stacksize);
359 ovs_abort(error, "pthread_attr_setstacksize failed");
364 /* Starts a thread that calls 'start(arg)'. Sets the thread's name to 'name'
365 * (suffixed by its ovsthread_id()). Returns the new thread's pthread_t. */
367 ovs_thread_create(const char *name, void *(*start)(void *), void *arg)
369 static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
370 struct ovsthread_aux *aux;
374 forbid_forking("multiple threads exist");
376 if (ovsthread_once_start(&once)) {
377 /* The first call to this function has to happen in the main thread.
378 * Before the process becomes multithreaded we make sure that the
379 * main thread is considered non quiescent.
381 * For other threads this is done in ovs_thread_wrapper(), but the
382 * main thread has no such wrapper.
384 * There's no reason to call ovsrcu_quiesce_end() in subsequent
385 * invocations of this function and it might introduce problems
386 * for other threads. */
387 ovsrcu_quiesce_end();
388 ovsthread_once_done(&once);
391 multithreaded = true;
392 aux = xmalloc(sizeof *aux);
395 ovs_strlcpy(aux->name, name, sizeof aux->name);
397 /* Some small systems use a default stack size as small as 80 kB, but OVS
398 * requires approximately 384 kB according to the following analysis:
399 * http://openvswitch.org/pipermail/dev/2016-January/065049.html
401 * We use 512 kB to give us some margin of error. */
403 pthread_attr_init(&attr);
404 set_min_stack_size(&attr, 512 * 1024);
406 error = pthread_create(&thread, &attr, ovsthread_wrapper, aux);
408 ovs_abort(error, "pthread_create failed");
410 pthread_attr_destroy(&attr);
415 ovsthread_once_start__(struct ovsthread_once *once)
417 ovs_mutex_lock(&once->mutex);
418 /* Mutex synchronizes memory, so we get the current value of 'done'. */
422 ovs_mutex_unlock(&once->mutex);
427 ovsthread_once_done(struct ovsthread_once *once)
429 /* We need release semantics here, so that the following store may not
430 * be moved ahead of any of the preceding initialization operations.
431 * A release atomic_thread_fence provides that prior memory accesses
432 * will not be reordered to take place after the following store. */
433 atomic_thread_fence(memory_order_release);
435 ovs_mutex_unlock(&once->mutex);
439 single_threaded(void)
441 return !multithreaded;
444 /* Asserts that the process has not yet created any threads (beyond the initial
447 * ('where' is used in logging. Commonly one would use
448 * assert_single_threaded() to automatically provide the caller's source file
449 * and line number for 'where'.) */
451 assert_single_threaded_at(const char *where)
454 VLOG_FATAL("%s: attempted operation not allowed when multithreaded",
460 /* Forks the current process (checking that this is allowed). Aborts with
461 * VLOG_FATAL if fork() returns an error, and otherwise returns the value
462 * returned by fork().
464 * ('where' is used in logging. Commonly one would use xfork() to
465 * automatically provide the caller's source file and line number for
468 xfork_at(const char *where)
473 VLOG_FATAL("%s: attempted to fork but forking not allowed (%s)",
474 where, must_not_fork);
479 VLOG_FATAL("%s: fork failed (%s)", where, ovs_strerror(errno));
485 /* Notes that the process must not call fork() from now on, for the specified
486 * 'reason'. (The process may still fork() if it execs itself immediately
489 forbid_forking(const char *reason)
491 ovs_assert(reason != NULL);
492 must_not_fork = reason;
495 /* Returns true if the process is allowed to fork, false otherwise. */
499 return !must_not_fork;
502 /* ovsthread_stats. */
505 ovsthread_stats_init(struct ovsthread_stats *stats)
509 ovs_mutex_init(&stats->mutex);
510 for (i = 0; i < ARRAY_SIZE(stats->buckets); i++) {
511 stats->buckets[i] = NULL;
516 ovsthread_stats_destroy(struct ovsthread_stats *stats)
518 ovs_mutex_destroy(&stats->mutex);
522 ovsthread_stats_bucket_get(struct ovsthread_stats *stats,
523 void *(*new_bucket)(void))
525 unsigned int idx = ovsthread_id_self() & (ARRAY_SIZE(stats->buckets) - 1);
526 void *bucket = stats->buckets[idx];
528 ovs_mutex_lock(&stats->mutex);
529 bucket = stats->buckets[idx];
531 bucket = stats->buckets[idx] = new_bucket();
533 ovs_mutex_unlock(&stats->mutex);
539 ovs_thread_stats_next_bucket(const struct ovsthread_stats *stats, size_t i)
541 for (; i < ARRAY_SIZE(stats->buckets); i++) {
542 if (stats->buckets[i]) {
550 /* Returns the total number of cores available to this process, or 0 if the
551 * number cannot be determined. */
553 count_cpu_cores(void)
555 static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
556 static long int n_cores;
558 if (ovsthread_once_start(&once)) {
560 n_cores = sysconf(_SC_NPROCESSORS_ONLN);
563 cpu_set_t *set = CPU_ALLOC(n_cores);
566 size_t size = CPU_ALLOC_SIZE(n_cores);
568 if (!sched_getaffinity(0, size, set)) {
569 n_cores = CPU_COUNT_S(size, set);
577 GetSystemInfo(&sysinfo);
578 n_cores = sysinfo.dwNumberOfProcessors;
580 ovsthread_once_done(&once);
583 return n_cores > 0 ? n_cores : 0;
586 /* Returns 'true' if current thread is PMD thread. */
590 const char *name = get_subprogram_name();
591 return !strncmp(name, "pmd", 3);
599 #define MAX_KEYS (L1_SIZE * L2_SIZE)
601 /* A piece of thread-specific data. */
602 struct ovsthread_key {
603 struct ovs_list list_node; /* In 'inuse_keys' or 'free_keys'. */
604 void (*destructor)(void *); /* Called at thread exit. */
606 /* Indexes into the per-thread array in struct ovsthread_key_slots.
607 * This key's data is stored in p1[index / L2_SIZE][index % L2_SIZE]. */
611 /* Per-thread data structure. */
612 struct ovsthread_key_slots {
613 struct ovs_list list_node; /* In 'slots_list'. */
617 /* Contains "struct ovsthread_key_slots *". */
618 static pthread_key_t tsd_key;
620 /* Guards data structures below. */
621 static struct ovs_mutex key_mutex = OVS_MUTEX_INITIALIZER;
623 /* 'inuse_keys' holds "struct ovsthread_key"s that have been created and not
626 * 'free_keys' holds "struct ovsthread_key"s that have been deleted and are
627 * ready for reuse. (We keep them around only to be able to easily locate
630 * Together, 'inuse_keys' and 'free_keys' hold an ovsthread_key for every index
631 * from 0 to n_keys - 1, inclusive. */
632 static struct ovs_list inuse_keys OVS_GUARDED_BY(key_mutex)
633 = OVS_LIST_INITIALIZER(&inuse_keys);
634 static struct ovs_list free_keys OVS_GUARDED_BY(key_mutex)
635 = OVS_LIST_INITIALIZER(&free_keys);
636 static unsigned int n_keys OVS_GUARDED_BY(key_mutex);
638 /* All existing struct ovsthread_key_slots. */
639 static struct ovs_list slots_list OVS_GUARDED_BY(key_mutex)
640 = OVS_LIST_INITIALIZER(&slots_list);
643 clear_slot(struct ovsthread_key_slots *slots, unsigned int index)
645 void **p2 = slots->p1[index / L2_SIZE];
647 void **valuep = &p2[index % L2_SIZE];
648 void *value = *valuep;
657 ovsthread_key_destruct__(void *slots_)
659 struct ovsthread_key_slots *slots = slots_;
660 struct ovsthread_key *key;
664 ovs_mutex_lock(&key_mutex);
665 ovs_list_remove(&slots->list_node);
666 LIST_FOR_EACH (key, list_node, &inuse_keys) {
667 void *value = clear_slot(slots, key->index);
668 if (value && key->destructor) {
669 key->destructor(value);
673 ovs_mutex_unlock(&key_mutex);
675 for (i = 0; i < DIV_ROUND_UP(n, L2_SIZE); i++) {
681 /* Cancels the callback to ovsthread_key_destruct__().
683 * Cancelling the call to the destructor during the main thread exit
684 * is needed while using pthreads-win32 library in Windows. It has been
685 * observed that in pthreads-win32, a call to the destructor during
686 * main thread exit causes undefined behavior. */
688 ovsthread_cancel_ovsthread_key_destruct__(void *aux OVS_UNUSED)
690 pthread_setspecific(tsd_key, NULL);
693 /* Initializes '*keyp' as a thread-specific data key. The data items are
694 * initially null in all threads.
696 * If a thread exits with non-null data, then 'destructor', if nonnull, will be
697 * called passing the final data value as its argument. 'destructor' must not
698 * call any thread-specific data functions in this API.
700 * This function is similar to xpthread_key_create(). */
702 ovsthread_key_create(ovsthread_key_t *keyp, void (*destructor)(void *))
704 static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
705 struct ovsthread_key *key;
707 if (ovsthread_once_start(&once)) {
708 xpthread_key_create(&tsd_key, ovsthread_key_destruct__);
709 fatal_signal_add_hook(ovsthread_cancel_ovsthread_key_destruct__,
711 ovsthread_once_done(&once);
714 ovs_mutex_lock(&key_mutex);
715 if (ovs_list_is_empty(&free_keys)) {
716 key = xmalloc(sizeof *key);
717 key->index = n_keys++;
718 if (key->index >= MAX_KEYS) {
722 key = CONTAINER_OF(ovs_list_pop_back(&free_keys),
723 struct ovsthread_key, list_node);
725 ovs_list_push_back(&inuse_keys, &key->list_node);
726 key->destructor = destructor;
727 ovs_mutex_unlock(&key_mutex);
732 /* Frees 'key'. The destructor supplied to ovsthread_key_create(), if any, is
735 * This function is similar to xpthread_key_delete(). */
737 ovsthread_key_delete(ovsthread_key_t key)
739 struct ovsthread_key_slots *slots;
741 ovs_mutex_lock(&key_mutex);
743 /* Move 'key' from 'inuse_keys' to 'free_keys'. */
744 ovs_list_remove(&key->list_node);
745 ovs_list_push_back(&free_keys, &key->list_node);
747 /* Clear this slot in all threads. */
748 LIST_FOR_EACH (slots, list_node, &slots_list) {
749 clear_slot(slots, key->index);
752 ovs_mutex_unlock(&key_mutex);
756 ovsthread_key_lookup__(const struct ovsthread_key *key)
758 struct ovsthread_key_slots *slots;
761 slots = pthread_getspecific(tsd_key);
763 slots = xzalloc(sizeof *slots);
765 ovs_mutex_lock(&key_mutex);
766 pthread_setspecific(tsd_key, slots);
767 ovs_list_push_back(&slots_list, &slots->list_node);
768 ovs_mutex_unlock(&key_mutex);
771 p2 = slots->p1[key->index / L2_SIZE];
773 p2 = xzalloc(L2_SIZE * sizeof *p2);
774 slots->p1[key->index / L2_SIZE] = p2;
777 return &p2[key->index % L2_SIZE];
780 /* Sets the value of thread-specific data item 'key', in the current thread, to
783 * This function is similar to pthread_setspecific(). */
785 ovsthread_setspecific(ovsthread_key_t key, const void *value)
787 *ovsthread_key_lookup__(key) = CONST_CAST(void *, value);
790 /* Returns the value of thread-specific data item 'key' in the current thread.
792 * This function is similar to pthread_getspecific(). */
794 ovsthread_getspecific(ovsthread_key_t key)
796 return *ovsthread_key_lookup__(key);