2 * Copyright (c) 2014 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 /* This header implements atomic operation primitives for MSVC
18 * on i586 or greater platforms (32 bit). */
19 #ifndef IN_OVS_ATOMIC_H
20 #error "This header should only be included indirectly via ovs-atomic.h."
23 /* From msdn documentation: With Visual Studio 2003, volatile to volatile
24 * references are ordered; the compiler will not re-order volatile variable
25 * access. With Visual Studio 2005, the compiler also uses acquire semantics
26 * for read operations on volatile variables and release semantics for write
27 * operations on volatile variables (when supported by the CPU).
29 * Though there is no clear documentation that states that anything greater
30 * than VS 2005 has the same behavior as described above, looking through MSVCs
31 * C++ atomics library in VS2013 shows that the compiler still takes
32 * acquire/release semantics on volatile variables. */
33 #define ATOMIC(TYPE) TYPE volatile
44 #define ATOMIC_BOOL_LOCK_FREE 2
45 #define ATOMIC_CHAR_LOCK_FREE 2
46 #define ATOMIC_SHORT_LOCK_FREE 2
47 #define ATOMIC_INT_LOCK_FREE 2
48 #define ATOMIC_LONG_LOCK_FREE 2
49 #define ATOMIC_LLONG_LOCK_FREE 2
50 #define ATOMIC_POINTER_LOCK_FREE 2
52 #define IS_LOCKLESS_ATOMIC(OBJECT) \
53 (sizeof(OBJECT) <= 8 && IS_POW2(sizeof(OBJECT)))
55 #define ATOMIC_VAR_INIT(VALUE) (VALUE)
56 #define atomic_init(OBJECT, VALUE) (*(OBJECT) = (VALUE), (void) 0)
59 atomic_compiler_barrier(memory_order order)
61 /* In case of 'memory_order_consume', it is implicitly assumed that
62 * the compiler will not move instructions that have data-dependency
63 * on the variable in question before the barrier. */
64 if (order > memory_order_consume) {
70 atomic_thread_fence(memory_order order)
72 /* x86 is strongly ordered and acquire/release semantics come
74 atomic_compiler_barrier(order);
75 if (order == memory_order_seq_cst) {
77 atomic_compiler_barrier(order);
82 atomic_signal_fence(memory_order order)
84 atomic_compiler_barrier(order);
87 /* 1, 2 and 4 bytes loads and stores are atomic on aligned memory. In addition,
88 * since the compiler automatically takes acquire and release semantics on
89 * volatile variables, for any order lesser than 'memory_order_seq_cst', we
90 * can directly assign or read values. */
92 #define atomic_store32(DST, SRC, ORDER) \
93 if (ORDER == memory_order_seq_cst) { \
94 InterlockedExchange((int32_t volatile *) (DST), \
100 /* 64 bit writes are atomic on i586 if 64 bit aligned. */
101 #define atomic_store64(DST, SRC, ORDER) \
102 if (((size_t) (DST) & (sizeof *(DST) - 1)) \
103 || ORDER == memory_order_seq_cst) { \
104 InterlockedExchange64((int64_t volatile *) (DST), \
110 /* Used for 8 and 16 bit variations. */
111 #define atomic_storeX(X, DST, SRC, ORDER) \
112 if (ORDER == memory_order_seq_cst) { \
113 InterlockedExchange##X((int##X##_t volatile *) (DST), \
114 (int##X##_t) (SRC)); \
119 #define atomic_store(DST, SRC) \
120 atomic_store_explicit(DST, SRC, memory_order_seq_cst)
122 #define atomic_store_explicit(DST, SRC, ORDER) \
123 if (sizeof *(DST) == 1) { \
124 atomic_storeX(8, DST, SRC, ORDER) \
125 } else if (sizeof *(DST) == 2) { \
126 atomic_storeX(16, DST, SRC, ORDER) \
127 } else if (sizeof *(DST) == 4) { \
128 atomic_store32(DST, SRC, ORDER) \
129 } else if (sizeof *(DST) == 8) { \
130 atomic_store64(DST, SRC, ORDER) \
135 /* On x86, for 'memory_order_seq_cst', if stores are locked, the corresponding
136 * reads don't need to be locked (based on the following in Intel Developers
138 * “Locked operations are atomic with respect to all other memory operations
139 * and all externally visible events. Only instruction fetch and page table
140 * accesses can pass locked instructions. Locked instructions can be used to
141 * synchronize data written by one processor and read by another processor.
142 * For the P6 family processors, locked operations serialize all outstanding
143 * load and store operations (that is, wait for them to complete). This rule
144 * is also true for the Pentium 4 and Intel Xeon processors, with one
145 * exception. Load operations that reference weakly ordered memory types
146 * (such as the WC memory type) may not be serialized."). */
148 /* For 8, 16 and 32 bit variations. */
149 #define atomic_readX(SRC, DST, ORDER) \
152 /* 64 bit reads are atomic on i586 if 64 bit aligned. */
153 #define atomic_read64(SRC, DST, ORDER) \
154 if (((size_t) (SRC) & (sizeof *(SRC) - 1)) == 0) { \
157 __pragma (warning(push)) \
158 __pragma (warning(disable:4047)) \
159 *(DST) = InterlockedOr64((int64_t volatile *) (SRC), 0); \
160 __pragma (warning(pop)) \
163 #define atomic_read(SRC, DST) \
164 atomic_read_explicit(SRC, DST, memory_order_seq_cst)
166 #define atomic_read_explicit(SRC, DST, ORDER) \
167 if (sizeof *(DST) == 1 || sizeof *(DST) == 2 || sizeof *(DST) == 4) { \
168 atomic_readX(SRC, DST, ORDER) \
169 } else if (sizeof *(DST) == 8) { \
170 atomic_read64(SRC, DST, ORDER) \
175 /* For add, sub, and logical operations, for 8, 16 and 64 bit data types,
176 * functions for all the different memory orders does not exist
177 * (though documentation exists for some of them). The MSVC C++ library which
178 * implements the c11 atomics simply calls the full memory barrier function
179 * for everything in x86(see xatomic.h). So do the same here. */
181 /* For 8, 16 and 64 bit variations. */
182 #define atomic_op(OP, X, RMW, ARG, ORIG, ORDER) \
183 atomic_##OP##_generic(X, RMW, ARG, ORIG, ORDER)
185 /* Arithmetic addition calls. */
187 #define atomic_add32(RMW, ARG, ORIG, ORDER) \
188 *(ORIG) = InterlockedExchangeAdd((int32_t volatile *) (RMW), \
191 /* For 8, 16 and 64 bit variations. */
192 #define atomic_add_generic(X, RMW, ARG, ORIG, ORDER) \
193 *(ORIG) = _InterlockedExchangeAdd##X((int##X##_t volatile *) (RMW), \
196 #define atomic_add(RMW, ARG, ORIG) \
197 atomic_add_explicit(RMW, ARG, ORIG, memory_order_seq_cst)
199 #define atomic_add_explicit(RMW, ARG, ORIG, ORDER) \
200 if (sizeof *(RMW) == 1) { \
201 atomic_op(add, 8, RMW, ARG, ORIG, ORDER) \
202 } else if (sizeof *(RMW) == 2) { \
203 atomic_op(add, 16, RMW, ARG, ORIG, ORDER) \
204 } else if (sizeof *(RMW) == 4) { \
205 atomic_add32(RMW, ARG, ORIG, ORDER) \
206 } else if (sizeof *(RMW) == 8) { \
207 atomic_op(add, 64, RMW, ARG, ORIG, ORDER) \
212 /* Arithmetic subtraction calls. */
214 #define atomic_sub(RMW, ARG, ORIG) \
215 atomic_add_explicit(RMW, (0 - (ARG)), ORIG, memory_order_seq_cst)
217 #define atomic_sub_explicit(RMW, ARG, ORIG, ORDER) \
218 atomic_add_explicit(RMW, (0 - (ARG)), ORIG, ORDER)
220 /* Logical 'and' calls. */
222 #define atomic_and32(RMW, ARG, ORIG, ORDER) \
223 *(ORIG) = InterlockedAnd((int32_t volatile *) (RMW), (int32_t) (ARG));
225 /* For 8, 16 and 64 bit variations. */
226 #define atomic_and_generic(X, RMW, ARG, ORIG, ORDER) \
227 *(ORIG) = InterlockedAnd##X((int##X##_t volatile *) (RMW), \
230 #define atomic_and(RMW, ARG, ORIG) \
231 atomic_and_explicit(RMW, ARG, ORIG, memory_order_seq_cst)
233 #define atomic_and_explicit(RMW, ARG, ORIG, ORDER) \
234 if (sizeof *(RMW) == 1) { \
235 atomic_op(and, 8, RMW, ARG, ORIG, ORDER) \
236 } else if (sizeof *(RMW) == 2) { \
237 atomic_op(and, 16, RMW, ARG, ORIG, ORDER) \
238 } else if (sizeof *(RMW) == 4) { \
239 atomic_and32(RMW, ARG, ORIG, ORDER) \
240 } else if (sizeof *(RMW) == 8) { \
241 atomic_op(and, 64, RMW, ARG, ORIG, ORDER) \
246 /* Logical 'Or' calls. */
248 #define atomic_or32(RMW, ARG, ORIG, ORDER) \
249 *(ORIG) = InterlockedOr((int32_t volatile *) (RMW), (int32_t) (ARG));
251 /* For 8, 16 and 64 bit variations. */
252 #define atomic_or_generic(X, RMW, ARG, ORIG, ORDER) \
253 *(ORIG) = InterlockedOr##X((int##X##_t volatile *) (RMW), \
256 #define atomic_or(RMW, ARG, ORIG) \
257 atomic_or_explicit(RMW, ARG, ORIG, memory_order_seq_cst)
259 #define atomic_or_explicit(RMW, ARG, ORIG, ORDER) \
260 if (sizeof *(RMW) == 1) { \
261 atomic_op(or, 8, RMW, ARG, ORIG, ORDER) \
262 } else if (sizeof *(RMW) == 2) { \
263 atomic_op(or, 16, RMW, ARG, ORIG, ORDER) \
264 } else if (sizeof *(RMW) == 4) { \
265 atomic_or32(RMW, ARG, ORIG, ORDER) \
266 } else if (sizeof *(RMW) == 8) { \
267 atomic_op(or, 64, RMW, ARG, ORIG, ORDER) \
272 /* Logical Xor calls. */
274 #define atomic_xor32(RMW, ARG, ORIG, ORDER) \
275 *(ORIG) = InterlockedXor((int32_t volatile *) (RMW), (int32_t) (ARG));
277 /* For 8, 16 and 64 bit variations. */
278 #define atomic_xor_generic(X, RMW, ARG, ORIG, ORDER) \
279 *(ORIG) = InterlockedXor##X((int##X##_t volatile *) (RMW), \
282 #define atomic_xor(RMW, ARG, ORIG) \
283 atomic_xor_explicit(RMW, ARG, ORIG, memory_order_seq_cst)
285 #define atomic_xor_explicit(RMW, ARG, ORIG, ORDER) \
286 if (sizeof *(RMW) == 1) { \
287 atomic_op(xor, 8, RMW, ARG, ORIG, ORDER) \
288 } else if (sizeof *(RMW) == 2) { \
289 atomic_op(xor, 16, RMW, ARG, ORIG, ORDER) \
290 } else if (sizeof *(RMW) == 4) { \
291 atomic_xor32(RMW, ARG, ORIG, ORDER); \
292 } else if (sizeof *(RMW) == 8) { \
293 atomic_op(xor, 64, RMW, ARG, ORIG, ORDER) \
298 #define atomic_compare_exchange_strong(DST, EXP, SRC) \
299 atomic_compare_exchange_strong_explicit(DST, EXP, SRC, \
300 memory_order_seq_cst, \
301 memory_order_seq_cst)
303 #define atomic_compare_exchange_weak atomic_compare_exchange_strong
304 #define atomic_compare_exchange_weak_explicit \
305 atomic_compare_exchange_strong_explicit
307 /* MSVCs c++ compiler implements c11 atomics and looking through its
308 * implementation (in xatomic.h), orders are ignored for x86 platform.
309 * Do the same here. */
311 atomic_compare_exchange8(int8_t volatile *dst, int8_t *expected, int8_t src)
313 int8_t previous = _InterlockedCompareExchange8(dst, src, *expected);
314 if (previous == *expected) {
317 *expected = previous;
323 atomic_compare_exchange16(int16_t volatile *dst, int16_t *expected,
326 int16_t previous = InterlockedCompareExchange16(dst, src, *expected);
327 if (previous == *expected) {
330 *expected = previous;
336 atomic_compare_exchange32(int32_t volatile *dst, int32_t *expected,
339 int32_t previous = InterlockedCompareExchange(dst, src, *expected);
340 if (previous == *expected) {
343 *expected = previous;
349 atomic_compare_exchange64(int64_t volatile *dst, int64_t *expected,
352 int64_t previous = InterlockedCompareExchange64(dst, src, *expected);
353 if (previous == *expected) {
356 *expected = previous;
362 atomic_compare_unreachable()
367 #define atomic_compare_exchange_strong_explicit(DST, EXP, SRC, ORD1, ORD2) \
368 (sizeof *(DST) == 1 \
369 ? atomic_compare_exchange8((int8_t volatile *) (DST), (int8_t *) (EXP), \
371 : (sizeof *(DST) == 2 \
372 ? atomic_compare_exchange16((int16_t volatile *) (DST), \
373 (int16_t *) (EXP), (int16_t) (SRC)) \
374 : (sizeof *(DST) == 4 \
375 ? atomic_compare_exchange32((int32_t volatile *) (DST), \
376 (int32_t *) (EXP), (int32_t) (SRC)) \
377 : (sizeof *(DST) == 8 \
378 ? atomic_compare_exchange64((int64_t volatile *) (DST), \
379 (int64_t *) (EXP), (int64_t) (SRC)) \
380 : ovs_fatal(0, "atomic operation with size greater than 8 bytes"), \
381 atomic_compare_unreachable()))))
386 typedef ATOMIC(int32_t) atomic_flag;
387 #define ATOMIC_FLAG_INIT 0
389 #define atomic_flag_test_and_set(FLAG) \
390 (bool) InterlockedBitTestAndSet(FLAG, 0)
392 #define atomic_flag_test_and_set_explicit(FLAG, ORDER) \
393 atomic_flag_test_and_set(FLAG)
395 #define atomic_flag_clear_explicit(FLAG, ORDER) \
397 #define atomic_flag_clear(FLAG) \
398 InterlockedBitTestAndReset(FLAG, 0)