lib/ovs-atomic-msvc.h

   1 /*
   2  * Copyright (c) 2014 Nicira, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at:
   7  *
   8  *     http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 /* This header implements atomic operation primitives for MSVC
  18  * on i586 or greater platforms (32 bit). */
  19 #ifndef IN_OVS_ATOMIC_H
  20 #error "This header should only be included indirectly via ovs-atomic.h."
  21 #endif
  22
  23 /* From msdn documentation: With Visual Studio 2003, volatile to volatile
  24  * references are ordered; the compiler will not re-order volatile variable
  25  * access. With Visual Studio 2005, the compiler also uses acquire semantics
  26  * for read operations on volatile variables and release semantics for write
  27  * operations on volatile variables (when supported by the CPU).
  28  *
  29  * Though there is no clear documentation that states that anything greater
  30  * than VS 2005 has the same behavior as described above, looking through MSVCs
  31  * C++ atomics library in VS2013 shows that the compiler still takes
  32  * acquire/release semantics on volatile variables. */
  33 #define ATOMIC(TYPE) TYPE volatile
  34
  35 typedef enum {
  36     memory_order_relaxed,
  37     memory_order_consume,
  38     memory_order_acquire,
  39     memory_order_release,
  40     memory_order_acq_rel,
  41     memory_order_seq_cst
  42 } memory_order;
  43
  44 #define ATOMIC_BOOL_LOCK_FREE 2
  45 #define ATOMIC_CHAR_LOCK_FREE 2
  46 #define ATOMIC_SHORT_LOCK_FREE 2
  47 #define ATOMIC_INT_LOCK_FREE 2
  48 #define ATOMIC_LONG_LOCK_FREE 2
  49 #define ATOMIC_LLONG_LOCK_FREE 2
  50 #define ATOMIC_POINTER_LOCK_FREE 2
  51
  52 #define IS_LOCKLESS_ATOMIC(OBJECT)                      \
  53     (sizeof(OBJECT) <= 8 && IS_POW2(sizeof(OBJECT)))
  54
  55 #define ATOMIC_VAR_INIT(VALUE) (VALUE)
  56 #define atomic_init(OBJECT, VALUE) (*(OBJECT) = (VALUE), (void) 0)
  57
  58 static inline void
  59 atomic_compiler_barrier(memory_order order)
  60 {
  61     /* In case of 'memory_order_consume', it is implicitly assumed that
  62      * the compiler will not move instructions that have data-dependency
  63      * on the variable in question before the barrier. */
  64     if (order > memory_order_consume) {
  65         _ReadWriteBarrier();
  66     }
  67 }
  68
  69 static inline void
  70 atomic_thread_fence(memory_order order)
  71 {
  72     /* x86 is strongly ordered and acquire/release semantics come
  73      * automatically. */
  74     atomic_compiler_barrier(order);
  75     if (order == memory_order_seq_cst) {
  76         MemoryBarrier();
  77         atomic_compiler_barrier(order);
  78     }
  79 }
  80
  81 static inline void
  82 atomic_signal_fence(memory_order order)
  83 {
  84     atomic_compiler_barrier(order);
  85 }
  86
  87 /* 1, 2 and 4 bytes loads and stores are atomic on aligned memory. In addition,
  88  * since the compiler automatically takes acquire and release semantics on
  89  * volatile variables, for any order lesser than 'memory_order_seq_cst', we
  90  * can directly assign or read values. */
  91
  92 #define atomic_store32(DST, SRC, ORDER)                                 \
  93     if (ORDER == memory_order_seq_cst) {                                \
  94         InterlockedExchange((int32_t volatile *) (DST),                 \
  95                                (int32_t) (SRC));                        \
  96     } else {                                                            \
  97         *(DST) = (SRC);                                                 \
  98     }
  99
 100 /* 64 bit writes are atomic on i586 if 64 bit aligned. */
 101 #define atomic_store64(DST, SRC, ORDER)                                    \
 102     if (((size_t) (DST) & (sizeof *(DST) - 1))                             \
 103         || ORDER == memory_order_seq_cst) {                                \
 104         InterlockedExchange64((int64_t volatile *) (DST),                  \
 105                               (int64_t) (SRC));                            \
 106     } else {                                                               \
 107         *(DST) = (SRC);                                                    \
 108     }
 109
 110 /* Used for 8 and 16 bit variations. */
 111 #define atomic_storeX(X, DST, SRC, ORDER)                               \
 112     if (ORDER == memory_order_seq_cst) {                                \
 113         InterlockedExchange##X((int##X##_t volatile *) (DST),           \
 114                                (int##X##_t) (SRC));                     \
 115     } else {                                                            \
 116         *(DST) = (SRC);                                                 \
 117     }
 118
 119 #define atomic_store(DST, SRC)                               \
 120         atomic_store_explicit(DST, SRC, memory_order_seq_cst)
 121
 122 #define atomic_store_explicit(DST, SRC, ORDER)                           \
 123     if (sizeof *(DST) == 1) {                                            \
 124         atomic_storeX(8, DST, SRC, ORDER)                                \
 125     } else if (sizeof *(DST) == 2) {                                     \
 126         atomic_storeX(16, DST, SRC, ORDER)                               \
 127     } else if (sizeof *(DST) == 4) {                                     \
 128         atomic_store32(DST, SRC, ORDER)                                  \
 129     } else if (sizeof *(DST) == 8) {                                     \
 130         atomic_store64(DST, SRC, ORDER)                                  \
 131     } else {                                                             \
 132         abort();                                                         \
 133     }
 134
 135 /* On x86, for 'memory_order_seq_cst', if stores are locked, the corresponding
 136  * reads don't need to be locked (based on the following in Intel Developers
 137  * manual:
 138  * “Locked operations are atomic with respect to all other memory operations
 139  * and all externally visible events. Only instruction fetch and page table
 140  * accesses can pass locked instructions. Locked instructions can be used to
 141  * synchronize data written by one processor and read by another processor.
 142  * For the P6 family processors, locked operations serialize all outstanding
 143  * load and store operations (that is, wait for them to complete). This rule
 144  * is also true for the Pentium 4 and Intel Xeon processors, with one
 145  * exception. Load operations that reference weakly ordered memory types
 146  * (such as the WC memory type) may not be serialized."). */
 147
 148  /* For 8, 16 and 32 bit variations. */
 149 #define atomic_readX(SRC, DST, ORDER)                                      \
 150     *(DST) = *(SRC);
 151
 152 /* 64 bit reads are atomic on i586 if 64 bit aligned. */
 153 #define atomic_read64(SRC, DST, ORDER)                                     \
 154     if (((size_t) (SRC) & (sizeof *(SRC) - 1)) == 0) {                     \
 155         *(DST) = *(SRC);                                                   \
 156     } else {                                                               \
 157        *(DST) = InterlockedOr64((int64_t volatile *) (SRC), 0);            \
 158     }
 159
 160 #define atomic_read(SRC, DST)                               \
 161         atomic_read_explicit(SRC, DST, memory_order_seq_cst)
 162
 163 #define atomic_read_explicit(SRC, DST, ORDER)                             \
 164     if (sizeof *(DST) == 1 || sizeof *(DST) == 2 || sizeof *(DST) == 4) { \
 165         atomic_readX(SRC, DST, ORDER)                                     \
 166     } else if (sizeof *(DST) == 8) {                                      \
 167         atomic_read64(SRC, DST, ORDER)                                    \
 168     } else {                                                              \
 169         abort();                                                          \
 170     }
 171
 172 /* For add, sub, and logical operations, for 8, 16 and 64 bit data types,
 173  * functions for all the different memory orders does not exist
 174  * (though documentation exists for some of them).  The MSVC C++ library which
 175  * implements the c11 atomics simply calls the full memory barrier function
 176  * for everything in x86(see xatomic.h). So do the same here. */
 177
 178 /* For 8, 16 and 64 bit variations. */
 179 #define atomic_op(OP, X, RMW, ARG, ORIG, ORDER)                         \
 180     atomic_##OP##_generic(X, RMW, ARG, ORIG, ORDER)
 181
 182 /* Arithmetic addition calls. */
 183
 184 #define atomic_add32(RMW, ARG, ORIG, ORDER)                        \
 185     *(ORIG) = InterlockedExchangeAdd((int32_t volatile *) (RMW),   \
 186                                       (int32_t) (ARG));
 187
 188 /* For 8, 16 and 64 bit variations. */
 189 #define atomic_add_generic(X, RMW, ARG, ORIG, ORDER)                        \
 190     *(ORIG) = _InterlockedExchangeAdd##X((int##X##_t volatile *) (RMW),     \
 191                                       (int##X##_t) (ARG));
 192
 193 #define atomic_add(RMW, ARG, ORIG)                               \
 194         atomic_add_explicit(RMW, ARG, ORIG, memory_order_seq_cst)
 195
 196 #define atomic_add_explicit(RMW, ARG, ORIG, ORDER)             \
 197     if (sizeof *(RMW) == 1) {                                  \
 198         atomic_op(add, 8, RMW, ARG, ORIG, ORDER)               \
 199     } else if (sizeof *(RMW) == 2) {                           \
 200         atomic_op(add, 16, RMW, ARG, ORIG, ORDER)              \
 201     } else if (sizeof *(RMW) == 4) {                           \
 202         atomic_add32(RMW, ARG, ORIG, ORDER)                    \
 203     } else if (sizeof *(RMW) == 8) {                           \
 204         atomic_op(add, 64, RMW, ARG, ORIG, ORDER)              \
 205     } else {                                                   \
 206         abort();                                               \
 207     }
 208
 209 /* Arithmetic subtraction calls. */
 210
 211 #define atomic_sub(RMW, ARG, ORIG)                             \
 212         atomic_add_explicit(RMW, (0 - (ARG)), ORIG, memory_order_seq_cst)
 213
 214 #define atomic_sub_explicit(RMW, ARG, ORIG, ORDER)           \
 215         atomic_add_explicit(RMW, (0 - (ARG)), ORIG, ORDER)
 216
 217 /* Logical 'and' calls. */
 218
 219 #define atomic_and32(RMW, ARG, ORIG, ORDER)                        \
 220     *(ORIG) = InterlockedAnd((int32_t volatile *) (RMW), (int32_t) (ARG));
 221
 222 /* For 8, 16 and 64 bit variations. */
 223 #define atomic_and_generic(X, RMW, ARG, ORIG, ORDER)                        \
 224     *(ORIG) = InterlockedAnd##X((int##X##_t volatile *) (RMW),              \
 225                                 (int##X##_t) (ARG));
 226
 227 #define atomic_and(RMW, ARG, ORIG)                               \
 228         atomic_and_explicit(RMW, ARG, ORIG, memory_order_seq_cst)
 229
 230 #define atomic_and_explicit(RMW, ARG, ORIG, ORDER)             \
 231     if (sizeof *(RMW) == 1) {                                  \
 232         atomic_op(and, 8, RMW, ARG, ORIG, ORDER)               \
 233     } else if (sizeof *(RMW) == 2) {                           \
 234         atomic_op(and, 16, RMW, ARG, ORIG, ORDER)              \
 235     } else if (sizeof *(RMW) == 4) {                           \
 236         atomic_and32(RMW, ARG, ORIG, ORDER)                    \
 237     } else if (sizeof *(RMW) == 8) {                           \
 238         atomic_op(and, 64, RMW, ARG, ORIG, ORDER)              \
 239     } else {                                                   \
 240         abort();                                               \
 241     }
 242
 243 /* Logical 'Or' calls. */
 244
 245 #define atomic_or32(RMW, ARG, ORIG, ORDER)                        \
 246     *(ORIG) = InterlockedOr((int32_t volatile *) (RMW), (int32_t) (ARG));
 247
 248 /* For 8, 16 and 64 bit variations. */
 249 #define atomic_or_generic(X, RMW, ARG, ORIG, ORDER)                        \
 250     *(ORIG) = InterlockedOr##X((int##X##_t volatile *) (RMW),              \
 251                                (int##X##_t) (ARG));
 252
 253 #define atomic_or(RMW, ARG, ORIG)                               \
 254         atomic_or_explicit(RMW, ARG, ORIG, memory_order_seq_cst)
 255
 256 #define atomic_or_explicit(RMW, ARG, ORIG, ORDER)              \
 257     if (sizeof *(RMW) == 1) {                                  \
 258         atomic_op(or, 8, RMW, ARG, ORIG, ORDER)                \
 259     } else if (sizeof *(RMW) == 2) {                           \
 260         atomic_op(or, 16, RMW, ARG, ORIG, ORDER)               \
 261     } else if (sizeof *(RMW) == 4) {                           \
 262         atomic_or32(RMW, ARG, ORIG, ORDER)                     \
 263     } else if (sizeof *(RMW) == 8) {                           \
 264         atomic_op(or, 64, RMW, ARG, ORIG, ORDER)               \
 265     } else {                                                   \
 266         abort();                                               \
 267     }
 268
 269 /* Logical Xor calls. */
 270
 271 #define atomic_xor32(RMW, ARG, ORIG, ORDER)                        \
 272     *(ORIG) = InterlockedXor((int32_t volatile *) (RMW), (int32_t) (ARG));
 273
 274 /* For 8, 16 and 64 bit variations. */
 275 #define atomic_xor_generic(X, RMW, ARG, ORIG, ORDER)                        \
 276     *(ORIG) = InterlockedXor##X((int##X##_t volatile *) (RMW),              \
 277                                 (int##X##_t) (ARG));
 278
 279 #define atomic_xor(RMW, ARG, ORIG)                               \
 280         atomic_xor_explicit(RMW, ARG, ORIG, memory_order_seq_cst)
 281
 282 #define atomic_xor_explicit(RMW, ARG, ORIG, ORDER)             \
 283     if (sizeof *(RMW) == 1) {                                  \
 284         atomic_op(xor, 8, RMW, ARG, ORIG, ORDER)               \
 285     } else if (sizeof *(RMW) == 2) {                           \
 286         atomic_op(xor, 16, RMW, ARG, ORIG, ORDER)              \
 287     } else if (sizeof *(RMW) == 4) {                           \
 288         atomic_xor32(RMW, ARG, ORIG, ORDER);                   \
 289     } else if (sizeof *(RMW) == 8) {                           \
 290         atomic_op(xor, 64, RMW, ARG, ORIG, ORDER)              \
 291     } else {                                                   \
 292         abort();                                               \
 293     }
 294
 295 #define atomic_compare_exchange_strong(DST, EXP, SRC)   \
 296     atomic_compare_exchange_strong_explicit(DST, EXP, SRC, \
 297                                             memory_order_seq_cst, \
 298                                             memory_order_seq_cst)
 299
 300 #define atomic_compare_exchange_weak atomic_compare_exchange_strong
 301 #define atomic_compare_exchange_weak_explicit \
 302         atomic_compare_exchange_strong_explicit
 303
 304 /* MSVCs c++ compiler implements c11 atomics and looking through its
 305  * implementation (in xatomic.h), orders are ignored for x86 platform.
 306  * Do the same here. */
 307 static inline bool
 308 atomic_compare_exchange8(int8_t volatile *dst, int8_t *expected, int8_t src)
 309 {
 310     int8_t previous = _InterlockedCompareExchange8(dst, src, *expected);
 311     if (previous == *expected) {
 312         return true;
 313     } else {
 314         *expected = previous;
 315         return false;
 316     }
 317 }
 318
 319 static inline bool
 320 atomic_compare_exchange16(int16_t volatile *dst, int16_t *expected,
 321                           int16_t src)
 322 {
 323     int16_t previous = InterlockedCompareExchange16(dst, src, *expected);
 324     if (previous == *expected) {
 325         return true;
 326     } else {
 327         *expected = previous;
 328         return false;
 329     }
 330 }
 331
 332 static inline bool
 333 atomic_compare_exchange32(int32_t volatile *dst, int32_t *expected,
 334                           int32_t src)
 335 {
 336     int32_t previous = InterlockedCompareExchange(dst, src, *expected);
 337     if (previous == *expected) {
 338         return true;
 339     } else {
 340         *expected = previous;
 341         return false;
 342     }
 343 }
 344
 345 static inline bool
 346 atomic_compare_exchange64(int64_t volatile *dst, int64_t *expected,
 347                           int64_t src)
 348 {
 349     int64_t previous = InterlockedCompareExchange64(dst, src, *expected);
 350     if (previous == *expected) {
 351         return true;
 352     } else {
 353         *expected = previous;
 354         return false;
 355     }
 356 }
 357
 358 static inline bool
 359 atomic_compare_unreachable()
 360 {
 361     return true;
 362 }
 363
 364 #define atomic_compare_exchange_strong_explicit(DST, EXP, SRC, ORD1, ORD2)    \
 365     (sizeof *(DST) == 1                                                       \
 366      ? atomic_compare_exchange8((int8_t volatile *) (DST), (int8_t *) (EXP),  \
 367                                 (int8_t) (SRC))                               \
 368      : (sizeof *(DST) == 2                                                    \
 369      ? atomic_compare_exchange16((int16_t volatile *) (DST),                  \
 370                                  (int16_t *) (EXP), (int16_t) (SRC))          \
 371      : (sizeof *(DST) == 4                                                    \
 372      ? atomic_compare_exchange32((int32_t volatile *) (DST),                  \
 373                                  (int32_t *) (EXP), (int32_t) (SRC))          \
 374      : (sizeof *(DST) == 8                                                    \
 375      ? atomic_compare_exchange64((int64_t volatile *) (DST),                  \
 376                                  (int64_t *) (EXP), (int64_t) (SRC))          \
 377      : ovs_fatal(0, "atomic operation with size greater than 8 bytes"),       \
 378        atomic_compare_unreachable()))))
 379
 380 \f
 381 /* atomic_flag */
 382
 383 typedef ATOMIC(int32_t) atomic_flag;
 384 #define ATOMIC_FLAG_INIT 0
 385
 386 #define atomic_flag_test_and_set(FLAG)                 \
 387     (bool) InterlockedBitTestAndSet(FLAG, 0)
 388
 389 #define atomic_flag_test_and_set_explicit(FLAG, ORDER) \
 390         atomic_flag_test_and_set(FLAG)
 391
 392 #define atomic_flag_clear_explicit(FLAG, ORDER) \
 393         atomic_flag_clear()
 394 #define atomic_flag_clear(FLAG)                 \
 395     InterlockedBitTestAndReset(FLAG, 0)