atomic_ops.h 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421
  1. /*
  2. * Copyright (c) 2003-2011 Hewlett-Packard Development Company, L.P.
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a copy
  5. * of this software and associated documentation files (the "Software"), to deal
  6. * in the Software without restriction, including without limitation the rights
  7. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8. * copies of the Software, and to permit persons to whom the Software is
  9. * furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  19. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  20. * SOFTWARE.
  21. */
  22. #ifndef AO_ATOMIC_OPS_H
  23. #define AO_ATOMIC_OPS_H
  24. #include "atomic_ops/ao_version.h"
  25. /* Define version numbers here to allow */
  26. /* test on build machines for cross-builds. */
  27. #include <assert.h>
  28. #include <stddef.h>
  29. /* We define various atomic operations on memory in a */
  30. /* machine-specific way. Unfortunately, this is complicated */
  31. /* by the fact that these may or may not be combined with */
  32. /* various memory barriers. Thus the actual operations we */
  33. /* define have the form AO_<atomic-op>_<barrier>, for all */
  34. /* plausible combinations of <atomic-op> and <barrier>. */
  35. /* This of course results in a mild combinatorial explosion. */
  36. /* To deal with it, we try to generate derived */
  37. /* definitions for as many of the combinations as we can, as */
  38. /* automatically as possible. */
  39. /* */
  40. /* Our assumption throughout is that the programmer will */
  41. /* specify the least demanding operation and memory barrier */
  42. /* that will guarantee correctness for the implementation. */
  43. /* Our job is to find the least expensive way to implement it */
  44. /* on the applicable hardware. In many cases that will */
  45. /* involve, for example, a stronger memory barrier, or a */
  46. /* combination of hardware primitives. */
  47. /* */
  48. /* Conventions: */
  49. /* "plain" atomic operations are not guaranteed to include */
  50. /* a barrier. The suffix in the name specifies the barrier */
  51. /* type. Suffixes are: */
  52. /* _release: Earlier operations may not be delayed past it. */
  53. /* _acquire: Later operations may not move ahead of it. */
  54. /* _read: Subsequent reads must follow this operation and */
  55. /* preceding reads. */
  56. /* _write: Earlier writes precede both this operation and */
  57. /* later writes. */
  58. /* _full: Ordered with respect to both earlier and later memory */
  59. /* operations. */
  60. /* _release_write: Ordered with respect to earlier writes. */
  61. /* _acquire_read: Ordered with respect to later reads. */
  62. /* */
  63. /* Currently we try to define the following atomic memory */
  64. /* operations, in combination with the above barriers: */
  65. /* AO_nop */
  66. /* AO_load */
  67. /* AO_store */
  68. /* AO_test_and_set (binary) */
  69. /* AO_fetch_and_add */
  70. /* AO_fetch_and_add1 */
  71. /* AO_fetch_and_sub1 */
  72. /* AO_and */
  73. /* AO_or */
  74. /* AO_xor */
  75. /* AO_compare_and_swap */
  76. /* AO_fetch_compare_and_swap */
  77. /* */
  78. /* Note that atomicity guarantees are valid only if both */
  79. /* readers and writers use AO_ operations to access the */
  80. /* shared value, while ordering constraints are intended to */
  81. /* apply all memory operations. If a location can potentially */
  82. /* be accessed simultaneously from multiple threads, and one of */
  83. /* those accesses may be a write access, then all such */
  84. /* accesses to that location should be through AO_ primitives. */
  85. /* However if AO_ operations enforce sufficient ordering to */
  86. /* ensure that a location x cannot be accessed concurrently, */
  87. /* or can only be read concurrently, then x can be accessed */
  88. /* via ordinary references and assignments. */
  89. /* */
  90. /* AO_compare_and_swap takes an address and an expected old */
  91. /* value and a new value, and returns an int. Non-zero result */
  92. /* indicates that it succeeded. */
  93. /* AO_fetch_compare_and_swap takes an address and an expected */
  94. /* old value and a new value, and returns the real old value. */
  95. /* The operation succeeded if and only if the expected old */
  96. /* value matches the old value returned. */
  97. /* */
  98. /* Test_and_set takes an address, atomically replaces it by */
  99. /* AO_TS_SET, and returns the prior value. */
  100. /* An AO_TS_t location can be reset with the */
  101. /* AO_CLEAR macro, which normally uses AO_store_release. */
  102. /* AO_fetch_and_add takes an address and an AO_t increment */
  103. /* value. The AO_fetch_and_add1 and AO_fetch_and_sub1 variants */
  104. /* are provided, since they allow faster implementations on */
  105. /* some hardware. AO_and, AO_or, AO_xor do atomically and, or, */
  106. /* xor (respectively) an AO_t value into a memory location, */
  107. /* but do not provide access to the original. */
  108. /* */
  109. /* We expect this list to grow slowly over time. */
  110. /* */
  111. /* Note that AO_nop_full is a full memory barrier. */
  112. /* */
  113. /* Note that if some data is initialized with */
  114. /* data.x = ...; data.y = ...; ... */
  115. /* AO_store_release_write(&data_is_initialized, 1) */
  116. /* then data is guaranteed to be initialized after the test */
  117. /* if (AO_load_acquire_read(&data_is_initialized)) ... */
  118. /* succeeds. Furthermore, this should generate near-optimal */
  119. /* code on all common platforms. */
  120. /* */
  121. /* All operations operate on unsigned AO_t, which */
  122. /* is the natural word size, and usually unsigned long. */
  123. /* It is possible to check whether a particular operation op */
  124. /* is available on a particular platform by checking whether */
  125. /* AO_HAVE_op is defined. We make heavy use of these macros */
  126. /* internally. */
  127. /* The rest of this file basically has three sections: */
  128. /* */
  129. /* Some utility and default definitions. */
  130. /* */
  131. /* The architecture dependent section: */
  132. /* This defines atomic operations that have direct hardware */
  133. /* support on a particular platform, mostly by including the */
  134. /* appropriate compiler- and hardware-dependent file. */
  135. /* */
  136. /* The synthesis section: */
  137. /* This tries to define other atomic operations in terms of */
  138. /* those that are explicitly available on the platform. */
  139. /* This section is hardware independent. */
  140. /* We make no attempt to synthesize operations in ways that */
  141. /* effectively introduce locks, except for the debugging/demo */
  142. /* pthread-based implementation at the beginning. A more */
  143. /* realistic implementation that falls back to locks could be */
  144. /* added as a higher layer. But that would sacrifice */
  145. /* usability from signal handlers. */
  146. /* The synthesis section is implemented almost entirely in */
  147. /* atomic_ops/generalize.h. */
  148. /* Some common defaults. Overridden for some architectures. */
  149. #define AO_t size_t
  150. /* The test_and_set primitive returns an AO_TS_VAL_t value. */
  151. /* AO_TS_t is the type of an in-memory test-and-set location. */
  152. #define AO_TS_INITIALIZER (AO_t)AO_TS_CLEAR
  153. /* Platform-dependent stuff: */
  154. #if (defined(__GNUC__) || defined(_MSC_VER) || defined(__INTEL_COMPILER) \
  155. || defined(__DMC__) || defined(__WATCOMC__)) && !defined(AO_NO_INLINE)
  156. # define AO_INLINE static __inline
  157. #elif defined(__sun) && !defined(AO_NO_INLINE)
  158. # define AO_INLINE static inline
  159. #else
  160. # define AO_INLINE static
  161. #endif
  162. #if __GNUC__ >= 3 && !defined(LINT2)
  163. # define AO_EXPECT_FALSE(expr) __builtin_expect(expr, 0)
  164. /* Equivalent to (expr) but predict that usually (expr) == 0. */
  165. #else
  166. # define AO_EXPECT_FALSE(expr) (expr)
  167. #endif /* !__GNUC__ */
  168. #if defined(__GNUC__) && !defined(__INTEL_COMPILER)
  169. # define AO_compiler_barrier() __asm__ __volatile__("" : : : "memory")
  170. #elif defined(_MSC_VER) || defined(__DMC__) || defined(__BORLANDC__) \
  171. || defined(__WATCOMC__)
  172. # if defined(_AMD64_) || defined(_M_X64) || _MSC_VER >= 1400
  173. # if defined(_WIN32_WCE)
  174. /* # include <cmnintrin.h> */
  175. # elif defined(_MSC_VER)
  176. # include <intrin.h>
  177. # endif
  178. # pragma intrinsic(_ReadWriteBarrier)
  179. # define AO_compiler_barrier() _ReadWriteBarrier()
  180. /* We assume this does not generate a fence instruction. */
  181. /* The documentation is a bit unclear. */
  182. # else
  183. # define AO_compiler_barrier() __asm { }
  184. /* The preceding implementation may be preferable here too. */
  185. /* But the documentation warns about VC++ 2003 and earlier. */
  186. # endif
  187. #elif defined(__INTEL_COMPILER)
  188. # define AO_compiler_barrier() __memory_barrier()
  189. /* FIXME: Too strong? IA64-only? */
  190. #elif defined(_HPUX_SOURCE)
  191. # if defined(__ia64)
  192. # include <machine/sys/inline.h>
  193. # define AO_compiler_barrier() _Asm_sched_fence()
  194. # else
  195. /* FIXME - We dont know how to do this. This is a guess. */
  196. /* And probably a bad one. */
  197. static volatile int AO_barrier_dummy;
  198. # define AO_compiler_barrier() (void)(AO_barrier_dummy = AO_barrier_dummy)
  199. # endif
  200. #else
  201. /* We conjecture that the following usually gives us the right */
  202. /* semantics or an error. */
  203. # define AO_compiler_barrier() asm("")
  204. #endif
  205. #if defined(AO_USE_PTHREAD_DEFS)
  206. # include "atomic_ops/sysdeps/generic_pthread.h"
  207. #endif /* AO_USE_PTHREAD_DEFS */
  208. #if (defined(__CC_ARM) || defined(__ARMCC__)) && !defined(__GNUC__) \
  209. && !defined(AO_USE_PTHREAD_DEFS)
  210. # include "atomic_ops/sysdeps/armcc/arm_v6.h"
  211. # define AO_GENERALIZE_TWICE
  212. #endif
  213. #if defined(__GNUC__) && !defined(AO_USE_PTHREAD_DEFS) \
  214. && !defined(__INTEL_COMPILER)
  215. # if defined(__i386__)
  216. /* We don't define AO_USE_SYNC_CAS_BUILTIN for x86 here because */
  217. /* it might require specifying additional options (like -march) */
  218. /* or additional link libraries (if -march is not specified). */
  219. # include "atomic_ops/sysdeps/gcc/x86.h"
  220. # endif /* __i386__ */
  221. # if defined(__x86_64__)
  222. # if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2)) \
  223. && !defined(AO_USE_SYNC_CAS_BUILTIN)
  224. /* It is safe to use __sync CAS built-in on this architecture. */
  225. # define AO_USE_SYNC_CAS_BUILTIN
  226. # endif
  227. # include "atomic_ops/sysdeps/gcc/x86.h"
  228. # endif /* __x86_64__ */
  229. # if defined(__ia64__)
  230. # include "atomic_ops/sysdeps/gcc/ia64.h"
  231. # define AO_GENERALIZE_TWICE
  232. # endif /* __ia64__ */
  233. # if defined(__hppa__)
  234. # include "atomic_ops/sysdeps/gcc/hppa.h"
  235. # define AO_CAN_EMUL_CAS
  236. # endif /* __hppa__ */
  237. # if defined(__alpha__)
  238. # include "atomic_ops/sysdeps/gcc/alpha.h"
  239. # define AO_GENERALIZE_TWICE
  240. # endif /* __alpha__ */
  241. # if defined(__s390__)
  242. # include "atomic_ops/sysdeps/gcc/s390.h"
  243. # endif /* __s390__ */
  244. # if defined(__sparc__)
  245. # include "atomic_ops/sysdeps/gcc/sparc.h"
  246. # define AO_CAN_EMUL_CAS
  247. # endif /* __sparc__ */
  248. # if defined(__m68k__)
  249. # include "atomic_ops/sysdeps/gcc/m68k.h"
  250. # endif /* __m68k__ */
  251. # if defined(__nios2__)
  252. # include "atomic_ops/sysdeps/gcc/nios2.h"
  253. # endif /* __nios2__ */
  254. # if defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) \
  255. || defined(__powerpc64__) || defined(__ppc64__)
  256. # include "atomic_ops/sysdeps/gcc/powerpc.h"
  257. # endif /* __powerpc__ */
  258. # if defined(__aarch64__)
  259. # include "atomic_ops/sysdeps/gcc/aarch64.h"
  260. # define AO_CAN_EMUL_CAS
  261. # endif /* __aarch64__ */
  262. # if defined(__arm__)
  263. # include "atomic_ops/sysdeps/gcc/arm.h"
  264. # define AO_CAN_EMUL_CAS
  265. # endif /* __arm__ */
  266. # if defined(__cris__) || defined(CRIS)
  267. # include "atomic_ops/sysdeps/gcc/cris.h"
  268. # define AO_GENERALIZE_TWICE
  269. # endif
  270. # if defined(__mips__)
  271. # include "atomic_ops/sysdeps/gcc/mips.h"
  272. # endif /* __mips__ */
  273. # if defined(__sh__) || defined(SH4)
  274. # include "atomic_ops/sysdeps/gcc/sh.h"
  275. # define AO_CAN_EMUL_CAS
  276. # endif /* __sh__ */
  277. # if defined(__avr32__)
  278. # include "atomic_ops/sysdeps/gcc/avr32.h"
  279. # endif
  280. # if defined(__hexagon__)
  281. # include "atomic_ops/sysdeps/gcc/hexagon.h"
  282. # endif
  283. #endif /* __GNUC__ && !AO_USE_PTHREAD_DEFS */
  284. #if (defined(__IBMC__) || defined(__IBMCPP__)) && !defined(__GNUC__) \
  285. && !defined(AO_USE_PTHREAD_DEFS)
  286. # if defined(__powerpc__) || defined(__powerpc) || defined(__ppc__) \
  287. || defined(__PPC__) || defined(_M_PPC) || defined(_ARCH_PPC) \
  288. || defined(_ARCH_PWR)
  289. # include "atomic_ops/sysdeps/ibmc/powerpc.h"
  290. # define AO_GENERALIZE_TWICE
  291. # endif
  292. #endif
  293. #if defined(__INTEL_COMPILER) && !defined(AO_USE_PTHREAD_DEFS)
  294. # if defined(__ia64__)
  295. # include "atomic_ops/sysdeps/icc/ia64.h"
  296. # define AO_GENERALIZE_TWICE
  297. # endif
  298. # if defined(__GNUC__)
  299. /* Intel Compiler in GCC compatible mode */
  300. # if defined(__i386__)
  301. # include "atomic_ops/sysdeps/gcc/x86.h"
  302. # endif /* __i386__ */
  303. # if defined(__x86_64__)
  304. # if (__INTEL_COMPILER > 1110) && !defined(AO_USE_SYNC_CAS_BUILTIN)
  305. # define AO_USE_SYNC_CAS_BUILTIN
  306. # endif
  307. # include "atomic_ops/sysdeps/gcc/x86.h"
  308. # endif /* __x86_64__ */
  309. # endif
  310. #endif
  311. #if defined(_HPUX_SOURCE) && !defined(__GNUC__) && !defined(AO_USE_PTHREAD_DEFS)
  312. # if defined(__ia64)
  313. # include "atomic_ops/sysdeps/hpc/ia64.h"
  314. # define AO_GENERALIZE_TWICE
  315. # else
  316. # include "atomic_ops/sysdeps/hpc/hppa.h"
  317. # define AO_CAN_EMUL_CAS
  318. # endif
  319. #endif
  320. #if defined(_MSC_VER) || defined(__DMC__) || defined(__BORLANDC__) \
  321. || (defined(__WATCOMC__) && defined(__NT__))
  322. # if defined(_AMD64_) || defined(_M_X64)
  323. # include "atomic_ops/sysdeps/msftc/x86_64.h"
  324. # elif defined(_M_IX86) || defined(x86)
  325. # include "atomic_ops/sysdeps/msftc/x86.h"
  326. # elif defined(_M_ARM) || defined(ARM) || defined(_ARM_)
  327. # include "atomic_ops/sysdeps/msftc/arm.h"
  328. # define AO_GENERALIZE_TWICE
  329. # endif
  330. #endif
  331. #if defined(__sun) && !defined(__GNUC__) && !defined(AO_USE_PTHREAD_DEFS)
  332. /* Note: use -DAO_USE_PTHREAD_DEFS if Sun CC does not handle inline asm. */
  333. # if defined(__i386) || defined(__x86_64) || defined(__amd64)
  334. # include "atomic_ops/sysdeps/sunc/x86.h"
  335. # endif
  336. #endif
  337. #if !defined(__GNUC__) && (defined(sparc) || defined(__sparc)) \
  338. && !defined(AO_USE_PTHREAD_DEFS)
  339. # include "atomic_ops/sysdeps/sunc/sparc.h"
  340. # define AO_CAN_EMUL_CAS
  341. #endif
  342. #if defined(AO_REQUIRE_CAS) && !defined(AO_HAVE_compare_and_swap) \
  343. && !defined(AO_HAVE_fetch_compare_and_swap) \
  344. && !defined(AO_HAVE_compare_and_swap_full) \
  345. && !defined(AO_HAVE_fetch_compare_and_swap_full) \
  346. && !defined(AO_HAVE_compare_and_swap_acquire) \
  347. && !defined(AO_HAVE_fetch_compare_and_swap_acquire)
  348. # if defined(AO_CAN_EMUL_CAS)
  349. # include "atomic_ops/sysdeps/emul_cas.h"
  350. # else
  351. # error Cannot implement AO_compare_and_swap_full on this architecture.
  352. # endif
  353. #endif /* AO_REQUIRE_CAS && !AO_HAVE_compare_and_swap ... */
  354. /* The most common way to clear a test-and-set location */
  355. /* at the end of a critical section. */
  356. #if AO_AO_TS_T && !defined(AO_CLEAR)
  357. # define AO_CLEAR(addr) AO_store_release((AO_TS_t *)(addr), AO_TS_CLEAR)
  358. #endif
  359. #if AO_CHAR_TS_T && !defined(AO_CLEAR)
  360. # define AO_CLEAR(addr) AO_char_store_release((AO_TS_t *)(addr), AO_TS_CLEAR)
  361. #endif
  362. /* The generalization section. */
  363. #if !defined(AO_GENERALIZE_TWICE) && defined(AO_CAN_EMUL_CAS) \
  364. && !defined(AO_HAVE_compare_and_swap_full) \
  365. && !defined(AO_HAVE_fetch_compare_and_swap_full)
  366. # define AO_GENERALIZE_TWICE
  367. #endif
  368. /* Theoretically we should repeatedly include atomic_ops/generalize.h. */
  369. /* In fact, we observe that this converges after a small fixed number */
  370. /* of iterations, usually one. */
  371. #include "atomic_ops/generalize.h"
  372. #if !defined(AO_GENERALIZE_TWICE) \
  373. && defined(AO_HAVE_compare_double_and_swap_double) \
  374. && (!defined(AO_HAVE_double_load) || !defined(AO_HAVE_double_store))
  375. # define AO_GENERALIZE_TWICE
  376. #endif
  377. #ifdef AO_T_IS_INT
  378. /* Included after the first generalization pass. */
  379. # include "atomic_ops/sysdeps/ao_t_is_int.h"
  380. # ifndef AO_GENERALIZE_TWICE
  381. /* Always generalize again. */
  382. # define AO_GENERALIZE_TWICE
  383. # endif
  384. #endif /* AO_T_IS_INT */
  385. #ifdef AO_GENERALIZE_TWICE
  386. # include "atomic_ops/generalize.h"
  387. #endif
  388. /* For compatibility with version 0.4 and earlier */
  389. #define AO_TS_T AO_TS_t
  390. #define AO_T AO_t
  391. #define AO_TS_VAL AO_TS_VAL_t
  392. #endif /* !AO_ATOMIC_OPS_H */