123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421 |
- /*
- * Copyright (c) 2003-2011 Hewlett-Packard Development Company, L.P.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
- #ifndef AO_ATOMIC_OPS_H
- #define AO_ATOMIC_OPS_H
- #include "atomic_ops/ao_version.h"
- /* Define version numbers here to allow */
- /* test on build machines for cross-builds. */
- #include <assert.h>
- #include <stddef.h>
- /* We define various atomic operations on memory in a */
- /* machine-specific way. Unfortunately, this is complicated */
- /* by the fact that these may or may not be combined with */
- /* various memory barriers. Thus the actual operations we */
- /* define have the form AO_<atomic-op>_<barrier>, for all */
- /* plausible combinations of <atomic-op> and <barrier>. */
- /* This of course results in a mild combinatorial explosion. */
- /* To deal with it, we try to generate derived */
- /* definitions for as many of the combinations as we can, as */
- /* automatically as possible. */
- /* */
- /* Our assumption throughout is that the programmer will */
- /* specify the least demanding operation and memory barrier */
- /* that will guarantee correctness for the implementation. */
- /* Our job is to find the least expensive way to implement it */
- /* on the applicable hardware. In many cases that will */
- /* involve, for example, a stronger memory barrier, or a */
- /* combination of hardware primitives. */
- /* */
- /* Conventions: */
- /* "plain" atomic operations are not guaranteed to include */
- /* a barrier. The suffix in the name specifies the barrier */
- /* type. Suffixes are: */
- /* _release: Earlier operations may not be delayed past it. */
- /* _acquire: Later operations may not move ahead of it. */
- /* _read: Subsequent reads must follow this operation and */
- /* preceding reads. */
- /* _write: Earlier writes precede both this operation and */
- /* later writes. */
- /* _full: Ordered with respect to both earlier and later memory */
- /* operations. */
- /* _release_write: Ordered with respect to earlier writes. */
- /* _acquire_read: Ordered with respect to later reads. */
- /* */
- /* Currently we try to define the following atomic memory */
- /* operations, in combination with the above barriers: */
- /* AO_nop */
- /* AO_load */
- /* AO_store */
- /* AO_test_and_set (binary) */
- /* AO_fetch_and_add */
- /* AO_fetch_and_add1 */
- /* AO_fetch_and_sub1 */
- /* AO_and */
- /* AO_or */
- /* AO_xor */
- /* AO_compare_and_swap */
- /* AO_fetch_compare_and_swap */
- /* */
- /* Note that atomicity guarantees are valid only if both */
- /* readers and writers use AO_ operations to access the */
- /* shared value, while ordering constraints are intended to */
- /* apply all memory operations. If a location can potentially */
- /* be accessed simultaneously from multiple threads, and one of */
- /* those accesses may be a write access, then all such */
- /* accesses to that location should be through AO_ primitives. */
- /* However if AO_ operations enforce sufficient ordering to */
- /* ensure that a location x cannot be accessed concurrently, */
- /* or can only be read concurrently, then x can be accessed */
- /* via ordinary references and assignments. */
- /* */
- /* AO_compare_and_swap takes an address and an expected old */
- /* value and a new value, and returns an int. Non-zero result */
- /* indicates that it succeeded. */
- /* AO_fetch_compare_and_swap takes an address and an expected */
- /* old value and a new value, and returns the real old value. */
- /* The operation succeeded if and only if the expected old */
- /* value matches the old value returned. */
- /* */
- /* Test_and_set takes an address, atomically replaces it by */
- /* AO_TS_SET, and returns the prior value. */
- /* An AO_TS_t location can be reset with the */
- /* AO_CLEAR macro, which normally uses AO_store_release. */
- /* AO_fetch_and_add takes an address and an AO_t increment */
- /* value. The AO_fetch_and_add1 and AO_fetch_and_sub1 variants */
- /* are provided, since they allow faster implementations on */
- /* some hardware. AO_and, AO_or, AO_xor do atomically and, or, */
- /* xor (respectively) an AO_t value into a memory location, */
- /* but do not provide access to the original. */
- /* */
- /* We expect this list to grow slowly over time. */
- /* */
- /* Note that AO_nop_full is a full memory barrier. */
- /* */
- /* Note that if some data is initialized with */
- /* data.x = ...; data.y = ...; ... */
- /* AO_store_release_write(&data_is_initialized, 1) */
- /* then data is guaranteed to be initialized after the test */
- /* if (AO_load_acquire_read(&data_is_initialized)) ... */
- /* succeeds. Furthermore, this should generate near-optimal */
- /* code on all common platforms. */
- /* */
- /* All operations operate on unsigned AO_t, which */
- /* is the natural word size, and usually unsigned long. */
- /* It is possible to check whether a particular operation op */
- /* is available on a particular platform by checking whether */
- /* AO_HAVE_op is defined. We make heavy use of these macros */
- /* internally. */
- /* The rest of this file basically has three sections: */
- /* */
- /* Some utility and default definitions. */
- /* */
- /* The architecture dependent section: */
- /* This defines atomic operations that have direct hardware */
- /* support on a particular platform, mostly by including the */
- /* appropriate compiler- and hardware-dependent file. */
- /* */
- /* The synthesis section: */
- /* This tries to define other atomic operations in terms of */
- /* those that are explicitly available on the platform. */
- /* This section is hardware independent. */
- /* We make no attempt to synthesize operations in ways that */
- /* effectively introduce locks, except for the debugging/demo */
- /* pthread-based implementation at the beginning. A more */
- /* realistic implementation that falls back to locks could be */
- /* added as a higher layer. But that would sacrifice */
- /* usability from signal handlers. */
- /* The synthesis section is implemented almost entirely in */
- /* atomic_ops/generalize.h. */
- /* Some common defaults. Overridden for some architectures. */
- #define AO_t size_t
- /* The test_and_set primitive returns an AO_TS_VAL_t value. */
- /* AO_TS_t is the type of an in-memory test-and-set location. */
- #define AO_TS_INITIALIZER (AO_t)AO_TS_CLEAR
- /* Platform-dependent stuff: */
- #if (defined(__GNUC__) || defined(_MSC_VER) || defined(__INTEL_COMPILER) \
- || defined(__DMC__) || defined(__WATCOMC__)) && !defined(AO_NO_INLINE)
- # define AO_INLINE static __inline
- #elif defined(__sun) && !defined(AO_NO_INLINE)
- # define AO_INLINE static inline
- #else
- # define AO_INLINE static
- #endif
- #if __GNUC__ >= 3 && !defined(LINT2)
- # define AO_EXPECT_FALSE(expr) __builtin_expect(expr, 0)
- /* Equivalent to (expr) but predict that usually (expr) == 0. */
- #else
- # define AO_EXPECT_FALSE(expr) (expr)
- #endif /* !__GNUC__ */
- #if defined(__GNUC__) && !defined(__INTEL_COMPILER)
- # define AO_compiler_barrier() __asm__ __volatile__("" : : : "memory")
- #elif defined(_MSC_VER) || defined(__DMC__) || defined(__BORLANDC__) \
- || defined(__WATCOMC__)
- # if defined(_AMD64_) || defined(_M_X64) || _MSC_VER >= 1400
- # if defined(_WIN32_WCE)
- /* # include <cmnintrin.h> */
- # elif defined(_MSC_VER)
- # include <intrin.h>
- # endif
- # pragma intrinsic(_ReadWriteBarrier)
- # define AO_compiler_barrier() _ReadWriteBarrier()
- /* We assume this does not generate a fence instruction. */
- /* The documentation is a bit unclear. */
- # else
- # define AO_compiler_barrier() __asm { }
- /* The preceding implementation may be preferable here too. */
- /* But the documentation warns about VC++ 2003 and earlier. */
- # endif
- #elif defined(__INTEL_COMPILER)
- # define AO_compiler_barrier() __memory_barrier()
- /* FIXME: Too strong? IA64-only? */
- #elif defined(_HPUX_SOURCE)
- # if defined(__ia64)
- # include <machine/sys/inline.h>
- # define AO_compiler_barrier() _Asm_sched_fence()
- # else
- /* FIXME - We dont know how to do this. This is a guess. */
- /* And probably a bad one. */
- static volatile int AO_barrier_dummy;
- # define AO_compiler_barrier() (void)(AO_barrier_dummy = AO_barrier_dummy)
- # endif
- #else
- /* We conjecture that the following usually gives us the right */
- /* semantics or an error. */
- # define AO_compiler_barrier() asm("")
- #endif
- #if defined(AO_USE_PTHREAD_DEFS)
- # include "atomic_ops/sysdeps/generic_pthread.h"
- #endif /* AO_USE_PTHREAD_DEFS */
- #if (defined(__CC_ARM) || defined(__ARMCC__)) && !defined(__GNUC__) \
- && !defined(AO_USE_PTHREAD_DEFS)
- # include "atomic_ops/sysdeps/armcc/arm_v6.h"
- # define AO_GENERALIZE_TWICE
- #endif
- #if defined(__GNUC__) && !defined(AO_USE_PTHREAD_DEFS) \
- && !defined(__INTEL_COMPILER)
- # if defined(__i386__)
- /* We don't define AO_USE_SYNC_CAS_BUILTIN for x86 here because */
- /* it might require specifying additional options (like -march) */
- /* or additional link libraries (if -march is not specified). */
- # include "atomic_ops/sysdeps/gcc/x86.h"
- # endif /* __i386__ */
- # if defined(__x86_64__)
- # if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2)) \
- && !defined(AO_USE_SYNC_CAS_BUILTIN)
- /* It is safe to use __sync CAS built-in on this architecture. */
- # define AO_USE_SYNC_CAS_BUILTIN
- # endif
- # include "atomic_ops/sysdeps/gcc/x86.h"
- # endif /* __x86_64__ */
- # if defined(__ia64__)
- # include "atomic_ops/sysdeps/gcc/ia64.h"
- # define AO_GENERALIZE_TWICE
- # endif /* __ia64__ */
- # if defined(__hppa__)
- # include "atomic_ops/sysdeps/gcc/hppa.h"
- # define AO_CAN_EMUL_CAS
- # endif /* __hppa__ */
- # if defined(__alpha__)
- # include "atomic_ops/sysdeps/gcc/alpha.h"
- # define AO_GENERALIZE_TWICE
- # endif /* __alpha__ */
- # if defined(__s390__)
- # include "atomic_ops/sysdeps/gcc/s390.h"
- # endif /* __s390__ */
- # if defined(__sparc__)
- # include "atomic_ops/sysdeps/gcc/sparc.h"
- # define AO_CAN_EMUL_CAS
- # endif /* __sparc__ */
- # if defined(__m68k__)
- # include "atomic_ops/sysdeps/gcc/m68k.h"
- # endif /* __m68k__ */
- # if defined(__nios2__)
- # include "atomic_ops/sysdeps/gcc/nios2.h"
- # endif /* __nios2__ */
- # if defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) \
- || defined(__powerpc64__) || defined(__ppc64__)
- # include "atomic_ops/sysdeps/gcc/powerpc.h"
- # endif /* __powerpc__ */
- # if defined(__aarch64__)
- # include "atomic_ops/sysdeps/gcc/aarch64.h"
- # define AO_CAN_EMUL_CAS
- # endif /* __aarch64__ */
- # if defined(__arm__)
- # include "atomic_ops/sysdeps/gcc/arm.h"
- # define AO_CAN_EMUL_CAS
- # endif /* __arm__ */
- # if defined(__cris__) || defined(CRIS)
- # include "atomic_ops/sysdeps/gcc/cris.h"
- # define AO_GENERALIZE_TWICE
- # endif
- # if defined(__mips__)
- # include "atomic_ops/sysdeps/gcc/mips.h"
- # endif /* __mips__ */
- # if defined(__sh__) || defined(SH4)
- # include "atomic_ops/sysdeps/gcc/sh.h"
- # define AO_CAN_EMUL_CAS
- # endif /* __sh__ */
- # if defined(__avr32__)
- # include "atomic_ops/sysdeps/gcc/avr32.h"
- # endif
- # if defined(__hexagon__)
- # include "atomic_ops/sysdeps/gcc/hexagon.h"
- # endif
- #endif /* __GNUC__ && !AO_USE_PTHREAD_DEFS */
- #if (defined(__IBMC__) || defined(__IBMCPP__)) && !defined(__GNUC__) \
- && !defined(AO_USE_PTHREAD_DEFS)
- # if defined(__powerpc__) || defined(__powerpc) || defined(__ppc__) \
- || defined(__PPC__) || defined(_M_PPC) || defined(_ARCH_PPC) \
- || defined(_ARCH_PWR)
- # include "atomic_ops/sysdeps/ibmc/powerpc.h"
- # define AO_GENERALIZE_TWICE
- # endif
- #endif
- #if defined(__INTEL_COMPILER) && !defined(AO_USE_PTHREAD_DEFS)
- # if defined(__ia64__)
- # include "atomic_ops/sysdeps/icc/ia64.h"
- # define AO_GENERALIZE_TWICE
- # endif
- # if defined(__GNUC__)
- /* Intel Compiler in GCC compatible mode */
- # if defined(__i386__)
- # include "atomic_ops/sysdeps/gcc/x86.h"
- # endif /* __i386__ */
- # if defined(__x86_64__)
- # if (__INTEL_COMPILER > 1110) && !defined(AO_USE_SYNC_CAS_BUILTIN)
- # define AO_USE_SYNC_CAS_BUILTIN
- # endif
- # include "atomic_ops/sysdeps/gcc/x86.h"
- # endif /* __x86_64__ */
- # endif
- #endif
- #if defined(_HPUX_SOURCE) && !defined(__GNUC__) && !defined(AO_USE_PTHREAD_DEFS)
- # if defined(__ia64)
- # include "atomic_ops/sysdeps/hpc/ia64.h"
- # define AO_GENERALIZE_TWICE
- # else
- # include "atomic_ops/sysdeps/hpc/hppa.h"
- # define AO_CAN_EMUL_CAS
- # endif
- #endif
- #if defined(_MSC_VER) || defined(__DMC__) || defined(__BORLANDC__) \
- || (defined(__WATCOMC__) && defined(__NT__))
- # if defined(_AMD64_) || defined(_M_X64)
- # include "atomic_ops/sysdeps/msftc/x86_64.h"
- # elif defined(_M_IX86) || defined(x86)
- # include "atomic_ops/sysdeps/msftc/x86.h"
- # elif defined(_M_ARM) || defined(ARM) || defined(_ARM_)
- # include "atomic_ops/sysdeps/msftc/arm.h"
- # define AO_GENERALIZE_TWICE
- # endif
- #endif
- #if defined(__sun) && !defined(__GNUC__) && !defined(AO_USE_PTHREAD_DEFS)
- /* Note: use -DAO_USE_PTHREAD_DEFS if Sun CC does not handle inline asm. */
- # if defined(__i386) || defined(__x86_64) || defined(__amd64)
- # include "atomic_ops/sysdeps/sunc/x86.h"
- # endif
- #endif
- #if !defined(__GNUC__) && (defined(sparc) || defined(__sparc)) \
- && !defined(AO_USE_PTHREAD_DEFS)
- # include "atomic_ops/sysdeps/sunc/sparc.h"
- # define AO_CAN_EMUL_CAS
- #endif
- #if defined(AO_REQUIRE_CAS) && !defined(AO_HAVE_compare_and_swap) \
- && !defined(AO_HAVE_fetch_compare_and_swap) \
- && !defined(AO_HAVE_compare_and_swap_full) \
- && !defined(AO_HAVE_fetch_compare_and_swap_full) \
- && !defined(AO_HAVE_compare_and_swap_acquire) \
- && !defined(AO_HAVE_fetch_compare_and_swap_acquire)
- # if defined(AO_CAN_EMUL_CAS)
- # include "atomic_ops/sysdeps/emul_cas.h"
- # else
- # error Cannot implement AO_compare_and_swap_full on this architecture.
- # endif
- #endif /* AO_REQUIRE_CAS && !AO_HAVE_compare_and_swap ... */
- /* The most common way to clear a test-and-set location */
- /* at the end of a critical section. */
- #if AO_AO_TS_T && !defined(AO_CLEAR)
- # define AO_CLEAR(addr) AO_store_release((AO_TS_t *)(addr), AO_TS_CLEAR)
- #endif
- #if AO_CHAR_TS_T && !defined(AO_CLEAR)
- # define AO_CLEAR(addr) AO_char_store_release((AO_TS_t *)(addr), AO_TS_CLEAR)
- #endif
- /* The generalization section. */
- #if !defined(AO_GENERALIZE_TWICE) && defined(AO_CAN_EMUL_CAS) \
- && !defined(AO_HAVE_compare_and_swap_full) \
- && !defined(AO_HAVE_fetch_compare_and_swap_full)
- # define AO_GENERALIZE_TWICE
- #endif
- /* Theoretically we should repeatedly include atomic_ops/generalize.h. */
- /* In fact, we observe that this converges after a small fixed number */
- /* of iterations, usually one. */
- #include "atomic_ops/generalize.h"
- #if !defined(AO_GENERALIZE_TWICE) \
- && defined(AO_HAVE_compare_double_and_swap_double) \
- && (!defined(AO_HAVE_double_load) || !defined(AO_HAVE_double_store))
- # define AO_GENERALIZE_TWICE
- #endif
- #ifdef AO_T_IS_INT
- /* Included after the first generalization pass. */
- # include "atomic_ops/sysdeps/ao_t_is_int.h"
- # ifndef AO_GENERALIZE_TWICE
- /* Always generalize again. */
- # define AO_GENERALIZE_TWICE
- # endif
- #endif /* AO_T_IS_INT */
- #ifdef AO_GENERALIZE_TWICE
- # include "atomic_ops/generalize.h"
- #endif
- /* For compatibility with version 0.4 and earlier */
- #define AO_TS_T AO_TS_t
- #define AO_T AO_t
- #define AO_TS_VAL AO_TS_VAL_t
- #endif /* !AO_ATOMIC_OPS_H */
|