gcc_armv7.h 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222
  1. /*
  2. Copyright 2005-2013 Intel Corporation. All Rights Reserved.
  3. This file is part of Threading Building Blocks.
  4. Threading Building Blocks is free software; you can redistribute it
  5. and/or modify it under the terms of the GNU General Public License
  6. version 2 as published by the Free Software Foundation.
  7. Threading Building Blocks is distributed in the hope that it will be
  8. useful, but WITHOUT ANY WARRANTY; without even the implied warranty
  9. of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU General Public License for more details.
  11. You should have received a copy of the GNU General Public License
  12. along with Threading Building Blocks; if not, write to the Free Software
  13. Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  14. As a special exception, you may use this file as part of a free software
  15. library without restriction. Specifically, if other files instantiate
  16. templates or use macros or inline functions from this file, or you compile
  17. this file and link it with other files to produce an executable, this
  18. file does not by itself cause the resulting executable to be covered by
  19. the GNU General Public License. This exception does not however
  20. invalidate any other reasons why the executable file might be covered by
  21. the GNU General Public License.
  22. */
  23. /*
  24. This is the TBB implementation for the ARMv7-a architecture.
  25. */
  26. #ifndef __TBB_machine_H
  27. #error Do not include this file directly; include tbb_machine.h instead
  28. #endif
  29. //TODO: is ARMv7 is the only version ever to support ?
  30. #if !(__ARM_ARCH_7A__)
  31. #error Threading Building Blocks ARM port requires an ARMv7-a architecture.
  32. #endif
  33. #include <sys/param.h>
  34. #include <unistd.h>
  35. #define __TBB_WORDSIZE 4
  36. #ifndef __BYTE_ORDER__
  37. // Hopefully endianness can be validly determined at runtime.
  38. // This may silently fail in some embedded systems with page-specific endianness.
  39. #elif __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
  40. #define __TBB_BIG_ENDIAN 1
  41. #elif __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__
  42. #define __TBB_BIG_ENDIAN 0
  43. #else
  44. #define __TBB_BIG_ENDIAN -1 // not currently supported
  45. #endif
  46. #define __TBB_compiler_fence() __asm__ __volatile__("": : :"memory")
  47. #define __TBB_control_consistency_helper() __TBB_compiler_fence()
  48. #define __TBB_armv7_inner_shareable_barrier() __asm__ __volatile__("dmb ish": : :"memory")
  49. #define __TBB_acquire_consistency_helper() __TBB_armv7_inner_shareable_barrier()
  50. #define __TBB_release_consistency_helper() __TBB_armv7_inner_shareable_barrier()
  51. #define __TBB_full_memory_fence() __TBB_armv7_inner_shareable_barrier()
  52. //--------------------------------------------------
  53. // Compare and swap
  54. //--------------------------------------------------
  55. /**
  56. * Atomic CAS for 32 bit values, if *ptr==comparand, then *ptr=value, returns *ptr
  57. * @param ptr pointer to value in memory to be swapped with value if *ptr==comparand
  58. * @param value value to assign *ptr to if *ptr==comparand
  59. * @param comparand value to compare with *ptr
  60. * @return value originally in memory at ptr, regardless of success
  61. */
  62. static inline int32_t __TBB_machine_cmpswp4(volatile void *ptr, int32_t value, int32_t comparand )
  63. {
  64. int32_t oldval, res;
  65. __TBB_full_memory_fence();
  66. do {
  67. __asm__ __volatile__(
  68. "ldrex %1, [%3]\n"
  69. "mov %0, #0\n"
  70. "cmp %1, %4\n"
  71. "strexeq %0, %5, [%3]\n"
  72. : "=&r" (res), "=&r" (oldval), "+Qo" (*(volatile int32_t*)ptr)
  73. : "r" ((int32_t *)ptr), "Ir" (comparand), "r" (value)
  74. : "cc");
  75. } while (res);
  76. __TBB_full_memory_fence();
  77. return oldval;
  78. }
  79. /**
  80. * Atomic CAS for 64 bit values, if *ptr==comparand, then *ptr=value, returns *ptr
  81. * @param ptr pointer to value in memory to be swapped with value if *ptr==comparand
  82. * @param value value to assign *ptr to if *ptr==comparand
  83. * @param comparand value to compare with *ptr
  84. * @return value originally in memory at ptr, regardless of success
  85. */
  86. static inline int64_t __TBB_machine_cmpswp8(volatile void *ptr, int64_t value, int64_t comparand )
  87. {
  88. int64_t oldval;
  89. int32_t res;
  90. __TBB_full_memory_fence();
  91. do {
  92. __asm__ __volatile__(
  93. "mov %0, #0\n"
  94. "ldrexd %1, %H1, [%3]\n"
  95. "cmp %1, %4\n"
  96. "cmpeq %H1, %H4\n"
  97. "strexdeq %0, %5, %H5, [%3]"
  98. : "=&r" (res), "=&r" (oldval), "+Qo" (*(volatile int64_t*)ptr)
  99. : "r" ((int64_t *)ptr), "r" (comparand), "r" (value)
  100. : "cc");
  101. } while (res);
  102. __TBB_full_memory_fence();
  103. return oldval;
  104. }
  105. static inline int32_t __TBB_machine_fetchadd4(volatile void* ptr, int32_t addend)
  106. {
  107. unsigned long tmp;
  108. int32_t result, tmp2;
  109. __TBB_full_memory_fence();
  110. __asm__ __volatile__(
  111. "1: ldrex %0, [%4]\n"
  112. " add %3, %0, %5\n"
  113. " strex %1, %3, [%4]\n"
  114. " cmp %1, #0\n"
  115. " bne 1b\n"
  116. : "=&r" (result), "=&r" (tmp), "+Qo" (*(volatile int32_t*)ptr), "=&r"(tmp2)
  117. : "r" ((int32_t *)ptr), "Ir" (addend)
  118. : "cc");
  119. __TBB_full_memory_fence();
  120. return result;
  121. }
  122. static inline int64_t __TBB_machine_fetchadd8(volatile void *ptr, int64_t addend)
  123. {
  124. unsigned long tmp;
  125. int64_t result, tmp2;
  126. __TBB_full_memory_fence();
  127. __asm__ __volatile__(
  128. "1: ldrexd %0, %H0, [%4]\n"
  129. " adds %3, %0, %5\n"
  130. " adc %H3, %H0, %H5\n"
  131. " strexd %1, %3, %H3, [%4]\n"
  132. " cmp %1, #0\n"
  133. " bne 1b"
  134. : "=&r" (result), "=&r" (tmp), "+Qo" (*(volatile int64_t*)ptr), "=&r"(tmp2)
  135. : "r" ((int64_t *)ptr), "r" (addend)
  136. : "cc");
  137. __TBB_full_memory_fence();
  138. return result;
  139. }
  140. inline void __TBB_machine_pause (int32_t delay )
  141. {
  142. while(delay>0)
  143. {
  144. __TBB_compiler_fence();
  145. delay--;
  146. }
  147. }
  148. namespace tbb {
  149. namespace internal {
  150. template <typename T, size_t S>
  151. struct machine_load_store_relaxed {
  152. static inline T load ( const volatile T& location ) {
  153. const T value = location;
  154. /*
  155. * An extra memory barrier is required for errata #761319
  156. * Please see http://infocenter.arm.com/help/topic/com.arm.doc.uan0004a
  157. */
  158. __TBB_armv7_inner_shareable_barrier();
  159. return value;
  160. }
  161. static inline void store ( volatile T& location, T value ) {
  162. location = value;
  163. }
  164. };
  165. }} // namespaces internal, tbb
  166. // Machine specific atomic operations
  167. #define __TBB_CompareAndSwap4(P,V,C) __TBB_machine_cmpswp4(P,V,C)
  168. #define __TBB_CompareAndSwap8(P,V,C) __TBB_machine_cmpswp8(P,V,C)
  169. #define __TBB_CompareAndSwapW(P,V,C) __TBB_machine_cmpswp4(P,V,C)
  170. #define __TBB_Pause(V) __TBB_machine_pause(V)
  171. // Use generics for some things
  172. #define __TBB_USE_GENERIC_PART_WORD_CAS 1
  173. #define __TBB_USE_GENERIC_PART_WORD_FETCH_ADD 1
  174. #define __TBB_USE_GENERIC_PART_WORD_FETCH_STORE 1
  175. #define __TBB_USE_GENERIC_FETCH_STORE 1
  176. #define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
  177. #define __TBB_USE_GENERIC_DWORD_LOAD_STORE 1
  178. #define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1