powerpc.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359
  1. /*
  2. * Copyright (c) 1991-1994 by Xerox Corporation. All rights reserved.
  3. * Copyright (c) 1996-1999 by Silicon Graphics. All rights reserved.
  4. * Copyright (c) 2003-2011 Hewlett-Packard Development Company, L.P.
  5. *
  6. *
  7. * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
  8. * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
  9. *
  10. * Permission is hereby granted to use or copy this program
  11. * for any purpose, provided the above notices are retained on all copies.
  12. * Permission to modify the code and to distribute modified code is granted,
  13. * provided the above notices are retained, and a notice that the code was
  14. * modified is included with the above copyright notice.
  15. *
  16. */
  17. /* Memory model documented at http://www-106.ibm.com/developerworks/ */
  18. /* eserver/articles/archguide.html and (clearer) */
  19. /* http://www-106.ibm.com/developerworks/eserver/articles/powerpc.html. */
  20. /* There appears to be no implicit ordering between any kind of */
  21. /* independent memory references. */
  22. /* Architecture enforces some ordering based on control dependence. */
  23. /* I don't know if that could help. */
  24. /* Data-dependent loads are always ordered. */
  25. /* Based on the above references, eieio is intended for use on */
  26. /* uncached memory, which we don't support. It does not order loads */
  27. /* from cached memory. */
  28. #include "../all_aligned_atomic_load_store.h"
  29. #include "../test_and_set_t_is_ao_t.h"
  30. /* There seems to be no byte equivalent of lwarx, so this */
  31. /* may really be what we want, at least in the 32-bit case. */
  32. AO_INLINE void
  33. AO_nop_full(void)
  34. {
  35. __asm__ __volatile__("sync" : : : "memory");
  36. }
  37. #define AO_HAVE_nop_full
  38. /* lwsync apparently works for everything but a StoreLoad barrier. */
  39. AO_INLINE void
  40. AO_lwsync(void)
  41. {
  42. #ifdef __NO_LWSYNC__
  43. __asm__ __volatile__("sync" : : : "memory");
  44. #else
  45. __asm__ __volatile__("lwsync" : : : "memory");
  46. #endif
  47. }
  48. #define AO_nop_write() AO_lwsync()
  49. #define AO_HAVE_nop_write
  50. #define AO_nop_read() AO_lwsync()
  51. #define AO_HAVE_nop_read
  52. /* We explicitly specify load_acquire, since it is important, and can */
  53. /* be implemented relatively cheaply. It could be implemented */
  54. /* with an ordinary load followed by a lwsync. But the general wisdom */
  55. /* seems to be that a data dependent branch followed by an isync is */
  56. /* cheaper. And the documentation is fairly explicit that this also */
  57. /* has acquire semantics. */
  58. /* ppc64 uses ld not lwz */
  59. AO_INLINE AO_t
  60. AO_load_acquire(const volatile AO_t *addr)
  61. {
  62. AO_t result;
  63. #if defined(__powerpc64__) || defined(__ppc64__) || defined(__64BIT__)
  64. __asm__ __volatile__ (
  65. "ld%U1%X1 %0,%1\n"
  66. "cmpw %0,%0\n"
  67. "bne- 1f\n"
  68. "1: isync\n"
  69. : "=r" (result)
  70. : "m"(*addr) : "memory", "cr0");
  71. #else
  72. /* FIXME: We should get gcc to allocate one of the condition */
  73. /* registers. I always got "impossible constraint" when I */
  74. /* tried the "y" constraint. */
  75. __asm__ __volatile__ (
  76. "lwz%U1%X1 %0,%1\n"
  77. "cmpw %0,%0\n"
  78. "bne- 1f\n"
  79. "1: isync\n"
  80. : "=r" (result)
  81. : "m"(*addr) : "memory", "cc");
  82. #endif
  83. return result;
  84. }
  85. #define AO_HAVE_load_acquire
  86. /* We explicitly specify store_release, since it relies */
  87. /* on the fact that lwsync is also a LoadStore barrier. */
  88. AO_INLINE void
  89. AO_store_release(volatile AO_t *addr, AO_t value)
  90. {
  91. AO_lwsync();
  92. *addr = value;
  93. }
  94. #define AO_HAVE_store_release
  95. #ifndef AO_PREFER_GENERALIZED
  96. /* This is similar to the code in the garbage collector. Deleting */
  97. /* this and having it synthesized from compare_and_swap would probably */
  98. /* only cost us a load immediate instruction. */
  99. AO_INLINE AO_TS_VAL_t
  100. AO_test_and_set(volatile AO_TS_t *addr) {
  101. #if defined(__powerpc64__) || defined(__ppc64__) || defined(__64BIT__)
  102. /* Completely untested. And we should be using smaller objects anyway. */
  103. unsigned long oldval;
  104. unsigned long temp = 1; /* locked value */
  105. __asm__ __volatile__(
  106. "1:ldarx %0,0,%1\n" /* load and reserve */
  107. "cmpdi %0, 0\n" /* if load is */
  108. "bne 2f\n" /* non-zero, return already set */
  109. "stdcx. %2,0,%1\n" /* else store conditional */
  110. "bne- 1b\n" /* retry if lost reservation */
  111. "2:\n" /* oldval is zero if we set */
  112. : "=&r"(oldval)
  113. : "r"(addr), "r"(temp)
  114. : "memory", "cr0");
  115. #else
  116. int oldval;
  117. int temp = 1; /* locked value */
  118. __asm__ __volatile__(
  119. "1:lwarx %0,0,%1\n" /* load and reserve */
  120. "cmpwi %0, 0\n" /* if load is */
  121. "bne 2f\n" /* non-zero, return already set */
  122. "stwcx. %2,0,%1\n" /* else store conditional */
  123. "bne- 1b\n" /* retry if lost reservation */
  124. "2:\n" /* oldval is zero if we set */
  125. : "=&r"(oldval)
  126. : "r"(addr), "r"(temp)
  127. : "memory", "cr0");
  128. #endif
  129. return (AO_TS_VAL_t)oldval;
  130. }
  131. #define AO_HAVE_test_and_set
  132. AO_INLINE AO_TS_VAL_t
  133. AO_test_and_set_acquire(volatile AO_TS_t *addr) {
  134. AO_TS_VAL_t result = AO_test_and_set(addr);
  135. AO_lwsync();
  136. return result;
  137. }
  138. #define AO_HAVE_test_and_set_acquire
  139. AO_INLINE AO_TS_VAL_t
  140. AO_test_and_set_release(volatile AO_TS_t *addr) {
  141. AO_lwsync();
  142. return AO_test_and_set(addr);
  143. }
  144. #define AO_HAVE_test_and_set_release
  145. AO_INLINE AO_TS_VAL_t
  146. AO_test_and_set_full(volatile AO_TS_t *addr) {
  147. AO_TS_VAL_t result;
  148. AO_lwsync();
  149. result = AO_test_and_set(addr);
  150. AO_lwsync();
  151. return result;
  152. }
  153. #define AO_HAVE_test_and_set_full
  154. #endif /* !AO_PREFER_GENERALIZED */
  155. #ifndef AO_GENERALIZE_ASM_BOOL_CAS
  156. AO_INLINE int
  157. AO_compare_and_swap(volatile AO_t *addr, AO_t old, AO_t new_val)
  158. {
  159. AO_t oldval;
  160. int result = 0;
  161. # if defined(__powerpc64__) || defined(__ppc64__) || defined(__64BIT__)
  162. __asm__ __volatile__(
  163. "1:ldarx %0,0,%2\n" /* load and reserve */
  164. "cmpd %0, %4\n" /* if load is not equal to */
  165. "bne 2f\n" /* old, fail */
  166. "stdcx. %3,0,%2\n" /* else store conditional */
  167. "bne- 1b\n" /* retry if lost reservation */
  168. "li %1,1\n" /* result = 1; */
  169. "2:\n"
  170. : "=&r"(oldval), "=&r"(result)
  171. : "r"(addr), "r"(new_val), "r"(old), "1"(result)
  172. : "memory", "cr0");
  173. # else
  174. __asm__ __volatile__(
  175. "1:lwarx %0,0,%2\n" /* load and reserve */
  176. "cmpw %0, %4\n" /* if load is not equal to */
  177. "bne 2f\n" /* old, fail */
  178. "stwcx. %3,0,%2\n" /* else store conditional */
  179. "bne- 1b\n" /* retry if lost reservation */
  180. "li %1,1\n" /* result = 1; */
  181. "2:\n"
  182. : "=&r"(oldval), "=&r"(result)
  183. : "r"(addr), "r"(new_val), "r"(old), "1"(result)
  184. : "memory", "cr0");
  185. # endif
  186. return result;
  187. }
  188. # define AO_HAVE_compare_and_swap
  189. AO_INLINE int
  190. AO_compare_and_swap_acquire(volatile AO_t *addr, AO_t old, AO_t new_val)
  191. {
  192. int result = AO_compare_and_swap(addr, old, new_val);
  193. AO_lwsync();
  194. return result;
  195. }
  196. # define AO_HAVE_compare_and_swap_acquire
  197. AO_INLINE int
  198. AO_compare_and_swap_release(volatile AO_t *addr, AO_t old, AO_t new_val)
  199. {
  200. AO_lwsync();
  201. return AO_compare_and_swap(addr, old, new_val);
  202. }
  203. # define AO_HAVE_compare_and_swap_release
  204. AO_INLINE int
  205. AO_compare_and_swap_full(volatile AO_t *addr, AO_t old, AO_t new_val)
  206. {
  207. int result;
  208. AO_lwsync();
  209. result = AO_compare_and_swap(addr, old, new_val);
  210. AO_lwsync();
  211. return result;
  212. }
  213. # define AO_HAVE_compare_and_swap_full
  214. #endif /* !AO_GENERALIZE_ASM_BOOL_CAS */
  215. AO_INLINE AO_t
  216. AO_fetch_compare_and_swap(volatile AO_t *addr, AO_t old_val, AO_t new_val)
  217. {
  218. AO_t fetched_val;
  219. # if defined(__powerpc64__) || defined(__ppc64__) || defined(__64BIT__)
  220. __asm__ __volatile__(
  221. "1:ldarx %0,0,%1\n" /* load and reserve */
  222. "cmpd %0, %3\n" /* if load is not equal to */
  223. "bne 2f\n" /* old_val, fail */
  224. "stdcx. %2,0,%1\n" /* else store conditional */
  225. "bne- 1b\n" /* retry if lost reservation */
  226. "2:\n"
  227. : "=&r"(fetched_val)
  228. : "r"(addr), "r"(new_val), "r"(old_val)
  229. : "memory", "cr0");
  230. # else
  231. __asm__ __volatile__(
  232. "1:lwarx %0,0,%1\n" /* load and reserve */
  233. "cmpw %0, %3\n" /* if load is not equal to */
  234. "bne 2f\n" /* old_val, fail */
  235. "stwcx. %2,0,%1\n" /* else store conditional */
  236. "bne- 1b\n" /* retry if lost reservation */
  237. "2:\n"
  238. : "=&r"(fetched_val)
  239. : "r"(addr), "r"(new_val), "r"(old_val)
  240. : "memory", "cr0");
  241. # endif
  242. return fetched_val;
  243. }
  244. #define AO_HAVE_fetch_compare_and_swap
  245. AO_INLINE AO_t
  246. AO_fetch_compare_and_swap_acquire(volatile AO_t *addr, AO_t old_val,
  247. AO_t new_val)
  248. {
  249. AO_t result = AO_fetch_compare_and_swap(addr, old_val, new_val);
  250. AO_lwsync();
  251. return result;
  252. }
  253. #define AO_HAVE_fetch_compare_and_swap_acquire
  254. AO_INLINE AO_t
  255. AO_fetch_compare_and_swap_release(volatile AO_t *addr, AO_t old_val,
  256. AO_t new_val)
  257. {
  258. AO_lwsync();
  259. return AO_fetch_compare_and_swap(addr, old_val, new_val);
  260. }
  261. #define AO_HAVE_fetch_compare_and_swap_release
  262. AO_INLINE AO_t
  263. AO_fetch_compare_and_swap_full(volatile AO_t *addr, AO_t old_val,
  264. AO_t new_val)
  265. {
  266. AO_t result;
  267. AO_lwsync();
  268. result = AO_fetch_compare_and_swap(addr, old_val, new_val);
  269. AO_lwsync();
  270. return result;
  271. }
  272. #define AO_HAVE_fetch_compare_and_swap_full
  273. #ifndef AO_PREFER_GENERALIZED
  274. AO_INLINE AO_t
  275. AO_fetch_and_add(volatile AO_t *addr, AO_t incr) {
  276. AO_t oldval;
  277. AO_t newval;
  278. #if defined(__powerpc64__) || defined(__ppc64__) || defined(__64BIT__)
  279. __asm__ __volatile__(
  280. "1:ldarx %0,0,%2\n" /* load and reserve */
  281. "add %1,%0,%3\n" /* increment */
  282. "stdcx. %1,0,%2\n" /* store conditional */
  283. "bne- 1b\n" /* retry if lost reservation */
  284. : "=&r"(oldval), "=&r"(newval)
  285. : "r"(addr), "r"(incr)
  286. : "memory", "cr0");
  287. #else
  288. __asm__ __volatile__(
  289. "1:lwarx %0,0,%2\n" /* load and reserve */
  290. "add %1,%0,%3\n" /* increment */
  291. "stwcx. %1,0,%2\n" /* store conditional */
  292. "bne- 1b\n" /* retry if lost reservation */
  293. : "=&r"(oldval), "=&r"(newval)
  294. : "r"(addr), "r"(incr)
  295. : "memory", "cr0");
  296. #endif
  297. return oldval;
  298. }
  299. #define AO_HAVE_fetch_and_add
  300. AO_INLINE AO_t
  301. AO_fetch_and_add_acquire(volatile AO_t *addr, AO_t incr) {
  302. AO_t result = AO_fetch_and_add(addr, incr);
  303. AO_lwsync();
  304. return result;
  305. }
  306. #define AO_HAVE_fetch_and_add_acquire
  307. AO_INLINE AO_t
  308. AO_fetch_and_add_release(volatile AO_t *addr, AO_t incr) {
  309. AO_lwsync();
  310. return AO_fetch_and_add(addr, incr);
  311. }
  312. #define AO_HAVE_fetch_and_add_release
  313. AO_INLINE AO_t
  314. AO_fetch_and_add_full(volatile AO_t *addr, AO_t incr) {
  315. AO_t result;
  316. AO_lwsync();
  317. result = AO_fetch_and_add(addr, incr);
  318. AO_lwsync();
  319. return result;
  320. }
  321. #define AO_HAVE_fetch_and_add_full
  322. #endif /* !AO_PREFER_GENERALIZED */
  323. #if defined(__powerpc64__) || defined(__ppc64__) || defined(__64BIT__)
  324. /* Empty */
  325. #else
  326. # define AO_T_IS_INT
  327. #endif
  328. /* TODO: Implement double-wide operations if available. */