lib1funcs.S 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429
  1. /*
  2. * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
  3. *
  4. * Author: Nicolas Pitre <nico@fluxnic.net>
  5. * - contributed to gcc-3.4 on Sep 30, 2003
  6. * - adapted for the Linux kernel on Oct 2, 2003
  7. */
  8. /* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
  9. * SPDX-License-Identifier: GPL-2.0+
  10. */
  11. #include <linux/linkage.h>
  12. #include <asm/assembler.h>
  13. /*
  14. * U-Boot compatibility bit, define empty UNWIND() macro as, since we
  15. * do not support stack unwinding and define CONFIG_AEABI to make all
  16. * of the functions available without diverging from Linux code.
  17. */
  18. #ifdef __UBOOT__
  19. #define UNWIND(x...)
  20. #define CONFIG_AEABI
  21. #endif
  22. .macro ARM_DIV_BODY dividend, divisor, result, curbit
  23. #if __LINUX_ARM_ARCH__ >= 5
  24. clz \curbit, \divisor
  25. clz \result, \dividend
  26. sub \result, \curbit, \result
  27. mov \curbit, #1
  28. mov \divisor, \divisor, lsl \result
  29. mov \curbit, \curbit, lsl \result
  30. mov \result, #0
  31. #else
  32. @ Initially shift the divisor left 3 bits if possible,
  33. @ set curbit accordingly. This allows for curbit to be located
  34. @ at the left end of each 4 bit nibbles in the division loop
  35. @ to save one loop in most cases.
  36. tst \divisor, #0xe0000000
  37. moveq \divisor, \divisor, lsl #3
  38. moveq \curbit, #8
  39. movne \curbit, #1
  40. @ Unless the divisor is very big, shift it up in multiples of
  41. @ four bits, since this is the amount of unwinding in the main
  42. @ division loop. Continue shifting until the divisor is
  43. @ larger than the dividend.
  44. 1: cmp \divisor, #0x10000000
  45. cmplo \divisor, \dividend
  46. movlo \divisor, \divisor, lsl #4
  47. movlo \curbit, \curbit, lsl #4
  48. blo 1b
  49. @ For very big divisors, we must shift it a bit at a time, or
  50. @ we will be in danger of overflowing.
  51. 1: cmp \divisor, #0x80000000
  52. cmplo \divisor, \dividend
  53. movlo \divisor, \divisor, lsl #1
  54. movlo \curbit, \curbit, lsl #1
  55. blo 1b
  56. mov \result, #0
  57. #endif
  58. @ Division loop
  59. 1: cmp \dividend, \divisor
  60. subhs \dividend, \dividend, \divisor
  61. orrhs \result, \result, \curbit
  62. cmp \dividend, \divisor, lsr #1
  63. subhs \dividend, \dividend, \divisor, lsr #1
  64. orrhs \result, \result, \curbit, lsr #1
  65. cmp \dividend, \divisor, lsr #2
  66. subhs \dividend, \dividend, \divisor, lsr #2
  67. orrhs \result, \result, \curbit, lsr #2
  68. cmp \dividend, \divisor, lsr #3
  69. subhs \dividend, \dividend, \divisor, lsr #3
  70. orrhs \result, \result, \curbit, lsr #3
  71. cmp \dividend, #0 @ Early termination?
  72. movsne \curbit, \curbit, lsr #4 @ No, any more bits to do?
  73. movne \divisor, \divisor, lsr #4
  74. bne 1b
  75. .endm
  76. .macro ARM_DIV2_ORDER divisor, order
  77. #if __LINUX_ARM_ARCH__ >= 5
  78. clz \order, \divisor
  79. rsb \order, \order, #31
  80. #else
  81. cmp \divisor, #(1 << 16)
  82. movhs \divisor, \divisor, lsr #16
  83. movhs \order, #16
  84. movlo \order, #0
  85. cmp \divisor, #(1 << 8)
  86. movhs \divisor, \divisor, lsr #8
  87. addhs \order, \order, #8
  88. cmp \divisor, #(1 << 4)
  89. movhs \divisor, \divisor, lsr #4
  90. addhs \order, \order, #4
  91. cmp \divisor, #(1 << 2)
  92. addhi \order, \order, #3
  93. addls \order, \order, \divisor, lsr #1
  94. #endif
  95. .endm
  96. .macro ARM_MOD_BODY dividend, divisor, order, spare
  97. #if __LINUX_ARM_ARCH__ >= 5
  98. clz \order, \divisor
  99. clz \spare, \dividend
  100. sub \order, \order, \spare
  101. mov \divisor, \divisor, lsl \order
  102. #else
  103. mov \order, #0
  104. @ Unless the divisor is very big, shift it up in multiples of
  105. @ four bits, since this is the amount of unwinding in the main
  106. @ division loop. Continue shifting until the divisor is
  107. @ larger than the dividend.
  108. 1: cmp \divisor, #0x10000000
  109. cmplo \divisor, \dividend
  110. movlo \divisor, \divisor, lsl #4
  111. addlo \order, \order, #4
  112. blo 1b
  113. @ For very big divisors, we must shift it a bit at a time, or
  114. @ we will be in danger of overflowing.
  115. 1: cmp \divisor, #0x80000000
  116. cmplo \divisor, \dividend
  117. movlo \divisor, \divisor, lsl #1
  118. addlo \order, \order, #1
  119. blo 1b
  120. #endif
  121. @ Perform all needed subtractions to keep only the reminder.
  122. @ Do comparisons in batch of 4 first.
  123. subs \order, \order, #3 @ yes, 3 is intended here
  124. blt 2f
  125. 1: cmp \dividend, \divisor
  126. subhs \dividend, \dividend, \divisor
  127. cmp \dividend, \divisor, lsr #1
  128. subhs \dividend, \dividend, \divisor, lsr #1
  129. cmp \dividend, \divisor, lsr #2
  130. subhs \dividend, \dividend, \divisor, lsr #2
  131. cmp \dividend, \divisor, lsr #3
  132. subhs \dividend, \dividend, \divisor, lsr #3
  133. cmp \dividend, #1
  134. mov \divisor, \divisor, lsr #4
  135. subsge \order, \order, #4
  136. bge 1b
  137. tst \order, #3
  138. teqne \dividend, #0
  139. beq 5f
  140. @ Either 1, 2 or 3 comparison/subtractions are left.
  141. 2: cmn \order, #2
  142. blt 4f
  143. beq 3f
  144. cmp \dividend, \divisor
  145. subhs \dividend, \dividend, \divisor
  146. mov \divisor, \divisor, lsr #1
  147. 3: cmp \dividend, \divisor
  148. subhs \dividend, \dividend, \divisor
  149. mov \divisor, \divisor, lsr #1
  150. 4: cmp \dividend, \divisor
  151. subhs \dividend, \dividend, \divisor
  152. 5:
  153. .endm
  154. .pushsection .text.__udivsi3, "ax"
  155. ENTRY(__udivsi3)
  156. ENTRY(__aeabi_uidiv)
  157. UNWIND(.fnstart)
  158. subs r2, r1, #1
  159. reteq lr
  160. bcc Ldiv0
  161. cmp r0, r1
  162. bls 11f
  163. tst r1, r2
  164. beq 12f
  165. ARM_DIV_BODY r0, r1, r2, r3
  166. mov r0, r2
  167. ret lr
  168. 11: moveq r0, #1
  169. movne r0, #0
  170. ret lr
  171. 12: ARM_DIV2_ORDER r1, r2
  172. mov r0, r0, lsr r2
  173. ret lr
  174. UNWIND(.fnend)
  175. ENDPROC(__udivsi3)
  176. ENDPROC(__aeabi_uidiv)
  177. .popsection
  178. .pushsection .text.__umodsi3, "ax"
  179. ENTRY(__umodsi3)
  180. UNWIND(.fnstart)
  181. subs r2, r1, #1 @ compare divisor with 1
  182. bcc Ldiv0
  183. cmpne r0, r1 @ compare dividend with divisor
  184. moveq r0, #0
  185. tsthi r1, r2 @ see if divisor is power of 2
  186. andeq r0, r0, r2
  187. retls lr
  188. ARM_MOD_BODY r0, r1, r2, r3
  189. ret lr
  190. UNWIND(.fnend)
  191. ENDPROC(__umodsi3)
  192. .popsection
  193. .pushsection .text.__divsi3, "ax"
  194. ENTRY(__divsi3)
  195. ENTRY(__aeabi_idiv)
  196. UNWIND(.fnstart)
  197. cmp r1, #0
  198. eor ip, r0, r1 @ save the sign of the result.
  199. beq Ldiv0
  200. rsbmi r1, r1, #0 @ loops below use unsigned.
  201. subs r2, r1, #1 @ division by 1 or -1 ?
  202. beq 10f
  203. movs r3, r0
  204. rsbmi r3, r0, #0 @ positive dividend value
  205. cmp r3, r1
  206. bls 11f
  207. tst r1, r2 @ divisor is power of 2 ?
  208. beq 12f
  209. ARM_DIV_BODY r3, r1, r0, r2
  210. cmp ip, #0
  211. rsbmi r0, r0, #0
  212. ret lr
  213. 10: teq ip, r0 @ same sign ?
  214. rsbmi r0, r0, #0
  215. ret lr
  216. 11: movlo r0, #0
  217. moveq r0, ip, asr #31
  218. orreq r0, r0, #1
  219. ret lr
  220. 12: ARM_DIV2_ORDER r1, r2
  221. cmp ip, #0
  222. mov r0, r3, lsr r2
  223. rsbmi r0, r0, #0
  224. ret lr
  225. UNWIND(.fnend)
  226. ENDPROC(__divsi3)
  227. ENDPROC(__aeabi_idiv)
  228. .popsection
  229. .pushsection .text.__modsi3, "ax"
  230. ENTRY(__modsi3)
  231. UNWIND(.fnstart)
  232. cmp r1, #0
  233. beq Ldiv0
  234. rsbmi r1, r1, #0 @ loops below use unsigned.
  235. movs ip, r0 @ preserve sign of dividend
  236. rsbmi r0, r0, #0 @ if negative make positive
  237. subs r2, r1, #1 @ compare divisor with 1
  238. cmpne r0, r1 @ compare dividend with divisor
  239. moveq r0, #0
  240. tsthi r1, r2 @ see if divisor is power of 2
  241. andeq r0, r0, r2
  242. bls 10f
  243. ARM_MOD_BODY r0, r1, r2, r3
  244. 10: cmp ip, #0
  245. rsbmi r0, r0, #0
  246. ret lr
  247. UNWIND(.fnend)
  248. ENDPROC(__modsi3)
  249. .popsection
  250. #ifdef CONFIG_AEABI
  251. .pushsection .text.__aeabi_uidivmod, "ax"
  252. ENTRY(__aeabi_uidivmod)
  253. UNWIND(.fnstart)
  254. UNWIND(.save {r0, r1, ip, lr} )
  255. stmfd sp!, {r0, r1, ip, lr}
  256. bl __aeabi_uidiv
  257. ldmfd sp!, {r1, r2, ip, lr}
  258. mul r3, r0, r2
  259. sub r1, r1, r3
  260. ret lr
  261. UNWIND(.fnend)
  262. ENDPROC(__aeabi_uidivmod)
  263. .popsection
  264. .pushsection .text.__aeabi_uidivmod, "ax"
  265. ENTRY(__aeabi_idivmod)
  266. UNWIND(.fnstart)
  267. UNWIND(.save {r0, r1, ip, lr} )
  268. stmfd sp!, {r0, r1, ip, lr}
  269. bl __aeabi_idiv
  270. ldmfd sp!, {r1, r2, ip, lr}
  271. mul r3, r0, r2
  272. sub r1, r1, r3
  273. ret lr
  274. UNWIND(.fnend)
  275. ENDPROC(__aeabi_idivmod)
  276. .popsection
  277. #endif
  278. .pushsection .text.Ldiv0, "ax"
  279. Ldiv0:
  280. UNWIND(.fnstart)
  281. UNWIND(.pad #4)
  282. UNWIND(.save {lr})
  283. str lr, [sp, #-8]!
  284. bl __div0
  285. mov r0, #0 @ About as wrong as it could be.
  286. ldr pc, [sp], #8
  287. UNWIND(.fnend)
  288. ENDPROC(Ldiv0)
  289. .popsection
  290. /* Thumb-1 specialities */
  291. #if defined(CONFIG_SYS_THUMB_BUILD) && !defined(CONFIG_HAS_THUMB2)
  292. .pushsection .text.__gnu_thumb1_case_sqi, "ax"
  293. ENTRY(__gnu_thumb1_case_sqi)
  294. push {r1}
  295. mov r1, lr
  296. lsrs r1, r1, #1
  297. lsls r1, r1, #1
  298. ldrsb r1, [r1, r0]
  299. lsls r1, r1, #1
  300. add lr, lr, r1
  301. pop {r1}
  302. bx lr
  303. ENDPROC(__gnu_thumb1_case_sqi)
  304. .popsection
  305. .pushsection .text.__gnu_thumb1_case_uqi, "ax"
  306. ENTRY(__gnu_thumb1_case_uqi)
  307. push {r1}
  308. mov r1, lr
  309. lsrs r1, r1, #1
  310. lsls r1, r1, #1
  311. ldrb r1, [r1, r0]
  312. lsls r1, r1, #1
  313. add lr, lr, r1
  314. pop {r1}
  315. bx lr
  316. ENDPROC(__gnu_thumb1_case_uqi)
  317. .popsection
  318. .pushsection .text.__gnu_thumb1_case_shi, "ax"
  319. ENTRY(__gnu_thumb1_case_shi)
  320. push {r0, r1}
  321. mov r1, lr
  322. lsrs r1, r1, #1
  323. lsls r0, r0, #1
  324. lsls r1, r1, #1
  325. ldrsh r1, [r1, r0]
  326. lsls r1, r1, #1
  327. add lr, lr, r1
  328. pop {r0, r1}
  329. bx lr
  330. ENDPROC(__gnu_thumb1_case_shi)
  331. .popsection
  332. .pushsection .text.__gnu_thumb1_case_uhi, "ax"
  333. ENTRY(__gnu_thumb1_case_uhi)
  334. push {r0, r1}
  335. mov r1, lr
  336. lsrs r1, r1, #1
  337. lsls r0, r0, #1
  338. lsls r1, r1, #1
  339. ldrh r1, [r1, r0]
  340. lsls r1, r1, #1
  341. add lr, lr, r1
  342. pop {r0, r1}
  343. bx lr
  344. ENDPROC(__gnu_thumb1_case_uhi)
  345. .popsection
  346. #endif