uldivmod.S 4.5 KB


  1. /*
  2. * Copyright 2010, Google Inc.
  3. *
  4. * Brought in from coreboot uldivmod.S
  5. *
  6. * SPDX-License-Identifier: GPL-2.0
  7. */
  8. #include <linux/linkage.h>
  9. #include <asm/assembler.h>
  10. /*
  11. * A, Q = r0 + (r1 << 32)
  12. * B, R = r2 + (r3 << 32)
  13. * A / B = Q ... R
  14. */
  15. A_0 .req r0
  16. A_1 .req r1
  17. B_0 .req r2
  18. B_1 .req r3
  19. C_0 .req r4
  20. C_1 .req r5
  21. D_0 .req r6
  22. D_1 .req r7
  23. Q_0 .req r0
  24. Q_1 .req r1
  25. R_0 .req r2
  26. R_1 .req r3
  27. THUMB(
  28. TMP .req r8
  29. )
  30. .pushsection .text.__aeabi_uldivmod, "ax"
  31. ENTRY(__aeabi_uldivmod)
  32. stmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) lr}
  33. @ Test if B == 0
  34. orrs ip, B_0, B_1 @ Z set -> B == 0
  35. beq L_div_by_0
  36. @ Test if B is power of 2: (B & (B - 1)) == 0
  37. subs C_0, B_0, #1
  38. sbc C_1, B_1, #0
  39. tst C_0, B_0
  40. tsteq B_1, C_1
  41. beq L_pow2
  42. @ Test if A_1 == B_1 == 0
  43. orrs ip, A_1, B_1
  44. beq L_div_32_32
  45. L_div_64_64:
  46. /* CLZ only exists in ARM architecture version 5 and above. */
  47. #ifdef HAVE_CLZ
  48. mov C_0, #1
  49. mov C_1, #0
  50. @ D_0 = clz A
  51. teq A_1, #0
  52. clz D_0, A_1
  53. clzeq ip, A_0
  54. addeq D_0, D_0, ip
  55. @ D_1 = clz B
  56. teq B_1, #0
  57. clz D_1, B_1
  58. clzeq ip, B_0
  59. addeq D_1, D_1, ip
  60. @ if clz B - clz A > 0
  61. subs D_0, D_1, D_0
  62. bls L_done_shift
  63. @ B <<= (clz B - clz A)
  64. subs D_1, D_0, #32
  65. rsb ip, D_0, #32
  66. movmi B_1, B_1, lsl D_0
  67. ARM( orrmi B_1, B_1, B_0, lsr ip )
  68. THUMB( lsrmi TMP, B_0, ip )
  69. THUMB( orrmi B_1, B_1, TMP )
  70. movpl B_1, B_0, lsl D_1
  71. mov B_0, B_0, lsl D_0
  72. @ C = 1 << (clz B - clz A)
  73. movmi C_1, C_1, lsl D_0
  74. ARM( orrmi C_1, C_1, C_0, lsr ip )
  75. THUMB( lsrmi TMP, C_0, ip )
  76. THUMB( orrmi C_1, C_1, TMP )
  77. movpl C_1, C_0, lsl D_1
  78. mov C_0, C_0, lsl D_0
  79. L_done_shift:
  80. mov D_0, #0
  81. mov D_1, #0
  82. @ C: current bit; D: result
  83. #else
  84. @ C: current bit; D: result
  85. mov C_0, #1
  86. mov C_1, #0
  87. mov D_0, #0
  88. mov D_1, #0
  89. L_lsl_4:
  90. cmp B_1, #0x10000000
  91. cmpcc B_1, A_1
  92. cmpeq B_0, A_0
  93. bcs L_lsl_1
  94. @ B <<= 4
  95. mov B_1, B_1, lsl #4
  96. orr B_1, B_1, B_0, lsr #28
  97. mov B_0, B_0, lsl #4
  98. @ C <<= 4
  99. mov C_1, C_1, lsl #4
  100. orr C_1, C_1, C_0, lsr #28
  101. mov C_0, C_0, lsl #4
  102. b L_lsl_4
  103. L_lsl_1:
  104. cmp B_1, #0x80000000
  105. cmpcc B_1, A_1
  106. cmpeq B_0, A_0
  107. bcs L_subtract
  108. @ B <<= 1
  109. mov B_1, B_1, lsl #1
  110. orr B_1, B_1, B_0, lsr #31
  111. mov B_0, B_0, lsl #1
  112. @ C <<= 1
  113. mov C_1, C_1, lsl #1
  114. orr C_1, C_1, C_0, lsr #31
  115. mov C_0, C_0, lsl #1
  116. b L_lsl_1
  117. #endif
  118. L_subtract:
  119. @ if A >= B
  120. cmp A_1, B_1
  121. cmpeq A_0, B_0
  122. bcc L_update
  123. @ A -= B
  124. subs A_0, A_0, B_0
  125. sbc A_1, A_1, B_1
  126. @ D |= C
  127. orr D_0, D_0, C_0
  128. orr D_1, D_1, C_1
  129. L_update:
  130. @ if A == 0: break
  131. orrs ip, A_1, A_0
  132. beq L_exit
  133. @ C >>= 1
  134. movs C_1, C_1, lsr #1
  135. movs C_0, C_0, rrx
  136. @ if C == 0: break
  137. orrs ip, C_1, C_0
  138. beq L_exit
  139. @ B >>= 1
  140. movs B_1, B_1, lsr #1
  141. mov B_0, B_0, rrx
  142. b L_subtract
  143. L_exit:
  144. @ Note: A, B & Q, R are aliases
  145. mov R_0, A_0
  146. mov R_1, A_1
  147. mov Q_0, D_0
  148. mov Q_1, D_1
  149. ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
  150. L_div_32_32:
  151. @ Note: A_0 & r0 are aliases
  152. @ Q_1 r1
  153. mov r1, B_0
  154. bl __aeabi_uidivmod
  155. mov R_0, r1
  156. mov R_1, #0
  157. mov Q_1, #0
  158. ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
  159. L_pow2:
  160. #ifdef HAVE_CLZ
  161. @ Note: A, B and Q, R are aliases
  162. @ R = A & (B - 1)
  163. and C_0, A_0, C_0
  164. and C_1, A_1, C_1
  165. @ Q = A >> log2(B)
  166. @ Note: B must not be 0 here!
  167. clz D_0, B_0
  168. add D_1, D_0, #1
  169. rsbs D_0, D_0, #31
  170. bpl L_1
  171. clz D_0, B_1
  172. rsb D_0, D_0, #31
  173. mov A_0, A_1, lsr D_0
  174. add D_0, D_0, #32
  175. L_1:
  176. movpl A_0, A_0, lsr D_0
  177. ARM( orrpl A_0, A_0, A_1, lsl D_1 )
  178. THUMB( lslpl TMP, A_1, D_1 )
  179. THUMB( orrpl A_0, A_0, TMP )
  180. mov A_1, A_1, lsr D_0
  181. @ Mov back C to R
  182. mov R_0, C_0
  183. mov R_1, C_1
  184. ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
  185. #else
  186. @ Note: A, B and Q, R are aliases
  187. @ R = A & (B - 1)
  188. and C_0, A_0, C_0
  189. and C_1, A_1, C_1
  190. @ Q = A >> log2(B)
  191. @ Note: B must not be 0 here!
  192. @ Count the leading zeroes in B.
  193. mov D_0, #0
  194. orrs B_0, B_0, B_0
  195. @ If B is greater than 1 << 31, divide A and B by 1 << 32.
  196. moveq A_0, A_1
  197. moveq A_1, #0
  198. moveq B_0, B_1
  199. @ Count the remaining leading zeroes in B.
  200. movs B_1, B_0, lsl #16
  201. addeq D_0, #16
  202. moveq B_0, B_0, lsr #16
  203. tst B_0, #0xff
  204. addeq D_0, #8
  205. moveq B_0, B_0, lsr #8
  206. tst B_0, #0xf
  207. addeq D_0, #4
  208. moveq B_0, B_0, lsr #4
  209. tst B_0, #0x3
  210. addeq D_0, #2
  211. moveq B_0, B_0, lsr #2
  212. tst B_0, #0x1
  213. addeq D_0, #1
  214. @ Shift A to the right by the appropriate amount.
  215. rsb D_1, D_0, #32
  216. mov Q_0, A_0, lsr D_0
  217. ARM( orr Q_0, Q_0, A_1, lsl D_1 )
  218. THUMB( lsl A_1, D_1 )
  219. THUMB( orr Q_0, A_1 )
  220. mov Q_1, A_1, lsr D_0
  221. @ Move C to R
  222. mov R_0, C_0
  223. mov R_1, C_1
  224. ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
  225. #endif
  226. L_div_by_0:
  227. bl __div0
  228. @ As wrong as it could be
  229. mov Q_0, #0
  230. mov Q_1, #0
  231. mov R_0, #0
  232. mov R_1, #0
  233. ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
  234. ENDPROC(__aeabi_uldivmod)
  235. .popsection