div64.S 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214
  1. /*
  2. * linux/arch/arm/lib/div64.S
  3. *
  4. * Optimized computation of 64-bit dividend / 32-bit divisor
  5. *
  6. * Author: Nicolas Pitre
  7. * Created: Oct 5, 2003
  8. * Copyright: Monta Vista Software, Inc.
  9. *
  10. * SPDX-License-Identifier: GPL-2.0
  11. */
  12. #include <linux/linkage.h>
  13. #include <asm/assembler.h>
  14. #ifdef __UBOOT__
  15. #define UNWIND(x...)
  16. #endif
  17. #ifdef __ARMEB__
  18. #define xh r0
  19. #define xl r1
  20. #define yh r2
  21. #define yl r3
  22. #else
  23. #define xl r0
  24. #define xh r1
  25. #define yl r2
  26. #define yh r3
  27. #endif
  28. /*
  29. * __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
  30. *
  31. * Note: Calling convention is totally non standard for optimal code.
  32. * This is meant to be used by do_div() from include/asm/div64.h only.
  33. *
  34. * Input parameters:
  35. * xh-xl = dividend (clobbered)
  36. * r4 = divisor (preserved)
  37. *
  38. * Output values:
  39. * yh-yl = result
  40. * xh = remainder
  41. *
  42. * Clobbered regs: xl, ip
  43. */
  44. .pushsection .text.__do_div64, "ax"
  45. ENTRY(__do_div64)
  46. UNWIND(.fnstart)
  47. @ Test for easy paths first.
  48. subs ip, r4, #1
  49. bls 9f @ divisor is 0 or 1
  50. tst ip, r4
  51. beq 8f @ divisor is power of 2
  52. @ See if we need to handle upper 32-bit result.
  53. cmp xh, r4
  54. mov yh, #0
  55. blo 3f
  56. @ Align divisor with upper part of dividend.
  57. @ The aligned divisor is stored in yl preserving the original.
  58. @ The bit position is stored in ip.
  59. #if __LINUX_ARM_ARCH__ >= 5
  60. clz yl, r4
  61. clz ip, xh
  62. sub yl, yl, ip
  63. mov ip, #1
  64. mov ip, ip, lsl yl
  65. mov yl, r4, lsl yl
  66. #else
  67. mov yl, r4
  68. mov ip, #1
  69. 1: cmp yl, #0x80000000
  70. cmpcc yl, xh
  71. movcc yl, yl, lsl #1
  72. movcc ip, ip, lsl #1
  73. bcc 1b
  74. #endif
  75. @ The division loop for needed upper bit positions.
  76. @ Break out early if dividend reaches 0.
  77. 2: cmp xh, yl
  78. orrcs yh, yh, ip
  79. subscs xh, xh, yl
  80. movsne ip, ip, lsr #1
  81. mov yl, yl, lsr #1
  82. bne 2b
  83. @ See if we need to handle lower 32-bit result.
  84. 3: cmp xh, #0
  85. mov yl, #0
  86. cmpeq xl, r4
  87. movlo xh, xl
  88. retlo lr
  89. @ The division loop for lower bit positions.
  90. @ Here we shift remainer bits leftwards rather than moving the
  91. @ divisor for comparisons, considering the carry-out bit as well.
  92. mov ip, #0x80000000
  93. 4: movs xl, xl, lsl #1
  94. adcs xh, xh, xh
  95. beq 6f
  96. cmpcc xh, r4
  97. 5: orrcs yl, yl, ip
  98. subcs xh, xh, r4
  99. movs ip, ip, lsr #1
  100. bne 4b
  101. ret lr
  102. @ The top part of remainder became zero. If carry is set
  103. @ (the 33th bit) this is a false positive so resume the loop.
  104. @ Otherwise, if lower part is also null then we are done.
  105. 6: bcs 5b
  106. cmp xl, #0
  107. reteq lr
  108. @ We still have remainer bits in the low part. Bring them up.
  109. #if __LINUX_ARM_ARCH__ >= 5
  110. clz xh, xl @ we know xh is zero here so...
  111. add xh, xh, #1
  112. mov xl, xl, lsl xh
  113. mov ip, ip, lsr xh
  114. #else
  115. 7: movs xl, xl, lsl #1
  116. mov ip, ip, lsr #1
  117. bcc 7b
  118. #endif
  119. @ Current remainder is now 1. It is worthless to compare with
  120. @ divisor at this point since divisor can not be smaller than 3 here.
  121. @ If possible, branch for another shift in the division loop.
  122. @ If no bit position left then we are done.
  123. movs ip, ip, lsr #1
  124. mov xh, #1
  125. bne 4b
  126. ret lr
  127. 8: @ Division by a power of 2: determine what that divisor order is
  128. @ then simply shift values around
  129. #if __LINUX_ARM_ARCH__ >= 5
  130. clz ip, r4
  131. rsb ip, ip, #31
  132. #else
  133. mov yl, r4
  134. cmp r4, #(1 << 16)
  135. mov ip, #0
  136. movhs yl, yl, lsr #16
  137. movhs ip, #16
  138. cmp yl, #(1 << 8)
  139. movhs yl, yl, lsr #8
  140. addhs ip, ip, #8
  141. cmp yl, #(1 << 4)
  142. movhs yl, yl, lsr #4
  143. addhs ip, ip, #4
  144. cmp yl, #(1 << 2)
  145. addhi ip, ip, #3
  146. addls ip, ip, yl, lsr #1
  147. #endif
  148. mov yh, xh, lsr ip
  149. mov yl, xl, lsr ip
  150. rsb ip, ip, #32
  151. ARM( orr yl, yl, xh, lsl ip )
  152. THUMB( lsl xh, xh, ip )
  153. THUMB( orr yl, yl, xh )
  154. mov xh, xl, lsl ip
  155. mov xh, xh, lsr ip
  156. ret lr
  157. @ eq -> division by 1: obvious enough...
  158. 9: moveq yl, xl
  159. moveq yh, xh
  160. moveq xh, #0
  161. reteq lr
  162. UNWIND(.fnend)
  163. UNWIND(.fnstart)
  164. UNWIND(.pad #4)
  165. UNWIND(.save {lr})
  166. Ldiv0_64:
  167. @ Division by 0:
  168. str lr, [sp, #-8]!
  169. bl __div0
  170. @ as wrong as it could be...
  171. mov yl, #0
  172. mov yh, #0
  173. mov xh, #0
  174. ldr pc, [sp], #8
  175. UNWIND(.fnend)
  176. ENDPROC(__do_div64)
  177. .popsection