udivsi3_i4i-Os.S 2.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. /* Copyright (C) 2006 Free Software Foundation, Inc.
  2. * SPDX-License-Identifier: GPL-2.0+
  3. */
  4. /* Moderately Space-optimized libgcc routines for the Renesas SH /
  5. STMicroelectronics ST40 CPUs.
  6. Contributed by J"orn Rennecke joern.rennecke@st.com. */
  7. /* Size: 186 bytes jointly for udivsi3_i4i and sdivsi3_i4i
  8. sh4-200 run times:
  9. udiv small divisor: 55 cycles
  10. udiv large divisor: 52 cycles
  11. sdiv small divisor, positive result: 59 cycles
  12. sdiv large divisor, positive result: 56 cycles
  13. sdiv small divisor, negative result: 65 cycles (*)
  14. sdiv large divisor, negative result: 62 cycles (*)
  15. (*): r2 is restored in the rts delay slot and has a lingering latency
  16. of two more cycles. */
  17. .balign 4
  18. .global __udivsi3_i4i
  19. .global __udivsi3_i4
  20. .set __udivsi3_i4, __udivsi3_i4i
  21. .type __udivsi3_i4i, @function
  22. .type __sdivsi3_i4i, @function
  23. __udivsi3_i4i:
  24. sts pr,r1
  25. mov.l r4,@-r15
  26. extu.w r5,r0
  27. cmp/eq r5,r0
  28. swap.w r4,r0
  29. shlr16 r4
  30. bf/s large_divisor
  31. div0u
  32. mov.l r5,@-r15
  33. shll16 r5
  34. sdiv_small_divisor:
  35. div1 r5,r4
  36. bsr div6
  37. div1 r5,r4
  38. div1 r5,r4
  39. bsr div6
  40. div1 r5,r4
  41. xtrct r4,r0
  42. xtrct r0,r4
  43. bsr div7
  44. swap.w r4,r4
  45. div1 r5,r4
  46. bsr div7
  47. div1 r5,r4
  48. xtrct r4,r0
  49. mov.l @r15+,r5
  50. swap.w r0,r0
  51. mov.l @r15+,r4
  52. jmp @r1
  53. rotcl r0
  54. div7:
  55. div1 r5,r4
  56. div6:
  57. div1 r5,r4; div1 r5,r4; div1 r5,r4
  58. div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
  59. divx3:
  60. rotcl r0
  61. div1 r5,r4
  62. rotcl r0
  63. div1 r5,r4
  64. rotcl r0
  65. rts
  66. div1 r5,r4
  67. large_divisor:
  68. mov.l r5,@-r15
  69. sdiv_large_divisor:
  70. xor r4,r0
  71. .rept 4
  72. rotcl r0
  73. bsr divx3
  74. div1 r5,r4
  75. .endr
  76. mov.l @r15+,r5
  77. mov.l @r15+,r4
  78. jmp @r1
  79. rotcl r0
  80. .global __sdivsi3_i4i
  81. .global __sdivsi3_i4
  82. .global __sdivsi3
  83. .set __sdivsi3_i4, __sdivsi3_i4i
  84. .set __sdivsi3, __sdivsi3_i4i
  85. __sdivsi3_i4i:
  86. mov.l r4,@-r15
  87. cmp/pz r5
  88. mov.l r5,@-r15
  89. bt/s pos_divisor
  90. cmp/pz r4
  91. neg r5,r5
  92. extu.w r5,r0
  93. bt/s neg_result
  94. cmp/eq r5,r0
  95. neg r4,r4
  96. pos_result:
  97. swap.w r4,r0
  98. bra sdiv_check_divisor
  99. sts pr,r1
  100. pos_divisor:
  101. extu.w r5,r0
  102. bt/s pos_result
  103. cmp/eq r5,r0
  104. neg r4,r4
  105. neg_result:
  106. mova negate_result,r0
  107. ;
  108. mov r0,r1
  109. swap.w r4,r0
  110. lds r2,macl
  111. sts pr,r2
  112. sdiv_check_divisor:
  113. shlr16 r4
  114. bf/s sdiv_large_divisor
  115. div0u
  116. bra sdiv_small_divisor
  117. shll16 r5
  118. .balign 4
  119. negate_result:
  120. neg r0,r0
  121. jmp @r2
  122. sts macl,r2