memcpy.S 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198
  1. /* Copyright (C) 1999-2019 Free Software Foundation, Inc.
  2. This file is part of the GNU C Library.
  3. Contributed by Kazumoto Kojima <kkojima@rr.iij4u.or.jp>
  4. Optimized by Toshiyasu Morita <toshiyasu.morita@hsa.hitachi.com>
  5. The GNU C Library is free software; you can redistribute it and/or
  6. modify it under the terms of the GNU Lesser General Public
  7. License as published by the Free Software Foundation; either
  8. version 2.1 of the License, or (at your option) any later version.
  9. The GNU C Library is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. Lesser General Public License for more details.
  13. You should have received a copy of the GNU Lesser General Public
  14. License along with the GNU C Library; if not, see
  15. <http://www.gnu.org/licenses/>. */
  16. #include <sysdep.h>
  17. /* void *memcpy(void *dst, const void *src, size_t n);
  18. No overlap between the memory of DST and of SRC are assumed. */
  19. ENTRY(memcpy)
  20. mov r4,r3 /* Save destination. */
  21. /* If less than 11 bytes, just do a byte copy. */
  22. mov #11,r0
  23. cmp/gt r6,r0
  24. bt L_byteloop_init
  25. /* Check if we need to word-align source. */
  26. mov r5,r0
  27. tst #1,r0
  28. bt L_wordalign
  29. mov.b @r0+,r1 /* Copy one byte. */
  30. add #-1,r6
  31. mov.b r1,@r4
  32. add #1,r4
  33. .balignw 4,0x0009
  34. L_wordalign:
  35. /* Check if we need to longword-align source. */
  36. tst #2,r0
  37. bt L_copy
  38. mov.w @r0+,r1 /* Copy one word. */
  39. add #-2,r6
  40. #ifdef __BIG_ENDIAN__
  41. add #1,r4
  42. mov.b r1,@r4
  43. shlr8 r1
  44. mov.b r1,@-r4
  45. add #2,r4
  46. #else
  47. mov.b r1,@r4
  48. add #1,r4
  49. shlr8 r1
  50. mov.b r1,@r4
  51. add #1,r4
  52. #endif
  53. L_copy:
  54. mov r0,r5
  55. /* Calculate the correct routine to handle the destination
  56. alignment and simultaneously calculate the loop counts for
  57. both the 2 word copy loop and byte copy loop. */
  58. mova L_jumptable,r0
  59. mov r0,r1
  60. mov r4,r0
  61. mov r6,r7
  62. and #3,r0
  63. shlr2 r7
  64. shll r0
  65. shlr r7
  66. mov.w @(r0,r1),r2
  67. mov #7,r0
  68. braf r2
  69. and r0,r6
  70. L_base:
  71. .balign 4
  72. L_jumptable:
  73. .word L_copydest0 - L_base
  74. .word L_copydest1_or_3 - L_base
  75. .word L_copydest2 - L_base
  76. .word L_copydest1_or_3 - L_base
  77. .balign 4
  78. /* Copy routine for (dest mod 4) == 1 or == 3. */
  79. L_copydest1_or_3:
  80. add #-1,r4
  81. .balignw 4,0x0009
  82. L_copydest1_or_3_loop:
  83. mov.l @r5+,r0 /* Read first longword. */
  84. dt r7
  85. mov.l @r5+,r1 /* Read second longword. */
  86. #ifdef __BIG_ENDIAN__
  87. /* Write first longword as byte, word, byte. */
  88. mov.b r0,@(4,r4)
  89. shlr8 r0
  90. mov.w r0,@(2,r4)
  91. shlr16 r0
  92. mov.b r0,@(1,r4)
  93. mov r1,r0
  94. /* Write second longword as byte, word, byte. */
  95. mov.b r0,@(8,r4)
  96. shlr8 r0
  97. mov.w r0,@(6,r4)
  98. shlr16 r0
  99. mov.b r0,@(5,r4)
  100. #else
  101. /* Write first longword as byte, word, byte. */
  102. mov.b r0,@(1,r4)
  103. shlr8 r0
  104. mov.w r0,@(2,r4)
  105. shlr16 r0
  106. mov.b r0,@(4,r4)
  107. mov r1,r0
  108. /* Write second longword as byte, word, byte. */
  109. mov.b r0,@(5,r4)
  110. shlr8 r0
  111. mov.w r0,@(6,r4)
  112. shlr16 r0
  113. mov.b r0,@(8,r4)
  114. #endif
  115. bf/s L_copydest1_or_3_loop
  116. add #8,r4
  117. bra L_byteloop_init
  118. add #1,r4
  119. .balign 4
  120. /* Copy routine for (dest mod 4) == 2. */
  121. L_copydest2:
  122. L_copydest2_loop:
  123. mov.l @r5+,r0
  124. dt r7
  125. mov.l @r5+,r1
  126. #ifdef __BIG_ENDIAN__
  127. mov.w r0,@(2,r4)
  128. shlr16 r0
  129. mov.w r0,@r4
  130. mov r1,r0
  131. mov.w r0,@(6,r4)
  132. shlr16 r0
  133. mov.w r0,@(4,r4)
  134. #else
  135. mov.w r0,@r4
  136. shlr16 r0
  137. mov.w r0,@(2,r4)
  138. mov r1,r0
  139. mov.w r0,@(4,r4)
  140. shlr16 r0
  141. mov.w r0,@(6,r4)
  142. #endif
  143. bf/s L_copydest2_loop
  144. add #8,r4
  145. bra L_byteloop_init
  146. nop
  147. .balign 4
  148. /* Copy routine for (dest mod 4) == 0. */
  149. L_copydest0:
  150. add #-8,r4
  151. .balignw 4,0x0009
  152. L_copydest0_loop:
  153. mov.l @r5+,r0
  154. dt r7
  155. mov.l @r5+,r1
  156. add #8,r4
  157. mov.l r0,@r4
  158. bf/s L_copydest0_loop
  159. mov.l r1,@(4,r4)
  160. add #8,r4 /* Fall through. */
  161. L_byteloop_init:
  162. tst r6,r6
  163. bt L_exit
  164. .balignw 4,0x0009
  165. /* Copy remaining bytes. */
  166. L_byteloop:
  167. mov.b @r5+,r0
  168. dt r6
  169. mov.b r0,@r4
  170. bf/s L_byteloop
  171. add #1,r4
  172. L_exit:
  173. rts
  174. mov r3,r0 /* Return destination. */
  175. END(memcpy)
  176. libc_hidden_builtin_def (memcpy)