string.c 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293
  1. /*
  2. * Copyright (C) 1991,1992,1993,1997,1998,2003, 2005 Free Software Foundation, Inc.
  3. * This file is part of the GNU C Library.
  4. * Copyright (c) 2011 The Chromium OS Authors.
  5. *
  6. * SPDX-License-Identifier: GPL-2.0+
  7. */
  8. /* From glibc-2.14, sysdeps/i386/memset.c */
  9. #include <linux/types.h>
  10. #include <linux/compiler.h>
  11. #include <asm/string.h>
  12. typedef uint32_t op_t;
  13. void *memset(void *dstpp, int c, size_t len)
  14. {
  15. int d0;
  16. unsigned long int dstp = (unsigned long int) dstpp;
  17. /* This explicit register allocation improves code very much indeed. */
  18. register op_t x asm("ax");
  19. x = (unsigned char) c;
  20. /* Clear the direction flag, so filling will move forward. */
  21. asm volatile("cld");
  22. /* This threshold value is optimal. */
  23. if (len >= 12) {
  24. /* Fill X with four copies of the char we want to fill with. */
  25. x |= (x << 8);
  26. x |= (x << 16);
  27. /* Adjust LEN for the bytes handled in the first loop. */
  28. len -= (-dstp) % sizeof(op_t);
  29. /*
  30. * There are at least some bytes to set. No need to test for
  31. * LEN == 0 in this alignment loop.
  32. */
  33. /* Fill bytes until DSTP is aligned on a longword boundary. */
  34. asm volatile(
  35. "rep\n"
  36. "stosb" /* %0, %2, %3 */ :
  37. "=D" (dstp), "=c" (d0) :
  38. "0" (dstp), "1" ((-dstp) % sizeof(op_t)), "a" (x) :
  39. "memory");
  40. /* Fill longwords. */
  41. asm volatile(
  42. "rep\n"
  43. "stosl" /* %0, %2, %3 */ :
  44. "=D" (dstp), "=c" (d0) :
  45. "0" (dstp), "1" (len / sizeof(op_t)), "a" (x) :
  46. "memory");
  47. len %= sizeof(op_t);
  48. }
  49. /* Write the last few bytes. */
  50. asm volatile(
  51. "rep\n"
  52. "stosb" /* %0, %2, %3 */ :
  53. "=D" (dstp), "=c" (d0) :
  54. "0" (dstp), "1" (len), "a" (x) :
  55. "memory");
  56. return dstpp;
  57. }
  58. #define OP_T_THRES 8
  59. #define OPSIZ (sizeof(op_t))
  60. #define BYTE_COPY_FWD(dst_bp, src_bp, nbytes) \
  61. do { \
  62. int __d0; \
  63. asm volatile( \
  64. /* Clear the direction flag, so copying goes forward. */ \
  65. "cld\n" \
  66. /* Copy bytes. */ \
  67. "rep\n" \
  68. "movsb" : \
  69. "=D" (dst_bp), "=S" (src_bp), "=c" (__d0) : \
  70. "0" (dst_bp), "1" (src_bp), "2" (nbytes) : \
  71. "memory"); \
  72. } while (0)
  73. #define WORD_COPY_FWD(dst_bp, src_bp, nbytes_left, nbytes) \
  74. do { \
  75. int __d0; \
  76. asm volatile( \
  77. /* Clear the direction flag, so copying goes forward. */ \
  78. "cld\n" \
  79. /* Copy longwords. */ \
  80. "rep\n" \
  81. "movsl" : \
  82. "=D" (dst_bp), "=S" (src_bp), "=c" (__d0) : \
  83. "0" (dst_bp), "1" (src_bp), "2" ((nbytes) / 4) : \
  84. "memory"); \
  85. (nbytes_left) = (nbytes) % 4; \
  86. } while (0)
  87. void *memcpy(void *dstpp, const void *srcpp, size_t len)
  88. {
  89. unsigned long int dstp = (long int)dstpp;
  90. unsigned long int srcp = (long int)srcpp;
  91. /* Copy from the beginning to the end. */
  92. /* If there not too few bytes to copy, use word copy. */
  93. if (len >= OP_T_THRES) {
  94. /* Copy just a few bytes to make DSTP aligned. */
  95. len -= (-dstp) % OPSIZ;
  96. BYTE_COPY_FWD(dstp, srcp, (-dstp) % OPSIZ);
  97. /* Copy from SRCP to DSTP taking advantage of the known
  98. * alignment of DSTP. Number of bytes remaining is put
  99. * in the third argument, i.e. in LEN. This number may
  100. * vary from machine to machine.
  101. */
  102. WORD_COPY_FWD(dstp, srcp, len, len);
  103. /* Fall out and copy the tail. */
  104. }
  105. /* There are just a few bytes to copy. Use byte memory operations. */
  106. BYTE_COPY_FWD(dstp, srcp, len);
  107. return dstpp;
  108. }
  109. void *memmove(void *dest, const void *src, size_t n)
  110. {
  111. int d0, d1, d2, d3, d4, d5;
  112. char *ret = dest;
  113. __asm__ __volatile__(
  114. /* Handle more 16 bytes in loop */
  115. "cmp $0x10, %0\n\t"
  116. "jb 1f\n\t"
  117. /* Decide forward/backward copy mode */
  118. "cmp %2, %1\n\t"
  119. "jb 2f\n\t"
  120. /*
  121. * movs instruction have many startup latency
  122. * so we handle small size by general register.
  123. */
  124. "cmp $680, %0\n\t"
  125. "jb 3f\n\t"
  126. /* movs instruction is only good for aligned case */
  127. "mov %1, %3\n\t"
  128. "xor %2, %3\n\t"
  129. "and $0xff, %3\n\t"
  130. "jz 4f\n\t"
  131. "3:\n\t"
  132. "sub $0x10, %0\n\t"
  133. /* We gobble 16 bytes forward in each loop */
  134. "3:\n\t"
  135. "sub $0x10, %0\n\t"
  136. "mov 0*4(%1), %3\n\t"
  137. "mov 1*4(%1), %4\n\t"
  138. "mov %3, 0*4(%2)\n\t"
  139. "mov %4, 1*4(%2)\n\t"
  140. "mov 2*4(%1), %3\n\t"
  141. "mov 3*4(%1), %4\n\t"
  142. "mov %3, 2*4(%2)\n\t"
  143. "mov %4, 3*4(%2)\n\t"
  144. "lea 0x10(%1), %1\n\t"
  145. "lea 0x10(%2), %2\n\t"
  146. "jae 3b\n\t"
  147. "add $0x10, %0\n\t"
  148. "jmp 1f\n\t"
  149. /* Handle data forward by movs */
  150. ".p2align 4\n\t"
  151. "4:\n\t"
  152. "mov -4(%1, %0), %3\n\t"
  153. "lea -4(%2, %0), %4\n\t"
  154. "shr $2, %0\n\t"
  155. "rep movsl\n\t"
  156. "mov %3, (%4)\n\t"
  157. "jmp 11f\n\t"
  158. /* Handle data backward by movs */
  159. ".p2align 4\n\t"
  160. "6:\n\t"
  161. "mov (%1), %3\n\t"
  162. "mov %2, %4\n\t"
  163. "lea -4(%1, %0), %1\n\t"
  164. "lea -4(%2, %0), %2\n\t"
  165. "shr $2, %0\n\t"
  166. "std\n\t"
  167. "rep movsl\n\t"
  168. "mov %3,(%4)\n\t"
  169. "cld\n\t"
  170. "jmp 11f\n\t"
  171. /* Start to prepare for backward copy */
  172. ".p2align 4\n\t"
  173. "2:\n\t"
  174. "cmp $680, %0\n\t"
  175. "jb 5f\n\t"
  176. "mov %1, %3\n\t"
  177. "xor %2, %3\n\t"
  178. "and $0xff, %3\n\t"
  179. "jz 6b\n\t"
  180. /* Calculate copy position to tail */
  181. "5:\n\t"
  182. "add %0, %1\n\t"
  183. "add %0, %2\n\t"
  184. "sub $0x10, %0\n\t"
  185. /* We gobble 16 bytes backward in each loop */
  186. "7:\n\t"
  187. "sub $0x10, %0\n\t"
  188. "mov -1*4(%1), %3\n\t"
  189. "mov -2*4(%1), %4\n\t"
  190. "mov %3, -1*4(%2)\n\t"
  191. "mov %4, -2*4(%2)\n\t"
  192. "mov -3*4(%1), %3\n\t"
  193. "mov -4*4(%1), %4\n\t"
  194. "mov %3, -3*4(%2)\n\t"
  195. "mov %4, -4*4(%2)\n\t"
  196. "lea -0x10(%1), %1\n\t"
  197. "lea -0x10(%2), %2\n\t"
  198. "jae 7b\n\t"
  199. /* Calculate copy position to head */
  200. "add $0x10, %0\n\t"
  201. "sub %0, %1\n\t"
  202. "sub %0, %2\n\t"
  203. /* Move data from 8 bytes to 15 bytes */
  204. ".p2align 4\n\t"
  205. "1:\n\t"
  206. "cmp $8, %0\n\t"
  207. "jb 8f\n\t"
  208. "mov 0*4(%1), %3\n\t"
  209. "mov 1*4(%1), %4\n\t"
  210. "mov -2*4(%1, %0), %5\n\t"
  211. "mov -1*4(%1, %0), %1\n\t"
  212. "mov %3, 0*4(%2)\n\t"
  213. "mov %4, 1*4(%2)\n\t"
  214. "mov %5, -2*4(%2, %0)\n\t"
  215. "mov %1, -1*4(%2, %0)\n\t"
  216. "jmp 11f\n\t"
  217. /* Move data from 4 bytes to 7 bytes */
  218. ".p2align 4\n\t"
  219. "8:\n\t"
  220. "cmp $4, %0\n\t"
  221. "jb 9f\n\t"
  222. "mov 0*4(%1), %3\n\t"
  223. "mov -1*4(%1, %0), %4\n\t"
  224. "mov %3, 0*4(%2)\n\t"
  225. "mov %4, -1*4(%2, %0)\n\t"
  226. "jmp 11f\n\t"
  227. /* Move data from 2 bytes to 3 bytes */
  228. ".p2align 4\n\t"
  229. "9:\n\t"
  230. "cmp $2, %0\n\t"
  231. "jb 10f\n\t"
  232. "movw 0*2(%1), %%dx\n\t"
  233. "movw -1*2(%1, %0), %%bx\n\t"
  234. "movw %%dx, 0*2(%2)\n\t"
  235. "movw %%bx, -1*2(%2, %0)\n\t"
  236. "jmp 11f\n\t"
  237. /* Move data for 1 byte */
  238. ".p2align 4\n\t"
  239. "10:\n\t"
  240. "cmp $1, %0\n\t"
  241. "jb 11f\n\t"
  242. "movb (%1), %%cl\n\t"
  243. "movb %%cl, (%2)\n\t"
  244. ".p2align 4\n\t"
  245. "11:"
  246. : "=&c" (d0), "=&S" (d1), "=&D" (d2),
  247. "=r" (d3), "=r" (d4), "=r"(d5)
  248. : "0" (n),
  249. "1" (src),
  250. "2" (dest)
  251. : "memory");
  252. return ret;
  253. }