strcpy-700.S 1.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667
  1. /*
  2. * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
  3. *
  4. * SPDX-License-Identifier: GPL-2.0+
  5. */
  6. /*
  7. * If dst and src are 4 byte aligned, copy 8 bytes at a time.
  8. * If the src is 4, but not 8 byte aligned, we first read 4 bytes to get
  9. * it 8 byte aligned. Thus, we can do a little read-ahead, without
  10. * dereferencing a cache line that we should not touch.
  11. * Note that short and long instructions have been scheduled to avoid
  12. * branch stalls.
  13. * The beq_s to r3z could be made unaligned & long to avoid a stall
  14. * there, but it is not likely to be taken often, and it would also be likely
  15. * to cost an unaligned mispredict at the next call.
  16. */
  17. .global strcpy
  18. .align 4
  19. strcpy:
  20. or %r2, %r0, %r1
  21. bmsk_s %r2, %r2, 1
  22. brne.d %r2, 0, charloop
  23. mov_s %r10, %r0
  24. ld_s %r3, [%r1, 0]
  25. mov %r8, 0x01010101
  26. bbit0.d %r1, 2, loop_start
  27. ror %r12, %r8
  28. sub %r2, %r3, %r8
  29. bic_s %r2, %r2, %r3
  30. tst_s %r2,%r12
  31. bne r3z
  32. mov_s %r4,%r3
  33. .balign 4
  34. loop:
  35. ld.a %r3, [%r1, 4]
  36. st.ab %r4, [%r10, 4]
  37. loop_start:
  38. ld.a %r4, [%r1, 4]
  39. sub %r2, %r3, %r8
  40. bic_s %r2, %r2, %r3
  41. tst_s %r2, %r12
  42. bne_s r3z
  43. st.ab %r3, [%r10, 4]
  44. sub %r2, %r4, %r8
  45. bic %r2, %r2, %r4
  46. tst %r2, %r12
  47. beq loop
  48. mov_s %r3, %r4
  49. #ifdef __LITTLE_ENDIAN__
  50. r3z: bmsk.f %r1, %r3, 7
  51. lsr_s %r3, %r3, 8
  52. #else /* __BIG_ENDIAN__ */
  53. r3z: lsr.f %r1, %r3, 24
  54. asl_s %r3, %r3, 8
  55. #endif /* _ENDIAN__ */
  56. bne.d r3z
  57. stb.ab %r1, [%r10, 1]
  58. j_s [%blink]
  59. .balign 4
  60. charloop:
  61. ldb.ab %r3, [%r1, 1]
  62. brne.d %r3, 0, charloop
  63. stb.ab %r3, [%r10, 1]
  64. j [%blink]