12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667 |
- /*
- * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
- *
- * SPDX-License-Identifier: GPL-2.0+
- */
- /*
- * If dst and src are 4 byte aligned, copy 8 bytes at a time.
- * If the src is 4, but not 8 byte aligned, we first read 4 bytes to get
- * it 8 byte aligned. Thus, we can do a little read-ahead, without
- * dereferencing a cache line that we should not touch.
- * Note that short and long instructions have been scheduled to avoid
- * branch stalls.
- * The beq_s to r3z could be made unaligned & long to avoid a stall
- * there, but it is not likely to be taken often, and it would also be likely
- * to cost an unaligned mispredict at the next call.
- */
- .global strcpy
- .align 4
- strcpy:
- or %r2, %r0, %r1
- bmsk_s %r2, %r2, 1
- brne.d %r2, 0, charloop
- mov_s %r10, %r0
- ld_s %r3, [%r1, 0]
- mov %r8, 0x01010101
- bbit0.d %r1, 2, loop_start
- ror %r12, %r8
- sub %r2, %r3, %r8
- bic_s %r2, %r2, %r3
- tst_s %r2,%r12
- bne r3z
- mov_s %r4,%r3
- .balign 4
- loop:
- ld.a %r3, [%r1, 4]
- st.ab %r4, [%r10, 4]
- loop_start:
- ld.a %r4, [%r1, 4]
- sub %r2, %r3, %r8
- bic_s %r2, %r2, %r3
- tst_s %r2, %r12
- bne_s r3z
- st.ab %r3, [%r10, 4]
- sub %r2, %r4, %r8
- bic %r2, %r2, %r4
- tst %r2, %r12
- beq loop
- mov_s %r3, %r4
- #ifdef __LITTLE_ENDIAN__
- r3z: bmsk.f %r1, %r3, 7
- lsr_s %r3, %r3, 8
- #else /* __BIG_ENDIAN__ */
- r3z: lsr.f %r1, %r3, 24
- asl_s %r3, %r3, 8
- #endif /* _ENDIAN__ */
- bne.d r3z
- stb.ab %r1, [%r10, 1]
- j_s [%blink]
- .balign 4
- charloop:
- ldb.ab %r3, [%r1, 1]
- brne.d %r3, 0, charloop
- stb.ab %r3, [%r10, 1]
- j [%blink]
|