strcmp.S 2.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. /*
  2. * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
  3. *
  4. * SPDX-License-Identifier: GPL-2.0+
  5. */
  6. /*
  7. * This is optimized primarily for the ARC700.
  8. * It would be possible to speed up the loops by one cycle / word
  9. * respective one cycle / byte by forcing double source 1 alignment, unrolling
  10. * by a factor of two, and speculatively loading the second word / byte of
  11. * source 1; however, that would increase the overhead for loop setup / finish,
  12. * and strcmp might often terminate early.
  13. */
  14. .global strcmp
  15. .align 4
  16. strcmp:
  17. or %r2, %r0, %r1
  18. bmsk_s %r2, %r2, 1
  19. brne %r2, 0, .Lcharloop
  20. mov_s %r12, 0x01010101
  21. ror %r5, %r12
  22. .Lwordloop:
  23. ld.ab %r2, [%r0, 4]
  24. ld.ab %r3, [%r1, 4]
  25. nop_s
  26. sub %r4, %r2, %r12
  27. bic %r4, %r4, %r2
  28. and %r4, %r4, %r5
  29. brne %r4, 0, .Lfound0
  30. breq %r2 ,%r3, .Lwordloop
  31. #ifdef __LITTLE_ENDIAN__
  32. xor %r0, %r2, %r3 /* mask for difference */
  33. sub_s %r1, %r0, 1
  34. bic_s %r0, %r0, %r1 /* mask for least significant difference bit */
  35. sub %r1, %r5, %r0
  36. xor %r0, %r5, %r1 /* mask for least significant difference byte */
  37. and_s %r2, %r2, %r0
  38. and_s %r3, %r3, %r0
  39. #endif /* _ENDIAN__ */
  40. cmp_s %r2, %r3
  41. mov_s %r0, 1
  42. j_s.d [%blink]
  43. bset.lo %r0, %r0, 31
  44. .balign 4
  45. #ifdef __LITTLE_ENDIAN__
  46. .Lfound0:
  47. xor %r0, %r2, %r3 /* mask for difference */
  48. or %r0, %r0, %r4 /* or in zero indicator */
  49. sub_s %r1, %r0, 1
  50. bic_s %r0, %r0, %r1 /* mask for least significant difference bit */
  51. sub %r1, %r5, %r0
  52. xor %r0, %r5, %r1 /* mask for least significant difference byte */
  53. and_s %r2, %r2, %r0
  54. and_s %r3, %r3, %r0
  55. sub.f %r0, %r2, %r3
  56. mov.hi %r0, 1
  57. j_s.d [%blink]
  58. bset.lo %r0, %r0, 31
  59. #else /* __BIG_ENDIAN__ */
  60. /*
  61. * The zero-detection above can mis-detect 0x01 bytes as zeroes
  62. * because of carry-propagateion from a lower significant zero byte.
  63. * We can compensate for this by checking that bit0 is zero.
  64. * This compensation is not necessary in the step where we
  65. * get a low estimate for r2, because in any affected bytes
  66. * we already have 0x00 or 0x01, which will remain unchanged
  67. * when bit 7 is cleared.
  68. */
  69. .balign 4
  70. .Lfound0:
  71. lsr %r0, %r4, 8
  72. lsr_s %r1, %r2
  73. bic_s %r2, %r2, %r0 /* get low estimate for r2 and get ... */
  74. bic_s %r0, %r0, %r1 /* <this is the adjusted mask for zeros> */
  75. or_s %r3, %r3, %r0 /* ... high estimate r3 so that r2 > r3 will */
  76. cmp_s %r3, %r2 /* ... be independent of trailing garbage */
  77. or_s %r2, %r2, %r0 /* likewise for r3 > r2 */
  78. bic_s %r3, %r3, %r0
  79. rlc %r0, 0 /* r0 := r2 > r3 ? 1 : 0 */
  80. cmp_s %r2, %r3
  81. j_s.d [%blink]
  82. bset.lo %r0, %r0, 31
  83. #endif /* _ENDIAN__ */
  84. .balign 4
  85. .Lcharloop:
  86. ldb.ab %r2,[%r0,1]
  87. ldb.ab %r3,[%r1,1]
  88. nop_s
  89. breq %r2, 0, .Lcmpend
  90. breq %r2, %r3, .Lcharloop
  91. .Lcmpend:
  92. j_s.d [%blink]
  93. sub %r0, %r2, %r3