strchr-700.S 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. /*
  2. * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
  3. *
  4. * SPDX-License-Identifier: GPL-2.0+
  5. */
  6. /*
  7. * ARC700 has a relatively long pipeline and branch prediction, so we want
  8. * to avoid branches that are hard to predict. On the other hand, the
  9. * presence of the norm instruction makes it easier to operate on whole
  10. * words branch-free.
  11. */
  12. .global strchr
  13. .align 4
  14. strchr:
  15. extb_s %r1, %r1
  16. asl %r5, %r1, 8
  17. bmsk %r2, %r0, 1
  18. or %r5, %r5, %r1
  19. mov_s %r3, 0x01010101
  20. breq.d %r2, %r0, .Laligned
  21. asl %r4, %r5, 16
  22. sub_s %r0, %r0, %r2
  23. asl %r7, %r2, 3
  24. ld_s %r2, [%r0]
  25. #ifdef __LITTLE_ENDIAN__
  26. asl %r7, %r3, %r7
  27. #else /* __BIG_ENDIAN__ */
  28. lsr %r7, %r3, %r7
  29. #endif /* _ENDIAN__ */
  30. or %r5, %r5, %r4
  31. ror %r4, %r3
  32. sub %r12, %r2, %r7
  33. bic_s %r12, %r12, %r2
  34. and %r12, %r12, %r4
  35. brne.d %r12, 0, .Lfound0_ua
  36. xor %r6, %r2, %r5
  37. ld.a %r2, [%r0, 4]
  38. sub %r12, %r6, %r7
  39. bic %r12, %r12, %r6
  40. #ifdef __LITTLE_ENDIAN__
  41. and %r7, %r12, %r4
  42. /* For speed, we want this branch to be unaligned. */
  43. breq %r7, 0, .Loop
  44. /* Likewise this one */
  45. b .Lfound_char
  46. #else /* __BIG_ENDIAN__ */
  47. and %r12, %r12, %r4
  48. /* For speed, we want this branch to be unaligned. */
  49. breq %r12, 0, .Loop
  50. lsr_s %r12, %r12, 7
  51. bic %r2, %r7, %r6
  52. b.d .Lfound_char_b
  53. and_s %r2, %r2, %r12
  54. #endif /* _ENDIAN__ */
  55. /* We require this code address to be unaligned for speed... */
  56. .Laligned:
  57. ld_s %r2, [%r0]
  58. or %r5, %r5, %r4
  59. ror %r4, %r3
  60. /* ... so that this code address is aligned, for itself and ... */
  61. .Loop:
  62. sub %r12, %r2, %r3
  63. bic_s %r12, %r12, %r2
  64. and %r12, %r12, %r4
  65. brne.d %r12, 0, .Lfound0
  66. xor %r6, %r2, %r5
  67. ld.a %r2, [%r0, 4]
  68. sub %r12, %r6, %r3
  69. bic %r12, %r12, %r6
  70. and %r7, %r12, %r4
  71. breq %r7, 0, .Loop
  72. /*
  73. *... so that this branch is unaligned.
  74. * Found searched-for character.
  75. * r0 has already advanced to next word.
  76. */
  77. #ifdef __LITTLE_ENDIAN__
  78. /*
  79. * We only need the information about the first matching byte
  80. * (i.e. the least significant matching byte) to be exact,
  81. * hence there is no problem with carry effects.
  82. */
  83. .Lfound_char:
  84. sub %r3, %r7, 1
  85. bic %r3, %r3, %r7
  86. norm %r2, %r3
  87. sub_s %r0, %r0, 1
  88. asr_s %r2, %r2, 3
  89. j.d [%blink]
  90. sub_s %r0, %r0, %r2
  91. .balign 4
  92. .Lfound0_ua:
  93. mov %r3, %r7
  94. .Lfound0:
  95. sub %r3, %r6, %r3
  96. bic %r3, %r3, %r6
  97. and %r2, %r3, %r4
  98. or_s %r12, %r12, %r2
  99. sub_s %r3, %r12, 1
  100. bic_s %r3, %r3, %r12
  101. norm %r3, %r3
  102. add_s %r0, %r0, 3
  103. asr_s %r12, %r3, 3
  104. asl.f 0, %r2, %r3
  105. sub_s %r0, %r0, %r12
  106. j_s.d [%blink]
  107. mov.pl %r0, 0
  108. #else /* __BIG_ENDIAN__ */
  109. .Lfound_char:
  110. lsr %r7, %r7, 7
  111. bic %r2, %r7, %r6
  112. .Lfound_char_b:
  113. norm %r2, %r2
  114. sub_s %r0, %r0, 4
  115. asr_s %r2, %r2, 3
  116. j.d [%blink]
  117. add_s %r0, %r0, %r2
  118. .Lfound0_ua:
  119. mov_s %r3, %r7
  120. .Lfound0:
  121. asl_s %r2, %r2, 7
  122. or %r7, %r6, %r4
  123. bic_s %r12, %r12, %r2
  124. sub %r2, %r7, %r3
  125. or %r2, %r2, %r6
  126. bic %r12, %r2, %r12
  127. bic.f %r3, %r4, %r12
  128. norm %r3, %r3
  129. add.pl %r3, %r3, 1
  130. asr_s %r12, %r3, 3
  131. asl.f 0, %r2, %r3
  132. add_s %r0, %r0, %r12
  133. j_s.d [%blink]
  134. mov.mi %r0, 0
  135. #endif /* _ENDIAN__ */