strcmp.S 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249
  1. /* Copyright (C) 2014-2019 Free Software Foundation, Inc.
  2. This file is part of the GNU C Library.
  3. The GNU C Library is free software; you can redistribute it and/or
  4. modify it under the terms of the GNU Lesser General Public
  5. License as published by the Free Software Foundation; either
  6. version 2.1 of the License, or (at your option) any later version.
  7. The GNU C Library is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  10. Lesser General Public License for more details.
  11. You should have received a copy of the GNU Lesser General Public
  12. License along with the GNU C Library. If not, see
  13. <http://www.gnu.org/licenses/>. */
  14. #ifdef ANDROID_CHANGES
  15. # include "machine/asm.h"
  16. # include "machine/regdef.h"
  17. #elif _LIBC
  18. # include <sysdep.h>
  19. # include <regdef.h>
  20. # include <sys/asm.h>
  21. #elif defined _COMPILING_NEWLIB
  22. # include "machine/asm.h"
  23. # include "machine/regdef.h"
  24. #else
  25. # include <regdef.h>
  26. # include <sys/asm.h>
  27. #endif
  28. /* Technically strcmp should not read past the end of the strings being
  29. compared. We will read a full word that may contain excess bits beyond
  30. the NULL string terminator but unless ENABLE_READAHEAD is set, we will not
  31. read the next word after the end of string. Setting ENABLE_READAHEAD will
  32. improve performance but is technically illegal based on the definition of
  33. strcmp. */
  34. #ifdef ENABLE_READAHEAD
  35. # define DELAY_READ
  36. #else
  37. # define DELAY_READ nop
  38. #endif
  39. /* Testing on a little endian machine showed using CLZ was a
  40. performance loss, so we are not turning it on by default. */
  41. #if defined(ENABLE_CLZ) && (__mips_isa_rev > 1)
  42. # define USE_CLZ
  43. #endif
  44. /* Some asm.h files do not have the L macro definition. */
  45. #ifndef L
  46. # if _MIPS_SIM == _ABIO32
  47. # define L(label) $L ## label
  48. # else
  49. # define L(label) .L ## label
  50. # endif
  51. #endif
  52. /* Some asm.h files do not have the PTR_ADDIU macro definition. */
  53. #ifndef PTR_ADDIU
  54. # ifdef USE_DOUBLE
  55. # define PTR_ADDIU daddiu
  56. # else
  57. # define PTR_ADDIU addiu
  58. # endif
  59. #endif
  60. /* Allow the routine to be named something else if desired. */
  61. #ifndef STRCMP_NAME
  62. # define STRCMP_NAME strcmp
  63. #endif
  64. #ifdef ANDROID_CHANGES
  65. LEAF(STRCMP_NAME, 0)
  66. #else
  67. LEAF(STRCMP_NAME)
  68. #endif
  69. .set nomips16
  70. .set noreorder
  71. or t0, a0, a1
  72. andi t0,0x3
  73. bne t0, zero, L(byteloop)
  74. /* Both strings are 4 byte aligned at this point. */
  75. lui t8, 0x0101
  76. ori t8, t8, 0x0101
  77. lui t9, 0x7f7f
  78. ori t9, 0x7f7f
  79. #define STRCMP32(OFFSET) \
  80. lw v0, OFFSET(a0); \
  81. lw v1, OFFSET(a1); \
  82. subu t0, v0, t8; \
  83. bne v0, v1, L(worddiff); \
  84. nor t1, v0, t9; \
  85. and t0, t0, t1; \
  86. bne t0, zero, L(returnzero)
  87. L(wordloop):
  88. STRCMP32(0)
  89. DELAY_READ
  90. STRCMP32(4)
  91. DELAY_READ
  92. STRCMP32(8)
  93. DELAY_READ
  94. STRCMP32(12)
  95. DELAY_READ
  96. STRCMP32(16)
  97. DELAY_READ
  98. STRCMP32(20)
  99. DELAY_READ
  100. STRCMP32(24)
  101. DELAY_READ
  102. STRCMP32(28)
  103. PTR_ADDIU a0, a0, 32
  104. b L(wordloop)
  105. PTR_ADDIU a1, a1, 32
  106. L(returnzero):
  107. j ra
  108. move v0, zero
  109. L(worddiff):
  110. #ifdef USE_CLZ
  111. subu t0, v0, t8
  112. nor t1, v0, t9
  113. and t1, t0, t1
  114. xor t0, v0, v1
  115. or t0, t0, t1
  116. # if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
  117. wsbh t0, t0
  118. rotr t0, t0, 16
  119. # endif
  120. clz t1, t0
  121. and t1, 0xf8
  122. # if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
  123. neg t1
  124. addu t1, 24
  125. # endif
  126. rotrv v0, v0, t1
  127. rotrv v1, v1, t1
  128. and v0, v0, 0xff
  129. and v1, v1, 0xff
  130. j ra
  131. subu v0, v0, v1
  132. #else /* USE_CLZ */
  133. # if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
  134. andi t0, v0, 0xff
  135. beq t0, zero, L(wexit01)
  136. andi t1, v1, 0xff
  137. bne t0, t1, L(wexit01)
  138. srl t8, v0, 8
  139. srl t9, v1, 8
  140. andi t8, t8, 0xff
  141. beq t8, zero, L(wexit89)
  142. andi t9, t9, 0xff
  143. bne t8, t9, L(wexit89)
  144. srl t0, v0, 16
  145. srl t1, v1, 16
  146. andi t0, t0, 0xff
  147. beq t0, zero, L(wexit01)
  148. andi t1, t1, 0xff
  149. bne t0, t1, L(wexit01)
  150. srl t8, v0, 24
  151. srl t9, v1, 24
  152. # else /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
  153. srl t0, v0, 24
  154. beq t0, zero, L(wexit01)
  155. srl t1, v1, 24
  156. bne t0, t1, L(wexit01)
  157. srl t8, v0, 16
  158. srl t9, v1, 16
  159. andi t8, t8, 0xff
  160. beq t8, zero, L(wexit89)
  161. andi t9, t9, 0xff
  162. bne t8, t9, L(wexit89)
  163. srl t0, v0, 8
  164. srl t1, v1, 8
  165. andi t0, t0, 0xff
  166. beq t0, zero, L(wexit01)
  167. andi t1, t1, 0xff
  168. bne t0, t1, L(wexit01)
  169. andi t8, v0, 0xff
  170. andi t9, v1, 0xff
  171. # endif /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
  172. L(wexit89):
  173. j ra
  174. subu v0, t8, t9
  175. L(wexit01):
  176. j ra
  177. subu v0, t0, t1
  178. #endif /* USE_CLZ */
  179. /* It might seem better to do the 'beq' instruction between the two 'lbu'
  180. instructions so that the nop is not needed but testing showed that this
  181. code is actually faster (based on glibc strcmp test). */
  182. #define BYTECMP01(OFFSET) \
  183. lbu v0, OFFSET(a0); \
  184. lbu v1, OFFSET(a1); \
  185. beq v0, zero, L(bexit01); \
  186. nop; \
  187. bne v0, v1, L(bexit01)
  188. #define BYTECMP89(OFFSET) \
  189. lbu t8, OFFSET(a0); \
  190. lbu t9, OFFSET(a1); \
  191. beq t8, zero, L(bexit89); \
  192. nop; \
  193. bne t8, t9, L(bexit89)
  194. L(byteloop):
  195. BYTECMP01(0)
  196. BYTECMP89(1)
  197. BYTECMP01(2)
  198. BYTECMP89(3)
  199. BYTECMP01(4)
  200. BYTECMP89(5)
  201. BYTECMP01(6)
  202. BYTECMP89(7)
  203. PTR_ADDIU a0, a0, 8
  204. b L(byteloop)
  205. PTR_ADDIU a1, a1, 8
  206. L(bexit01):
  207. j ra
  208. subu v0, v0, v1
  209. L(bexit89):
  210. j ra
  211. subu v0, t8, t9
  212. .set at
  213. .set reorder
  214. END(STRCMP_NAME)
  215. #ifndef ANDROID_CHANGES
  216. # ifdef _LIBC
  217. libc_hidden_builtin_def (STRCMP_NAME)
  218. # endif
  219. #endif