memcmp.S 2.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. /*
  2. * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
  3. *
  4. * SPDX-License-Identifier: GPL-2.0+
  5. */
  6. #ifdef __LITTLE_ENDIAN__
  7. #define WORD2 r2
  8. #define SHIFT r3
  9. #else /* __BIG_ENDIAN__ */
  10. #define WORD2 r3
  11. #define SHIFT r2
  12. #endif /* _ENDIAN__ */
  13. .global memcmp
  14. .align 4
  15. memcmp:
  16. or %r12, %r0, %r1
  17. asl_s %r12, %r12, 30
  18. sub %r3, %r2, 1
  19. brls %r2, %r12, .Lbytewise
  20. ld %r4, [%r0, 0]
  21. ld %r5, [%r1, 0]
  22. lsr.f %lp_count, %r3, 3
  23. lpne .Loop_end
  24. ld_s WORD2, [%r0, 4]
  25. ld_s %r12, [%r1, 4]
  26. brne %r4, %r5, .Leven
  27. ld.a %r4, [%r0, 8]
  28. ld.a %r5, [%r1, 8]
  29. brne WORD2, %r12, .Lodd
  30. nop
  31. .Loop_end:
  32. asl_s SHIFT, SHIFT, 3
  33. bhs_s .Last_cmp
  34. brne %r4, %r5, .Leven
  35. ld %r4, [%r0, 4]
  36. ld %r5, [%r1, 4]
  37. #ifdef __LITTLE_ENDIAN__
  38. nop_s
  39. /* one more load latency cycle */
  40. .Last_cmp:
  41. xor %r0, %r4, %r5
  42. bset %r0, %r0, SHIFT
  43. sub_s %r1, %r0, 1
  44. bic_s %r1, %r1, %r0
  45. norm %r1, %r1
  46. b.d .Leven_cmp
  47. and %r1, %r1, 24
  48. .Leven:
  49. xor %r0, %r4, %r5
  50. sub_s %r1, %r0, 1
  51. bic_s %r1, %r1, %r0
  52. norm %r1, %r1
  53. /* slow track insn */
  54. and %r1, %r1, 24
  55. .Leven_cmp:
  56. asl %r2, %r4, %r1
  57. asl %r12, %r5, %r1
  58. lsr_s %r2, %r2, 1
  59. lsr_s %r12, %r12, 1
  60. j_s.d [%blink]
  61. sub %r0, %r2, %r12
  62. .balign 4
  63. .Lodd:
  64. xor %r0, WORD2, %r12
  65. sub_s %r1, %r0, 1
  66. bic_s %r1, %r1, %r0
  67. norm %r1, %r1
  68. /* slow track insn */
  69. and %r1, %r1, 24
  70. asl_s %r2, %r2, %r1
  71. asl_s %r12, %r12, %r1
  72. lsr_s %r2, %r2, 1
  73. lsr_s %r12, %r12, 1
  74. j_s.d [%blink]
  75. sub %r0, %r2, %r12
  76. #else /* __BIG_ENDIAN__ */
  77. .Last_cmp:
  78. neg_s SHIFT, SHIFT
  79. lsr %r4, %r4, SHIFT
  80. lsr %r5, %r5, SHIFT
  81. /* slow track insn */
  82. .Leven:
  83. sub.f %r0, %r4, %r5
  84. mov.ne %r0, 1
  85. j_s.d [%blink]
  86. bset.cs %r0, %r0, 31
  87. .Lodd:
  88. cmp_s WORD2, %r12
  89. mov_s %r0, 1
  90. j_s.d [%blink]
  91. bset.cs %r0, %r0, 31
  92. #endif /* _ENDIAN__ */
  93. .balign 4
  94. .Lbytewise:
  95. breq %r2, 0, .Lnil
  96. ldb %r4, [%r0, 0]
  97. ldb %r5, [%r1, 0]
  98. lsr.f %lp_count, %r3
  99. lpne .Lbyte_end
  100. ldb_s %r3, [%r0, 1]
  101. ldb %r12, [%r1, 1]
  102. brne %r4, %r5, .Lbyte_even
  103. ldb.a %r4, [%r0, 2]
  104. ldb.a %r5, [%r1, 2]
  105. brne %r3, %r12, .Lbyte_odd
  106. nop
  107. .Lbyte_end:
  108. bcc .Lbyte_even
  109. brne %r4, %r5, .Lbyte_even
  110. ldb_s %r3, [%r0, 1]
  111. ldb_s %r12, [%r1, 1]
  112. .Lbyte_odd:
  113. j_s.d [%blink]
  114. sub %r0, %r3, %r12
  115. .Lbyte_even:
  116. j_s.d [%blink]
  117. sub %r0, %r4, %r5
  118. .Lnil:
  119. j_s.d [%blink]
  120. mov %r0, 0