sha1-alpha.pl 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322
  1. #!/usr/bin/env perl
  2. # ====================================================================
  3. # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
  4. # project. The module is, however, dual licensed under OpenSSL and
  5. # CRYPTOGAMS licenses depending on where you obtain it. For further
  6. # details see http://www.openssl.org/~appro/cryptogams/.
  7. # ====================================================================
  8. # SHA1 block procedure for Alpha.
  9. # On 21264 performance is 33% better than code generated by vendor
  10. # compiler, and 75% better than GCC [3.4], and in absolute terms is
  11. # 8.7 cycles per processed byte. Implementation features vectorized
  12. # byte swap, but not Xupdate.
  13. @X=( "\$0", "\$1", "\$2", "\$3", "\$4", "\$5", "\$6", "\$7",
  14. "\$8", "\$9", "\$10", "\$11", "\$12", "\$13", "\$14", "\$15");
  15. $ctx="a0"; # $16
  16. $inp="a1";
  17. $num="a2";
  18. $A="a3";
  19. $B="a4"; # 20
  20. $C="a5";
  21. $D="t8";
  22. $E="t9"; @V=($A,$B,$C,$D,$E);
  23. $t0="t10"; # 24
  24. $t1="t11";
  25. $t2="ra";
  26. $t3="t12";
  27. $K="AT"; # 28
  28. sub BODY_00_19 {
  29. my ($i,$a,$b,$c,$d,$e)=@_;
  30. my $j=$i+1;
  31. $code.=<<___ if ($i==0);
  32. ldq_u @X[0],0+0($inp)
  33. ldq_u @X[1],0+7($inp)
  34. ___
  35. $code.=<<___ if (!($i&1) && $i<14);
  36. ldq_u @X[$i+2],($i+2)*4+0($inp)
  37. ldq_u @X[$i+3],($i+2)*4+7($inp)
  38. ___
  39. $code.=<<___ if (!($i&1) && $i<15);
  40. extql @X[$i],$inp,@X[$i]
  41. extqh @X[$i+1],$inp,@X[$i+1]
  42. or @X[$i+1],@X[$i],@X[$i] # pair of 32-bit values are fetched
  43. srl @X[$i],24,$t0 # vectorized byte swap
  44. srl @X[$i],8,$t2
  45. sll @X[$i],8,$t3
  46. sll @X[$i],24,@X[$i]
  47. zapnot $t0,0x11,$t0
  48. zapnot $t2,0x22,$t2
  49. zapnot @X[$i],0x88,@X[$i]
  50. or $t0,$t2,$t0
  51. zapnot $t3,0x44,$t3
  52. sll $a,5,$t1
  53. or @X[$i],$t0,@X[$i]
  54. addl $K,$e,$e
  55. and $b,$c,$t2
  56. zapnot $a,0xf,$a
  57. or @X[$i],$t3,@X[$i]
  58. srl $a,27,$t0
  59. bic $d,$b,$t3
  60. sll $b,30,$b
  61. extll @X[$i],4,@X[$i+1] # extract upper half
  62. or $t2,$t3,$t2
  63. addl @X[$i],$e,$e
  64. addl $t1,$e,$e
  65. srl $b,32,$t3
  66. zapnot @X[$i],0xf,@X[$i]
  67. addl $t0,$e,$e
  68. addl $t2,$e,$e
  69. or $t3,$b,$b
  70. ___
  71. $code.=<<___ if (($i&1) && $i<15);
  72. sll $a,5,$t1
  73. addl $K,$e,$e
  74. and $b,$c,$t2
  75. zapnot $a,0xf,$a
  76. srl $a,27,$t0
  77. addl @X[$i%16],$e,$e
  78. bic $d,$b,$t3
  79. sll $b,30,$b
  80. or $t2,$t3,$t2
  81. addl $t1,$e,$e
  82. srl $b,32,$t3
  83. zapnot @X[$i],0xf,@X[$i]
  84. addl $t0,$e,$e
  85. addl $t2,$e,$e
  86. or $t3,$b,$b
  87. ___
  88. $code.=<<___ if ($i>=15); # with forward Xupdate
  89. sll $a,5,$t1
  90. addl $K,$e,$e
  91. and $b,$c,$t2
  92. xor @X[($j+2)%16],@X[$j%16],@X[$j%16]
  93. zapnot $a,0xf,$a
  94. addl @X[$i%16],$e,$e
  95. bic $d,$b,$t3
  96. xor @X[($j+8)%16],@X[$j%16],@X[$j%16]
  97. srl $a,27,$t0
  98. addl $t1,$e,$e
  99. or $t2,$t3,$t2
  100. xor @X[($j+13)%16],@X[$j%16],@X[$j%16]
  101. sll $b,30,$b
  102. addl $t0,$e,$e
  103. srl @X[$j%16],31,$t1
  104. addl $t2,$e,$e
  105. srl $b,32,$t3
  106. addl @X[$j%16],@X[$j%16],@X[$j%16]
  107. or $t3,$b,$b
  108. zapnot @X[$i%16],0xf,@X[$i%16]
  109. or $t1,@X[$j%16],@X[$j%16]
  110. ___
  111. }
  112. sub BODY_20_39 {
  113. my ($i,$a,$b,$c,$d,$e)=@_;
  114. my $j=$i+1;
  115. $code.=<<___ if ($i<79); # with forward Xupdate
  116. sll $a,5,$t1
  117. addl $K,$e,$e
  118. zapnot $a,0xf,$a
  119. xor @X[($j+2)%16],@X[$j%16],@X[$j%16]
  120. sll $b,30,$t3
  121. addl $t1,$e,$e
  122. xor $b,$c,$t2
  123. xor @X[($j+8)%16],@X[$j%16],@X[$j%16]
  124. srl $b,2,$b
  125. addl @X[$i%16],$e,$e
  126. xor $d,$t2,$t2
  127. xor @X[($j+13)%16],@X[$j%16],@X[$j%16]
  128. srl @X[$j%16],31,$t1
  129. addl $t2,$e,$e
  130. srl $a,27,$t0
  131. addl @X[$j%16],@X[$j%16],@X[$j%16]
  132. or $t3,$b,$b
  133. addl $t0,$e,$e
  134. or $t1,@X[$j%16],@X[$j%16]
  135. ___
  136. $code.=<<___ if ($i<77);
  137. zapnot @X[$i%16],0xf,@X[$i%16]
  138. ___
  139. $code.=<<___ if ($i==79); # with context fetch
  140. sll $a,5,$t1
  141. addl $K,$e,$e
  142. zapnot $a,0xf,$a
  143. ldl @X[0],0($ctx)
  144. sll $b,30,$t3
  145. addl $t1,$e,$e
  146. xor $b,$c,$t2
  147. ldl @X[1],4($ctx)
  148. srl $b,2,$b
  149. addl @X[$i%16],$e,$e
  150. xor $d,$t2,$t2
  151. ldl @X[2],8($ctx)
  152. srl $a,27,$t0
  153. addl $t2,$e,$e
  154. ldl @X[3],12($ctx)
  155. or $t3,$b,$b
  156. addl $t0,$e,$e
  157. ldl @X[4],16($ctx)
  158. ___
  159. }
  160. sub BODY_40_59 {
  161. my ($i,$a,$b,$c,$d,$e)=@_;
  162. my $j=$i+1;
  163. $code.=<<___; # with forward Xupdate
  164. sll $a,5,$t1
  165. addl $K,$e,$e
  166. zapnot $a,0xf,$a
  167. xor @X[($j+2)%16],@X[$j%16],@X[$j%16]
  168. srl $a,27,$t0
  169. and $b,$c,$t2
  170. and $b,$d,$t3
  171. xor @X[($j+8)%16],@X[$j%16],@X[$j%16]
  172. sll $b,30,$b
  173. addl $t1,$e,$e
  174. xor @X[($j+13)%16],@X[$j%16],@X[$j%16]
  175. srl @X[$j%16],31,$t1
  176. addl $t0,$e,$e
  177. or $t2,$t3,$t2
  178. and $c,$d,$t3
  179. or $t2,$t3,$t2
  180. srl $b,32,$t3
  181. addl @X[$i%16],$e,$e
  182. addl @X[$j%16],@X[$j%16],@X[$j%16]
  183. or $t3,$b,$b
  184. addl $t2,$e,$e
  185. or $t1,@X[$j%16],@X[$j%16]
  186. zapnot @X[$i%16],0xf,@X[$i%16]
  187. ___
  188. }
  189. $code=<<___;
  190. #ifdef __linux__
  191. #include <asm/regdef.h>
  192. #else
  193. #include <asm.h>
  194. #include <regdef.h>
  195. #endif
  196. .text
  197. .set noat
  198. .set noreorder
  199. .globl sha1_block_data_order
  200. .align 5
  201. .ent sha1_block_data_order
  202. sha1_block_data_order:
  203. lda sp,-64(sp)
  204. stq ra,0(sp)
  205. stq s0,8(sp)
  206. stq s1,16(sp)
  207. stq s2,24(sp)
  208. stq s3,32(sp)
  209. stq s4,40(sp)
  210. stq s5,48(sp)
  211. stq fp,56(sp)
  212. .mask 0x0400fe00,-64
  213. .frame sp,64,ra
  214. .prologue 0
  215. ldl $A,0($ctx)
  216. ldl $B,4($ctx)
  217. sll $num,6,$num
  218. ldl $C,8($ctx)
  219. ldl $D,12($ctx)
  220. ldl $E,16($ctx)
  221. addq $inp,$num,$num
  222. .Lloop:
  223. .set noreorder
  224. ldah $K,23170(zero)
  225. zapnot $B,0xf,$B
  226. lda $K,31129($K) # K_00_19
  227. ___
  228. for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
  229. $code.=<<___;
  230. ldah $K,28378(zero)
  231. lda $K,-5215($K) # K_20_39
  232. ___
  233. for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
  234. $code.=<<___;
  235. ldah $K,-28900(zero)
  236. lda $K,-17188($K) # K_40_59
  237. ___
  238. for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
  239. $code.=<<___;
  240. ldah $K,-13725(zero)
  241. lda $K,-15914($K) # K_60_79
  242. ___
  243. for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
  244. $code.=<<___;
  245. addl @X[0],$A,$A
  246. addl @X[1],$B,$B
  247. addl @X[2],$C,$C
  248. addl @X[3],$D,$D
  249. addl @X[4],$E,$E
  250. stl $A,0($ctx)
  251. stl $B,4($ctx)
  252. addq $inp,64,$inp
  253. stl $C,8($ctx)
  254. stl $D,12($ctx)
  255. stl $E,16($ctx)
  256. cmpult $inp,$num,$t1
  257. bne $t1,.Lloop
  258. .set noreorder
  259. ldq ra,0(sp)
  260. ldq s0,8(sp)
  261. ldq s1,16(sp)
  262. ldq s2,24(sp)
  263. ldq s3,32(sp)
  264. ldq s4,40(sp)
  265. ldq s5,48(sp)
  266. ldq fp,56(sp)
  267. lda sp,64(sp)
  268. ret (ra)
  269. .end sha1_block_data_order
  270. .ascii "SHA1 block transform for Alpha, CRYPTOGAMS by <appro\@openssl.org>"
  271. .align 2
  272. ___
  273. $output=shift and open STDOUT,">$output";
  274. print $code;
  275. close STDOUT;