sha1-mips.pl 10 KB


  1. #!/usr/bin/env perl
  2. # ====================================================================
  3. # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
  4. # project. The module is, however, dual licensed under OpenSSL and
  5. # CRYPTOGAMS licenses depending on where you obtain it. For further
  6. # details see http://www.openssl.org/~appro/cryptogams/.
  7. # ====================================================================
  8. # SHA1 block procedure for MIPS.
  9. # Performance improvement is 30% on unaligned input. The "secret" is
  10. # to deploy lwl/lwr pair to load unaligned input. One could have
  11. # vectorized Xupdate on MIPSIII/IV, but the goal was to code MIPS32-
  12. # compatible subroutine. There is room for minor optimization on
  13. # little-endian platforms...
  14. # September 2012.
  15. #
  16. # Add MIPS32r2 code (>25% less instructions).
  17. ######################################################################
  18. # There is a number of MIPS ABI in use, O32 and N32/64 are most
  19. # widely used. Then there is a new contender: NUBI. It appears that if
  20. # one picks the latter, it's possible to arrange code in ABI neutral
  21. # manner. Therefore let's stick to NUBI register layout:
  22. #
  23. ($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
  24. ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
  25. ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
  26. ($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
  27. #
  28. # The return value is placed in $a0. Following coding rules facilitate
  29. # interoperability:
  30. #
  31. # - never ever touch $tp, "thread pointer", former $gp;
  32. # - copy return value to $t0, former $v0 [or to $a0 if you're adapting
  33. # old code];
  34. # - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
  35. #
  36. # For reference here is register layout for N32/64 MIPS ABIs:
  37. #
  38. # ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
  39. # ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
  40. # ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
  41. # ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
  42. # ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
  43. #
  44. $flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
  45. if ($flavour =~ /64|n32/i) {
  46. $PTR_ADD="dadd"; # incidentally works even on n32
  47. $PTR_SUB="dsub"; # incidentally works even on n32
  48. $REG_S="sd";
  49. $REG_L="ld";
  50. $PTR_SLL="dsll"; # incidentally works even on n32
  51. $SZREG=8;
  52. } else {
  53. $PTR_ADD="add";
  54. $PTR_SUB="sub";
  55. $REG_S="sw";
  56. $REG_L="lw";
  57. $PTR_SLL="sll";
  58. $SZREG=4;
  59. }
  60. #
  61. # <appro@openssl.org>
  62. #
  63. ######################################################################
  64. $big_endian=(`echo MIPSEL | $ENV{CC} -E -`=~/MIPSEL/)?1:0 if ($ENV{CC});
  65. for (@ARGV) { $output=$_ if (/^\w[\w\-]*\.\w+$/); }
  66. open STDOUT,">$output";
  67. if (!defined($big_endian))
  68. { $big_endian=(unpack('L',pack('N',1))==1); }
  69. # offsets of the Most and Least Significant Bytes
  70. $MSB=$big_endian?0:3;
  71. $LSB=3&~$MSB;
  72. @X=map("\$$_",(8..23)); # a4-a7,s0-s11
  73. $ctx=$a0;
  74. $inp=$a1;
  75. $num=$a2;
  76. $A="\$1";
  77. $B="\$2";
  78. $C="\$3";
  79. $D="\$7";
  80. $E="\$24"; @V=($A,$B,$C,$D,$E);
  81. $t0="\$25";
  82. $t1=$num; # $num is offloaded to stack
  83. $t2="\$30"; # fp
  84. $K="\$31"; # ra
  85. sub BODY_00_14 {
  86. my ($i,$a,$b,$c,$d,$e)=@_;
  87. my $j=$i+1;
  88. $code.=<<___ if (!$big_endian);
  89. #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
  90. wsbh @X[$i],@X[$i] # byte swap($i)
  91. rotr @X[$i],@X[$i],16
  92. #else
  93. srl $t0,@X[$i],24 # byte swap($i)
  94. srl $t1,@X[$i],8
  95. andi $t2,@X[$i],0xFF00
  96. sll @X[$i],@X[$i],24
  97. andi $t1,0xFF00
  98. sll $t2,$t2,8
  99. or @X[$i],$t0
  100. or $t1,$t2
  101. or @X[$i],$t1
  102. #endif
  103. ___
  104. $code.=<<___;
  105. #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
  106. addu $e,$K # $i
  107. xor $t0,$c,$d
  108. rotr $t1,$a,27
  109. lwl @X[$j],$j*4+$MSB($inp)
  110. and $t0,$b
  111. addu $e,$t1
  112. lwr @X[$j],$j*4+$LSB($inp)
  113. xor $t0,$d
  114. addu $e,@X[$i]
  115. rotr $b,$b,2
  116. addu $e,$t0
  117. #else
  118. lwl @X[$j],$j*4+$MSB($inp)
  119. sll $t0,$a,5 # $i
  120. addu $e,$K
  121. lwr @X[$j],$j*4+$LSB($inp)
  122. srl $t1,$a,27
  123. addu $e,$t0
  124. xor $t0,$c,$d
  125. addu $e,$t1
  126. sll $t2,$b,30
  127. and $t0,$b
  128. srl $b,$b,2
  129. xor $t0,$d
  130. addu $e,@X[$i]
  131. or $b,$t2
  132. addu $e,$t0
  133. #endif
  134. ___
  135. }
  136. sub BODY_15_19 {
  137. my ($i,$a,$b,$c,$d,$e)=@_;
  138. my $j=$i+1;
  139. $code.=<<___ if (!$big_endian && $i==15);
  140. #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
  141. wsbh @X[$i],@X[$i] # byte swap($i)
  142. rotr @X[$i],@X[$i],16
  143. #else
  144. srl $t0,@X[$i],24 # byte swap($i)
  145. srl $t1,@X[$i],8
  146. andi $t2,@X[$i],0xFF00
  147. sll @X[$i],@X[$i],24
  148. andi $t1,0xFF00
  149. sll $t2,$t2,8
  150. or @X[$i],$t0
  151. or @X[$i],$t1
  152. or @X[$i],$t2
  153. #endif
  154. ___
  155. $code.=<<___;
  156. #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
  157. addu $e,$K # $i
  158. xor @X[$j%16],@X[($j+2)%16]
  159. xor $t0,$c,$d
  160. rotr $t1,$a,27
  161. xor @X[$j%16],@X[($j+8)%16]
  162. and $t0,$b
  163. addu $e,$t1
  164. xor @X[$j%16],@X[($j+13)%16]
  165. xor $t0,$d
  166. addu $e,@X[$i%16]
  167. rotr @X[$j%16],@X[$j%16],31
  168. rotr $b,$b,2
  169. addu $e,$t0
  170. #else
  171. xor @X[$j%16],@X[($j+2)%16]
  172. sll $t0,$a,5 # $i
  173. addu $e,$K
  174. srl $t1,$a,27
  175. addu $e,$t0
  176. xor @X[$j%16],@X[($j+8)%16]
  177. xor $t0,$c,$d
  178. addu $e,$t1
  179. xor @X[$j%16],@X[($j+13)%16]
  180. sll $t2,$b,30
  181. and $t0,$b
  182. srl $t1,@X[$j%16],31
  183. addu @X[$j%16],@X[$j%16]
  184. srl $b,$b,2
  185. xor $t0,$d
  186. or @X[$j%16],$t1
  187. addu $e,@X[$i%16]
  188. or $b,$t2
  189. addu $e,$t0
  190. #endif
  191. ___
  192. }
  193. sub BODY_20_39 {
  194. my ($i,$a,$b,$c,$d,$e)=@_;
  195. my $j=$i+1;
  196. $code.=<<___ if ($i<79);
  197. #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
  198. xor @X[$j%16],@X[($j+2)%16]
  199. addu $e,$K # $i
  200. rotr $t1,$a,27
  201. xor @X[$j%16],@X[($j+8)%16]
  202. xor $t0,$c,$d
  203. addu $e,$t1
  204. xor @X[$j%16],@X[($j+13)%16]
  205. xor $t0,$b
  206. addu $e,@X[$i%16]
  207. rotr @X[$j%16],@X[$j%16],31
  208. rotr $b,$b,2
  209. addu $e,$t0
  210. #else
  211. xor @X[$j%16],@X[($j+2)%16]
  212. sll $t0,$a,5 # $i
  213. addu $e,$K
  214. srl $t1,$a,27
  215. addu $e,$t0
  216. xor @X[$j%16],@X[($j+8)%16]
  217. xor $t0,$c,$d
  218. addu $e,$t1
  219. xor @X[$j%16],@X[($j+13)%16]
  220. sll $t2,$b,30
  221. xor $t0,$b
  222. srl $t1,@X[$j%16],31
  223. addu @X[$j%16],@X[$j%16]
  224. srl $b,$b,2
  225. addu $e,@X[$i%16]
  226. or @X[$j%16],$t1
  227. or $b,$t2
  228. addu $e,$t0
  229. #endif
  230. ___
  231. $code.=<<___ if ($i==79);
  232. #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
  233. lw @X[0],0($ctx)
  234. addu $e,$K # $i
  235. lw @X[1],4($ctx)
  236. rotr $t1,$a,27
  237. lw @X[2],8($ctx)
  238. xor $t0,$c,$d
  239. addu $e,$t1
  240. lw @X[3],12($ctx)
  241. xor $t0,$b
  242. addu $e,@X[$i%16]
  243. lw @X[4],16($ctx)
  244. rotr $b,$b,2
  245. addu $e,$t0
  246. #else
  247. lw @X[0],0($ctx)
  248. sll $t0,$a,5 # $i
  249. addu $e,$K
  250. lw @X[1],4($ctx)
  251. srl $t1,$a,27
  252. addu $e,$t0
  253. lw @X[2],8($ctx)
  254. xor $t0,$c,$d
  255. addu $e,$t1
  256. lw @X[3],12($ctx)
  257. sll $t2,$b,30
  258. xor $t0,$b
  259. lw @X[4],16($ctx)
  260. srl $b,$b,2
  261. addu $e,@X[$i%16]
  262. or $b,$t2
  263. addu $e,$t0
  264. #endif
  265. ___
  266. }
  267. sub BODY_40_59 {
  268. my ($i,$a,$b,$c,$d,$e)=@_;
  269. my $j=$i+1;
  270. $code.=<<___ if ($i<79);
  271. #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
  272. addu $e,$K # $i
  273. and $t0,$c,$d
  274. xor @X[$j%16],@X[($j+2)%16]
  275. rotr $t1,$a,27
  276. addu $e,$t0
  277. xor @X[$j%16],@X[($j+8)%16]
  278. xor $t0,$c,$d
  279. addu $e,$t1
  280. xor @X[$j%16],@X[($j+13)%16]
  281. and $t0,$b
  282. addu $e,@X[$i%16]
  283. rotr @X[$j%16],@X[$j%16],31
  284. rotr $b,$b,2
  285. addu $e,$t0
  286. #else
  287. xor @X[$j%16],@X[($j+2)%16]
  288. sll $t0,$a,5 # $i
  289. addu $e,$K
  290. srl $t1,$a,27
  291. addu $e,$t0
  292. xor @X[$j%16],@X[($j+8)%16]
  293. and $t0,$c,$d
  294. addu $e,$t1
  295. xor @X[$j%16],@X[($j+13)%16]
  296. sll $t2,$b,30
  297. addu $e,$t0
  298. srl $t1,@X[$j%16],31
  299. xor $t0,$c,$d
  300. addu @X[$j%16],@X[$j%16]
  301. and $t0,$b
  302. srl $b,$b,2
  303. or @X[$j%16],$t1
  304. addu $e,@X[$i%16]
  305. or $b,$t2
  306. addu $e,$t0
  307. #endif
  308. ___
  309. }
  310. $FRAMESIZE=16; # large enough to accomodate NUBI saved registers
  311. $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc0fff008 : 0xc0ff0000;
  312. $code=<<___;
  313. #ifdef OPENSSL_FIPSCANISTER
  314. # include <openssl/fipssyms.h>
  315. #endif
  316. #if defined(__mips_smartmips) && !defined(_MIPS_ARCH_MIPS32R2)
  317. #define _MIPS_ARCH_MIPS32R2
  318. #endif
  319. .text
  320. .set noat
  321. .set noreorder
  322. .align 5
  323. .globl sha1_block_data_order
  324. .ent sha1_block_data_order
  325. sha1_block_data_order:
  326. .frame $sp,$FRAMESIZE*$SZREG,$ra
  327. .mask $SAVED_REGS_MASK,-$SZREG
  328. .set noreorder
  329. $PTR_SUB $sp,$FRAMESIZE*$SZREG
  330. $REG_S $ra,($FRAMESIZE-1)*$SZREG($sp)
  331. $REG_S $fp,($FRAMESIZE-2)*$SZREG($sp)
  332. $REG_S $s11,($FRAMESIZE-3)*$SZREG($sp)
  333. $REG_S $s10,($FRAMESIZE-4)*$SZREG($sp)
  334. $REG_S $s9,($FRAMESIZE-5)*$SZREG($sp)
  335. $REG_S $s8,($FRAMESIZE-6)*$SZREG($sp)
  336. $REG_S $s7,($FRAMESIZE-7)*$SZREG($sp)
  337. $REG_S $s6,($FRAMESIZE-8)*$SZREG($sp)
  338. $REG_S $s5,($FRAMESIZE-9)*$SZREG($sp)
  339. $REG_S $s4,($FRAMESIZE-10)*$SZREG($sp)
  340. ___
  341. $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
  342. $REG_S $s3,($FRAMESIZE-11)*$SZREG($sp)
  343. $REG_S $s2,($FRAMESIZE-12)*$SZREG($sp)
  344. $REG_S $s1,($FRAMESIZE-13)*$SZREG($sp)
  345. $REG_S $s0,($FRAMESIZE-14)*$SZREG($sp)
  346. $REG_S $gp,($FRAMESIZE-15)*$SZREG($sp)
  347. ___
  348. $code.=<<___;
  349. $PTR_SLL $num,6
  350. $PTR_ADD $num,$inp
  351. $REG_S $num,0($sp)
  352. lw $A,0($ctx)
  353. lw $B,4($ctx)
  354. lw $C,8($ctx)
  355. lw $D,12($ctx)
  356. b .Loop
  357. lw $E,16($ctx)
  358. .align 4
  359. .Loop:
  360. .set reorder
  361. lwl @X[0],$MSB($inp)
  362. lui $K,0x5a82
  363. lwr @X[0],$LSB($inp)
  364. ori $K,0x7999 # K_00_19
  365. ___
  366. for ($i=0;$i<15;$i++) { &BODY_00_14($i,@V); unshift(@V,pop(@V)); }
  367. for (;$i<20;$i++) { &BODY_15_19($i,@V); unshift(@V,pop(@V)); }
  368. $code.=<<___;
  369. lui $K,0x6ed9
  370. ori $K,0xeba1 # K_20_39
  371. ___
  372. for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
  373. $code.=<<___;
  374. lui $K,0x8f1b
  375. ori $K,0xbcdc # K_40_59
  376. ___
  377. for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
  378. $code.=<<___;
  379. lui $K,0xca62
  380. ori $K,0xc1d6 # K_60_79
  381. ___
  382. for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
  383. $code.=<<___;
  384. $PTR_ADD $inp,64
  385. $REG_L $num,0($sp)
  386. addu $A,$X[0]
  387. addu $B,$X[1]
  388. sw $A,0($ctx)
  389. addu $C,$X[2]
  390. addu $D,$X[3]
  391. sw $B,4($ctx)
  392. addu $E,$X[4]
  393. sw $C,8($ctx)
  394. sw $D,12($ctx)
  395. sw $E,16($ctx)
  396. .set noreorder
  397. bne $inp,$num,.Loop
  398. nop
  399. .set noreorder
  400. $REG_L $ra,($FRAMESIZE-1)*$SZREG($sp)
  401. $REG_L $fp,($FRAMESIZE-2)*$SZREG($sp)
  402. $REG_L $s11,($FRAMESIZE-3)*$SZREG($sp)
  403. $REG_L $s10,($FRAMESIZE-4)*$SZREG($sp)
  404. $REG_L $s9,($FRAMESIZE-5)*$SZREG($sp)
  405. $REG_L $s8,($FRAMESIZE-6)*$SZREG($sp)
  406. $REG_L $s7,($FRAMESIZE-7)*$SZREG($sp)
  407. $REG_L $s6,($FRAMESIZE-8)*$SZREG($sp)
  408. $REG_L $s5,($FRAMESIZE-9)*$SZREG($sp)
  409. $REG_L $s4,($FRAMESIZE-10)*$SZREG($sp)
  410. ___
  411. $code.=<<___ if ($flavour =~ /nubi/i);
  412. $REG_L $s3,($FRAMESIZE-11)*$SZREG($sp)
  413. $REG_L $s2,($FRAMESIZE-12)*$SZREG($sp)
  414. $REG_L $s1,($FRAMESIZE-13)*$SZREG($sp)
  415. $REG_L $s0,($FRAMESIZE-14)*$SZREG($sp)
  416. $REG_L $gp,($FRAMESIZE-15)*$SZREG($sp)
  417. ___
  418. $code.=<<___;
  419. jr $ra
  420. $PTR_ADD $sp,$FRAMESIZE*$SZREG
  421. .end sha1_block_data_order
  422. .rdata
  423. .asciiz "SHA1 for MIPS, CRYPTOGAMS by <appro\@openssl.org>"
  424. ___
  425. print $code;
  426. close STDOUT;