aes-mips.pl 51 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122
  1. #!/usr/bin/env perl
  2. # ====================================================================
  3. # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
  4. # project. The module is, however, dual licensed under OpenSSL and
  5. # CRYPTOGAMS licenses depending on where you obtain it. For further
  6. # details see http://www.openssl.org/~appro/cryptogams/.
  7. # ====================================================================
  8. # AES for MIPS
  9. # October 2010
  10. #
  11. # Code uses 1K[+256B] S-box and on single-issue core [such as R5000]
  12. # spends ~68 cycles per byte processed with 128-bit key. This is ~16%
  13. # faster than gcc-generated code, which is not very impressive. But
  14. # recall that compressed S-box requires extra processing, namely
  15. # additional rotations. Rotations are implemented with lwl/lwr pairs,
  16. # which is normally used for loading unaligned data. Another cool
  17. # thing about this module is its endian neutrality, which means that
  18. # it processes data without ever changing byte order...
  19. # September 2012
  20. #
  21. # Add MIPS32R2 (~10% less instructions) and SmartMIPS ASE (further
  22. # ~25% less instructions) code. Note that there is no run-time switch,
  23. # instead, code path is chosen upon pre-process time, pass -mips32r2
  24. # or/and -msmartmips.
  25. ######################################################################
  26. # There is a number of MIPS ABI in use, O32 and N32/64 are most
  27. # widely used. Then there is a new contender: NUBI. It appears that if
  28. # one picks the latter, it's possible to arrange code in ABI neutral
  29. # manner. Therefore let's stick to NUBI register layout:
  30. #
  31. ($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
  32. ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
  33. ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
  34. ($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
  35. #
  36. # The return value is placed in $a0. Following coding rules facilitate
  37. # interoperability:
  38. #
  39. # - never ever touch $tp, "thread pointer", former $gp;
  40. # - copy return value to $t0, former $v0 [or to $a0 if you're adapting
  41. # old code];
  42. # - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
  43. #
  44. # For reference here is register layout for N32/64 MIPS ABIs:
  45. #
  46. # ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
  47. # ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
  48. # ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
  49. # ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
  50. # ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
  51. #
  52. $flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
  53. if ($flavour =~ /64|n32/i) {
  54. $PTR_ADD="dadd"; # incidentally works even on n32
  55. $PTR_SUB="dsub"; # incidentally works even on n32
  56. $PTR_INS="dins";
  57. $REG_S="sd";
  58. $REG_L="ld";
  59. $PTR_SLL="dsll"; # incidentally works even on n32
  60. $SZREG=8;
  61. } else {
  62. $PTR_ADD="add";
  63. $PTR_SUB="sub";
  64. $PTR_INS="ins";
  65. $REG_S="sw";
  66. $REG_L="lw";
  67. $PTR_SLL="sll";
  68. $SZREG=4;
  69. }
  70. $pf = ($flavour =~ /nubi/i) ? $t0 : $t2;
  71. #
  72. # <appro@openssl.org>
  73. #
  74. ######################################################################
  75. $big_endian=(`echo MIPSEL | $ENV{CC} -E -`=~/MIPSEL/)?1:0 if ($ENV{CC});
  76. for (@ARGV) { $output=$_ if (/^\w[\w\-]*\.\w+$/); }
  77. open STDOUT,">$output";
  78. if (!defined($big_endian))
  79. { $big_endian=(unpack('L',pack('N',1))==1); }
  80. while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
  81. open STDOUT,">$output";
  82. my ($MSB,$LSB)=(0,3); # automatically converted to little-endian
  83. $code.=<<___;
  84. .text
  85. #ifdef OPENSSL_FIPSCANISTER
  86. # include <openssl/fipssyms.h>
  87. #endif
  88. #if defined(__mips_smartmips) && !defined(_MIPS_ARCH_MIPS32R2)
  89. #define _MIPS_ARCH_MIPS32R2
  90. #endif
  91. #if !defined(__mips_eabi) && (!defined(__vxworks) || defined(__pic__))
  92. .option pic2
  93. #endif
  94. .set noat
  95. ___
  96. {{{
  97. my $FRAMESIZE=16*$SZREG;
  98. my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc0fff008 : 0xc0ff0000;
  99. my ($inp,$out,$key,$Tbl,$s0,$s1,$s2,$s3)=($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7);
  100. my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
  101. my ($t0,$t1,$t2,$t3,$t4,$t5,$t6,$t7,$t8,$t9,$t10,$t11) = map("\$$_",(12..23));
  102. my ($key0,$cnt)=($gp,$fp);
  103. # instuction ordering is "stolen" from output from MIPSpro assembler
  104. # invoked with -mips3 -O3 arguments...
  105. $code.=<<___;
  106. .align 5
  107. .ent _mips_AES_encrypt
  108. _mips_AES_encrypt:
  109. .frame $sp,0,$ra
  110. .set reorder
  111. lw $t0,0($key)
  112. lw $t1,4($key)
  113. lw $t2,8($key)
  114. lw $t3,12($key)
  115. lw $cnt,240($key)
  116. $PTR_ADD $key0,$key,16
  117. xor $s0,$t0
  118. xor $s1,$t1
  119. xor $s2,$t2
  120. xor $s3,$t3
  121. sub $cnt,1
  122. #if defined(__mips_smartmips)
  123. ext $i0,$s1,16,8
  124. .Loop_enc:
  125. ext $i1,$s2,16,8
  126. ext $i2,$s3,16,8
  127. ext $i3,$s0,16,8
  128. lwxs $t0,$i0($Tbl) # Te1[s1>>16]
  129. ext $i0,$s2,8,8
  130. lwxs $t1,$i1($Tbl) # Te1[s2>>16]
  131. ext $i1,$s3,8,8
  132. lwxs $t2,$i2($Tbl) # Te1[s3>>16]
  133. ext $i2,$s0,8,8
  134. lwxs $t3,$i3($Tbl) # Te1[s0>>16]
  135. ext $i3,$s1,8,8
  136. lwxs $t4,$i0($Tbl) # Te2[s2>>8]
  137. ext $i0,$s3,0,8
  138. lwxs $t5,$i1($Tbl) # Te2[s3>>8]
  139. ext $i1,$s0,0,8
  140. lwxs $t6,$i2($Tbl) # Te2[s0>>8]
  141. ext $i2,$s1,0,8
  142. lwxs $t7,$i3($Tbl) # Te2[s1>>8]
  143. ext $i3,$s2,0,8
  144. lwxs $t8,$i0($Tbl) # Te3[s3]
  145. ext $i0,$s0,24,8
  146. lwxs $t9,$i1($Tbl) # Te3[s0]
  147. ext $i1,$s1,24,8
  148. lwxs $t10,$i2($Tbl) # Te3[s1]
  149. ext $i2,$s2,24,8
  150. lwxs $t11,$i3($Tbl) # Te3[s2]
  151. ext $i3,$s3,24,8
  152. rotr $t0,$t0,8
  153. rotr $t1,$t1,8
  154. rotr $t2,$t2,8
  155. rotr $t3,$t3,8
  156. rotr $t4,$t4,16
  157. rotr $t5,$t5,16
  158. rotr $t6,$t6,16
  159. rotr $t7,$t7,16
  160. xor $t0,$t4
  161. lwxs $t4,$i0($Tbl) # Te0[s0>>24]
  162. xor $t1,$t5
  163. lwxs $t5,$i1($Tbl) # Te0[s1>>24]
  164. xor $t2,$t6
  165. lwxs $t6,$i2($Tbl) # Te0[s2>>24]
  166. xor $t3,$t7
  167. lwxs $t7,$i3($Tbl) # Te0[s3>>24]
  168. rotr $t8,$t8,24
  169. lw $s0,0($key0)
  170. rotr $t9,$t9,24
  171. lw $s1,4($key0)
  172. rotr $t10,$t10,24
  173. lw $s2,8($key0)
  174. rotr $t11,$t11,24
  175. lw $s3,12($key0)
  176. xor $t0,$t8
  177. xor $t1,$t9
  178. xor $t2,$t10
  179. xor $t3,$t11
  180. xor $t0,$t4
  181. xor $t1,$t5
  182. xor $t2,$t6
  183. xor $t3,$t7
  184. sub $cnt,1
  185. $PTR_ADD $key0,16
  186. xor $s0,$t0
  187. xor $s1,$t1
  188. xor $s2,$t2
  189. xor $s3,$t3
  190. .set noreorder
  191. bnez $cnt,.Loop_enc
  192. ext $i0,$s1,16,8
  193. _xtr $i0,$s1,16-2
  194. #else
  195. _xtr $i0,$s1,16-2
  196. .Loop_enc:
  197. _xtr $i1,$s2,16-2
  198. _xtr $i2,$s3,16-2
  199. _xtr $i3,$s0,16-2
  200. and $i0,0x3fc
  201. and $i1,0x3fc
  202. and $i2,0x3fc
  203. and $i3,0x3fc
  204. $PTR_ADD $i0,$Tbl
  205. $PTR_ADD $i1,$Tbl
  206. $PTR_ADD $i2,$Tbl
  207. $PTR_ADD $i3,$Tbl
  208. #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
  209. lw $t0,0($i0) # Te1[s1>>16]
  210. _xtr $i0,$s2,8-2
  211. lw $t1,0($i1) # Te1[s2>>16]
  212. _xtr $i1,$s3,8-2
  213. lw $t2,0($i2) # Te1[s3>>16]
  214. _xtr $i2,$s0,8-2
  215. lw $t3,0($i3) # Te1[s0>>16]
  216. _xtr $i3,$s1,8-2
  217. #else
  218. lwl $t0,3($i0) # Te1[s1>>16]
  219. lwl $t1,3($i1) # Te1[s2>>16]
  220. lwl $t2,3($i2) # Te1[s3>>16]
  221. lwl $t3,3($i3) # Te1[s0>>16]
  222. lwr $t0,2($i0) # Te1[s1>>16]
  223. _xtr $i0,$s2,8-2
  224. lwr $t1,2($i1) # Te1[s2>>16]
  225. _xtr $i1,$s3,8-2
  226. lwr $t2,2($i2) # Te1[s3>>16]
  227. _xtr $i2,$s0,8-2
  228. lwr $t3,2($i3) # Te1[s0>>16]
  229. _xtr $i3,$s1,8-2
  230. #endif
  231. and $i0,0x3fc
  232. and $i1,0x3fc
  233. and $i2,0x3fc
  234. and $i3,0x3fc
  235. $PTR_ADD $i0,$Tbl
  236. $PTR_ADD $i1,$Tbl
  237. $PTR_ADD $i2,$Tbl
  238. $PTR_ADD $i3,$Tbl
  239. #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
  240. rotr $t0,$t0,8
  241. rotr $t1,$t1,8
  242. rotr $t2,$t2,8
  243. rotr $t3,$t3,8
  244. # if defined(_MIPSEL)
  245. lw $t4,0($i0) # Te2[s2>>8]
  246. _xtr $i0,$s3,0-2
  247. lw $t5,0($i1) # Te2[s3>>8]
  248. _xtr $i1,$s0,0-2
  249. lw $t6,0($i2) # Te2[s0>>8]
  250. _xtr $i2,$s1,0-2
  251. lw $t7,0($i3) # Te2[s1>>8]
  252. _xtr $i3,$s2,0-2
  253. and $i0,0x3fc
  254. and $i1,0x3fc
  255. and $i2,0x3fc
  256. and $i3,0x3fc
  257. $PTR_ADD $i0,$Tbl
  258. $PTR_ADD $i1,$Tbl
  259. $PTR_ADD $i2,$Tbl
  260. $PTR_ADD $i3,$Tbl
  261. lw $t8,0($i0) # Te3[s3]
  262. $PTR_INS $i0,$s0,2,8
  263. lw $t9,0($i1) # Te3[s0]
  264. $PTR_INS $i1,$s1,2,8
  265. lw $t10,0($i2) # Te3[s1]
  266. $PTR_INS $i2,$s2,2,8
  267. lw $t11,0($i3) # Te3[s2]
  268. $PTR_INS $i3,$s3,2,8
  269. # else
  270. lw $t4,0($i0) # Te2[s2>>8]
  271. $PTR_INS $i0,$s3,2,8
  272. lw $t5,0($i1) # Te2[s3>>8]
  273. $PTR_INS $i1,$s0,2,8
  274. lw $t6,0($i2) # Te2[s0>>8]
  275. $PTR_INS $i2,$s1,2,8
  276. lw $t7,0($i3) # Te2[s1>>8]
  277. $PTR_INS $i3,$s2,2,8
  278. lw $t8,0($i0) # Te3[s3]
  279. _xtr $i0,$s0,24-2
  280. lw $t9,0($i1) # Te3[s0]
  281. _xtr $i1,$s1,24-2
  282. lw $t10,0($i2) # Te3[s1]
  283. _xtr $i2,$s2,24-2
  284. lw $t11,0($i3) # Te3[s2]
  285. _xtr $i3,$s3,24-2
  286. and $i0,0x3fc
  287. and $i1,0x3fc
  288. and $i2,0x3fc
  289. and $i3,0x3fc
  290. $PTR_ADD $i0,$Tbl
  291. $PTR_ADD $i1,$Tbl
  292. $PTR_ADD $i2,$Tbl
  293. $PTR_ADD $i3,$Tbl
  294. # endif
  295. rotr $t4,$t4,16
  296. rotr $t5,$t5,16
  297. rotr $t6,$t6,16
  298. rotr $t7,$t7,16
  299. rotr $t8,$t8,24
  300. rotr $t9,$t9,24
  301. rotr $t10,$t10,24
  302. rotr $t11,$t11,24
  303. #else
  304. lwl $t4,2($i0) # Te2[s2>>8]
  305. lwl $t5,2($i1) # Te2[s3>>8]
  306. lwl $t6,2($i2) # Te2[s0>>8]
  307. lwl $t7,2($i3) # Te2[s1>>8]
  308. lwr $t4,1($i0) # Te2[s2>>8]
  309. _xtr $i0,$s3,0-2
  310. lwr $t5,1($i1) # Te2[s3>>8]
  311. _xtr $i1,$s0,0-2
  312. lwr $t6,1($i2) # Te2[s0>>8]
  313. _xtr $i2,$s1,0-2
  314. lwr $t7,1($i3) # Te2[s1>>8]
  315. _xtr $i3,$s2,0-2
  316. and $i0,0x3fc
  317. and $i1,0x3fc
  318. and $i2,0x3fc
  319. and $i3,0x3fc
  320. $PTR_ADD $i0,$Tbl
  321. $PTR_ADD $i1,$Tbl
  322. $PTR_ADD $i2,$Tbl
  323. $PTR_ADD $i3,$Tbl
  324. lwl $t8,1($i0) # Te3[s3]
  325. lwl $t9,1($i1) # Te3[s0]
  326. lwl $t10,1($i2) # Te3[s1]
  327. lwl $t11,1($i3) # Te3[s2]
  328. lwr $t8,0($i0) # Te3[s3]
  329. _xtr $i0,$s0,24-2
  330. lwr $t9,0($i1) # Te3[s0]
  331. _xtr $i1,$s1,24-2
  332. lwr $t10,0($i2) # Te3[s1]
  333. _xtr $i2,$s2,24-2
  334. lwr $t11,0($i3) # Te3[s2]
  335. _xtr $i3,$s3,24-2
  336. and $i0,0x3fc
  337. and $i1,0x3fc
  338. and $i2,0x3fc
  339. and $i3,0x3fc
  340. $PTR_ADD $i0,$Tbl
  341. $PTR_ADD $i1,$Tbl
  342. $PTR_ADD $i2,$Tbl
  343. $PTR_ADD $i3,$Tbl
  344. #endif
  345. xor $t0,$t4
  346. lw $t4,0($i0) # Te0[s0>>24]
  347. xor $t1,$t5
  348. lw $t5,0($i1) # Te0[s1>>24]
  349. xor $t2,$t6
  350. lw $t6,0($i2) # Te0[s2>>24]
  351. xor $t3,$t7
  352. lw $t7,0($i3) # Te0[s3>>24]
  353. xor $t0,$t8
  354. lw $s0,0($key0)
  355. xor $t1,$t9
  356. lw $s1,4($key0)
  357. xor $t2,$t10
  358. lw $s2,8($key0)
  359. xor $t3,$t11
  360. lw $s3,12($key0)
  361. xor $t0,$t4
  362. xor $t1,$t5
  363. xor $t2,$t6
  364. xor $t3,$t7
  365. sub $cnt,1
  366. $PTR_ADD $key0,16
  367. xor $s0,$t0
  368. xor $s1,$t1
  369. xor $s2,$t2
  370. xor $s3,$t3
  371. .set noreorder
  372. bnez $cnt,.Loop_enc
  373. _xtr $i0,$s1,16-2
  374. #endif
  375. .set reorder
  376. _xtr $i1,$s2,16-2
  377. _xtr $i2,$s3,16-2
  378. _xtr $i3,$s0,16-2
  379. and $i0,0x3fc
  380. and $i1,0x3fc
  381. and $i2,0x3fc
  382. and $i3,0x3fc
  383. $PTR_ADD $i0,$Tbl
  384. $PTR_ADD $i1,$Tbl
  385. $PTR_ADD $i2,$Tbl
  386. $PTR_ADD $i3,$Tbl
  387. lbu $t0,2($i0) # Te4[s1>>16]
  388. _xtr $i0,$s2,8-2
  389. lbu $t1,2($i1) # Te4[s2>>16]
  390. _xtr $i1,$s3,8-2
  391. lbu $t2,2($i2) # Te4[s3>>16]
  392. _xtr $i2,$s0,8-2
  393. lbu $t3,2($i3) # Te4[s0>>16]
  394. _xtr $i3,$s1,8-2
  395. and $i0,0x3fc
  396. and $i1,0x3fc
  397. and $i2,0x3fc
  398. and $i3,0x3fc
  399. $PTR_ADD $i0,$Tbl
  400. $PTR_ADD $i1,$Tbl
  401. $PTR_ADD $i2,$Tbl
  402. $PTR_ADD $i3,$Tbl
  403. #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
  404. # if defined(_MIPSEL)
  405. lbu $t4,2($i0) # Te4[s2>>8]
  406. $PTR_INS $i0,$s0,2,8
  407. lbu $t5,2($i1) # Te4[s3>>8]
  408. $PTR_INS $i1,$s1,2,8
  409. lbu $t6,2($i2) # Te4[s0>>8]
  410. $PTR_INS $i2,$s2,2,8
  411. lbu $t7,2($i3) # Te4[s1>>8]
  412. $PTR_INS $i3,$s3,2,8
  413. lbu $t8,2($i0) # Te4[s0>>24]
  414. _xtr $i0,$s3,0-2
  415. lbu $t9,2($i1) # Te4[s1>>24]
  416. _xtr $i1,$s0,0-2
  417. lbu $t10,2($i2) # Te4[s2>>24]
  418. _xtr $i2,$s1,0-2
  419. lbu $t11,2($i3) # Te4[s3>>24]
  420. _xtr $i3,$s2,0-2
  421. and $i0,0x3fc
  422. and $i1,0x3fc
  423. and $i2,0x3fc
  424. and $i3,0x3fc
  425. $PTR_ADD $i0,$Tbl
  426. $PTR_ADD $i1,$Tbl
  427. $PTR_ADD $i2,$Tbl
  428. $PTR_ADD $i3,$Tbl
  429. # else
  430. lbu $t4,2($i0) # Te4[s2>>8]
  431. _xtr $i0,$s0,24-2
  432. lbu $t5,2($i1) # Te4[s3>>8]
  433. _xtr $i1,$s1,24-2
  434. lbu $t6,2($i2) # Te4[s0>>8]
  435. _xtr $i2,$s2,24-2
  436. lbu $t7,2($i3) # Te4[s1>>8]
  437. _xtr $i3,$s3,24-2
  438. and $i0,0x3fc
  439. and $i1,0x3fc
  440. and $i2,0x3fc
  441. and $i3,0x3fc
  442. $PTR_ADD $i0,$Tbl
  443. $PTR_ADD $i1,$Tbl
  444. $PTR_ADD $i2,$Tbl
  445. $PTR_ADD $i3,$Tbl
  446. lbu $t8,2($i0) # Te4[s0>>24]
  447. $PTR_INS $i0,$s3,2,8
  448. lbu $t9,2($i1) # Te4[s1>>24]
  449. $PTR_INS $i1,$s0,2,8
  450. lbu $t10,2($i2) # Te4[s2>>24]
  451. $PTR_INS $i2,$s1,2,8
  452. lbu $t11,2($i3) # Te4[s3>>24]
  453. $PTR_INS $i3,$s2,2,8
  454. # endif
  455. _ins $t0,16
  456. _ins $t1,16
  457. _ins $t2,16
  458. _ins $t3,16
  459. _ins2 $t0,$t4,8
  460. lbu $t4,2($i0) # Te4[s3]
  461. _ins2 $t1,$t5,8
  462. lbu $t5,2($i1) # Te4[s0]
  463. _ins2 $t2,$t6,8
  464. lbu $t6,2($i2) # Te4[s1]
  465. _ins2 $t3,$t7,8
  466. lbu $t7,2($i3) # Te4[s2]
  467. _ins2 $t0,$t8,24
  468. lw $s0,0($key0)
  469. _ins2 $t1,$t9,24
  470. lw $s1,4($key0)
  471. _ins2 $t2,$t10,24
  472. lw $s2,8($key0)
  473. _ins2 $t3,$t11,24
  474. lw $s3,12($key0)
  475. _ins2 $t0,$t4,0
  476. _ins2 $t1,$t5,0
  477. _ins2 $t2,$t6,0
  478. _ins2 $t3,$t7,0
  479. #else
  480. lbu $t4,2($i0) # Te4[s2>>8]
  481. _xtr $i0,$s0,24-2
  482. lbu $t5,2($i1) # Te4[s3>>8]
  483. _xtr $i1,$s1,24-2
  484. lbu $t6,2($i2) # Te4[s0>>8]
  485. _xtr $i2,$s2,24-2
  486. lbu $t7,2($i3) # Te4[s1>>8]
  487. _xtr $i3,$s3,24-2
  488. and $i0,0x3fc
  489. and $i1,0x3fc
  490. and $i2,0x3fc
  491. and $i3,0x3fc
  492. $PTR_ADD $i0,$Tbl
  493. $PTR_ADD $i1,$Tbl
  494. $PTR_ADD $i2,$Tbl
  495. $PTR_ADD $i3,$Tbl
  496. lbu $t8,2($i0) # Te4[s0>>24]
  497. _xtr $i0,$s3,0-2
  498. lbu $t9,2($i1) # Te4[s1>>24]
  499. _xtr $i1,$s0,0-2
  500. lbu $t10,2($i2) # Te4[s2>>24]
  501. _xtr $i2,$s1,0-2
  502. lbu $t11,2($i3) # Te4[s3>>24]
  503. _xtr $i3,$s2,0-2
  504. and $i0,0x3fc
  505. and $i1,0x3fc
  506. and $i2,0x3fc
  507. and $i3,0x3fc
  508. $PTR_ADD $i0,$Tbl
  509. $PTR_ADD $i1,$Tbl
  510. $PTR_ADD $i2,$Tbl
  511. $PTR_ADD $i3,$Tbl
  512. _ins $t0,16
  513. _ins $t1,16
  514. _ins $t2,16
  515. _ins $t3,16
  516. _ins $t4,8
  517. _ins $t5,8
  518. _ins $t6,8
  519. _ins $t7,8
  520. xor $t0,$t4
  521. lbu $t4,2($i0) # Te4[s3]
  522. xor $t1,$t5
  523. lbu $t5,2($i1) # Te4[s0]
  524. xor $t2,$t6
  525. lbu $t6,2($i2) # Te4[s1]
  526. xor $t3,$t7
  527. lbu $t7,2($i3) # Te4[s2]
  528. _ins $t8,24
  529. lw $s0,0($key0)
  530. _ins $t9,24
  531. lw $s1,4($key0)
  532. _ins $t10,24
  533. lw $s2,8($key0)
  534. _ins $t11,24
  535. lw $s3,12($key0)
  536. xor $t0,$t8
  537. xor $t1,$t9
  538. xor $t2,$t10
  539. xor $t3,$t11
  540. _ins $t4,0
  541. _ins $t5,0
  542. _ins $t6,0
  543. _ins $t7,0
  544. xor $t0,$t4
  545. xor $t1,$t5
  546. xor $t2,$t6
  547. xor $t3,$t7
  548. #endif
  549. xor $s0,$t0
  550. xor $s1,$t1
  551. xor $s2,$t2
  552. xor $s3,$t3
  553. jr $ra
  554. .end _mips_AES_encrypt
  555. .align 5
  556. .globl AES_encrypt
  557. .ent AES_encrypt
  558. AES_encrypt:
  559. .frame $sp,$FRAMESIZE,$ra
  560. .mask $SAVED_REGS_MASK,-$SZREG
  561. .set noreorder
  562. ___
  563. $code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification
  564. .cpload $pf
  565. ___
  566. $code.=<<___;
  567. $PTR_SUB $sp,$FRAMESIZE
  568. $REG_S $ra,$FRAMESIZE-1*$SZREG($sp)
  569. $REG_S $fp,$FRAMESIZE-2*$SZREG($sp)
  570. $REG_S $s11,$FRAMESIZE-3*$SZREG($sp)
  571. $REG_S $s10,$FRAMESIZE-4*$SZREG($sp)
  572. $REG_S $s9,$FRAMESIZE-5*$SZREG($sp)
  573. $REG_S $s8,$FRAMESIZE-6*$SZREG($sp)
  574. $REG_S $s7,$FRAMESIZE-7*$SZREG($sp)
  575. $REG_S $s6,$FRAMESIZE-8*$SZREG($sp)
  576. $REG_S $s5,$FRAMESIZE-9*$SZREG($sp)
  577. $REG_S $s4,$FRAMESIZE-10*$SZREG($sp)
  578. ___
  579. $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
  580. $REG_S \$15,$FRAMESIZE-11*$SZREG($sp)
  581. $REG_S \$14,$FRAMESIZE-12*$SZREG($sp)
  582. $REG_S \$13,$FRAMESIZE-13*$SZREG($sp)
  583. $REG_S \$12,$FRAMESIZE-14*$SZREG($sp)
  584. $REG_S $gp,$FRAMESIZE-15*$SZREG($sp)
  585. ___
  586. $code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
  587. .cplocal $Tbl
  588. .cpsetup $pf,$zero,AES_encrypt
  589. ___
  590. $code.=<<___;
  591. .set reorder
  592. la $Tbl,AES_Te # PIC-ified 'load address'
  593. lwl $s0,0+$MSB($inp)
  594. lwl $s1,4+$MSB($inp)
  595. lwl $s2,8+$MSB($inp)
  596. lwl $s3,12+$MSB($inp)
  597. lwr $s0,0+$LSB($inp)
  598. lwr $s1,4+$LSB($inp)
  599. lwr $s2,8+$LSB($inp)
  600. lwr $s3,12+$LSB($inp)
  601. bal _mips_AES_encrypt
  602. swr $s0,0+$LSB($out)
  603. swr $s1,4+$LSB($out)
  604. swr $s2,8+$LSB($out)
  605. swr $s3,12+$LSB($out)
  606. swl $s0,0+$MSB($out)
  607. swl $s1,4+$MSB($out)
  608. swl $s2,8+$MSB($out)
  609. swl $s3,12+$MSB($out)
  610. .set noreorder
  611. $REG_L $ra,$FRAMESIZE-1*$SZREG($sp)
  612. $REG_L $fp,$FRAMESIZE-2*$SZREG($sp)
  613. $REG_L $s11,$FRAMESIZE-3*$SZREG($sp)
  614. $REG_L $s10,$FRAMESIZE-4*$SZREG($sp)
  615. $REG_L $s9,$FRAMESIZE-5*$SZREG($sp)
  616. $REG_L $s8,$FRAMESIZE-6*$SZREG($sp)
  617. $REG_L $s7,$FRAMESIZE-7*$SZREG($sp)
  618. $REG_L $s6,$FRAMESIZE-8*$SZREG($sp)
  619. $REG_L $s5,$FRAMESIZE-9*$SZREG($sp)
  620. $REG_L $s4,$FRAMESIZE-10*$SZREG($sp)
  621. ___
  622. $code.=<<___ if ($flavour =~ /nubi/i);
  623. $REG_L \$15,$FRAMESIZE-11*$SZREG($sp)
  624. $REG_L \$14,$FRAMESIZE-12*$SZREG($sp)
  625. $REG_L \$13,$FRAMESIZE-13*$SZREG($sp)
  626. $REG_L \$12,$FRAMESIZE-14*$SZREG($sp)
  627. $REG_L $gp,$FRAMESIZE-15*$SZREG($sp)
  628. ___
  629. $code.=<<___;
  630. jr $ra
  631. $PTR_ADD $sp,$FRAMESIZE
  632. .end AES_encrypt
  633. ___
  634. $code.=<<___;
  635. .align 5
  636. .ent _mips_AES_decrypt
  637. _mips_AES_decrypt:
  638. .frame $sp,0,$ra
  639. .set reorder
  640. lw $t0,0($key)
  641. lw $t1,4($key)
  642. lw $t2,8($key)
  643. lw $t3,12($key)
  644. lw $cnt,240($key)
  645. $PTR_ADD $key0,$key,16
  646. xor $s0,$t0
  647. xor $s1,$t1
  648. xor $s2,$t2
  649. xor $s3,$t3
  650. sub $cnt,1
  651. #if defined(__mips_smartmips)
  652. ext $i0,$s3,16,8
  653. .Loop_dec:
  654. ext $i1,$s0,16,8
  655. ext $i2,$s1,16,8
  656. ext $i3,$s2,16,8
  657. lwxs $t0,$i0($Tbl) # Td1[s3>>16]
  658. ext $i0,$s2,8,8
  659. lwxs $t1,$i1($Tbl) # Td1[s0>>16]
  660. ext $i1,$s3,8,8
  661. lwxs $t2,$i2($Tbl) # Td1[s1>>16]
  662. ext $i2,$s0,8,8
  663. lwxs $t3,$i3($Tbl) # Td1[s2>>16]
  664. ext $i3,$s1,8,8
  665. lwxs $t4,$i0($Tbl) # Td2[s2>>8]
  666. ext $i0,$s1,0,8
  667. lwxs $t5,$i1($Tbl) # Td2[s3>>8]
  668. ext $i1,$s2,0,8
  669. lwxs $t6,$i2($Tbl) # Td2[s0>>8]
  670. ext $i2,$s3,0,8
  671. lwxs $t7,$i3($Tbl) # Td2[s1>>8]
  672. ext $i3,$s0,0,8
  673. lwxs $t8,$i0($Tbl) # Td3[s1]
  674. ext $i0,$s0,24,8
  675. lwxs $t9,$i1($Tbl) # Td3[s2]
  676. ext $i1,$s1,24,8
  677. lwxs $t10,$i2($Tbl) # Td3[s3]
  678. ext $i2,$s2,24,8
  679. lwxs $t11,$i3($Tbl) # Td3[s0]
  680. ext $i3,$s3,24,8
  681. rotr $t0,$t0,8
  682. rotr $t1,$t1,8
  683. rotr $t2,$t2,8
  684. rotr $t3,$t3,8
  685. rotr $t4,$t4,16
  686. rotr $t5,$t5,16
  687. rotr $t6,$t6,16
  688. rotr $t7,$t7,16
  689. xor $t0,$t4
  690. lwxs $t4,$i0($Tbl) # Td0[s0>>24]
  691. xor $t1,$t5
  692. lwxs $t5,$i1($Tbl) # Td0[s1>>24]
  693. xor $t2,$t6
  694. lwxs $t6,$i2($Tbl) # Td0[s2>>24]
  695. xor $t3,$t7
  696. lwxs $t7,$i3($Tbl) # Td0[s3>>24]
  697. rotr $t8,$t8,24
  698. lw $s0,0($key0)
  699. rotr $t9,$t9,24
  700. lw $s1,4($key0)
  701. rotr $t10,$t10,24
  702. lw $s2,8($key0)
  703. rotr $t11,$t11,24
  704. lw $s3,12($key0)
  705. xor $t0,$t8
  706. xor $t1,$t9
  707. xor $t2,$t10
  708. xor $t3,$t11
  709. xor $t0,$t4
  710. xor $t1,$t5
  711. xor $t2,$t6
  712. xor $t3,$t7
  713. sub $cnt,1
  714. $PTR_ADD $key0,16
  715. xor $s0,$t0
  716. xor $s1,$t1
  717. xor $s2,$t2
  718. xor $s3,$t3
  719. .set noreorder
  720. bnez $cnt,.Loop_dec
  721. ext $i0,$s3,16,8
  722. _xtr $i0,$s3,16-2
  723. #else
  724. _xtr $i0,$s3,16-2
  725. .Loop_dec:
  726. _xtr $i1,$s0,16-2
  727. _xtr $i2,$s1,16-2
  728. _xtr $i3,$s2,16-2
  729. and $i0,0x3fc
  730. and $i1,0x3fc
  731. and $i2,0x3fc
  732. and $i3,0x3fc
  733. $PTR_ADD $i0,$Tbl
  734. $PTR_ADD $i1,$Tbl
  735. $PTR_ADD $i2,$Tbl
  736. $PTR_ADD $i3,$Tbl
  737. #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
  738. lw $t0,0($i0) # Td1[s3>>16]
  739. _xtr $i0,$s2,8-2
  740. lw $t1,0($i1) # Td1[s0>>16]
  741. _xtr $i1,$s3,8-2
  742. lw $t2,0($i2) # Td1[s1>>16]
  743. _xtr $i2,$s0,8-2
  744. lw $t3,0($i3) # Td1[s2>>16]
  745. _xtr $i3,$s1,8-2
  746. #else
  747. lwl $t0,3($i0) # Td1[s3>>16]
  748. lwl $t1,3($i1) # Td1[s0>>16]
  749. lwl $t2,3($i2) # Td1[s1>>16]
  750. lwl $t3,3($i3) # Td1[s2>>16]
  751. lwr $t0,2($i0) # Td1[s3>>16]
  752. _xtr $i0,$s2,8-2
  753. lwr $t1,2($i1) # Td1[s0>>16]
  754. _xtr $i1,$s3,8-2
  755. lwr $t2,2($i2) # Td1[s1>>16]
  756. _xtr $i2,$s0,8-2
  757. lwr $t3,2($i3) # Td1[s2>>16]
  758. _xtr $i3,$s1,8-2
  759. #endif
  760. and $i0,0x3fc
  761. and $i1,0x3fc
  762. and $i2,0x3fc
  763. and $i3,0x3fc
  764. $PTR_ADD $i0,$Tbl
  765. $PTR_ADD $i1,$Tbl
  766. $PTR_ADD $i2,$Tbl
  767. $PTR_ADD $i3,$Tbl
  768. #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
  769. rotr $t0,$t0,8
  770. rotr $t1,$t1,8
  771. rotr $t2,$t2,8
  772. rotr $t3,$t3,8
  773. # if defined(_MIPSEL)
  774. lw $t4,0($i0) # Td2[s2>>8]
  775. _xtr $i0,$s1,0-2
  776. lw $t5,0($i1) # Td2[s3>>8]
  777. _xtr $i1,$s2,0-2
  778. lw $t6,0($i2) # Td2[s0>>8]
  779. _xtr $i2,$s3,0-2
  780. lw $t7,0($i3) # Td2[s1>>8]
  781. _xtr $i3,$s0,0-2
  782. and $i0,0x3fc
  783. and $i1,0x3fc
  784. and $i2,0x3fc
  785. and $i3,0x3fc
  786. $PTR_ADD $i0,$Tbl
  787. $PTR_ADD $i1,$Tbl
  788. $PTR_ADD $i2,$Tbl
  789. $PTR_ADD $i3,$Tbl
  790. lw $t8,0($i0) # Td3[s1]
  791. $PTR_INS $i0,$s0,2,8
  792. lw $t9,0($i1) # Td3[s2]
  793. $PTR_INS $i1,$s1,2,8
  794. lw $t10,0($i2) # Td3[s3]
  795. $PTR_INS $i2,$s2,2,8
  796. lw $t11,0($i3) # Td3[s0]
  797. $PTR_INS $i3,$s3,2,8
  798. #else
  799. lw $t4,0($i0) # Td2[s2>>8]
  800. $PTR_INS $i0,$s1,2,8
  801. lw $t5,0($i1) # Td2[s3>>8]
  802. $PTR_INS $i1,$s2,2,8
  803. lw $t6,0($i2) # Td2[s0>>8]
  804. $PTR_INS $i2,$s3,2,8
  805. lw $t7,0($i3) # Td2[s1>>8]
  806. $PTR_INS $i3,$s0,2,8
  807. lw $t8,0($i0) # Td3[s1]
  808. _xtr $i0,$s0,24-2
  809. lw $t9,0($i1) # Td3[s2]
  810. _xtr $i1,$s1,24-2
  811. lw $t10,0($i2) # Td3[s3]
  812. _xtr $i2,$s2,24-2
  813. lw $t11,0($i3) # Td3[s0]
  814. _xtr $i3,$s3,24-2
  815. and $i0,0x3fc
  816. and $i1,0x3fc
  817. and $i2,0x3fc
  818. and $i3,0x3fc
  819. $PTR_ADD $i0,$Tbl
  820. $PTR_ADD $i1,$Tbl
  821. $PTR_ADD $i2,$Tbl
  822. $PTR_ADD $i3,$Tbl
  823. #endif
  824. rotr $t4,$t4,16
  825. rotr $t5,$t5,16
  826. rotr $t6,$t6,16
  827. rotr $t7,$t7,16
  828. rotr $t8,$t8,24
  829. rotr $t9,$t9,24
  830. rotr $t10,$t10,24
  831. rotr $t11,$t11,24
  832. #else
  833. lwl $t4,2($i0) # Td2[s2>>8]
  834. lwl $t5,2($i1) # Td2[s3>>8]
  835. lwl $t6,2($i2) # Td2[s0>>8]
  836. lwl $t7,2($i3) # Td2[s1>>8]
  837. lwr $t4,1($i0) # Td2[s2>>8]
  838. _xtr $i0,$s1,0-2
  839. lwr $t5,1($i1) # Td2[s3>>8]
  840. _xtr $i1,$s2,0-2
  841. lwr $t6,1($i2) # Td2[s0>>8]
  842. _xtr $i2,$s3,0-2
  843. lwr $t7,1($i3) # Td2[s1>>8]
  844. _xtr $i3,$s0,0-2
  845. and $i0,0x3fc
  846. and $i1,0x3fc
  847. and $i2,0x3fc
  848. and $i3,0x3fc
  849. $PTR_ADD $i0,$Tbl
  850. $PTR_ADD $i1,$Tbl
  851. $PTR_ADD $i2,$Tbl
  852. $PTR_ADD $i3,$Tbl
  853. lwl $t8,1($i0) # Td3[s1]
  854. lwl $t9,1($i1) # Td3[s2]
  855. lwl $t10,1($i2) # Td3[s3]
  856. lwl $t11,1($i3) # Td3[s0]
  857. lwr $t8,0($i0) # Td3[s1]
  858. _xtr $i0,$s0,24-2
  859. lwr $t9,0($i1) # Td3[s2]
  860. _xtr $i1,$s1,24-2
  861. lwr $t10,0($i2) # Td3[s3]
  862. _xtr $i2,$s2,24-2
  863. lwr $t11,0($i3) # Td3[s0]
  864. _xtr $i3,$s3,24-2
  865. and $i0,0x3fc
  866. and $i1,0x3fc
  867. and $i2,0x3fc
  868. and $i3,0x3fc
  869. $PTR_ADD $i0,$Tbl
  870. $PTR_ADD $i1,$Tbl
  871. $PTR_ADD $i2,$Tbl
  872. $PTR_ADD $i3,$Tbl
  873. #endif
  874. xor $t0,$t4
  875. lw $t4,0($i0) # Td0[s0>>24]
  876. xor $t1,$t5
  877. lw $t5,0($i1) # Td0[s1>>24]
  878. xor $t2,$t6
  879. lw $t6,0($i2) # Td0[s2>>24]
  880. xor $t3,$t7
  881. lw $t7,0($i3) # Td0[s3>>24]
  882. xor $t0,$t8
  883. lw $s0,0($key0)
  884. xor $t1,$t9
  885. lw $s1,4($key0)
  886. xor $t2,$t10
  887. lw $s2,8($key0)
  888. xor $t3,$t11
  889. lw $s3,12($key0)
  890. xor $t0,$t4
  891. xor $t1,$t5
  892. xor $t2,$t6
  893. xor $t3,$t7
  894. sub $cnt,1
  895. $PTR_ADD $key0,16
  896. xor $s0,$t0
  897. xor $s1,$t1
  898. xor $s2,$t2
  899. xor $s3,$t3
  900. .set noreorder
  901. bnez $cnt,.Loop_dec
  902. _xtr $i0,$s3,16-2
  903. #endif
  904. .set reorder
  905. lw $t4,1024($Tbl) # prefetch Td4
  906. _xtr $i0,$s3,16
  907. lw $t5,1024+32($Tbl)
  908. _xtr $i1,$s0,16
  909. lw $t6,1024+64($Tbl)
  910. _xtr $i2,$s1,16
  911. lw $t7,1024+96($Tbl)
  912. _xtr $i3,$s2,16
  913. lw $t8,1024+128($Tbl)
  914. and $i0,0xff
  915. lw $t9,1024+160($Tbl)
  916. and $i1,0xff
  917. lw $t10,1024+192($Tbl)
  918. and $i2,0xff
  919. lw $t11,1024+224($Tbl)
  920. and $i3,0xff
  921. $PTR_ADD $i0,$Tbl
  922. $PTR_ADD $i1,$Tbl
  923. $PTR_ADD $i2,$Tbl
  924. $PTR_ADD $i3,$Tbl
  925. lbu $t0,1024($i0) # Td4[s3>>16]
  926. _xtr $i0,$s2,8
  927. lbu $t1,1024($i1) # Td4[s0>>16]
  928. _xtr $i1,$s3,8
  929. lbu $t2,1024($i2) # Td4[s1>>16]
  930. _xtr $i2,$s0,8
  931. lbu $t3,1024($i3) # Td4[s2>>16]
  932. _xtr $i3,$s1,8
  933. and $i0,0xff
  934. and $i1,0xff
  935. and $i2,0xff
  936. and $i3,0xff
  937. $PTR_ADD $i0,$Tbl
  938. $PTR_ADD $i1,$Tbl
  939. $PTR_ADD $i2,$Tbl
  940. $PTR_ADD $i3,$Tbl
  941. #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
  942. # if defined(_MIPSEL)
  943. lbu $t4,1024($i0) # Td4[s2>>8]
  944. $PTR_INS $i0,$s0,0,8
  945. lbu $t5,1024($i1) # Td4[s3>>8]
  946. $PTR_INS $i1,$s1,0,8
  947. lbu $t6,1024($i2) # Td4[s0>>8]
  948. $PTR_INS $i2,$s2,0,8
  949. lbu $t7,1024($i3) # Td4[s1>>8]
  950. $PTR_INS $i3,$s3,0,8
  951. lbu $t8,1024($i0) # Td4[s0>>24]
  952. _xtr $i0,$s1,0
  953. lbu $t9,1024($i1) # Td4[s1>>24]
  954. _xtr $i1,$s2,0
  955. lbu $t10,1024($i2) # Td4[s2>>24]
  956. _xtr $i2,$s3,0
  957. lbu $t11,1024($i3) # Td4[s3>>24]
  958. _xtr $i3,$s0,0
  959. $PTR_ADD $i0,$Tbl
  960. $PTR_ADD $i1,$Tbl
  961. $PTR_ADD $i2,$Tbl
  962. $PTR_ADD $i3,$Tbl
  963. # else
  964. lbu $t4,1024($i0) # Td4[s2>>8]
  965. _xtr $i0,$s0,24
  966. lbu $t5,1024($i1) # Td4[s3>>8]
  967. _xtr $i1,$s1,24
  968. lbu $t6,1024($i2) # Td4[s0>>8]
  969. _xtr $i2,$s2,24
  970. lbu $t7,1024($i3) # Td4[s1>>8]
  971. _xtr $i3,$s3,24
  972. $PTR_ADD $i0,$Tbl
  973. $PTR_ADD $i1,$Tbl
  974. $PTR_ADD $i2,$Tbl
  975. $PTR_ADD $i3,$Tbl
  976. lbu $t8,1024($i0) # Td4[s0>>24]
  977. $PTR_INS $i0,$s1,0,8
  978. lbu $t9,1024($i1) # Td4[s1>>24]
  979. $PTR_INS $i1,$s2,0,8
  980. lbu $t10,1024($i2) # Td4[s2>>24]
  981. $PTR_INS $i2,$s3,0,8
  982. lbu $t11,1024($i3) # Td4[s3>>24]
  983. $PTR_INS $i3,$s0,0,8
  984. # endif
  985. _ins $t0,16
  986. _ins $t1,16
  987. _ins $t2,16
  988. _ins $t3,16
  989. _ins2 $t0,$t4,8
  990. lbu $t4,1024($i0) # Td4[s1]
  991. _ins2 $t1,$t5,8
  992. lbu $t5,1024($i1) # Td4[s2]
  993. _ins2 $t2,$t6,8
  994. lbu $t6,1024($i2) # Td4[s3]
  995. _ins2 $t3,$t7,8
  996. lbu $t7,1024($i3) # Td4[s0]
  997. _ins2 $t0,$t8,24
  998. lw $s0,0($key0)
  999. _ins2 $t1,$t9,24
  1000. lw $s1,4($key0)
  1001. _ins2 $t2,$t10,24
  1002. lw $s2,8($key0)
  1003. _ins2 $t3,$t11,24
  1004. lw $s3,12($key0)
  1005. _ins2 $t0,$t4,0
  1006. _ins2 $t1,$t5,0
  1007. _ins2 $t2,$t6,0
  1008. _ins2 $t3,$t7,0
  1009. #else
  1010. lbu $t4,1024($i0) # Td4[s2>>8]
  1011. _xtr $i0,$s0,24
  1012. lbu $t5,1024($i1) # Td4[s3>>8]
  1013. _xtr $i1,$s1,24
  1014. lbu $t6,1024($i2) # Td4[s0>>8]
  1015. _xtr $i2,$s2,24
  1016. lbu $t7,1024($i3) # Td4[s1>>8]
  1017. _xtr $i3,$s3,24
  1018. $PTR_ADD $i0,$Tbl
  1019. $PTR_ADD $i1,$Tbl
  1020. $PTR_ADD $i2,$Tbl
  1021. $PTR_ADD $i3,$Tbl
  1022. lbu $t8,1024($i0) # Td4[s0>>24]
  1023. _xtr $i0,$s1,0
  1024. lbu $t9,1024($i1) # Td4[s1>>24]
  1025. _xtr $i1,$s2,0
  1026. lbu $t10,1024($i2) # Td4[s2>>24]
  1027. _xtr $i2,$s3,0
  1028. lbu $t11,1024($i3) # Td4[s3>>24]
  1029. _xtr $i3,$s0,0
  1030. $PTR_ADD $i0,$Tbl
  1031. $PTR_ADD $i1,$Tbl
  1032. $PTR_ADD $i2,$Tbl
  1033. $PTR_ADD $i3,$Tbl
  1034. _ins $t0,16
  1035. _ins $t1,16
  1036. _ins $t2,16
  1037. _ins $t3,16
  1038. _ins $t4,8
  1039. _ins $t5,8
  1040. _ins $t6,8
  1041. _ins $t7,8
  1042. xor $t0,$t4
  1043. lbu $t4,1024($i0) # Td4[s1]
  1044. xor $t1,$t5
  1045. lbu $t5,1024($i1) # Td4[s2]
  1046. xor $t2,$t6
  1047. lbu $t6,1024($i2) # Td4[s3]
  1048. xor $t3,$t7
  1049. lbu $t7,1024($i3) # Td4[s0]
  1050. _ins $t8,24
  1051. lw $s0,0($key0)
  1052. _ins $t9,24
  1053. lw $s1,4($key0)
  1054. _ins $t10,24
  1055. lw $s2,8($key0)
  1056. _ins $t11,24
  1057. lw $s3,12($key0)
  1058. xor $t0,$t8
  1059. xor $t1,$t9
  1060. xor $t2,$t10
  1061. xor $t3,$t11
  1062. _ins $t4,0
  1063. _ins $t5,0
  1064. _ins $t6,0
  1065. _ins $t7,0
  1066. xor $t0,$t4
  1067. xor $t1,$t5
  1068. xor $t2,$t6
  1069. xor $t3,$t7
  1070. #endif
  1071. xor $s0,$t0
  1072. xor $s1,$t1
  1073. xor $s2,$t2
  1074. xor $s3,$t3
  1075. jr $ra
  1076. .end _mips_AES_decrypt
  1077. .align 5
  1078. .globl AES_decrypt
  1079. .ent AES_decrypt
  1080. AES_decrypt:
  1081. .frame $sp,$FRAMESIZE,$ra
  1082. .mask $SAVED_REGS_MASK,-$SZREG
  1083. .set noreorder
  1084. ___
  1085. $code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification
  1086. .cpload $pf
  1087. ___
  1088. $code.=<<___;
  1089. $PTR_SUB $sp,$FRAMESIZE
  1090. $REG_S $ra,$FRAMESIZE-1*$SZREG($sp)
  1091. $REG_S $fp,$FRAMESIZE-2*$SZREG($sp)
  1092. $REG_S $s11,$FRAMESIZE-3*$SZREG($sp)
  1093. $REG_S $s10,$FRAMESIZE-4*$SZREG($sp)
  1094. $REG_S $s9,$FRAMESIZE-5*$SZREG($sp)
  1095. $REG_S $s8,$FRAMESIZE-6*$SZREG($sp)
  1096. $REG_S $s7,$FRAMESIZE-7*$SZREG($sp)
  1097. $REG_S $s6,$FRAMESIZE-8*$SZREG($sp)
  1098. $REG_S $s5,$FRAMESIZE-9*$SZREG($sp)
  1099. $REG_S $s4,$FRAMESIZE-10*$SZREG($sp)
  1100. ___
  1101. $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
  1102. $REG_S \$15,$FRAMESIZE-11*$SZREG($sp)
  1103. $REG_S \$14,$FRAMESIZE-12*$SZREG($sp)
  1104. $REG_S \$13,$FRAMESIZE-13*$SZREG($sp)
  1105. $REG_S \$12,$FRAMESIZE-14*$SZREG($sp)
  1106. $REG_S $gp,$FRAMESIZE-15*$SZREG($sp)
  1107. ___
  1108. $code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
  1109. .cplocal $Tbl
  1110. .cpsetup $pf,$zero,AES_decrypt
  1111. ___
  1112. $code.=<<___;
  1113. .set reorder
  1114. la $Tbl,AES_Td # PIC-ified 'load address'
  1115. lwl $s0,0+$MSB($inp)
  1116. lwl $s1,4+$MSB($inp)
  1117. lwl $s2,8+$MSB($inp)
  1118. lwl $s3,12+$MSB($inp)
  1119. lwr $s0,0+$LSB($inp)
  1120. lwr $s1,4+$LSB($inp)
  1121. lwr $s2,8+$LSB($inp)
  1122. lwr $s3,12+$LSB($inp)
  1123. bal _mips_AES_decrypt
  1124. swr $s0,0+$LSB($out)
  1125. swr $s1,4+$LSB($out)
  1126. swr $s2,8+$LSB($out)
  1127. swr $s3,12+$LSB($out)
  1128. swl $s0,0+$MSB($out)
  1129. swl $s1,4+$MSB($out)
  1130. swl $s2,8+$MSB($out)
  1131. swl $s3,12+$MSB($out)
  1132. .set noreorder
  1133. $REG_L $ra,$FRAMESIZE-1*$SZREG($sp)
  1134. $REG_L $fp,$FRAMESIZE-2*$SZREG($sp)
  1135. $REG_L $s11,$FRAMESIZE-3*$SZREG($sp)
  1136. $REG_L $s10,$FRAMESIZE-4*$SZREG($sp)
  1137. $REG_L $s9,$FRAMESIZE-5*$SZREG($sp)
  1138. $REG_L $s8,$FRAMESIZE-6*$SZREG($sp)
  1139. $REG_L $s7,$FRAMESIZE-7*$SZREG($sp)
  1140. $REG_L $s6,$FRAMESIZE-8*$SZREG($sp)
  1141. $REG_L $s5,$FRAMESIZE-9*$SZREG($sp)
  1142. $REG_L $s4,$FRAMESIZE-10*$SZREG($sp)
  1143. ___
  1144. $code.=<<___ if ($flavour =~ /nubi/i);
  1145. $REG_L \$15,$FRAMESIZE-11*$SZREG($sp)
  1146. $REG_L \$14,$FRAMESIZE-12*$SZREG($sp)
  1147. $REG_L \$13,$FRAMESIZE-13*$SZREG($sp)
  1148. $REG_L \$12,$FRAMESIZE-14*$SZREG($sp)
  1149. $REG_L $gp,$FRAMESIZE-15*$SZREG($sp)
  1150. ___
  1151. $code.=<<___;
  1152. jr $ra
  1153. $PTR_ADD $sp,$FRAMESIZE
  1154. .end AES_decrypt
  1155. ___
  1156. }}}
  1157. {{{
  1158. my $FRAMESIZE=8*$SZREG;
  1159. my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc000f008 : 0xc0000000;
  1160. my ($inp,$bits,$key,$Tbl)=($a0,$a1,$a2,$a3);
  1161. my ($rk0,$rk1,$rk2,$rk3,$rk4,$rk5,$rk6,$rk7)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
  1162. my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
  1163. my ($rcon,$cnt)=($gp,$fp);
  1164. $code.=<<___;
  1165. .align 5
  1166. .ent _mips_AES_set_encrypt_key
  1167. _mips_AES_set_encrypt_key:
  1168. .frame $sp,0,$ra
  1169. .set noreorder
  1170. beqz $inp,.Lekey_done
  1171. li $t0,-1
  1172. beqz $key,.Lekey_done
  1173. $PTR_ADD $rcon,$Tbl,256
  1174. .set reorder
  1175. lwl $rk0,0+$MSB($inp) # load 128 bits
  1176. lwl $rk1,4+$MSB($inp)
  1177. lwl $rk2,8+$MSB($inp)
  1178. lwl $rk3,12+$MSB($inp)
  1179. li $at,128
  1180. lwr $rk0,0+$LSB($inp)
  1181. lwr $rk1,4+$LSB($inp)
  1182. lwr $rk2,8+$LSB($inp)
  1183. lwr $rk3,12+$LSB($inp)
  1184. .set noreorder
  1185. beq $bits,$at,.L128bits
  1186. li $cnt,10
  1187. .set reorder
  1188. lwl $rk4,16+$MSB($inp) # load 192 bits
  1189. lwl $rk5,20+$MSB($inp)
  1190. li $at,192
  1191. lwr $rk4,16+$LSB($inp)
  1192. lwr $rk5,20+$LSB($inp)
  1193. .set noreorder
  1194. beq $bits,$at,.L192bits
  1195. li $cnt,8
  1196. .set reorder
  1197. lwl $rk6,24+$MSB($inp) # load 256 bits
  1198. lwl $rk7,28+$MSB($inp)
  1199. li $at,256
  1200. lwr $rk6,24+$LSB($inp)
  1201. lwr $rk7,28+$LSB($inp)
  1202. .set noreorder
  1203. beq $bits,$at,.L256bits
  1204. li $cnt,7
  1205. b .Lekey_done
  1206. li $t0,-2
  1207. .align 4
  1208. .L128bits:
  1209. .set reorder
  1210. srl $i0,$rk3,16
  1211. srl $i1,$rk3,8
  1212. and $i0,0xff
  1213. and $i1,0xff
  1214. and $i2,$rk3,0xff
  1215. srl $i3,$rk3,24
  1216. $PTR_ADD $i0,$Tbl
  1217. $PTR_ADD $i1,$Tbl
  1218. $PTR_ADD $i2,$Tbl
  1219. $PTR_ADD $i3,$Tbl
  1220. lbu $i0,0($i0)
  1221. lbu $i1,0($i1)
  1222. lbu $i2,0($i2)
  1223. lbu $i3,0($i3)
  1224. sw $rk0,0($key)
  1225. sw $rk1,4($key)
  1226. sw $rk2,8($key)
  1227. sw $rk3,12($key)
  1228. sub $cnt,1
  1229. $PTR_ADD $key,16
  1230. _bias $i0,24
  1231. _bias $i1,16
  1232. _bias $i2,8
  1233. _bias $i3,0
  1234. xor $rk0,$i0
  1235. lw $i0,0($rcon)
  1236. xor $rk0,$i1
  1237. xor $rk0,$i2
  1238. xor $rk0,$i3
  1239. xor $rk0,$i0
  1240. xor $rk1,$rk0
  1241. xor $rk2,$rk1
  1242. xor $rk3,$rk2
  1243. .set noreorder
  1244. bnez $cnt,.L128bits
  1245. $PTR_ADD $rcon,4
  1246. sw $rk0,0($key)
  1247. sw $rk1,4($key)
  1248. sw $rk2,8($key)
  1249. li $cnt,10
  1250. sw $rk3,12($key)
  1251. li $t0,0
  1252. sw $cnt,80($key)
  1253. b .Lekey_done
  1254. $PTR_SUB $key,10*16
  1255. .align 4
  1256. .L192bits:
  1257. .set reorder
  1258. srl $i0,$rk5,16
  1259. srl $i1,$rk5,8
  1260. and $i0,0xff
  1261. and $i1,0xff
  1262. and $i2,$rk5,0xff
  1263. srl $i3,$rk5,24
  1264. $PTR_ADD $i0,$Tbl
  1265. $PTR_ADD $i1,$Tbl
  1266. $PTR_ADD $i2,$Tbl
  1267. $PTR_ADD $i3,$Tbl
  1268. lbu $i0,0($i0)
  1269. lbu $i1,0($i1)
  1270. lbu $i2,0($i2)
  1271. lbu $i3,0($i3)
  1272. sw $rk0,0($key)
  1273. sw $rk1,4($key)
  1274. sw $rk2,8($key)
  1275. sw $rk3,12($key)
  1276. sw $rk4,16($key)
  1277. sw $rk5,20($key)
  1278. sub $cnt,1
  1279. $PTR_ADD $key,24
  1280. _bias $i0,24
  1281. _bias $i1,16
  1282. _bias $i2,8
  1283. _bias $i3,0
  1284. xor $rk0,$i0
  1285. lw $i0,0($rcon)
  1286. xor $rk0,$i1
  1287. xor $rk0,$i2
  1288. xor $rk0,$i3
  1289. xor $rk0,$i0
  1290. xor $rk1,$rk0
  1291. xor $rk2,$rk1
  1292. xor $rk3,$rk2
  1293. xor $rk4,$rk3
  1294. xor $rk5,$rk4
  1295. .set noreorder
  1296. bnez $cnt,.L192bits
  1297. $PTR_ADD $rcon,4
  1298. sw $rk0,0($key)
  1299. sw $rk1,4($key)
  1300. sw $rk2,8($key)
  1301. li $cnt,12
  1302. sw $rk3,12($key)
  1303. li $t0,0
  1304. sw $cnt,48($key)
  1305. b .Lekey_done
  1306. $PTR_SUB $key,12*16
  1307. .align 4
  1308. .L256bits:
  1309. .set reorder
  1310. srl $i0,$rk7,16
  1311. srl $i1,$rk7,8
  1312. and $i0,0xff
  1313. and $i1,0xff
  1314. and $i2,$rk7,0xff
  1315. srl $i3,$rk7,24
  1316. $PTR_ADD $i0,$Tbl
  1317. $PTR_ADD $i1,$Tbl
  1318. $PTR_ADD $i2,$Tbl
  1319. $PTR_ADD $i3,$Tbl
  1320. lbu $i0,0($i0)
  1321. lbu $i1,0($i1)
  1322. lbu $i2,0($i2)
  1323. lbu $i3,0($i3)
  1324. sw $rk0,0($key)
  1325. sw $rk1,4($key)
  1326. sw $rk2,8($key)
  1327. sw $rk3,12($key)
  1328. sw $rk4,16($key)
  1329. sw $rk5,20($key)
  1330. sw $rk6,24($key)
  1331. sw $rk7,28($key)
  1332. sub $cnt,1
  1333. _bias $i0,24
  1334. _bias $i1,16
  1335. _bias $i2,8
  1336. _bias $i3,0
  1337. xor $rk0,$i0
  1338. lw $i0,0($rcon)
  1339. xor $rk0,$i1
  1340. xor $rk0,$i2
  1341. xor $rk0,$i3
  1342. xor $rk0,$i0
  1343. xor $rk1,$rk0
  1344. xor $rk2,$rk1
  1345. xor $rk3,$rk2
  1346. beqz $cnt,.L256bits_done
  1347. srl $i0,$rk3,24
  1348. srl $i1,$rk3,16
  1349. srl $i2,$rk3,8
  1350. and $i3,$rk3,0xff
  1351. and $i1,0xff
  1352. and $i2,0xff
  1353. $PTR_ADD $i0,$Tbl
  1354. $PTR_ADD $i1,$Tbl
  1355. $PTR_ADD $i2,$Tbl
  1356. $PTR_ADD $i3,$Tbl
  1357. lbu $i0,0($i0)
  1358. lbu $i1,0($i1)
  1359. lbu $i2,0($i2)
  1360. lbu $i3,0($i3)
  1361. sll $i0,24
  1362. sll $i1,16
  1363. sll $i2,8
  1364. xor $rk4,$i0
  1365. xor $rk4,$i1
  1366. xor $rk4,$i2
  1367. xor $rk4,$i3
  1368. xor $rk5,$rk4
  1369. xor $rk6,$rk5
  1370. xor $rk7,$rk6
  1371. $PTR_ADD $key,32
  1372. .set noreorder
  1373. b .L256bits
  1374. $PTR_ADD $rcon,4
  1375. .L256bits_done:
  1376. sw $rk0,32($key)
  1377. sw $rk1,36($key)
  1378. sw $rk2,40($key)
  1379. li $cnt,14
  1380. sw $rk3,44($key)
  1381. li $t0,0
  1382. sw $cnt,48($key)
  1383. $PTR_SUB $key,12*16
  1384. .Lekey_done:
  1385. jr $ra
  1386. nop
  1387. .end _mips_AES_set_encrypt_key
  1388. .globl private_AES_set_encrypt_key
  1389. .ent private_AES_set_encrypt_key
  1390. private_AES_set_encrypt_key:
  1391. .frame $sp,$FRAMESIZE,$ra
  1392. .mask $SAVED_REGS_MASK,-$SZREG
  1393. .set noreorder
  1394. ___
  1395. $code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification
  1396. .cpload $pf
  1397. ___
  1398. $code.=<<___;
  1399. $PTR_SUB $sp,$FRAMESIZE
  1400. $REG_S $ra,$FRAMESIZE-1*$SZREG($sp)
  1401. $REG_S $fp,$FRAMESIZE-2*$SZREG($sp)
  1402. ___
  1403. $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
  1404. $REG_S $s3,$FRAMESIZE-3*$SZREG($sp)
  1405. $REG_S $s2,$FRAMESIZE-4*$SZREG($sp)
  1406. $REG_S $s1,$FRAMESIZE-5*$SZREG($sp)
  1407. $REG_S $s0,$FRAMESIZE-6*$SZREG($sp)
  1408. $REG_S $gp,$FRAMESIZE-7*$SZREG($sp)
  1409. ___
  1410. $code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
  1411. .cplocal $Tbl
  1412. .cpsetup $pf,$zero,private_AES_set_encrypt_key
  1413. ___
  1414. $code.=<<___;
  1415. .set reorder
  1416. la $Tbl,AES_Te4 # PIC-ified 'load address'
  1417. bal _mips_AES_set_encrypt_key
  1418. .set noreorder
  1419. move $a0,$t0
  1420. $REG_L $ra,$FRAMESIZE-1*$SZREG($sp)
  1421. $REG_L $fp,$FRAMESIZE-2*$SZREG($sp)
  1422. ___
  1423. $code.=<<___ if ($flavour =~ /nubi/i);
  1424. $REG_L $s3,$FRAMESIZE-11*$SZREG($sp)
  1425. $REG_L $s2,$FRAMESIZE-12*$SZREG($sp)
  1426. $REG_L $s1,$FRAMESIZE-13*$SZREG($sp)
  1427. $REG_L $s0,$FRAMESIZE-14*$SZREG($sp)
  1428. $REG_L $gp,$FRAMESIZE-15*$SZREG($sp)
  1429. ___
  1430. $code.=<<___;
  1431. jr $ra
  1432. $PTR_ADD $sp,$FRAMESIZE
  1433. .end private_AES_set_encrypt_key
  1434. ___
  1435. my ($head,$tail)=($inp,$bits);
  1436. my ($tp1,$tp2,$tp4,$tp8,$tp9,$tpb,$tpd,$tpe)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
  1437. my ($m,$x80808080,$x7f7f7f7f,$x1b1b1b1b)=($at,$t0,$t1,$t2);
  1438. $code.=<<___;
  1439. .align 5
  1440. .globl private_AES_set_decrypt_key
  1441. .ent private_AES_set_decrypt_key
  1442. private_AES_set_decrypt_key:
  1443. .frame $sp,$FRAMESIZE,$ra
  1444. .mask $SAVED_REGS_MASK,-$SZREG
  1445. .set noreorder
  1446. ___
  1447. $code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification
  1448. .cpload $pf
  1449. ___
  1450. $code.=<<___;
  1451. $PTR_SUB $sp,$FRAMESIZE
  1452. $REG_S $ra,$FRAMESIZE-1*$SZREG($sp)
  1453. $REG_S $fp,$FRAMESIZE-2*$SZREG($sp)
  1454. ___
  1455. $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
  1456. $REG_S $s3,$FRAMESIZE-3*$SZREG($sp)
  1457. $REG_S $s2,$FRAMESIZE-4*$SZREG($sp)
  1458. $REG_S $s1,$FRAMESIZE-5*$SZREG($sp)
  1459. $REG_S $s0,$FRAMESIZE-6*$SZREG($sp)
  1460. $REG_S $gp,$FRAMESIZE-7*$SZREG($sp)
  1461. ___
  1462. $code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
  1463. .cplocal $Tbl
  1464. .cpsetup $pf,$zero,private_AES_set_decrypt_key
  1465. ___
  1466. $code.=<<___;
  1467. .set reorder
  1468. la $Tbl,AES_Te4 # PIC-ified 'load address'
  1469. bal _mips_AES_set_encrypt_key
  1470. bltz $t0,.Ldkey_done
  1471. sll $at,$cnt,4
  1472. $PTR_ADD $head,$key,0
  1473. $PTR_ADD $tail,$key,$at
  1474. .align 4
  1475. .Lswap:
  1476. lw $rk0,0($head)
  1477. lw $rk1,4($head)
  1478. lw $rk2,8($head)
  1479. lw $rk3,12($head)
  1480. lw $rk4,0($tail)
  1481. lw $rk5,4($tail)
  1482. lw $rk6,8($tail)
  1483. lw $rk7,12($tail)
  1484. sw $rk0,0($tail)
  1485. sw $rk1,4($tail)
  1486. sw $rk2,8($tail)
  1487. sw $rk3,12($tail)
  1488. $PTR_ADD $head,16
  1489. $PTR_SUB $tail,16
  1490. sw $rk4,-16($head)
  1491. sw $rk5,-12($head)
  1492. sw $rk6,-8($head)
  1493. sw $rk7,-4($head)
  1494. bne $head,$tail,.Lswap
  1495. lw $tp1,16($key) # modulo-scheduled
  1496. lui $x80808080,0x8080
  1497. sub $cnt,1
  1498. or $x80808080,0x8080
  1499. sll $cnt,2
  1500. $PTR_ADD $key,16
  1501. lui $x1b1b1b1b,0x1b1b
  1502. nor $x7f7f7f7f,$zero,$x80808080
  1503. or $x1b1b1b1b,0x1b1b
  1504. .align 4
  1505. .Lmix:
  1506. and $m,$tp1,$x80808080
  1507. and $tp2,$tp1,$x7f7f7f7f
  1508. srl $tp4,$m,7
  1509. addu $tp2,$tp2 # tp2<<1
  1510. subu $m,$tp4
  1511. and $m,$x1b1b1b1b
  1512. xor $tp2,$m
  1513. and $m,$tp2,$x80808080
  1514. and $tp4,$tp2,$x7f7f7f7f
  1515. srl $tp8,$m,7
  1516. addu $tp4,$tp4 # tp4<<1
  1517. subu $m,$tp8
  1518. and $m,$x1b1b1b1b
  1519. xor $tp4,$m
  1520. and $m,$tp4,$x80808080
  1521. and $tp8,$tp4,$x7f7f7f7f
  1522. srl $tp9,$m,7
  1523. addu $tp8,$tp8 # tp8<<1
  1524. subu $m,$tp9
  1525. and $m,$x1b1b1b1b
  1526. xor $tp8,$m
  1527. xor $tp9,$tp8,$tp1
  1528. xor $tpe,$tp8,$tp4
  1529. xor $tpb,$tp9,$tp2
  1530. xor $tpd,$tp9,$tp4
  1531. #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
  1532. rotr $tp1,$tpd,16
  1533. xor $tpe,$tp2
  1534. rotr $tp2,$tp9,8
  1535. xor $tpe,$tp1
  1536. rotr $tp4,$tpb,24
  1537. xor $tpe,$tp2
  1538. lw $tp1,4($key) # modulo-scheduled
  1539. xor $tpe,$tp4
  1540. #else
  1541. _ror $tp1,$tpd,16
  1542. xor $tpe,$tp2
  1543. _ror $tp2,$tpd,-16
  1544. xor $tpe,$tp1
  1545. _ror $tp1,$tp9,8
  1546. xor $tpe,$tp2
  1547. _ror $tp2,$tp9,-24
  1548. xor $tpe,$tp1
  1549. _ror $tp1,$tpb,24
  1550. xor $tpe,$tp2
  1551. _ror $tp2,$tpb,-8
  1552. xor $tpe,$tp1
  1553. lw $tp1,4($key) # modulo-scheduled
  1554. xor $tpe,$tp2
  1555. #endif
  1556. sub $cnt,1
  1557. sw $tpe,0($key)
  1558. $PTR_ADD $key,4
  1559. bnez $cnt,.Lmix
  1560. li $t0,0
  1561. .Ldkey_done:
  1562. .set noreorder
  1563. move $a0,$t0
  1564. $REG_L $ra,$FRAMESIZE-1*$SZREG($sp)
  1565. $REG_L $fp,$FRAMESIZE-2*$SZREG($sp)
  1566. ___
  1567. $code.=<<___ if ($flavour =~ /nubi/i);
  1568. $REG_L $s3,$FRAMESIZE-11*$SZREG($sp)
  1569. $REG_L $s2,$FRAMESIZE-12*$SZREG($sp)
  1570. $REG_L $s1,$FRAMESIZE-13*$SZREG($sp)
  1571. $REG_L $s0,$FRAMESIZE-14*$SZREG($sp)
  1572. $REG_L $gp,$FRAMESIZE-15*$SZREG($sp)
  1573. ___
  1574. $code.=<<___;
  1575. jr $ra
  1576. $PTR_ADD $sp,$FRAMESIZE
  1577. .end private_AES_set_decrypt_key
  1578. ___
  1579. }}}
  1580. ######################################################################
  1581. # Tables are kept in endian-neutral manner
  1582. $code.=<<___;
  1583. .rdata
  1584. .align 10
  1585. AES_Te:
  1586. .byte 0xc6,0x63,0x63,0xa5, 0xf8,0x7c,0x7c,0x84 # Te0
  1587. .byte 0xee,0x77,0x77,0x99, 0xf6,0x7b,0x7b,0x8d
  1588. .byte 0xff,0xf2,0xf2,0x0d, 0xd6,0x6b,0x6b,0xbd
  1589. .byte 0xde,0x6f,0x6f,0xb1, 0x91,0xc5,0xc5,0x54
  1590. .byte 0x60,0x30,0x30,0x50, 0x02,0x01,0x01,0x03
  1591. .byte 0xce,0x67,0x67,0xa9, 0x56,0x2b,0x2b,0x7d
  1592. .byte 0xe7,0xfe,0xfe,0x19, 0xb5,0xd7,0xd7,0x62
  1593. .byte 0x4d,0xab,0xab,0xe6, 0xec,0x76,0x76,0x9a
  1594. .byte 0x8f,0xca,0xca,0x45, 0x1f,0x82,0x82,0x9d
  1595. .byte 0x89,0xc9,0xc9,0x40, 0xfa,0x7d,0x7d,0x87
  1596. .byte 0xef,0xfa,0xfa,0x15, 0xb2,0x59,0x59,0xeb
  1597. .byte 0x8e,0x47,0x47,0xc9, 0xfb,0xf0,0xf0,0x0b
  1598. .byte 0x41,0xad,0xad,0xec, 0xb3,0xd4,0xd4,0x67
  1599. .byte 0x5f,0xa2,0xa2,0xfd, 0x45,0xaf,0xaf,0xea
  1600. .byte 0x23,0x9c,0x9c,0xbf, 0x53,0xa4,0xa4,0xf7
  1601. .byte 0xe4,0x72,0x72,0x96, 0x9b,0xc0,0xc0,0x5b
  1602. .byte 0x75,0xb7,0xb7,0xc2, 0xe1,0xfd,0xfd,0x1c
  1603. .byte 0x3d,0x93,0x93,0xae, 0x4c,0x26,0x26,0x6a
  1604. .byte 0x6c,0x36,0x36,0x5a, 0x7e,0x3f,0x3f,0x41
  1605. .byte 0xf5,0xf7,0xf7,0x02, 0x83,0xcc,0xcc,0x4f
  1606. .byte 0x68,0x34,0x34,0x5c, 0x51,0xa5,0xa5,0xf4
  1607. .byte 0xd1,0xe5,0xe5,0x34, 0xf9,0xf1,0xf1,0x08
  1608. .byte 0xe2,0x71,0x71,0x93, 0xab,0xd8,0xd8,0x73
  1609. .byte 0x62,0x31,0x31,0x53, 0x2a,0x15,0x15,0x3f
  1610. .byte 0x08,0x04,0x04,0x0c, 0x95,0xc7,0xc7,0x52
  1611. .byte 0x46,0x23,0x23,0x65, 0x9d,0xc3,0xc3,0x5e
  1612. .byte 0x30,0x18,0x18,0x28, 0x37,0x96,0x96,0xa1
  1613. .byte 0x0a,0x05,0x05,0x0f, 0x2f,0x9a,0x9a,0xb5
  1614. .byte 0x0e,0x07,0x07,0x09, 0x24,0x12,0x12,0x36
  1615. .byte 0x1b,0x80,0x80,0x9b, 0xdf,0xe2,0xe2,0x3d
  1616. .byte 0xcd,0xeb,0xeb,0x26, 0x4e,0x27,0x27,0x69
  1617. .byte 0x7f,0xb2,0xb2,0xcd, 0xea,0x75,0x75,0x9f
  1618. .byte 0x12,0x09,0x09,0x1b, 0x1d,0x83,0x83,0x9e
  1619. .byte 0x58,0x2c,0x2c,0x74, 0x34,0x1a,0x1a,0x2e
  1620. .byte 0x36,0x1b,0x1b,0x2d, 0xdc,0x6e,0x6e,0xb2
  1621. .byte 0xb4,0x5a,0x5a,0xee, 0x5b,0xa0,0xa0,0xfb
  1622. .byte 0xa4,0x52,0x52,0xf6, 0x76,0x3b,0x3b,0x4d
  1623. .byte 0xb7,0xd6,0xd6,0x61, 0x7d,0xb3,0xb3,0xce
  1624. .byte 0x52,0x29,0x29,0x7b, 0xdd,0xe3,0xe3,0x3e
  1625. .byte 0x5e,0x2f,0x2f,0x71, 0x13,0x84,0x84,0x97
  1626. .byte 0xa6,0x53,0x53,0xf5, 0xb9,0xd1,0xd1,0x68
  1627. .byte 0x00,0x00,0x00,0x00, 0xc1,0xed,0xed,0x2c
  1628. .byte 0x40,0x20,0x20,0x60, 0xe3,0xfc,0xfc,0x1f
  1629. .byte 0x79,0xb1,0xb1,0xc8, 0xb6,0x5b,0x5b,0xed
  1630. .byte 0xd4,0x6a,0x6a,0xbe, 0x8d,0xcb,0xcb,0x46
  1631. .byte 0x67,0xbe,0xbe,0xd9, 0x72,0x39,0x39,0x4b
  1632. .byte 0x94,0x4a,0x4a,0xde, 0x98,0x4c,0x4c,0xd4
  1633. .byte 0xb0,0x58,0x58,0xe8, 0x85,0xcf,0xcf,0x4a
  1634. .byte 0xbb,0xd0,0xd0,0x6b, 0xc5,0xef,0xef,0x2a
  1635. .byte 0x4f,0xaa,0xaa,0xe5, 0xed,0xfb,0xfb,0x16
  1636. .byte 0x86,0x43,0x43,0xc5, 0x9a,0x4d,0x4d,0xd7
  1637. .byte 0x66,0x33,0x33,0x55, 0x11,0x85,0x85,0x94
  1638. .byte 0x8a,0x45,0x45,0xcf, 0xe9,0xf9,0xf9,0x10
  1639. .byte 0x04,0x02,0x02,0x06, 0xfe,0x7f,0x7f,0x81
  1640. .byte 0xa0,0x50,0x50,0xf0, 0x78,0x3c,0x3c,0x44
  1641. .byte 0x25,0x9f,0x9f,0xba, 0x4b,0xa8,0xa8,0xe3
  1642. .byte 0xa2,0x51,0x51,0xf3, 0x5d,0xa3,0xa3,0xfe
  1643. .byte 0x80,0x40,0x40,0xc0, 0x05,0x8f,0x8f,0x8a
  1644. .byte 0x3f,0x92,0x92,0xad, 0x21,0x9d,0x9d,0xbc
  1645. .byte 0x70,0x38,0x38,0x48, 0xf1,0xf5,0xf5,0x04
  1646. .byte 0x63,0xbc,0xbc,0xdf, 0x77,0xb6,0xb6,0xc1
  1647. .byte 0xaf,0xda,0xda,0x75, 0x42,0x21,0x21,0x63
  1648. .byte 0x20,0x10,0x10,0x30, 0xe5,0xff,0xff,0x1a
  1649. .byte 0xfd,0xf3,0xf3,0x0e, 0xbf,0xd2,0xd2,0x6d
  1650. .byte 0x81,0xcd,0xcd,0x4c, 0x18,0x0c,0x0c,0x14
  1651. .byte 0x26,0x13,0x13,0x35, 0xc3,0xec,0xec,0x2f
  1652. .byte 0xbe,0x5f,0x5f,0xe1, 0x35,0x97,0x97,0xa2
  1653. .byte 0x88,0x44,0x44,0xcc, 0x2e,0x17,0x17,0x39
  1654. .byte 0x93,0xc4,0xc4,0x57, 0x55,0xa7,0xa7,0xf2
  1655. .byte 0xfc,0x7e,0x7e,0x82, 0x7a,0x3d,0x3d,0x47
  1656. .byte 0xc8,0x64,0x64,0xac, 0xba,0x5d,0x5d,0xe7
  1657. .byte 0x32,0x19,0x19,0x2b, 0xe6,0x73,0x73,0x95
  1658. .byte 0xc0,0x60,0x60,0xa0, 0x19,0x81,0x81,0x98
  1659. .byte 0x9e,0x4f,0x4f,0xd1, 0xa3,0xdc,0xdc,0x7f
  1660. .byte 0x44,0x22,0x22,0x66, 0x54,0x2a,0x2a,0x7e
  1661. .byte 0x3b,0x90,0x90,0xab, 0x0b,0x88,0x88,0x83
  1662. .byte 0x8c,0x46,0x46,0xca, 0xc7,0xee,0xee,0x29
  1663. .byte 0x6b,0xb8,0xb8,0xd3, 0x28,0x14,0x14,0x3c
  1664. .byte 0xa7,0xde,0xde,0x79, 0xbc,0x5e,0x5e,0xe2
  1665. .byte 0x16,0x0b,0x0b,0x1d, 0xad,0xdb,0xdb,0x76
  1666. .byte 0xdb,0xe0,0xe0,0x3b, 0x64,0x32,0x32,0x56
  1667. .byte 0x74,0x3a,0x3a,0x4e, 0x14,0x0a,0x0a,0x1e
  1668. .byte 0x92,0x49,0x49,0xdb, 0x0c,0x06,0x06,0x0a
  1669. .byte 0x48,0x24,0x24,0x6c, 0xb8,0x5c,0x5c,0xe4
  1670. .byte 0x9f,0xc2,0xc2,0x5d, 0xbd,0xd3,0xd3,0x6e
  1671. .byte 0x43,0xac,0xac,0xef, 0xc4,0x62,0x62,0xa6
  1672. .byte 0x39,0x91,0x91,0xa8, 0x31,0x95,0x95,0xa4
  1673. .byte 0xd3,0xe4,0xe4,0x37, 0xf2,0x79,0x79,0x8b
  1674. .byte 0xd5,0xe7,0xe7,0x32, 0x8b,0xc8,0xc8,0x43
  1675. .byte 0x6e,0x37,0x37,0x59, 0xda,0x6d,0x6d,0xb7
  1676. .byte 0x01,0x8d,0x8d,0x8c, 0xb1,0xd5,0xd5,0x64
  1677. .byte 0x9c,0x4e,0x4e,0xd2, 0x49,0xa9,0xa9,0xe0
  1678. .byte 0xd8,0x6c,0x6c,0xb4, 0xac,0x56,0x56,0xfa
  1679. .byte 0xf3,0xf4,0xf4,0x07, 0xcf,0xea,0xea,0x25
  1680. .byte 0xca,0x65,0x65,0xaf, 0xf4,0x7a,0x7a,0x8e
  1681. .byte 0x47,0xae,0xae,0xe9, 0x10,0x08,0x08,0x18
  1682. .byte 0x6f,0xba,0xba,0xd5, 0xf0,0x78,0x78,0x88
  1683. .byte 0x4a,0x25,0x25,0x6f, 0x5c,0x2e,0x2e,0x72
  1684. .byte 0x38,0x1c,0x1c,0x24, 0x57,0xa6,0xa6,0xf1
  1685. .byte 0x73,0xb4,0xb4,0xc7, 0x97,0xc6,0xc6,0x51
  1686. .byte 0xcb,0xe8,0xe8,0x23, 0xa1,0xdd,0xdd,0x7c
  1687. .byte 0xe8,0x74,0x74,0x9c, 0x3e,0x1f,0x1f,0x21
  1688. .byte 0x96,0x4b,0x4b,0xdd, 0x61,0xbd,0xbd,0xdc
  1689. .byte 0x0d,0x8b,0x8b,0x86, 0x0f,0x8a,0x8a,0x85
  1690. .byte 0xe0,0x70,0x70,0x90, 0x7c,0x3e,0x3e,0x42
  1691. .byte 0x71,0xb5,0xb5,0xc4, 0xcc,0x66,0x66,0xaa
  1692. .byte 0x90,0x48,0x48,0xd8, 0x06,0x03,0x03,0x05
  1693. .byte 0xf7,0xf6,0xf6,0x01, 0x1c,0x0e,0x0e,0x12
  1694. .byte 0xc2,0x61,0x61,0xa3, 0x6a,0x35,0x35,0x5f
  1695. .byte 0xae,0x57,0x57,0xf9, 0x69,0xb9,0xb9,0xd0
  1696. .byte 0x17,0x86,0x86,0x91, 0x99,0xc1,0xc1,0x58
  1697. .byte 0x3a,0x1d,0x1d,0x27, 0x27,0x9e,0x9e,0xb9
  1698. .byte 0xd9,0xe1,0xe1,0x38, 0xeb,0xf8,0xf8,0x13
  1699. .byte 0x2b,0x98,0x98,0xb3, 0x22,0x11,0x11,0x33
  1700. .byte 0xd2,0x69,0x69,0xbb, 0xa9,0xd9,0xd9,0x70
  1701. .byte 0x07,0x8e,0x8e,0x89, 0x33,0x94,0x94,0xa7
  1702. .byte 0x2d,0x9b,0x9b,0xb6, 0x3c,0x1e,0x1e,0x22
  1703. .byte 0x15,0x87,0x87,0x92, 0xc9,0xe9,0xe9,0x20
  1704. .byte 0x87,0xce,0xce,0x49, 0xaa,0x55,0x55,0xff
  1705. .byte 0x50,0x28,0x28,0x78, 0xa5,0xdf,0xdf,0x7a
  1706. .byte 0x03,0x8c,0x8c,0x8f, 0x59,0xa1,0xa1,0xf8
  1707. .byte 0x09,0x89,0x89,0x80, 0x1a,0x0d,0x0d,0x17
  1708. .byte 0x65,0xbf,0xbf,0xda, 0xd7,0xe6,0xe6,0x31
  1709. .byte 0x84,0x42,0x42,0xc6, 0xd0,0x68,0x68,0xb8
  1710. .byte 0x82,0x41,0x41,0xc3, 0x29,0x99,0x99,0xb0
  1711. .byte 0x5a,0x2d,0x2d,0x77, 0x1e,0x0f,0x0f,0x11
  1712. .byte 0x7b,0xb0,0xb0,0xcb, 0xa8,0x54,0x54,0xfc
  1713. .byte 0x6d,0xbb,0xbb,0xd6, 0x2c,0x16,0x16,0x3a
  1714. AES_Td:
  1715. .byte 0x51,0xf4,0xa7,0x50, 0x7e,0x41,0x65,0x53 # Td0
  1716. .byte 0x1a,0x17,0xa4,0xc3, 0x3a,0x27,0x5e,0x96
  1717. .byte 0x3b,0xab,0x6b,0xcb, 0x1f,0x9d,0x45,0xf1
  1718. .byte 0xac,0xfa,0x58,0xab, 0x4b,0xe3,0x03,0x93
  1719. .byte 0x20,0x30,0xfa,0x55, 0xad,0x76,0x6d,0xf6
  1720. .byte 0x88,0xcc,0x76,0x91, 0xf5,0x02,0x4c,0x25
  1721. .byte 0x4f,0xe5,0xd7,0xfc, 0xc5,0x2a,0xcb,0xd7
  1722. .byte 0x26,0x35,0x44,0x80, 0xb5,0x62,0xa3,0x8f
  1723. .byte 0xde,0xb1,0x5a,0x49, 0x25,0xba,0x1b,0x67
  1724. .byte 0x45,0xea,0x0e,0x98, 0x5d,0xfe,0xc0,0xe1
  1725. .byte 0xc3,0x2f,0x75,0x02, 0x81,0x4c,0xf0,0x12
  1726. .byte 0x8d,0x46,0x97,0xa3, 0x6b,0xd3,0xf9,0xc6
  1727. .byte 0x03,0x8f,0x5f,0xe7, 0x15,0x92,0x9c,0x95
  1728. .byte 0xbf,0x6d,0x7a,0xeb, 0x95,0x52,0x59,0xda
  1729. .byte 0xd4,0xbe,0x83,0x2d, 0x58,0x74,0x21,0xd3
  1730. .byte 0x49,0xe0,0x69,0x29, 0x8e,0xc9,0xc8,0x44
  1731. .byte 0x75,0xc2,0x89,0x6a, 0xf4,0x8e,0x79,0x78
  1732. .byte 0x99,0x58,0x3e,0x6b, 0x27,0xb9,0x71,0xdd
  1733. .byte 0xbe,0xe1,0x4f,0xb6, 0xf0,0x88,0xad,0x17
  1734. .byte 0xc9,0x20,0xac,0x66, 0x7d,0xce,0x3a,0xb4
  1735. .byte 0x63,0xdf,0x4a,0x18, 0xe5,0x1a,0x31,0x82
  1736. .byte 0x97,0x51,0x33,0x60, 0x62,0x53,0x7f,0x45
  1737. .byte 0xb1,0x64,0x77,0xe0, 0xbb,0x6b,0xae,0x84
  1738. .byte 0xfe,0x81,0xa0,0x1c, 0xf9,0x08,0x2b,0x94
  1739. .byte 0x70,0x48,0x68,0x58, 0x8f,0x45,0xfd,0x19
  1740. .byte 0x94,0xde,0x6c,0x87, 0x52,0x7b,0xf8,0xb7
  1741. .byte 0xab,0x73,0xd3,0x23, 0x72,0x4b,0x02,0xe2
  1742. .byte 0xe3,0x1f,0x8f,0x57, 0x66,0x55,0xab,0x2a
  1743. .byte 0xb2,0xeb,0x28,0x07, 0x2f,0xb5,0xc2,0x03
  1744. .byte 0x86,0xc5,0x7b,0x9a, 0xd3,0x37,0x08,0xa5
  1745. .byte 0x30,0x28,0x87,0xf2, 0x23,0xbf,0xa5,0xb2
  1746. .byte 0x02,0x03,0x6a,0xba, 0xed,0x16,0x82,0x5c
  1747. .byte 0x8a,0xcf,0x1c,0x2b, 0xa7,0x79,0xb4,0x92
  1748. .byte 0xf3,0x07,0xf2,0xf0, 0x4e,0x69,0xe2,0xa1
  1749. .byte 0x65,0xda,0xf4,0xcd, 0x06,0x05,0xbe,0xd5
  1750. .byte 0xd1,0x34,0x62,0x1f, 0xc4,0xa6,0xfe,0x8a
  1751. .byte 0x34,0x2e,0x53,0x9d, 0xa2,0xf3,0x55,0xa0
  1752. .byte 0x05,0x8a,0xe1,0x32, 0xa4,0xf6,0xeb,0x75
  1753. .byte 0x0b,0x83,0xec,0x39, 0x40,0x60,0xef,0xaa
  1754. .byte 0x5e,0x71,0x9f,0x06, 0xbd,0x6e,0x10,0x51
  1755. .byte 0x3e,0x21,0x8a,0xf9, 0x96,0xdd,0x06,0x3d
  1756. .byte 0xdd,0x3e,0x05,0xae, 0x4d,0xe6,0xbd,0x46
  1757. .byte 0x91,0x54,0x8d,0xb5, 0x71,0xc4,0x5d,0x05
  1758. .byte 0x04,0x06,0xd4,0x6f, 0x60,0x50,0x15,0xff
  1759. .byte 0x19,0x98,0xfb,0x24, 0xd6,0xbd,0xe9,0x97
  1760. .byte 0x89,0x40,0x43,0xcc, 0x67,0xd9,0x9e,0x77
  1761. .byte 0xb0,0xe8,0x42,0xbd, 0x07,0x89,0x8b,0x88
  1762. .byte 0xe7,0x19,0x5b,0x38, 0x79,0xc8,0xee,0xdb
  1763. .byte 0xa1,0x7c,0x0a,0x47, 0x7c,0x42,0x0f,0xe9
  1764. .byte 0xf8,0x84,0x1e,0xc9, 0x00,0x00,0x00,0x00
  1765. .byte 0x09,0x80,0x86,0x83, 0x32,0x2b,0xed,0x48
  1766. .byte 0x1e,0x11,0x70,0xac, 0x6c,0x5a,0x72,0x4e
  1767. .byte 0xfd,0x0e,0xff,0xfb, 0x0f,0x85,0x38,0x56
  1768. .byte 0x3d,0xae,0xd5,0x1e, 0x36,0x2d,0x39,0x27
  1769. .byte 0x0a,0x0f,0xd9,0x64, 0x68,0x5c,0xa6,0x21
  1770. .byte 0x9b,0x5b,0x54,0xd1, 0x24,0x36,0x2e,0x3a
  1771. .byte 0x0c,0x0a,0x67,0xb1, 0x93,0x57,0xe7,0x0f
  1772. .byte 0xb4,0xee,0x96,0xd2, 0x1b,0x9b,0x91,0x9e
  1773. .byte 0x80,0xc0,0xc5,0x4f, 0x61,0xdc,0x20,0xa2
  1774. .byte 0x5a,0x77,0x4b,0x69, 0x1c,0x12,0x1a,0x16
  1775. .byte 0xe2,0x93,0xba,0x0a, 0xc0,0xa0,0x2a,0xe5
  1776. .byte 0x3c,0x22,0xe0,0x43, 0x12,0x1b,0x17,0x1d
  1777. .byte 0x0e,0x09,0x0d,0x0b, 0xf2,0x8b,0xc7,0xad
  1778. .byte 0x2d,0xb6,0xa8,0xb9, 0x14,0x1e,0xa9,0xc8
  1779. .byte 0x57,0xf1,0x19,0x85, 0xaf,0x75,0x07,0x4c
  1780. .byte 0xee,0x99,0xdd,0xbb, 0xa3,0x7f,0x60,0xfd
  1781. .byte 0xf7,0x01,0x26,0x9f, 0x5c,0x72,0xf5,0xbc
  1782. .byte 0x44,0x66,0x3b,0xc5, 0x5b,0xfb,0x7e,0x34
  1783. .byte 0x8b,0x43,0x29,0x76, 0xcb,0x23,0xc6,0xdc
  1784. .byte 0xb6,0xed,0xfc,0x68, 0xb8,0xe4,0xf1,0x63
  1785. .byte 0xd7,0x31,0xdc,0xca, 0x42,0x63,0x85,0x10
  1786. .byte 0x13,0x97,0x22,0x40, 0x84,0xc6,0x11,0x20
  1787. .byte 0x85,0x4a,0x24,0x7d, 0xd2,0xbb,0x3d,0xf8
  1788. .byte 0xae,0xf9,0x32,0x11, 0xc7,0x29,0xa1,0x6d
  1789. .byte 0x1d,0x9e,0x2f,0x4b, 0xdc,0xb2,0x30,0xf3
  1790. .byte 0x0d,0x86,0x52,0xec, 0x77,0xc1,0xe3,0xd0
  1791. .byte 0x2b,0xb3,0x16,0x6c, 0xa9,0x70,0xb9,0x99
  1792. .byte 0x11,0x94,0x48,0xfa, 0x47,0xe9,0x64,0x22
  1793. .byte 0xa8,0xfc,0x8c,0xc4, 0xa0,0xf0,0x3f,0x1a
  1794. .byte 0x56,0x7d,0x2c,0xd8, 0x22,0x33,0x90,0xef
  1795. .byte 0x87,0x49,0x4e,0xc7, 0xd9,0x38,0xd1,0xc1
  1796. .byte 0x8c,0xca,0xa2,0xfe, 0x98,0xd4,0x0b,0x36
  1797. .byte 0xa6,0xf5,0x81,0xcf, 0xa5,0x7a,0xde,0x28
  1798. .byte 0xda,0xb7,0x8e,0x26, 0x3f,0xad,0xbf,0xa4
  1799. .byte 0x2c,0x3a,0x9d,0xe4, 0x50,0x78,0x92,0x0d
  1800. .byte 0x6a,0x5f,0xcc,0x9b, 0x54,0x7e,0x46,0x62
  1801. .byte 0xf6,0x8d,0x13,0xc2, 0x90,0xd8,0xb8,0xe8
  1802. .byte 0x2e,0x39,0xf7,0x5e, 0x82,0xc3,0xaf,0xf5
  1803. .byte 0x9f,0x5d,0x80,0xbe, 0x69,0xd0,0x93,0x7c
  1804. .byte 0x6f,0xd5,0x2d,0xa9, 0xcf,0x25,0x12,0xb3
  1805. .byte 0xc8,0xac,0x99,0x3b, 0x10,0x18,0x7d,0xa7
  1806. .byte 0xe8,0x9c,0x63,0x6e, 0xdb,0x3b,0xbb,0x7b
  1807. .byte 0xcd,0x26,0x78,0x09, 0x6e,0x59,0x18,0xf4
  1808. .byte 0xec,0x9a,0xb7,0x01, 0x83,0x4f,0x9a,0xa8
  1809. .byte 0xe6,0x95,0x6e,0x65, 0xaa,0xff,0xe6,0x7e
  1810. .byte 0x21,0xbc,0xcf,0x08, 0xef,0x15,0xe8,0xe6
  1811. .byte 0xba,0xe7,0x9b,0xd9, 0x4a,0x6f,0x36,0xce
  1812. .byte 0xea,0x9f,0x09,0xd4, 0x29,0xb0,0x7c,0xd6
  1813. .byte 0x31,0xa4,0xb2,0xaf, 0x2a,0x3f,0x23,0x31
  1814. .byte 0xc6,0xa5,0x94,0x30, 0x35,0xa2,0x66,0xc0
  1815. .byte 0x74,0x4e,0xbc,0x37, 0xfc,0x82,0xca,0xa6
  1816. .byte 0xe0,0x90,0xd0,0xb0, 0x33,0xa7,0xd8,0x15
  1817. .byte 0xf1,0x04,0x98,0x4a, 0x41,0xec,0xda,0xf7
  1818. .byte 0x7f,0xcd,0x50,0x0e, 0x17,0x91,0xf6,0x2f
  1819. .byte 0x76,0x4d,0xd6,0x8d, 0x43,0xef,0xb0,0x4d
  1820. .byte 0xcc,0xaa,0x4d,0x54, 0xe4,0x96,0x04,0xdf
  1821. .byte 0x9e,0xd1,0xb5,0xe3, 0x4c,0x6a,0x88,0x1b
  1822. .byte 0xc1,0x2c,0x1f,0xb8, 0x46,0x65,0x51,0x7f
  1823. .byte 0x9d,0x5e,0xea,0x04, 0x01,0x8c,0x35,0x5d
  1824. .byte 0xfa,0x87,0x74,0x73, 0xfb,0x0b,0x41,0x2e
  1825. .byte 0xb3,0x67,0x1d,0x5a, 0x92,0xdb,0xd2,0x52
  1826. .byte 0xe9,0x10,0x56,0x33, 0x6d,0xd6,0x47,0x13
  1827. .byte 0x9a,0xd7,0x61,0x8c, 0x37,0xa1,0x0c,0x7a
  1828. .byte 0x59,0xf8,0x14,0x8e, 0xeb,0x13,0x3c,0x89
  1829. .byte 0xce,0xa9,0x27,0xee, 0xb7,0x61,0xc9,0x35
  1830. .byte 0xe1,0x1c,0xe5,0xed, 0x7a,0x47,0xb1,0x3c
  1831. .byte 0x9c,0xd2,0xdf,0x59, 0x55,0xf2,0x73,0x3f
  1832. .byte 0x18,0x14,0xce,0x79, 0x73,0xc7,0x37,0xbf
  1833. .byte 0x53,0xf7,0xcd,0xea, 0x5f,0xfd,0xaa,0x5b
  1834. .byte 0xdf,0x3d,0x6f,0x14, 0x78,0x44,0xdb,0x86
  1835. .byte 0xca,0xaf,0xf3,0x81, 0xb9,0x68,0xc4,0x3e
  1836. .byte 0x38,0x24,0x34,0x2c, 0xc2,0xa3,0x40,0x5f
  1837. .byte 0x16,0x1d,0xc3,0x72, 0xbc,0xe2,0x25,0x0c
  1838. .byte 0x28,0x3c,0x49,0x8b, 0xff,0x0d,0x95,0x41
  1839. .byte 0x39,0xa8,0x01,0x71, 0x08,0x0c,0xb3,0xde
  1840. .byte 0xd8,0xb4,0xe4,0x9c, 0x64,0x56,0xc1,0x90
  1841. .byte 0x7b,0xcb,0x84,0x61, 0xd5,0x32,0xb6,0x70
  1842. .byte 0x48,0x6c,0x5c,0x74, 0xd0,0xb8,0x57,0x42
  1843. .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 # Td4
  1844. .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
  1845. .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
  1846. .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
  1847. .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
  1848. .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
  1849. .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
  1850. .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
  1851. .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
  1852. .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
  1853. .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
  1854. .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
  1855. .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
  1856. .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
  1857. .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
  1858. .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
  1859. .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
  1860. .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
  1861. .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
  1862. .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
  1863. .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
  1864. .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
  1865. .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
  1866. .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
  1867. .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
  1868. .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
  1869. .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
  1870. .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
  1871. .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
  1872. .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
  1873. .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
  1874. .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
  1875. AES_Te4:
  1876. .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 # Te4
  1877. .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
  1878. .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
  1879. .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
  1880. .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
  1881. .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
  1882. .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
  1883. .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
  1884. .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
  1885. .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
  1886. .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
  1887. .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
  1888. .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
  1889. .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
  1890. .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
  1891. .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
  1892. .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
  1893. .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
  1894. .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
  1895. .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
  1896. .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
  1897. .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
  1898. .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
  1899. .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
  1900. .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
  1901. .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
  1902. .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
  1903. .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
  1904. .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
  1905. .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
  1906. .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
  1907. .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
  1908. .byte 0x01,0x00,0x00,0x00, 0x02,0x00,0x00,0x00 # rcon
  1909. .byte 0x04,0x00,0x00,0x00, 0x08,0x00,0x00,0x00
  1910. .byte 0x10,0x00,0x00,0x00, 0x20,0x00,0x00,0x00
  1911. .byte 0x40,0x00,0x00,0x00, 0x80,0x00,0x00,0x00
  1912. .byte 0x1B,0x00,0x00,0x00, 0x36,0x00,0x00,0x00
  1913. ___
  1914. foreach (split("\n",$code)) {
  1915. s/\`([^\`]*)\`/eval $1/ge;
  1916. # made-up _instructions, _xtr, _ins, _ror and _bias, cope
  1917. # with byte order dependencies...
  1918. if (/^\s+_/) {
  1919. s/(_[a-z]+\s+)(\$[0-9]+),([^,]+)(#.*)*$/$1$2,$2,$3/;
  1920. s/_xtr\s+(\$[0-9]+),(\$[0-9]+),([0-9]+(\-2)*)/
  1921. sprintf("srl\t$1,$2,%d",$big_endian ? eval($3)
  1922. : eval("24-$3"))/e or
  1923. s/_ins\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
  1924. sprintf("sll\t$1,$2,%d",$big_endian ? eval($3)
  1925. : eval("24-$3"))/e or
  1926. s/_ins2\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
  1927. sprintf("ins\t$1,$2,%d,8",$big_endian ? eval($3)
  1928. : eval("24-$3"))/e or
  1929. s/_ror\s+(\$[0-9]+),(\$[0-9]+),(\-?[0-9]+)/
  1930. sprintf("srl\t$1,$2,%d",$big_endian ? eval($3)
  1931. : eval("$3*-1"))/e or
  1932. s/_bias\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
  1933. sprintf("sll\t$1,$2,%d",$big_endian ? eval($3)
  1934. : eval("($3-16)&31"))/e;
  1935. s/srl\s+(\$[0-9]+),(\$[0-9]+),\-([0-9]+)/
  1936. sprintf("sll\t$1,$2,$3")/e or
  1937. s/srl\s+(\$[0-9]+),(\$[0-9]+),0/
  1938. sprintf("and\t$1,$2,0xff")/e or
  1939. s/(sll\s+\$[0-9]+,\$[0-9]+,0)/#$1/;
  1940. }
  1941. # convert lwl/lwr and swr/swl to little-endian order
  1942. if (!$big_endian && /^\s+[sl]w[lr]\s+/) {
  1943. s/([sl]wl.*)([0-9]+)\((\$[0-9]+)\)/
  1944. sprintf("$1%d($3)",eval("$2-$2%4+($2%4-1)&3"))/e or
  1945. s/([sl]wr.*)([0-9]+)\((\$[0-9]+)\)/
  1946. sprintf("$1%d($3)",eval("$2-$2%4+($2%4+1)&3"))/e;
  1947. }
  1948. if (!$big_endian) {
  1949. s/(rotr\s+\$[0-9]+,\$[0-9]+),([0-9]+)/sprintf("$1,%d",32-$2)/e;
  1950. s/(ext\s+\$[0-9]+,\$[0-9]+),([0-9]+),8/sprintf("$1,%d,8",24-$2)/e;
  1951. }
  1952. print $_,"\n";
  1953. }
  1954. close STDOUT;