123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131 |
- $flavour = shift;
- $output = shift;
- if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
- $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
- $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
- ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
- ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
- die "can't locate x86_64-xlate.pl";
- open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
- *STDOUT=*OUT;
- if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
- =~ /GNU assembler version ([2-9]\.[0-9]+)/) {
- $addx = ($1>=2.23);
- }
- if (!$addx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) &&
- `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) {
- $addx = ($1>=2.10);
- }
- if (!$addx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
- `ml64 2>&1` =~ /Version ([0-9]+)\./) {
- $addx = ($1>=12);
- }
- if (!$addx && `$ENV{CC} -v 2>&1` =~ /((?:clang|LLVM) version|.*based on LLVM) ([0-9]+)\.([0-9]+)/) {
- my $ver = $2 + $3/100.0;
- $addx = ($ver>=3.03);
- }
- $code.=<<___;
- .text
- .globl x25519_fe51_mul
- .type x25519_fe51_mul,\@function,3
- .align 32
- x25519_fe51_mul:
- .cfi_startproc
- push %rbp
- .cfi_push %rbp
- push %rbx
- .cfi_push %rbx
- push %r12
- .cfi_push %r12
- push %r13
- .cfi_push %r13
- push %r14
- .cfi_push %r14
- push %r15
- .cfi_push %r15
- lea -8*5(%rsp),%rsp
- .cfi_adjust_cfa_offset 40
- .Lfe51_mul_body:
- mov 8*0(%rsi),%rax
- mov 8*0(%rdx),%r11
- mov 8*1(%rdx),%r12
- mov 8*2(%rdx),%r13
- mov 8*3(%rdx),%rbp
- mov 8*4(%rdx),%r14
- mov %rdi,8*4(%rsp)
- mov %rax,%rdi
- mulq %r11
- mov %r11,8*0(%rsp)
- mov %rax,%rbx
- mov %rdi,%rax
- mov %rdx,%rcx
- mulq %r12
- mov %r12,8*1(%rsp)
- mov %rax,%r8
- mov %rdi,%rax
- lea (%r14,%r14,8),%r15
- mov %rdx,%r9
- mulq %r13
- mov %r13,8*2(%rsp)
- mov %rax,%r10
- mov %rdi,%rax
- lea (%r14,%r15,2),%rdi
- mov %rdx,%r11
- mulq %rbp
- mov %rax,%r12
- mov 8*0(%rsi),%rax
- mov %rdx,%r13
- mulq %r14
- mov %rax,%r14
- mov 8*1(%rsi),%rax
- mov %rdx,%r15
- mulq %rdi
- add %rax,%rbx
- mov 8*2(%rsi),%rax
- adc %rdx,%rcx
- mulq %rdi
- add %rax,%r8
- mov 8*3(%rsi),%rax
- adc %rdx,%r9
- mulq %rdi
- add %rax,%r10
- mov 8*4(%rsi),%rax
- adc %rdx,%r11
- mulq %rdi
- imulq \$19,%rbp,%rdi
- add %rax,%r12
- mov 8*1(%rsi),%rax
- adc %rdx,%r13
- mulq %rbp
- mov 8*2(%rsp),%rbp
- add %rax,%r14
- mov 8*2(%rsi),%rax
- adc %rdx,%r15
- mulq %rdi
- add %rax,%rbx
- mov 8*3(%rsi),%rax
- adc %rdx,%rcx
- mulq %rdi
- add %rax,%r8
- mov 8*4(%rsi),%rax
- adc %rdx,%r9
- mulq %rdi
- imulq \$19,%rbp,%rdi
- add %rax,%r10
- mov 8*1(%rsi),%rax
- adc %rdx,%r11
- mulq %rbp
- add %rax,%r12
- mov 8*2(%rsi),%rax
- adc %rdx,%r13
- mulq %rbp
- mov 8*1(%rsp),%rbp
- add %rax,%r14
- mov 8*3(%rsi),%rax
- adc %rdx,%r15
- mulq %rdi
- add %rax,%rbx
- mov 8*4(%rsi),%rax
- adc %rdx,%rcx
- mulq %rdi
- add %rax,%r8
- mov 8*1(%rsi),%rax
- adc %rdx,%r9
- mulq %rbp
- imulq \$19,%rbp,%rdi
- add %rax,%r10
- mov 8*2(%rsi),%rax
- adc %rdx,%r11
- mulq %rbp
- add %rax,%r12
- mov 8*3(%rsi),%rax
- adc %rdx,%r13
- mulq %rbp
- mov 8*0(%rsp),%rbp
- add %rax,%r14
- mov 8*4(%rsi),%rax
- adc %rdx,%r15
- mulq %rdi
- add %rax,%rbx
- mov 8*1(%rsi),%rax
- adc %rdx,%rcx
- mul %rbp
- add %rax,%r8
- mov 8*2(%rsi),%rax
- adc %rdx,%r9
- mul %rbp
- add %rax,%r10
- mov 8*3(%rsi),%rax
- adc %rdx,%r11
- mul %rbp
- add %rax,%r12
- mov 8*4(%rsi),%rax
- adc %rdx,%r13
- mulq %rbp
- add %rax,%r14
- adc %rdx,%r15
- mov 8*4(%rsp),%rdi
- jmp .Lreduce51
- .Lfe51_mul_epilogue:
- .cfi_endproc
- .size x25519_fe51_mul,.-x25519_fe51_mul
- .globl x25519_fe51_sqr
- .type x25519_fe51_sqr,\@function,2
- .align 32
- x25519_fe51_sqr:
- .cfi_startproc
- push %rbp
- .cfi_push %rbp
- push %rbx
- .cfi_push %rbx
- push %r12
- .cfi_push %r12
- push %r13
- .cfi_push %r13
- push %r14
- .cfi_push %r14
- push %r15
- .cfi_push %r15
- lea -8*5(%rsp),%rsp
- .cfi_adjust_cfa_offset 40
- .Lfe51_sqr_body:
- mov 8*0(%rsi),%rax
- mov 8*2(%rsi),%r15
- mov 8*4(%rsi),%rbp
- mov %rdi,8*4(%rsp)
- lea (%rax,%rax),%r14
- mulq %rax
- mov %rax,%rbx
- mov 8*1(%rsi),%rax
- mov %rdx,%rcx
- mulq %r14
- mov %rax,%r8
- mov %r15,%rax
- mov %r15,8*0(%rsp)
- mov %rdx,%r9
- mulq %r14
- mov %rax,%r10
- mov 8*3(%rsi),%rax
- mov %rdx,%r11
- imulq \$19,%rbp,%rdi
- mulq %r14
- mov %rax,%r12
- mov %rbp,%rax
- mov %rdx,%r13
- mulq %r14
- mov %rax,%r14
- mov %rbp,%rax
- mov %rdx,%r15
- mulq %rdi
- add %rax,%r12
- mov 8*1(%rsi),%rax
- adc %rdx,%r13
- mov 8*3(%rsi),%rsi
- lea (%rax,%rax),%rbp
- mulq %rax
- add %rax,%r10
- mov 8*0(%rsp),%rax
- adc %rdx,%r11
- mulq %rbp
- add %rax,%r12
- mov %rbp,%rax
- adc %rdx,%r13
- mulq %rsi
- add %rax,%r14
- mov %rbp,%rax
- adc %rdx,%r15
- imulq \$19,%rsi,%rbp
- mulq %rdi
- add %rax,%rbx
- lea (%rsi,%rsi),%rax
- adc %rdx,%rcx
- mulq %rdi
- add %rax,%r10
- mov %rsi,%rax
- adc %rdx,%r11
- mulq %rbp
- add %rax,%r8
- mov 8*0(%rsp),%rax
- adc %rdx,%r9
- lea (%rax,%rax),%rsi
- mulq %rax
- add %rax,%r14
- mov %rbp,%rax
- adc %rdx,%r15
- mulq %rsi
- add %rax,%rbx
- mov %rsi,%rax
- adc %rdx,%rcx
- mulq %rdi
- add %rax,%r8
- adc %rdx,%r9
- mov 8*4(%rsp),%rdi
- jmp .Lreduce51
- .align 32
- .Lreduce51:
- mov \$0x7ffffffffffff,%rbp
- mov %r10,%rdx
- shr \$51,%r10
- shl \$13,%r11
- and %rbp,%rdx
- or %r10,%r11
- add %r11,%r12
- adc \$0,%r13
- mov %rbx,%rax
- shr \$51,%rbx
- shl \$13,%rcx
- and %rbp,%rax
- or %rbx,%rcx
- add %rcx,%r8
- adc \$0,%r9
- mov %r12,%rbx
- shr \$51,%r12
- shl \$13,%r13
- and %rbp,%rbx
- or %r12,%r13
- add %r13,%r14
- adc \$0,%r15
- mov %r8,%rcx
- shr \$51,%r8
- shl \$13,%r9
- and %rbp,%rcx
- or %r8,%r9
- add %r9,%rdx
- mov %r14,%r10
- shr \$51,%r14
- shl \$13,%r15
- and %rbp,%r10
- or %r14,%r15
- lea (%r15,%r15,8),%r14
- lea (%r15,%r14,2),%r15
- add %r15,%rax
- mov %rdx,%r8
- and %rbp,%rdx
- shr \$51,%r8
- add %r8,%rbx
- mov %rax,%r9
- and %rbp,%rax
- shr \$51,%r9
- add %r9,%rcx
- mov %rax,8*0(%rdi)
- mov %rcx,8*1(%rdi)
- mov %rdx,8*2(%rdi)
- mov %rbx,8*3(%rdi)
- mov %r10,8*4(%rdi)
- mov 8*5(%rsp),%r15
- .cfi_restore %r15
- mov 8*6(%rsp),%r14
- .cfi_restore %r14
- mov 8*7(%rsp),%r13
- .cfi_restore %r13
- mov 8*8(%rsp),%r12
- .cfi_restore %r12
- mov 8*9(%rsp),%rbx
- .cfi_restore %rbx
- mov 8*10(%rsp),%rbp
- .cfi_restore %rbp
- lea 8*11(%rsp),%rsp
- .cfi_adjust_cfa_offset 88
- .Lfe51_sqr_epilogue:
- ret
- .cfi_endproc
- .size x25519_fe51_sqr,.-x25519_fe51_sqr
- .globl x25519_fe51_mul121666
- .type x25519_fe51_mul121666,\@function,2
- .align 32
- x25519_fe51_mul121666:
- .cfi_startproc
- push %rbp
- .cfi_push %rbp
- push %rbx
- .cfi_push %rbx
- push %r12
- .cfi_push %r12
- push %r13
- .cfi_push %r13
- push %r14
- .cfi_push %r14
- push %r15
- .cfi_push %r15
- lea -8*5(%rsp),%rsp
- .cfi_adjust_cfa_offset 40
- .Lfe51_mul121666_body:
- mov \$121666,%eax
- mulq 8*0(%rsi)
- mov %rax,%rbx
- mov \$121666,%eax
- mov %rdx,%rcx
- mulq 8*1(%rsi)
- mov %rax,%r8
- mov \$121666,%eax
- mov %rdx,%r9
- mulq 8*2(%rsi)
- mov %rax,%r10
- mov \$121666,%eax
- mov %rdx,%r11
- mulq 8*3(%rsi)
- mov %rax,%r12
- mov \$121666,%eax
- mov %rdx,%r13
- mulq 8*4(%rsi)
- mov %rax,%r14
- mov %rdx,%r15
- jmp .Lreduce51
- .Lfe51_mul121666_epilogue:
- .cfi_endproc
- .size x25519_fe51_mul121666,.-x25519_fe51_mul121666
- ___
- if ($addx) {
- my ($acc0,$acc1,$acc2,$acc3,$acc4,$acc5,$acc6,$acc7) = map("%r$_",(8..15));
- $code.=<<___;
- .extern OPENSSL_ia32cap_P
- .globl x25519_fe64_eligible
- .type x25519_fe64_eligible,\@abi-omnipotent
- .align 32
- x25519_fe64_eligible:
- .cfi_startproc
- mov OPENSSL_ia32cap_P+8(%rip),%ecx
- xor %eax,%eax
- and \$0x80100,%ecx
- cmp \$0x80100,%ecx
- cmove %ecx,%eax
- ret
- .cfi_endproc
- .size x25519_fe64_eligible,.-x25519_fe64_eligible
- .globl x25519_fe64_mul
- .type x25519_fe64_mul,\@function,3
- .align 32
- x25519_fe64_mul:
- .cfi_startproc
- push %rbp
- .cfi_push %rbp
- push %rbx
- .cfi_push %rbx
- push %r12
- .cfi_push %r12
- push %r13
- .cfi_push %r13
- push %r14
- .cfi_push %r14
- push %r15
- .cfi_push %r15
- push %rdi
- .cfi_push %rdi
- lea -8*2(%rsp),%rsp
- .cfi_adjust_cfa_offset 16
- .Lfe64_mul_body:
- mov %rdx,%rax
- mov 8*0(%rdx),%rbp
- mov 8*0(%rsi),%rdx
- mov 8*1(%rax),%rcx
- mov 8*2(%rax),$acc6
- mov 8*3(%rax),$acc7
- mulx %rbp,$acc0,%rax
- xor %edi,%edi
- mulx %rcx,$acc1,%rbx
- adcx %rax,$acc1
- mulx $acc6,$acc2,%rax
- adcx %rbx,$acc2
- mulx $acc7,$acc3,$acc4
- mov 8*1(%rsi),%rdx
- adcx %rax,$acc3
- mov $acc6,(%rsp)
- adcx %rdi,$acc4
- mulx %rbp,%rax,%rbx
- adox %rax,$acc1
- adcx %rbx,$acc2
- mulx %rcx,%rax,%rbx
- adox %rax,$acc2
- adcx %rbx,$acc3
- mulx $acc6,%rax,%rbx
- adox %rax,$acc3
- adcx %rbx,$acc4
- mulx $acc7,%rax,$acc5
- mov 8*2(%rsi),%rdx
- adox %rax,$acc4
- adcx %rdi,$acc5
- adox %rdi,$acc5
- mulx %rbp,%rax,%rbx
- adcx %rax,$acc2
- adox %rbx,$acc3
- mulx %rcx,%rax,%rbx
- adcx %rax,$acc3
- adox %rbx,$acc4
- mulx $acc6,%rax,%rbx
- adcx %rax,$acc4
- adox %rbx,$acc5
- mulx $acc7,%rax,$acc6
- mov 8*3(%rsi),%rdx
- adcx %rax,$acc5
- adox %rdi,$acc6
- adcx %rdi,$acc6
- mulx %rbp,%rax,%rbx
- adox %rax,$acc3
- adcx %rbx,$acc4
- mulx %rcx,%rax,%rbx
- adox %rax,$acc4
- adcx %rbx,$acc5
- mulx (%rsp),%rax,%rbx
- adox %rax,$acc5
- adcx %rbx,$acc6
- mulx $acc7,%rax,$acc7
- mov \$38,%edx
- adox %rax,$acc6
- adcx %rdi,$acc7
- adox %rdi,$acc7
- jmp .Lreduce64
- .Lfe64_mul_epilogue:
- .cfi_endproc
- .size x25519_fe64_mul,.-x25519_fe64_mul
- .globl x25519_fe64_sqr
- .type x25519_fe64_sqr,\@function,2
- .align 32
- x25519_fe64_sqr:
- .cfi_startproc
- push %rbp
- .cfi_push %rbp
- push %rbx
- .cfi_push %rbx
- push %r12
- .cfi_push %r12
- push %r13
- .cfi_push %r13
- push %r14
- .cfi_push %r14
- push %r15
- .cfi_push %r15
- push %rdi
- .cfi_push %rdi
- lea -8*2(%rsp),%rsp
- .cfi_adjust_cfa_offset 16
- .Lfe64_sqr_body:
- mov 8*0(%rsi),%rdx
- mov 8*1(%rsi),%rcx
- mov 8*2(%rsi),%rbp
- mov 8*3(%rsi),%rsi
-
- mulx %rdx,$acc0,$acc7
- mulx %rcx,$acc1,%rax
- xor %edi,%edi
- mulx %rbp,$acc2,%rbx
- adcx %rax,$acc2
- mulx %rsi,$acc3,$acc4
- mov %rcx,%rdx
- adcx %rbx,$acc3
- adcx %rdi,$acc4
-
- mulx %rbp,%rax,%rbx
- adox %rax,$acc3
- adcx %rbx,$acc4
- mulx %rsi,%rax,$acc5
- mov %rbp,%rdx
- adox %rax,$acc4
- adcx %rdi,$acc5
-
- mulx %rsi,%rax,$acc6
- mov %rcx,%rdx
- adox %rax,$acc5
- adcx %rdi,$acc6
- adox %rdi,$acc6
- adcx $acc1,$acc1
- adox $acc7,$acc1
- adcx $acc2,$acc2
- mulx %rdx,%rax,%rbx
- mov %rbp,%rdx
- adcx $acc3,$acc3
- adox %rax,$acc2
- adcx $acc4,$acc4
- adox %rbx,$acc3
- mulx %rdx,%rax,%rbx
- mov %rsi,%rdx
- adcx $acc5,$acc5
- adox %rax,$acc4
- adcx $acc6,$acc6
- adox %rbx,$acc5
- mulx %rdx,%rax,$acc7
- mov \$38,%edx
- adox %rax,$acc6
- adcx %rdi,$acc7
- adox %rdi,$acc7
- jmp .Lreduce64
- .align 32
- .Lreduce64:
- mulx $acc4,%rax,%rbx
- adcx %rax,$acc0
- adox %rbx,$acc1
- mulx $acc5,%rax,%rbx
- adcx %rax,$acc1
- adox %rbx,$acc2
- mulx $acc6,%rax,%rbx
- adcx %rax,$acc2
- adox %rbx,$acc3
- mulx $acc7,%rax,$acc4
- adcx %rax,$acc3
- adox %rdi,$acc4
- adcx %rdi,$acc4
- mov 8*2(%rsp),%rdi
- imulq %rdx,$acc4
- add $acc4,$acc0
- adc \$0,$acc1
- adc \$0,$acc2
- adc \$0,$acc3
- sbb %rax,%rax
- and \$38,%rax
- add %rax,$acc0
- mov $acc1,8*1(%rdi)
- mov $acc2,8*2(%rdi)
- mov $acc3,8*3(%rdi)
- mov $acc0,8*0(%rdi)
- mov 8*3(%rsp),%r15
- .cfi_restore %r15
- mov 8*4(%rsp),%r14
- .cfi_restore %r14
- mov 8*5(%rsp),%r13
- .cfi_restore %r13
- mov 8*6(%rsp),%r12
- .cfi_restore %r12
- mov 8*7(%rsp),%rbx
- .cfi_restore %rbx
- mov 8*8(%rsp),%rbp
- .cfi_restore %rbp
- lea 8*9(%rsp),%rsp
- .cfi_adjust_cfa_offset 88
- .Lfe64_sqr_epilogue:
- ret
- .cfi_endproc
- .size x25519_fe64_sqr,.-x25519_fe64_sqr
- .globl x25519_fe64_mul121666
- .type x25519_fe64_mul121666,\@function,2
- .align 32
- x25519_fe64_mul121666:
- .Lfe64_mul121666_body:
- .cfi_startproc
- mov \$121666,%edx
- mulx 8*0(%rsi),$acc0,%rcx
- mulx 8*1(%rsi),$acc1,%rax
- add %rcx,$acc1
- mulx 8*2(%rsi),$acc2,%rcx
- adc %rax,$acc2
- mulx 8*3(%rsi),$acc3,%rax
- adc %rcx,$acc3
- adc \$0,%rax
- imulq \$38,%rax,%rax
- add %rax,$acc0
- adc \$0,$acc1
- adc \$0,$acc2
- adc \$0,$acc3
- sbb %rax,%rax
- and \$38,%rax
- add %rax,$acc0
- mov $acc1,8*1(%rdi)
- mov $acc2,8*2(%rdi)
- mov $acc3,8*3(%rdi)
- mov $acc0,8*0(%rdi)
- .Lfe64_mul121666_epilogue:
- ret
- .cfi_endproc
- .size x25519_fe64_mul121666,.-x25519_fe64_mul121666
- .globl x25519_fe64_add
- .type x25519_fe64_add,\@function,3
- .align 32
- x25519_fe64_add:
- .Lfe64_add_body:
- .cfi_startproc
- mov 8*0(%rsi),$acc0
- mov 8*1(%rsi),$acc1
- mov 8*2(%rsi),$acc2
- mov 8*3(%rsi),$acc3
- add 8*0(%rdx),$acc0
- adc 8*1(%rdx),$acc1
- adc 8*2(%rdx),$acc2
- adc 8*3(%rdx),$acc3
- sbb %rax,%rax
- and \$38,%rax
- add %rax,$acc0
- adc \$0,$acc1
- adc \$0,$acc2
- mov $acc1,8*1(%rdi)
- adc \$0,$acc3
- mov $acc2,8*2(%rdi)
- sbb %rax,%rax
- mov $acc3,8*3(%rdi)
- and \$38,%rax
- add %rax,$acc0
- mov $acc0,8*0(%rdi)
- .Lfe64_add_epilogue:
- ret
- .cfi_endproc
- .size x25519_fe64_add,.-x25519_fe64_add
- .globl x25519_fe64_sub
- .type x25519_fe64_sub,\@function,3
- .align 32
- x25519_fe64_sub:
- .Lfe64_sub_body:
- .cfi_startproc
- mov 8*0(%rsi),$acc0
- mov 8*1(%rsi),$acc1
- mov 8*2(%rsi),$acc2
- mov 8*3(%rsi),$acc3
- sub 8*0(%rdx),$acc0
- sbb 8*1(%rdx),$acc1
- sbb 8*2(%rdx),$acc2
- sbb 8*3(%rdx),$acc3
- sbb %rax,%rax # cf -> mask
- and \$38,%rax
- sub %rax,$acc0
- sbb \$0,$acc1
- sbb \$0,$acc2
- mov $acc1,8*1(%rdi)
- sbb \$0,$acc3
- mov $acc2,8*2(%rdi)
- sbb %rax,%rax # cf -> mask
- mov $acc3,8*3(%rdi)
- and \$38,%rax
- sub %rax,$acc0
- mov $acc0,8*0(%rdi)
- .Lfe64_sub_epilogue:
- ret
- .cfi_endproc
- .size x25519_fe64_sub,.-x25519_fe64_sub
- .globl x25519_fe64_tobytes
- .type x25519_fe64_tobytes,\@function,2
- .align 32
- x25519_fe64_tobytes:
- .Lfe64_to_body:
- .cfi_startproc
- mov 8*0(%rsi),$acc0
- mov 8*1(%rsi),$acc1
- mov 8*2(%rsi),$acc2
- mov 8*3(%rsi),$acc3
- ################################# reduction modulo 2^255-19
- lea ($acc3,$acc3),%rax
- sar \$63,$acc3 # most significant bit -> mask
- shr \$1,%rax # most significant bit cleared
- and \$19,$acc3
- add \$19,$acc3 # compare to modulus in the same go
- add $acc3,$acc0
- adc \$0,$acc1
- adc \$0,$acc2
- adc \$0,%rax
- lea (%rax,%rax),$acc3
- sar \$63,%rax # most significant bit -> mask
- shr \$1,$acc3 # most significant bit cleared
- not %rax
- and \$19,%rax
- sub %rax,$acc0
- sbb \$0,$acc1
- sbb \$0,$acc2
- sbb \$0,$acc3
- mov $acc0,8*0(%rdi)
- mov $acc1,8*1(%rdi)
- mov $acc2,8*2(%rdi)
- mov $acc3,8*3(%rdi)
- .Lfe64_to_epilogue:
- ret
- .cfi_endproc
- .size x25519_fe64_tobytes,.-x25519_fe64_tobytes
- ___
- } else {
- $code.=<<___;
- .globl x25519_fe64_eligible
- .type x25519_fe64_eligible,\@abi-omnipotent
- .align 32
- x25519_fe64_eligible:
- .cfi_startproc
- xor %eax,%eax
- ret
- .cfi_endproc
- .size x25519_fe64_eligible,.-x25519_fe64_eligible
- .globl x25519_fe64_mul
- .type x25519_fe64_mul,\@abi-omnipotent
- .globl x25519_fe64_sqr
- .globl x25519_fe64_mul121666
- .globl x25519_fe64_add
- .globl x25519_fe64_sub
- .globl x25519_fe64_tobytes
- x25519_fe64_mul:
- x25519_fe64_sqr:
- x25519_fe64_mul121666:
- x25519_fe64_add:
- x25519_fe64_sub:
- x25519_fe64_tobytes:
- .cfi_startproc
- .byte 0x0f,0x0b
- ret
- .cfi_endproc
- .size x25519_fe64_mul,.-x25519_fe64_mul
- ___
- }
- $code.=<<___;
- .asciz "X25519 primitives for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
- ___
- if ($win64) {
- $rec="%rcx";
- $frame="%rdx";
- $context="%r8";
- $disp="%r9";
- $code.=<<___;
- .extern __imp_RtlVirtualUnwind
- .type short_handler,\@abi-omnipotent
- .align 16
- short_handler:
- push %rsi
- push %rdi
- push %rbx
- push %rbp
- push %r12
- push %r13
- push %r14
- push %r15
- pushfq
- sub \$64,%rsp
- mov 120($context),%rax # pull context->Rax
- mov 248($context),%rbx # pull context->Rip
- mov 8($disp),%rsi # disp->ImageBase
- mov 56($disp),%r11 # disp->HandlerData
- mov 0(%r11),%r10d # HandlerData[0]
- lea (%rsi,%r10),%r10 # end of prologue label
- cmp %r10,%rbx # context->Rip<end of prologue label
- jb .Lcommon_seh_tail
- mov 152($context),%rax # pull context->Rsp
- jmp .Lcommon_seh_tail
- .size short_handler,.-short_handler
- .type full_handler,\@abi-omnipotent
- .align 16
- full_handler:
- push %rsi
- push %rdi
- push %rbx
- push %rbp
- push %r12
- push %r13
- push %r14
- push %r15
- pushfq
- sub \$64,%rsp
- mov 120($context),%rax # pull context->Rax
- mov 248($context),%rbx # pull context->Rip
- mov 8($disp),%rsi # disp->ImageBase
- mov 56($disp),%r11 # disp->HandlerData
- mov 0(%r11),%r10d # HandlerData[0]
- lea (%rsi,%r10),%r10 # end of prologue label
- cmp %r10,%rbx # context->Rip<end of prologue label
- jb .Lcommon_seh_tail
- mov 152($context),%rax # pull context->Rsp
- mov 4(%r11),%r10d # HandlerData[1]
- lea (%rsi,%r10),%r10 # epilogue label
- cmp %r10,%rbx # context->Rip>=epilogue label
- jae .Lcommon_seh_tail
- mov 8(%r11),%r10d # HandlerData[2]
- lea (%rax,%r10),%rax
- mov -8(%rax),%rbp
- mov -16(%rax),%rbx
- mov -24(%rax),%r12
- mov -32(%rax),%r13
- mov -40(%rax),%r14
- mov -48(%rax),%r15
- mov %rbx,144($context) # restore context->Rbx
- mov %rbp,160($context) # restore context->Rbp
- mov %r12,216($context) # restore context->R12
- mov %r13,224($context) # restore context->R13
- mov %r14,232($context) # restore context->R14
- mov %r15,240($context) # restore context->R15
- .Lcommon_seh_tail:
- mov 8(%rax),%rdi
- mov 16(%rax),%rsi
- mov %rax,152($context) # restore context->Rsp
- mov %rsi,168($context) # restore context->Rsi
- mov %rdi,176($context) # restore context->Rdi
- mov 40($disp),%rdi # disp->ContextRecord
- mov $context,%rsi # context
- mov \$154,%ecx # sizeof(CONTEXT)
- .long 0xa548f3fc # cld; rep movsq
- mov $disp,%rsi
- xor %rcx,%rcx
- mov 8(%rsi),%rdx
- mov 0(%rsi),%r8
- mov 16(%rsi),%r9
- mov 40(%rsi),%r10
- lea 56(%rsi),%r11
- lea 24(%rsi),%r12
- mov %r10,32(%rsp)
- mov %r11,40(%rsp)
- mov %r12,48(%rsp)
- mov %rcx,56(%rsp)
- call *__imp_RtlVirtualUnwind(%rip)
- mov \$1,%eax
- add \$64,%rsp
- popfq
- pop %r15
- pop %r14
- pop %r13
- pop %r12
- pop %rbp
- pop %rbx
- pop %rdi
- pop %rsi
- ret
- .size full_handler,.-full_handler
- .section .pdata
- .align 4
- .rva .LSEH_begin_x25519_fe51_mul
- .rva .LSEH_end_x25519_fe51_mul
- .rva .LSEH_info_x25519_fe51_mul
- .rva .LSEH_begin_x25519_fe51_sqr
- .rva .LSEH_end_x25519_fe51_sqr
- .rva .LSEH_info_x25519_fe51_sqr
- .rva .LSEH_begin_x25519_fe51_mul121666
- .rva .LSEH_end_x25519_fe51_mul121666
- .rva .LSEH_info_x25519_fe51_mul121666
- ___
- $code.=<<___ if ($addx);
- .rva .LSEH_begin_x25519_fe64_mul
- .rva .LSEH_end_x25519_fe64_mul
- .rva .LSEH_info_x25519_fe64_mul
- .rva .LSEH_begin_x25519_fe64_sqr
- .rva .LSEH_end_x25519_fe64_sqr
- .rva .LSEH_info_x25519_fe64_sqr
- .rva .LSEH_begin_x25519_fe64_mul121666
- .rva .LSEH_end_x25519_fe64_mul121666
- .rva .LSEH_info_x25519_fe64_mul121666
- .rva .LSEH_begin_x25519_fe64_add
- .rva .LSEH_end_x25519_fe64_add
- .rva .LSEH_info_x25519_fe64_add
- .rva .LSEH_begin_x25519_fe64_sub
- .rva .LSEH_end_x25519_fe64_sub
- .rva .LSEH_info_x25519_fe64_sub
- .rva .LSEH_begin_x25519_fe64_tobytes
- .rva .LSEH_end_x25519_fe64_tobytes
- .rva .LSEH_info_x25519_fe64_tobytes
- ___
- $code.=<<___;
- .section .xdata
- .align 8
- .LSEH_info_x25519_fe51_mul:
- .byte 9,0,0,0
- .rva full_handler
- .rva .Lfe51_mul_body,.Lfe51_mul_epilogue
- .long 88,0
- .LSEH_info_x25519_fe51_sqr:
- .byte 9,0,0,0
- .rva full_handler
- .rva .Lfe51_sqr_body,.Lfe51_sqr_epilogue
- .long 88,0
- .LSEH_info_x25519_fe51_mul121666:
- .byte 9,0,0,0
- .rva full_handler
- .rva .Lfe51_mul121666_body,.Lfe51_mul121666_epilogue
- .long 88,0
- ___
- $code.=<<___ if ($addx);
- .LSEH_info_x25519_fe64_mul:
- .byte 9,0,0,0
- .rva full_handler
- .rva .Lfe64_mul_body,.Lfe64_mul_epilogue
- .long 72,0
- .LSEH_info_x25519_fe64_sqr:
- .byte 9,0,0,0
- .rva full_handler
- .rva .Lfe64_sqr_body,.Lfe64_sqr_epilogue
- .long 72,0
- .LSEH_info_x25519_fe64_mul121666:
- .byte 9,0,0,0
- .rva short_handler
- .rva .Lfe64_mul121666_body,.Lfe64_mul121666_epilogue
- .LSEH_info_x25519_fe64_add:
- .byte 9,0,0,0
- .rva short_handler
- .rva .Lfe64_add_body,.Lfe64_add_epilogue
- .LSEH_info_x25519_fe64_sub:
- .byte 9,0,0,0
- .rva short_handler
- .rva .Lfe64_sub_body,.Lfe64_sub_epilogue
- .LSEH_info_x25519_fe64_tobytes:
- .byte 9,0,0,0
- .rva short_handler
- .rva .Lfe64_to_body,.Lfe64_to_epilogue
- ___
- }
- $code =~ s/\`([^\`]*)\`/eval $1/gem;
- print $code;
- close STDOUT or die "error closing STDOUT: $!";
|