-rw-r--r-- 4193 lib1305-20250407/crypto_onetimeauth/poly1305/amd64-maax-g4/poly1305_keypowers_asm.h raw
#ifndef __POLY1305KEYPOWERSASM__
#define __POLY1305KEYPOWERSASM__
/* field arithmetic used for computing the key powers */
#define fe1305_tau_square() \
\
movq %r14,%rdx; \
mulx %r15,%r9,%r10; \
movq $0,%r11; \
shld $1,%r10,%r11; \
shld $1,%r9,%r10; \
shlq $1,%r9; \
\
xorq %r12,%r12; \
mulx %rdx,%r8,%rbx; \
adcx %rbx,%r9; \
\
movq %r15,%rdx; \
mulx %rdx,%rax,%rbx; \
adcx %rax,%r10; \
adcx %rbx,%r11; \
\
movq %r10,%r13; \
andq mask2(%rip),%r10; \
andq mask2c(%rip),%r13; \
\
addq %r13,%r8; \
adcq %r11,%r9; \
adcq %r12,%r10; \
\
shrd $2,%r11,%r13; \
shrq $2,%r11; \
\
addq %r13,%r8; \
adcq %r11,%r9; \
adcq %r12,%r10; \
\
movq %r10,%r13; \
andq mask2(%rip),%r10; \
shrq $2,%r13; \
\
imul $5,%r13,%r13; \
addq %r13,%r8; \
adcq $0,%r9; \
adcq $0,%r10; \
\
movq %r8,24(%rdi); \
movq %r9,32(%rdi); \
movq %r10,40(%rdi); \
#define fe1305_tau_squaren(x) \
\
xorq %r15,%r15; \
movq %r8,%rdx; \
\
mulx %r9,%r12,%r13; \
\
mulx %r10,%rbx,%r14; \
adcx %rbx,%r13; \
adcx %r15,%r14; \
\
xorq %rax,%rax; \
movq %r9,%rdx; \
\
mulx %r10,%rbx,%rdx; \
adcx %rbx,%r14; \
adox %rdx,%r15; \
adcx %rax,%r15; \
\
shld $1,%r14,%r15; \
shld $1,%r13,%r14; \
shld $1,%r12,%r13; \
shlq $1,%r12; \
\
xorq %rdx,%rdx; \
movq %r8,%rdx; \
mulx %rdx,%r11,%rbx; \
adcx %rbx,%r12; \
\
movq %r9,%rdx; \
mulx %rdx,%r8,%r9; \
adcx %r13,%r8; \
adcx %r14,%r9; \
\
movq %r10,%rdx; \
mulx %rdx,%rax,%rbx; \
adcx %rax,%r15; \
\
movq %r8,%r10; \
andq mask2(%rip),%r10; \
andq mask2c(%rip),%r8; \
\
addq %r8,%r11; \
adcq %r9,%r12; \
adcq %r15,%r10; \
\
shrd $2,%r9,%r8; \
shrd $2,%r15,%r9; \
shrq $2,%r15; \
\
addq %r11,%r8; \
adcq %r12,%r9; \
adcq %r15,%r10; \
\
movq %r10,%r15; \
andq mask2(%rip),%r10; \
shrq $2,%r15; \
\
imul $5,%r15,%r15; \
addq %r15,%r8; \
adcq $0,%r9; \
adcq $0,%r10; \
\
movq %r8,24*x+0(%rdi); \
movq %r9,24*x+8(%rdi); \
movq %r10,24*x+16(%rdi); \
#define fe1305_mul_tau_taun(x,y) \
\
xorq %r11,%r11; \
movq 24*x+0(%rdi),%rdx; \
\
mulx 0(%rdi),%r8,%r9; \
mulx 8(%rdi),%rbx,%r10; \
adcx %rbx,%r9; \
adcx %r11,%r10; \
\
xorq %r12,%r12; \
movq 24*x+8(%rdi),%rdx; \
\
mulx 0(%rdi),%rbx,%rbp; \
adcx %rbx,%r9; \
adox %rbp,%r10; \
\
mulx 8(%rdi),%rbx,%rbp; \
adcx %rbx,%r10; \
adox %rbp,%r11; \
adcx %r12,%r11; \
\
xorq %rax,%rax; \
movq 24*x+16(%rdi),%rdx; \
\
mulx 0(%rdi),%rbx,%rbp; \
adcx %rbx,%r10; \
adox %rbp,%r11; \
\
mulx 8(%rdi),%rbx,%rbp; \
adcx %rbx,%r11; \
adox %rbp,%r12; \
adcx %rax,%r12; \
\
movq %r10,%r13; \
andq mask2(%rip),%r10; \
andq mask2c(%rip),%r13; \
\
addq %r13,%r8; \
adcq %r11,%r9; \
adcq %r12,%r10; \
\
shrd $2,%r11,%r13; \
shrd $2,%r12,%r11; \
shrq $2,%r12; \
\
addq %r8,%r13; \
adcq %r9,%r11; \
adcq %r10,%r12; \
\
movq %r12,%rcx; \
andq mask2(%rip),%rcx; \
shrq $2,%r12; \
\
imul $5,%r12,%r12; \
addq %r12,%r13; \
adcq $0,%r11; \
adcq $0,%rcx; \
\
movq %r13,24*y+0(%rdi); \
movq %r11,24*y+8(%rdi); \
movq %rcx,24*y+16(%rdi); \
#endif