-rw-r--r-- 4350 lib1305-20250407/crypto_onetimeauth/poly1305/amd64-maax-g4/poly1305_asm.h raw
#ifndef __POLY1305ASM__
#define __POLY1305ASM__
/* field arithmetic used for computing poly1305 */
#define fe1305_mul_taun(m,t) \
\
xorq %rcx,%rcx; \
movq m+0(%rsi),%rdx; \
\
mulx t+0(%rdi),%r13,%r14; \
mulx t+8(%rdi),%rbx,%r15; \
adcx %rbx,%r14; \
\
mulx t+16(%rdi),%rbx,%rax; \
adcx %rbx,%r15; \
adcx %rcx,%rax; \
\
xorq %rdx,%rdx; \
movq m+8(%rsi),%rdx; \
\
mulx t+0(%rdi),%rbx,%rbp; \
adcx %rbx,%r14; \
adox %rbp,%r15; \
\
mulx t+8(%rdi),%rbx,%rbp; \
adcx %rbx,%r15; \
adox %rbp,%rax; \
\
mulx t+16(%rdi),%rbx,%rbp; \
adcx %rbx,%rax; \
adox %rbp,%rcx; \
adcx zero(%rip),%rcx; \
\
xorq %rdx,%rdx; \
\
adcx t+0(%rdi),%r15; \
adox %rdx,%rax; \
adcx t+8(%rdi),%rax; \
adox %rdx,%rcx; \
adcx t+16(%rdi),%rcx; \
#define fe1305_mul_taunr(t) \
\
xorq %r15,%r15; \
movq %r8,%rdx; \
\
mulx t+0(%rdi),%r8,%r12; \
mulx t+8(%rdi),%rbx,%r13; \
adcx %rbx,%r12; \
adcx %r15,%r13; \
\
mulx t+16(%rdi),%rbx,%r14; \
adcx %rbx,%r13; \
adcx %r15,%r14; \
\
xorq %rax,%rax; \
movq %r9,%rdx; \
\
mulx t+0(%rdi),%r9,%rbp; \
adcx %r12,%r9; \
adox %rbp,%r13; \
\
mulx t+8(%rdi),%rbx,%rbp; \
adcx %rbx,%r13; \
adox %rbp,%r14; \
\
mulx t+16(%rdi),%rbx,%rbp; \
adcx %rbx,%r14; \
adox %rbp,%r15; \
adcx %rax,%r15; \
\
xorq %rax,%rax; \
movq %r10,%rdx; \
\
mulx t+0(%rdi),%r10,%rbp; \
adcx %r13,%r10; \
adox %rbp,%r14; \
\
mulx t+8(%rdi),%r11,%rbp; \
adcx %r14,%r11; \
adox %rbp,%r15; \
\
mulx t+16(%rdi),%r12,%rbp; \
adcx %r15,%r12; \
#define fe1305_mul_tau(m,t) \
\
xorq %rax,%rax; \
movq m+0(%rsi),%rdx; \
\
mulx t+0(%rdi),%r13,%r14; \
mulx t+8(%rdi),%rbx,%r15; \
adcx %rbx,%r14; \
adcx %rax,%r15; \
\
xorq %rcx,%rcx; \
movq m+8(%rsi),%rdx; \
\
mulx t+0(%rdi),%rbx,%rbp; \
adcx %rbx,%r14; \
adox %rbp,%r15; \
\
mulx t+8(%rdi),%rbx,%rbp; \
adcx %rbx,%r15; \
adox %rbp,%rax; \
adcx %rcx,%rax; \
\
xorq %rdx,%rdx; \
\
adcx t+0(%rdi),%r15; \
adox %rdx,%rax; \
adcx t+8(%rdi),%rax; \
adox %rdx,%rcx; \
adcx %rdx,%rcx; \
#define fe1305_mul_taur(t) \
\
xorq %r14,%r14; \
movq %r8,%rdx; \
\
mulx t+0(%rdi),%r8,%r12; \
mulx t+8(%rdi),%rbx,%r13; \
adcx %rbx,%r12; \
adcx %r14,%r13; \
\
xorq %rax,%rax; \
movq %r9,%rdx; \
\
mulx t+0(%rdi),%r9,%rbp; \
adcx %r12,%r9; \
adox %rbp,%r13; \
\
mulx t+8(%rdi),%rbx,%rbp; \
adcx %rbx,%r13; \
adox %rbp,%r14; \
adcx %rax,%r14; \
\
xorq %r12,%r12; \
movq %r10,%rdx; \
\
mulx t+0(%rdi),%r10,%rbp; \
adcx %r13,%r10; \
adox %rbp,%r14; \
\
mulx t+8(%rdi),%r11,%rbp; \
adcx %r14,%r11; \
adox %rbp,%r12; \
adcx %rax,%r12; \
#define fe1305_add_product() \
\
xorq %rdx,%rdx; \
\
adcx %r13,%r8; \
adcx %r14,%r9; \
adcx %r15,%r10; \
adcx %rax,%r11; \
adcx %rcx,%r12; \
#define fe1305_add_msg_block(m) \
\
addq m+0(%rsi),%r8; \
adcq m+8(%rsi),%r9; \
adcq $1,%r10; \
#define fe1305_reduce_5l() \
\
movq %r10,%r13; \
andq mask2(%rip),%r10; \
andq mask2c(%rip),%r13; \
\
addq %r13,%r8; \
adcq %r11,%r9; \
adcq %r12,%r10; \
\
shrd $2,%r11,%r13; \
shrd $2,%r12,%r11; \
shrq $2,%r12; \
\
addq %r13,%r8; \
adcq %r11,%r9; \
adcq %r12,%r10; \
#define fe1305_reduce_3l() \
\
movq %r10,%r11; \
andq mask2(%rip),%r10; \
shrq $2,%r11; \
\
imul $5,%r11,%r11; \
addq %r11,%r8; \
adcq $0,%r9; \
adcq $0,%r10; \
#endif