-rw-r--r-- 4400 lib1305-20250407/crypto_onetimeauth/poly1305/amd64-maa44-g4/poly1305_asm.h raw
#ifndef __POLY1305ASM__
#define __POLY1305ASM__
/* field arithmetic used for computing poly1305 */
#define fe1305_mul_taun(m,t) \
\
movq m+0(%rsi),%r14; \
movq m+8(%rsi),%r15; \
\
movq %r14,%rbp; \
andq pmask1(%rip),%r14; \
movq %r15,%rbx; \
andq pmask2(%rip),%rbp; \
shrq $44,%rbp; \
andq pmask3(%rip),%r15; \
shlq $20,%r15; \
orq %rbp,%r15; \
andq pmask4(%rip),%rbx; \
shrq $24,%rbx; \
orq twoexp41(%rip),%rbx; \
\
movq %r14,120(%rsp); \
movq %r15,128(%rsp); \
movq %rbx,136(%rsp); \
\
movq t+0(%rdi),%rax; \
mulq 120(%rsp); \
movq %rax,%r14; \
movq %rdx,%r15; \
\
movq t+8(%rdi),%rax; \
imul $20,%rax,%rax; \
mulq 136(%rsp); \
addq %rax,%r14; \
adcq %rdx,%r15; \
\
movq t+16(%rdi),%rax; \
imul $20,%rax,%rax; \
movq %rax,144(%rsp); \
mulq 128(%rsp); \
addq %rax,%r14; \
adcq %rdx,%r15; \
\
movq t+0(%rdi),%rax; \
mulq 128(%rsp); \
movq %rax,%rbx; \
movq %rdx,%rbp; \
\
movq t+8(%rdi),%rax; \
mulq 120(%rsp); \
addq %rax,%rbx; \
adcq %rdx,%rbp; \
\
movq 144(%rsp),%rax; \
mulq 136(%rsp); \
addq %rax,%rbx; \
adcq %rdx,%rbp; \
\
movq t+0(%rdi),%rax; \
mulq 136(%rsp); \
movq %rax,%rcx; \
movq %rdx,144(%rsp); \
\
movq t+8(%rdi),%rax; \
mulq 128(%rsp); \
addq %rax,%rcx; \
adcq 144(%rsp),%rdx; \
movq %rdx,144(%rsp); \
\
movq t+16(%rdi),%rax; \
mulq 120(%rsp); \
addq %rax,%rcx; \
adcq 144(%rsp),%rdx; \
#define fe1305_mul_taunr(t) \
\
movq %r8,%r14; \
movq %r10,%r15; \
movq %r12,%rbx; \
\
movq t+0(%rdi),%rax; \
mulq %r14; \
movq %rax,%r8; \
movq %rdx,%r9; \
\
movq t+8(%rdi),%rax; \
imul $20,%rax,%rax; \
mulq %rbx; \
addq %rax,%r8; \
adcq %rdx,%r9; \
\
movq t+16(%rdi),%rax; \
imul $20,%rax,%rax; \
movq %rax,%rbp; \
mulq %r15; \
addq %rax,%r8; \
adcq %rdx,%r9; \
\
movq t+0(%rdi),%rax; \
mulq %r15; \
movq %rax,%r10; \
movq %rdx,%r11; \
\
movq t+8(%rdi),%rax; \
mulq %r14; \
addq %rax,%r10; \
adcq %rdx,%r11; \
\
movq %rbp,%rax; \
mulq %rbx; \
addq %rax,%r10; \
adcq %rdx,%r11; \
\
movq t+0(%rdi),%rax; \
mulq %rbx; \
movq %rax,%r12; \
movq %rdx,%r13; \
\
movq t+8(%rdi),%rax; \
mulq %r15; \
addq %rax,%r12; \
adcq %rdx,%r13; \
\
movq t+16(%rdi),%rax; \
mulq %r14; \
addq %rax,%r12; \
adcq %rdx,%r13; \
#define fe1305_add_product() \
\
addq %r14,%r8; \
adcq %r15,%r9; \
\
addq %rbx,%r10; \
adcq %rbp,%r11; \
\
addq %rcx,%r12; \
adcq %rdx,%r13; \
#define fe1305_add_msg_block(m) \
\
movq m+0(%rsi),%r14; \
movq m+8(%rsi),%r15; \
\
movq %r14,%rbp; \
andq pmask1(%rip),%r14; \
movq %r15,%rbx; \
andq pmask2(%rip),%rbp; \
shrq $44,%rbp; \
andq pmask3(%rip),%r15; \
shlq $20,%r15; \
orq %rbp,%r15; \
andq pmask4(%rip),%rbx; \
shrq $24,%rbx; \
movq $1,%rbp; \
shlq $40,%rbp; \
orq %rbp,%rbx; \
\
addq %r14,%r8; \
addq %r15,%r10; \
addq %rbx,%r12; \
#define fe1305_reduce_3l_128bits() \
\
shld $20,%r8,%r9; \
shld $20,%r10,%r11; \
shld $22,%r12,%r13; \
\
movq mask44(%rip),%rbp; \
\
andq %rbp,%r8; \
\
andq %rbp,%r10; \
addq %r9,%r10; \
\
andq mask42(%rip),%r12; \
addq %r11,%r12; \
\
imul $5,%r13,%r13; \
addq %r13,%r8; \
#define fe1305_reduce_3l_64bits() \
\
movq mask44(%rip),%rbp; \
\
movq %r8,%rdx; \
shrq $44,%rdx; \
addq %r10,%rdx; \
andq %rbp,%r8; \
\
movq %rdx,%r10; \
shrq $44,%rdx; \
addq %r12,%rdx; \
andq %rbp,%r10; \
\
movq %rdx,%r12; \
shrq $42,%rdx; \
imul $5,%rdx,%rdx; \
addq %rdx,%r8; \
andq mask42(%rip),%r12; \
#endif