-rw-r--r-- 6793 lib1305-20250407/crypto_onetimeauth/poly1305/amd64-maa64-g4/poly1305_asm.h raw
#ifndef __POLY1305ASM__
#define __POLY1305ASM__
/* field arithmetic used for computing poly1305 */
#define fe1305_mul_taun(m,t) \
\
movq m+0(%rsi),%rax; \
mulq t+16(%rdi); \
movq %rax,120(%rsp); \
movq %rdx,%rbp; \
xorq %r15,%r15; \
\
movq m+8(%rsi),%rax; \
mulq t+16(%rdi); \
addq %rax,%rbp; \
adcq $0,%r15; \
\
movq %rdx,%rbx; \
xorq %rcx,%rcx; \
addq t+16(%rdi),%rbx; \
adcq $0,%rcx; \
\
movq m+8(%rsi),%rax; \
mulq t+8(%rdi); \
movq %rax,128(%rsp); \
addq %rdx,%rbp; \
adcq $0,%r15; \
addq t+8(%rdi),%rbp; \
adcq $0,%r15; \
\
movq %rbp,%r13; \
movq %r15,%r14; \
shld $62,%r13,%r14; \
shlq $62,%r13; \
\
movq %rbx,%rax; \
movq %rcx,%rdx; \
shld $62,%rax,%rdx; \
shlq $62,%rax; \
addq %rax,%rbp; \
adcq %rdx,%r15; \
\
movq m+0(%rsi),%rax; \
mulq t+0(%rdi); \
addq %rax,%r13; \
adcq $0,%r14; \
addq %rdx,%rbp; \
adcq $0,%r15; \
\
addq t+0(%rdi),%rbx; \
adcq $0,%rcx; \
addq 120(%rsp),%rbx; \
adcq $0,%rcx; \
addq 128(%rsp),%rbx; \
adcq $0,%rcx; \
\
movq m+0(%rsi),%rax; \
mulq t+8(%rdi); \
addq %rax,%rbp; \
adcq $0,%r15; \
addq %rdx,%rbx; \
adcq $0,%rcx; \
\
movq m+8(%rsi),%rax; \
mulq t+0(%rdi); \
addq %rax,%rbp; \
adcq $0,%r15; \
addq %rdx,%rbx; \
adcq $0,%rcx; \
\
addq %rbp,%r14; \
adcq $0,%r15; \
\
addq %rbx,%r15; \
adcq $0,%rcx; \
#define fe1305_mul_taunr(t) \
\
movq %r8,%rbx; \
movq %r9,%rbp; \
movq %r10,%rcx; \
\
movq %rbp,%rax; \
mulq t+16(%rdi); \
movq %rax,%r12; \
xorq %r13,%r13; \
movq %rdx,%r10; \
xorq %r11,%r11; \
\
movq %rcx,%rax; \
mulq t+8(%rdi); \
addq %rax,%r12; \
adcq %r13,%r13; \
addq %rdx,%r10; \
adcq %r11,%r11; \
\
movq %rcx,%rax; \
mulq t+16(%rdi); \
addq %rax,%r10; \
adcq %r11,%r11; \
\
movq %rbx,%rax; \
mulq t+16(%rdi); \
movq %rax,120(%rsp); \
addq %rdx,%r12; \
adcq $0,%r13; \
movq %rbp,%rax; \
mulq t+8(%rdi); \
movq %rax,128(%rsp); \
addq %rdx,%r12; \
adcq $0,%r13; \
movq %rcx,%rax; \
mulq t+0(%rdi); \
movq %rax,136(%rsp); \
addq %rdx,%r12; \
adcq $0,%r13; \
\
movq %r12,%r8; \
movq %r13,%r9; \
shld $62,%r8,%r9; \
shlq $62,%r8; \
\
movq %r10,%rax; \
movq %r11,%rdx; \
shld $62,%rax,%rdx; \
shlq $62,%rax; \
addq %rax,%r12; \
adcq %rdx,%r13; \
\
movq %rbx,%rax; \
mulq t+0(%rdi); \
addq %rax,%r8; \
adcq $0,%r9; \
addq %rdx,%r12; \
adcq $0,%r13; \
\
movq %rbx,%rax; \
mulq t+8(%rdi); \
addq %rax,%r12; \
adcq $0,%r13; \
addq %rdx,%r10; \
adcq $0,%r11; \
\
movq %rbp,%rax; \
mulq t+0(%rdi); \
addq %rax,%r12; \
adcq $0,%r13; \
addq %rdx,%r10; \
adcq $0,%r11; \
\
addq 120(%rsp),%r10; \
adcq $0,%r11; \
addq 128(%rsp),%r10; \
adcq $0,%r11; \
addq 136(%rsp),%r10; \
adcq $0,%r11; \
\
addq %r12,%r9; \
adcq $0,%r13; \
\
addq %r13,%r10; \
adcq $0,%r11; \
#define fe1305_mul_tau(m,t) \
\
movq t+8(%rdi),%r13; \
xorq %r14,%r14; \
movq t+8(%rdi),%rbp; \
xorq %r15,%r15; \
xorq %rbx,%rbx; \
xorq %rcx,%rcx; \
\
movq m+8(%rsi),%rax; \
mulq t+8(%rdi); \
addq %rax,%rbx; \
adcq $0,%rcx; \
addq %rdx,%r13; \
adcq $0,%r14; \
addq %rdx,%rbp; \
adcq $0,%r15; \
\
addq t+0(%rdi),%rbx; \
adcq $0,%rcx; \
\
shld $62,%r13,%r14; \
shlq $62,%r13; \
\
movq m+0(%rsi),%rax; \
mulq t+0(%rdi); \
addq %rax,%r13; \
adcq $0,%r14; \
addq %rdx,%rbp; \
adcq $0,%r15; \
\
movq m+0(%rsi),%rax; \
mulq t+8(%rdi); \
addq %rax,%rbp; \
adcq $0,%r15; \
addq %rdx,%rbx; \
adcq $0,%rcx; \
\
movq m+8(%rsi),%rax; \
mulq t+0(%rdi); \
addq %rax,%rbp; \
adcq $0,%r15; \
addq %rdx,%rbx; \
adcq $0,%rcx; \
\
addq %rbp,%r14; \
adcq $0,%r15; \
\
addq %rbx,%r15; \
adcq $0,%rcx; \
#define fe1305_mul_taur(t) \
\
movq %r8,%rbx; \
movq %r9,%rbp; \
movq %r10,%rcx; \
\
movq %rcx,%rax; \
mulq t+8(%rdi); \
movq %rax,%r8; \
xorq %r9,%r9; \
movq %rax,%r12; \
xorq %r13,%r13; \
movq %rdx,%r10; \
xorq %r11,%r11; \
xorq %rax,%rax; \
shld $62,%rdx,%rax; \
shlq $62,%rdx; \
addq %rdx,%r12; \
adcq %rax,%r13; \
\
movq %rbp,%rax; \
mulq t+8(%rdi); \
addq %rax,%r10; \
adcq $0,%r11; \
addq %rdx,%r8; \
adcq $0,%r9; \
addq %rdx,%r12; \
adcq $0,%r13; \
\
movq %rcx,%rax; \
mulq t+0(%rdi); \
addq %rax,%r10; \
adcq $0,%r11; \
addq %rdx,%r8; \
adcq $0,%r9; \
addq %rdx,%r12; \
adcq $0,%r13; \
\
shld $62,%r8,%r9; \
shlq $62,%r8; \
\
movq %rbx,%rax; \
mulq t+0(%rdi); \
addq %rax,%r8; \
adcq $0,%r9; \
addq %rdx,%r12; \
adcq $0,%r13; \
\
movq %rbx,%rax; \
mulq t+8(%rdi); \
addq %rax,%r12; \
adcq $0,%r13; \
addq %rdx,%r10; \
adcq $0,%r11; \
\
movq %rbp,%rax; \
mulq t+0(%rdi); \
addq %rax,%r12; \
adcq $0,%r13; \
addq %rdx,%r10; \
adcq $0,%r11; \
\
addq %r12,%r9; \
adcq $0,%r13; \
\
addq %r13,%r10; \
adcq $0,%r11; \
#define fe1305_add_product() \
\
addq %r13,%r8; \
adcq %r14,%r9; \
adcq %r15,%r10; \
adcq %rcx,%r11; \
#define fe1305_add_msg_block(m) \
\
addq m+0(%rsi),%r8; \
adcq m+8(%rsi),%r9; \
adcq $1,%r10; \
#define fe1305_reduce_4l() \
\
movq %r10,%r12; \
\
andq mask2(%rip),%r10; \
andq mask2c(%rip),%r12; \
\
addq %r12,%r8; \
adcq %r11,%r9; \
adcq $0,%r10; \
\
shrd $2,%r11,%r12; \
shrq $2,%r11; \
\
addq %r12,%r8; \
adcq %r11,%r9; \
adcq $0,%r10; \
#endif