-rw-r--r-- 6793 lib1305-20250407/crypto_onetimeauth/poly1305/amd64-maa64-g4/poly1305_asm.h raw
#ifndef __POLY1305ASM__ #define __POLY1305ASM__ /* field arithmetic used for computing poly1305 */ #define fe1305_mul_taun(m,t) \ \ movq m+0(%rsi),%rax; \ mulq t+16(%rdi); \ movq %rax,120(%rsp); \ movq %rdx,%rbp; \ xorq %r15,%r15; \ \ movq m+8(%rsi),%rax; \ mulq t+16(%rdi); \ addq %rax,%rbp; \ adcq $0,%r15; \ \ movq %rdx,%rbx; \ xorq %rcx,%rcx; \ addq t+16(%rdi),%rbx; \ adcq $0,%rcx; \ \ movq m+8(%rsi),%rax; \ mulq t+8(%rdi); \ movq %rax,128(%rsp); \ addq %rdx,%rbp; \ adcq $0,%r15; \ addq t+8(%rdi),%rbp; \ adcq $0,%r15; \ \ movq %rbp,%r13; \ movq %r15,%r14; \ shld $62,%r13,%r14; \ shlq $62,%r13; \ \ movq %rbx,%rax; \ movq %rcx,%rdx; \ shld $62,%rax,%rdx; \ shlq $62,%rax; \ addq %rax,%rbp; \ adcq %rdx,%r15; \ \ movq m+0(%rsi),%rax; \ mulq t+0(%rdi); \ addq %rax,%r13; \ adcq $0,%r14; \ addq %rdx,%rbp; \ adcq $0,%r15; \ \ addq t+0(%rdi),%rbx; \ adcq $0,%rcx; \ addq 120(%rsp),%rbx; \ adcq $0,%rcx; \ addq 128(%rsp),%rbx; \ adcq $0,%rcx; \ \ movq m+0(%rsi),%rax; \ mulq t+8(%rdi); \ addq %rax,%rbp; \ adcq $0,%r15; \ addq %rdx,%rbx; \ adcq $0,%rcx; \ \ movq m+8(%rsi),%rax; \ mulq t+0(%rdi); \ addq %rax,%rbp; \ adcq $0,%r15; \ addq %rdx,%rbx; \ adcq $0,%rcx; \ \ addq %rbp,%r14; \ adcq $0,%r15; \ \ addq %rbx,%r15; \ adcq $0,%rcx; \ #define fe1305_mul_taunr(t) \ \ movq %r8,%rbx; \ movq %r9,%rbp; \ movq %r10,%rcx; \ \ movq %rbp,%rax; \ mulq t+16(%rdi); \ movq %rax,%r12; \ xorq %r13,%r13; \ movq %rdx,%r10; \ xorq %r11,%r11; \ \ movq %rcx,%rax; \ mulq t+8(%rdi); \ addq %rax,%r12; \ adcq %r13,%r13; \ addq %rdx,%r10; \ adcq %r11,%r11; \ \ movq %rcx,%rax; \ mulq t+16(%rdi); \ addq %rax,%r10; \ adcq %r11,%r11; \ \ movq %rbx,%rax; \ mulq t+16(%rdi); \ movq %rax,120(%rsp); \ addq %rdx,%r12; \ adcq $0,%r13; \ movq %rbp,%rax; \ mulq t+8(%rdi); \ movq %rax,128(%rsp); \ addq %rdx,%r12; \ adcq $0,%r13; \ movq %rcx,%rax; \ mulq t+0(%rdi); \ movq %rax,136(%rsp); \ addq %rdx,%r12; \ adcq $0,%r13; \ \ movq %r12,%r8; \ movq %r13,%r9; \ shld $62,%r8,%r9; \ shlq $62,%r8; \ \ movq %r10,%rax; \ movq %r11,%rdx; \ shld $62,%rax,%rdx; \ shlq $62,%rax; \ addq %rax,%r12; \ adcq %rdx,%r13; \ \ movq %rbx,%rax; \ mulq t+0(%rdi); \ addq %rax,%r8; \ adcq $0,%r9; \ addq %rdx,%r12; \ adcq $0,%r13; \ \ movq %rbx,%rax; \ mulq t+8(%rdi); \ addq %rax,%r12; \ adcq $0,%r13; \ addq %rdx,%r10; \ adcq $0,%r11; \ \ movq %rbp,%rax; \ mulq t+0(%rdi); \ addq %rax,%r12; \ adcq $0,%r13; \ addq %rdx,%r10; \ adcq $0,%r11; \ \ addq 120(%rsp),%r10; \ adcq $0,%r11; \ addq 128(%rsp),%r10; \ adcq $0,%r11; \ addq 136(%rsp),%r10; \ adcq $0,%r11; \ \ addq %r12,%r9; \ adcq $0,%r13; \ \ addq %r13,%r10; \ adcq $0,%r11; \ #define fe1305_mul_tau(m,t) \ \ movq t+8(%rdi),%r13; \ xorq %r14,%r14; \ movq t+8(%rdi),%rbp; \ xorq %r15,%r15; \ xorq %rbx,%rbx; \ xorq %rcx,%rcx; \ \ movq m+8(%rsi),%rax; \ mulq t+8(%rdi); \ addq %rax,%rbx; \ adcq $0,%rcx; \ addq %rdx,%r13; \ adcq $0,%r14; \ addq %rdx,%rbp; \ adcq $0,%r15; \ \ addq t+0(%rdi),%rbx; \ adcq $0,%rcx; \ \ shld $62,%r13,%r14; \ shlq $62,%r13; \ \ movq m+0(%rsi),%rax; \ mulq t+0(%rdi); \ addq %rax,%r13; \ adcq $0,%r14; \ addq %rdx,%rbp; \ adcq $0,%r15; \ \ movq m+0(%rsi),%rax; \ mulq t+8(%rdi); \ addq %rax,%rbp; \ adcq $0,%r15; \ addq %rdx,%rbx; \ adcq $0,%rcx; \ \ movq m+8(%rsi),%rax; \ mulq t+0(%rdi); \ addq %rax,%rbp; \ adcq $0,%r15; \ addq %rdx,%rbx; \ adcq $0,%rcx; \ \ addq %rbp,%r14; \ adcq $0,%r15; \ \ addq %rbx,%r15; \ adcq $0,%rcx; \ #define fe1305_mul_taur(t) \ \ movq %r8,%rbx; \ movq %r9,%rbp; \ movq %r10,%rcx; \ \ movq %rcx,%rax; \ mulq t+8(%rdi); \ movq %rax,%r8; \ xorq %r9,%r9; \ movq %rax,%r12; \ xorq %r13,%r13; \ movq %rdx,%r10; \ xorq %r11,%r11; \ xorq %rax,%rax; \ shld $62,%rdx,%rax; \ shlq $62,%rdx; \ addq %rdx,%r12; \ adcq %rax,%r13; \ \ movq %rbp,%rax; \ mulq t+8(%rdi); \ addq %rax,%r10; \ adcq $0,%r11; \ addq %rdx,%r8; \ adcq $0,%r9; \ addq %rdx,%r12; \ adcq $0,%r13; \ \ movq %rcx,%rax; \ mulq t+0(%rdi); \ addq %rax,%r10; \ adcq $0,%r11; \ addq %rdx,%r8; \ adcq $0,%r9; \ addq %rdx,%r12; \ adcq $0,%r13; \ \ shld $62,%r8,%r9; \ shlq $62,%r8; \ \ movq %rbx,%rax; \ mulq t+0(%rdi); \ addq %rax,%r8; \ adcq $0,%r9; \ addq %rdx,%r12; \ adcq $0,%r13; \ \ movq %rbx,%rax; \ mulq t+8(%rdi); \ addq %rax,%r12; \ adcq $0,%r13; \ addq %rdx,%r10; \ adcq $0,%r11; \ \ movq %rbp,%rax; \ mulq t+0(%rdi); \ addq %rax,%r12; \ adcq $0,%r13; \ addq %rdx,%r10; \ adcq $0,%r11; \ \ addq %r12,%r9; \ adcq $0,%r13; \ \ addq %r13,%r10; \ adcq $0,%r11; \ #define fe1305_add_product() \ \ addq %r13,%r8; \ adcq %r14,%r9; \ adcq %r15,%r10; \ adcq %rcx,%r11; \ #define fe1305_add_msg_block(m) \ \ addq m+0(%rsi),%r8; \ adcq m+8(%rsi),%r9; \ adcq $1,%r10; \ #define fe1305_reduce_4l() \ \ movq %r10,%r12; \ \ andq mask2(%rip),%r10; \ andq mask2c(%rip),%r12; \ \ addq %r12,%r8; \ adcq %r11,%r9; \ adcq $0,%r10; \ \ shrd $2,%r11,%r12; \ shrq $2,%r11; \ \ addq %r12,%r8; \ adcq %r11,%r9; \ adcq $0,%r10; \ #endif