-rw-r--r-- 4400 lib1305-20250407/crypto_onetimeauth/poly1305/amd64-maa44-g4/poly1305_asm.h raw
#ifndef __POLY1305ASM__ #define __POLY1305ASM__ /* field arithmetic used for computing poly1305 */ #define fe1305_mul_taun(m,t) \ \ movq m+0(%rsi),%r14; \ movq m+8(%rsi),%r15; \ \ movq %r14,%rbp; \ andq pmask1(%rip),%r14; \ movq %r15,%rbx; \ andq pmask2(%rip),%rbp; \ shrq $44,%rbp; \ andq pmask3(%rip),%r15; \ shlq $20,%r15; \ orq %rbp,%r15; \ andq pmask4(%rip),%rbx; \ shrq $24,%rbx; \ orq twoexp41(%rip),%rbx; \ \ movq %r14,120(%rsp); \ movq %r15,128(%rsp); \ movq %rbx,136(%rsp); \ \ movq t+0(%rdi),%rax; \ mulq 120(%rsp); \ movq %rax,%r14; \ movq %rdx,%r15; \ \ movq t+8(%rdi),%rax; \ imul $20,%rax,%rax; \ mulq 136(%rsp); \ addq %rax,%r14; \ adcq %rdx,%r15; \ \ movq t+16(%rdi),%rax; \ imul $20,%rax,%rax; \ movq %rax,144(%rsp); \ mulq 128(%rsp); \ addq %rax,%r14; \ adcq %rdx,%r15; \ \ movq t+0(%rdi),%rax; \ mulq 128(%rsp); \ movq %rax,%rbx; \ movq %rdx,%rbp; \ \ movq t+8(%rdi),%rax; \ mulq 120(%rsp); \ addq %rax,%rbx; \ adcq %rdx,%rbp; \ \ movq 144(%rsp),%rax; \ mulq 136(%rsp); \ addq %rax,%rbx; \ adcq %rdx,%rbp; \ \ movq t+0(%rdi),%rax; \ mulq 136(%rsp); \ movq %rax,%rcx; \ movq %rdx,144(%rsp); \ \ movq t+8(%rdi),%rax; \ mulq 128(%rsp); \ addq %rax,%rcx; \ adcq 144(%rsp),%rdx; \ movq %rdx,144(%rsp); \ \ movq t+16(%rdi),%rax; \ mulq 120(%rsp); \ addq %rax,%rcx; \ adcq 144(%rsp),%rdx; \ #define fe1305_mul_taunr(t) \ \ movq %r8,%r14; \ movq %r10,%r15; \ movq %r12,%rbx; \ \ movq t+0(%rdi),%rax; \ mulq %r14; \ movq %rax,%r8; \ movq %rdx,%r9; \ \ movq t+8(%rdi),%rax; \ imul $20,%rax,%rax; \ mulq %rbx; \ addq %rax,%r8; \ adcq %rdx,%r9; \ \ movq t+16(%rdi),%rax; \ imul $20,%rax,%rax; \ movq %rax,%rbp; \ mulq %r15; \ addq %rax,%r8; \ adcq %rdx,%r9; \ \ movq t+0(%rdi),%rax; \ mulq %r15; \ movq %rax,%r10; \ movq %rdx,%r11; \ \ movq t+8(%rdi),%rax; \ mulq %r14; \ addq %rax,%r10; \ adcq %rdx,%r11; \ \ movq %rbp,%rax; \ mulq %rbx; \ addq %rax,%r10; \ adcq %rdx,%r11; \ \ movq t+0(%rdi),%rax; \ mulq %rbx; \ movq %rax,%r12; \ movq %rdx,%r13; \ \ movq t+8(%rdi),%rax; \ mulq %r15; \ addq %rax,%r12; \ adcq %rdx,%r13; \ \ movq t+16(%rdi),%rax; \ mulq %r14; \ addq %rax,%r12; \ adcq %rdx,%r13; \ #define fe1305_add_product() \ \ addq %r14,%r8; \ adcq %r15,%r9; \ \ addq %rbx,%r10; \ adcq %rbp,%r11; \ \ addq %rcx,%r12; \ adcq %rdx,%r13; \ #define fe1305_add_msg_block(m) \ \ movq m+0(%rsi),%r14; \ movq m+8(%rsi),%r15; \ \ movq %r14,%rbp; \ andq pmask1(%rip),%r14; \ movq %r15,%rbx; \ andq pmask2(%rip),%rbp; \ shrq $44,%rbp; \ andq pmask3(%rip),%r15; \ shlq $20,%r15; \ orq %rbp,%r15; \ andq pmask4(%rip),%rbx; \ shrq $24,%rbx; \ movq $1,%rbp; \ shlq $40,%rbp; \ orq %rbp,%rbx; \ \ addq %r14,%r8; \ addq %r15,%r10; \ addq %rbx,%r12; \ #define fe1305_reduce_3l_128bits() \ \ shld $20,%r8,%r9; \ shld $20,%r10,%r11; \ shld $22,%r12,%r13; \ \ movq mask44(%rip),%rbp; \ \ andq %rbp,%r8; \ \ andq %rbp,%r10; \ addq %r9,%r10; \ \ andq mask42(%rip),%r12; \ addq %r11,%r12; \ \ imul $5,%r13,%r13; \ addq %r13,%r8; \ #define fe1305_reduce_3l_64bits() \ \ movq mask44(%rip),%rbp; \ \ movq %r8,%rdx; \ shrq $44,%rdx; \ addq %r10,%rdx; \ andq %rbp,%r8; \ \ movq %rdx,%r10; \ shrq $44,%rdx; \ addq %r12,%rdx; \ andq %rbp,%r10; \ \ movq %rdx,%r12; \ shrq $42,%rdx; \ imul $5,%rdx,%rdx; \ addq %rdx,%r8; \ andq mask42(%rip),%r12; \ #endif