-rw-r--r-- 4350 lib1305-20250415/crypto_onetimeauth/poly1305/amd64-maax-g4/poly1305_asm.h raw
#ifndef __POLY1305ASM__ #define __POLY1305ASM__ /* field arithmetic used for computing poly1305 */ #define fe1305_mul_taun(m,t) \ \ xorq %rcx,%rcx; \ movq m+0(%rsi),%rdx; \ \ mulx t+0(%rdi),%r13,%r14; \ mulx t+8(%rdi),%rbx,%r15; \ adcx %rbx,%r14; \ \ mulx t+16(%rdi),%rbx,%rax; \ adcx %rbx,%r15; \ adcx %rcx,%rax; \ \ xorq %rdx,%rdx; \ movq m+8(%rsi),%rdx; \ \ mulx t+0(%rdi),%rbx,%rbp; \ adcx %rbx,%r14; \ adox %rbp,%r15; \ \ mulx t+8(%rdi),%rbx,%rbp; \ adcx %rbx,%r15; \ adox %rbp,%rax; \ \ mulx t+16(%rdi),%rbx,%rbp; \ adcx %rbx,%rax; \ adox %rbp,%rcx; \ adcx zero(%rip),%rcx; \ \ xorq %rdx,%rdx; \ \ adcx t+0(%rdi),%r15; \ adox %rdx,%rax; \ adcx t+8(%rdi),%rax; \ adox %rdx,%rcx; \ adcx t+16(%rdi),%rcx; \ #define fe1305_mul_taunr(t) \ \ xorq %r15,%r15; \ movq %r8,%rdx; \ \ mulx t+0(%rdi),%r8,%r12; \ mulx t+8(%rdi),%rbx,%r13; \ adcx %rbx,%r12; \ adcx %r15,%r13; \ \ mulx t+16(%rdi),%rbx,%r14; \ adcx %rbx,%r13; \ adcx %r15,%r14; \ \ xorq %rax,%rax; \ movq %r9,%rdx; \ \ mulx t+0(%rdi),%r9,%rbp; \ adcx %r12,%r9; \ adox %rbp,%r13; \ \ mulx t+8(%rdi),%rbx,%rbp; \ adcx %rbx,%r13; \ adox %rbp,%r14; \ \ mulx t+16(%rdi),%rbx,%rbp; \ adcx %rbx,%r14; \ adox %rbp,%r15; \ adcx %rax,%r15; \ \ xorq %rax,%rax; \ movq %r10,%rdx; \ \ mulx t+0(%rdi),%r10,%rbp; \ adcx %r13,%r10; \ adox %rbp,%r14; \ \ mulx t+8(%rdi),%r11,%rbp; \ adcx %r14,%r11; \ adox %rbp,%r15; \ \ mulx t+16(%rdi),%r12,%rbp; \ adcx %r15,%r12; \ #define fe1305_mul_tau(m,t) \ \ xorq %rax,%rax; \ movq m+0(%rsi),%rdx; \ \ mulx t+0(%rdi),%r13,%r14; \ mulx t+8(%rdi),%rbx,%r15; \ adcx %rbx,%r14; \ adcx %rax,%r15; \ \ xorq %rcx,%rcx; \ movq m+8(%rsi),%rdx; \ \ mulx t+0(%rdi),%rbx,%rbp; \ adcx %rbx,%r14; \ adox %rbp,%r15; \ \ mulx t+8(%rdi),%rbx,%rbp; \ adcx %rbx,%r15; \ adox %rbp,%rax; \ adcx %rcx,%rax; \ \ xorq %rdx,%rdx; \ \ adcx t+0(%rdi),%r15; \ adox %rdx,%rax; \ adcx t+8(%rdi),%rax; \ adox %rdx,%rcx; \ adcx %rdx,%rcx; \ #define fe1305_mul_taur(t) \ \ xorq %r14,%r14; \ movq %r8,%rdx; \ \ mulx t+0(%rdi),%r8,%r12; \ mulx t+8(%rdi),%rbx,%r13; \ adcx %rbx,%r12; \ adcx %r14,%r13; \ \ xorq %rax,%rax; \ movq %r9,%rdx; \ \ mulx t+0(%rdi),%r9,%rbp; \ adcx %r12,%r9; \ adox %rbp,%r13; \ \ mulx t+8(%rdi),%rbx,%rbp; \ adcx %rbx,%r13; \ adox %rbp,%r14; \ adcx %rax,%r14; \ \ xorq %r12,%r12; \ movq %r10,%rdx; \ \ mulx t+0(%rdi),%r10,%rbp; \ adcx %r13,%r10; \ adox %rbp,%r14; \ \ mulx t+8(%rdi),%r11,%rbp; \ adcx %r14,%r11; \ adox %rbp,%r12; \ adcx %rax,%r12; \ #define fe1305_add_product() \ \ xorq %rdx,%rdx; \ \ adcx %r13,%r8; \ adcx %r14,%r9; \ adcx %r15,%r10; \ adcx %rax,%r11; \ adcx %rcx,%r12; \ #define fe1305_add_msg_block(m) \ \ addq m+0(%rsi),%r8; \ adcq m+8(%rsi),%r9; \ adcq $1,%r10; \ #define fe1305_reduce_5l() \ \ movq %r10,%r13; \ andq mask2(%rip),%r10; \ andq mask2c(%rip),%r13; \ \ addq %r13,%r8; \ adcq %r11,%r9; \ adcq %r12,%r10; \ \ shrd $2,%r11,%r13; \ shrd $2,%r12,%r11; \ shrq $2,%r12; \ \ addq %r13,%r8; \ adcq %r11,%r9; \ adcq %r12,%r10; \ #define fe1305_reduce_3l() \ \ movq %r10,%r11; \ andq mask2(%rip),%r10; \ shrq $2,%r11; \ \ imul $5,%r11,%r11; \ addq %r11,%r8; \ adcq $0,%r9; \ adcq $0,%r10; \ #endif