-rw-r--r-- 26333 lib1305-20250415/crypto_onetimeauth/poly1305/amd64-maax-g24/poly1305_maax_g24.S raw
/* assembly to compute poly1305 using precomputed key powers and applying lazy reduction over a group of 32 field elements */ #include "crypto_asm_hidden.h" // linker define poly1305_maax_g24 // linker use mask2 // linker use mask2c // linker use zero // linker use p0 // linker use p1 // linker use p2 #define mask2 CRYPTO_SHARED_NAMESPACE(mask2) #define mask2c CRYPTO_SHARED_NAMESPACE(mask2c) #define zero CRYPTO_SHARED_NAMESPACE(zero) #define p0 CRYPTO_SHARED_NAMESPACE(p0) #define p1 CRYPTO_SHARED_NAMESPACE(p1) #define p2 CRYPTO_SHARED_NAMESPACE(p2) #include "poly1305_asm.h" .p2align 5 ASM_HIDDEN _CRYPTO_SHARED_NAMESPACE(poly1305_maax_g24) ASM_HIDDEN CRYPTO_SHARED_NAMESPACE(poly1305_maax_g24) .global _CRYPTO_SHARED_NAMESPACE(poly1305_maax_g24) .global CRYPTO_SHARED_NAMESPACE(poly1305_maax_g24) _CRYPTO_SHARED_NAMESPACE(poly1305_maax_g24): CRYPTO_SHARED_NAMESPACE(poly1305_maax_g24): movq %rsp,%r11 andq $-32,%rsp subq $128,%rsp movq %r11,0(%rsp) movq %r12,8(%rsp) movq %r13,16(%rsp) movq %r14,24(%rsp) movq %r15,32(%rsp) movq %rbx,40(%rsp) movq %rbp,48(%rsp) movq %rdi,56(%rsp) movq %r8,64(%rsp) movq %r9,72(%rsp) /* store last 16 bytes of the key */ movq 576(%rdx),%r14 movq 584(%rdx),%r15 movq %r14,88(%rsp) movq %r15,96(%rsp) /* key = (r15 : r14) */ movq 0(%rdx),%r14 movq 8(%rdx),%r15 /* initialize a quad-word on the stack with 0 */ movq $0,104(%rsp) /* if the message has a single block */ cmpq $1,%rcx je .L5 movq %rcx,80(%rsp) movq %rdx,%rdi movq %rdx,112(%rsp) movq $0,%r8 movq $0,%r9 movq $0,%r10 movq $0,%r11 movq $0,%r12 cmpq $2,%rcx je .LB2 cmpq $3,%rcx je .LB3 cmpq $4,%rcx je .LB4 cmpq $5,%rcx je .LB5 cmpq $6,%rcx je .LB6 cmpq $7,%rcx je .LB7 cmpq $8,%rcx je .LB8 cmpq $9,%rcx je .LB9 cmpq $10,%rcx je .LB10 cmpq $11,%rcx je .LB11 cmpq $12,%rcx je .LB12 cmpq $13,%rcx je .LB13 cmpq $14,%rcx je .LB14 cmpq $15,%rcx je .LB15 cmpq $16,%rcx je .LB16 cmpq $17,%rcx je .LB17 cmpq $18,%rcx je .LB18 cmpq $19,%rcx je .LB19 cmpq $20,%rcx je .LB20 cmpq $21,%rcx je .LB21 cmpq $22,%rcx je .LB22 cmpq $23,%rcx je .LB23 .LB24: fe1305_mul_taun(0,528) fe1305_add_product() fe1305_mul_taun(16,504) fe1305_add_product() fe1305_mul_taun(32,480) fe1305_add_product() fe1305_mul_taun(48,456) fe1305_add_product() fe1305_mul_taun(64,432) fe1305_add_product() fe1305_mul_taun(80,408) fe1305_add_product() fe1305_mul_taun(96,384) fe1305_add_product() fe1305_mul_taun(112,360) fe1305_add_product() fe1305_mul_taun(128,336) fe1305_add_product() fe1305_mul_taun(144,312) fe1305_add_product() fe1305_mul_taun(160,288) fe1305_add_product() fe1305_mul_taun(176,264) fe1305_add_product() fe1305_mul_taun(192,240) fe1305_add_product() fe1305_mul_taun(208,216) fe1305_add_product() fe1305_mul_taun(224,192) fe1305_add_product() fe1305_mul_taun(240,168) fe1305_add_product() fe1305_mul_taun(256,144) fe1305_add_product() fe1305_mul_taun(272,120) fe1305_add_product() fe1305_mul_taun(288,96) fe1305_add_product() fe1305_mul_taun(304,72) fe1305_add_product() fe1305_mul_taun(320,48) fe1305_add_product() fe1305_mul_taun(336,24) fe1305_add_product() fe1305_mul_tau(352,0) fe1305_add_product() fe1305_reduce_5l() fe1305_reduce_3l() addq $368,%rsi movq 80(%rsp),%rcx subq $24,%rcx movq %rcx,80(%rsp) /* if there are no blocks left before processing the last block */ cmpq $0,%rcx je .LB0 /* if there is one more block before processing the last block */ fe1305_add_msg_block(0) addq $16,%rsi cmpq $1,%rcx je .LB1 .LT2: cmpq $2,%rcx jg .LT3 /* if there are two more block before processing the last block */ fe1305_mul_taunr(24) jmp .LB2 .LT3: cmpq $3,%rcx jg .LT4 /* if there are three more block before processing the last block */ fe1305_mul_taunr(48) jmp .LB3 .LT4: cmpq $4,%rcx jg .LT5 /* if there are four more block before processing the last block */ fe1305_mul_taunr(72) jmp .LB4 .LT5: cmpq $5,%rcx jg .LT6 /* if there are five more block before processing the last block */ fe1305_mul_taunr(96) jmp .LB5 .LT6: cmpq $6,%rcx jg .LT7 /* if there are six more block before processing the last block */ fe1305_mul_taunr(120) jmp .LB6 .LT7: cmpq $7,%rcx jg .LT8 /* if there are seven more block before processing the last block */ fe1305_mul_taunr(144) jmp .LB7 .LT8: cmpq $8,%rcx jg .LT9 /* if there are eight more block before processing the last block */ fe1305_mul_taunr(168) jmp .LB8 .LT9: cmpq $9,%rcx jg .LT10 /* if there are nine more block before processing the last block */ fe1305_mul_taunr(192) jmp .LB9 .LT10: cmpq $10,%rcx jg .LT11 /* if there are ten more block before processing the last block */ fe1305_mul_taunr(216) jmp .LB10 .LT11: cmpq $11,%rcx jg .LT12 /* if there are eleven more block before processing the last block */ fe1305_mul_taunr(240) jmp .LB11 .LT12: cmpq $12,%rcx jg .LT13 /* if there are twelve more block before processing the last block */ fe1305_mul_taunr(264) jmp .LB12 .LT13: cmpq $13,%rcx jg .LT14 /* if there are thirteen more block before processing the last block */ fe1305_mul_taunr(288) jmp .LB13 .LT14: cmpq $14,%rcx jg .LT15 /* if there are fourteen more block before processing the last block */ fe1305_mul_taunr(312) jmp .LB14 .LT15: cmpq $15,%rcx jg .LT16 /* if there are fifteen more block before processing the last block */ fe1305_mul_taunr(336) jmp .LB15 .LT16: cmpq $16,%rcx jg .LT17 /* if there are sixteen more block before processing the last block */ fe1305_mul_taunr(360) jmp .LB16 .LT17: cmpq $17,%rcx jg .LT18 /* if there are seventeen more block before processing the last block */ fe1305_mul_taunr(384) jmp .LB17 .LT18: cmpq $18,%rcx jg .LT19 /* if there are eighteen more block before processing the last block */ fe1305_mul_taunr(408) jmp .LB18 .LT19: cmpq $19,%rcx jg .LT20 /* if there are nineteen more block before processing the last block */ fe1305_mul_taunr(432) jmp .LB19 .LT20: cmpq $20,%rcx jg .LT21 /* if there are twenty more block before processing the last block */ fe1305_mul_taunr(456) jmp .LB20 .LT21: cmpq $21,%rcx jg .LT22 /* if there are twenty one more block before processing the last block */ fe1305_mul_taunr(480) jmp .LB21 .LT22: cmpq $22,%rcx jg .LT23 /* if there are twenty two more block before processing the last block */ fe1305_mul_taunr(504) jmp .LB22 .LT23: cmpq $23,%rcx jg .LT24 /* if there are twenty three more block before processing the last block */ fe1305_mul_taunr(528) jmp .LB23 .LT24: /* if there are at least twenty four more block before processing the last block */ fe1305_mul_taunr(552) jmp .LB24 .LB1: fe1305_mul_taur(0) fe1305_reduce_5l() jmp .LB0 .LB2: fe1305_mul_tau(0,0) fe1305_add_product() fe1305_reduce_5l() addq $16,%rsi jmp .LB0 .LB3: fe1305_mul_taun(0,24) fe1305_add_product() fe1305_mul_tau(16,0) fe1305_add_product() fe1305_reduce_5l() addq $32,%rsi jmp .LB0 .LB4: fe1305_mul_taun(0,48) fe1305_add_product() fe1305_mul_taun(16,24) fe1305_add_product() fe1305_mul_tau(32,0) fe1305_add_product() fe1305_reduce_5l() addq $48,%rsi jmp .LB0 .LB5: fe1305_mul_taun(0,72) fe1305_add_product() fe1305_mul_taun(16,48) fe1305_add_product() fe1305_mul_taun(32,24) fe1305_add_product() fe1305_mul_tau(48,0) fe1305_add_product() fe1305_reduce_5l() addq $64,%rsi jmp .LB0 .LB6: fe1305_mul_taun(0,96) fe1305_add_product() fe1305_mul_taun(16,72) fe1305_add_product() fe1305_mul_taun(32,48) fe1305_add_product() fe1305_mul_taun(48,24) fe1305_add_product() fe1305_mul_tau(64,0) fe1305_add_product() fe1305_reduce_5l() addq $80,%rsi jmp .LB0 .LB7: fe1305_mul_taun(0,120) fe1305_add_product() fe1305_mul_taun(16,96) fe1305_add_product() fe1305_mul_taun(32,72) fe1305_add_product() fe1305_mul_taun(48,48) fe1305_add_product() fe1305_mul_taun(64,24) fe1305_add_product() fe1305_mul_tau(80,0) fe1305_add_product() fe1305_reduce_5l() addq $96,%rsi jmp .LB0 .LB8: fe1305_mul_taun(0,144) fe1305_add_product() fe1305_mul_taun(16,120) fe1305_add_product() fe1305_mul_taun(32,96) fe1305_add_product() fe1305_mul_taun(48,72) fe1305_add_product() fe1305_mul_taun(64,48) fe1305_add_product() fe1305_mul_taun(80,24) fe1305_add_product() fe1305_mul_tau(96,0) fe1305_add_product() fe1305_reduce_5l() addq $112,%rsi jmp .LB0 .LB9: fe1305_mul_taun(0,168) fe1305_add_product() fe1305_mul_taun(16,144) fe1305_add_product() fe1305_mul_taun(32,120) fe1305_add_product() fe1305_mul_taun(48,96) fe1305_add_product() fe1305_mul_taun(64,72) fe1305_add_product() fe1305_mul_taun(80,48) fe1305_add_product() fe1305_mul_taun(96,24) fe1305_add_product() fe1305_mul_tau(112,0) fe1305_add_product() fe1305_reduce_5l() addq $128,%rsi jmp .LB0 .LB10: fe1305_mul_taun(0,192) fe1305_add_product() fe1305_mul_taun(16,168) fe1305_add_product() fe1305_mul_taun(32,144) fe1305_add_product() fe1305_mul_taun(48,120) fe1305_add_product() fe1305_mul_taun(64,96) fe1305_add_product() fe1305_mul_taun(80,72) fe1305_add_product() fe1305_mul_taun(96,48) fe1305_add_product() fe1305_mul_taun(112,24) fe1305_add_product() fe1305_mul_tau(128,0) fe1305_add_product() fe1305_reduce_5l() addq $144,%rsi jmp .LB0 .LB11: fe1305_mul_taun(0,216) fe1305_add_product() fe1305_mul_taun(16,192) fe1305_add_product() fe1305_mul_taun(32,168) fe1305_add_product() fe1305_mul_taun(48,144) fe1305_add_product() fe1305_mul_taun(64,120) fe1305_add_product() fe1305_mul_taun(80,96) fe1305_add_product() fe1305_mul_taun(96,72) fe1305_add_product() fe1305_mul_taun(112,48) fe1305_add_product() fe1305_mul_taun(128,24) fe1305_add_product() fe1305_mul_tau(144,0) fe1305_add_product() fe1305_reduce_5l() addq $160,%rsi jmp .LB0 .LB12: fe1305_mul_taun(0,240) fe1305_add_product() fe1305_mul_taun(16,216) fe1305_add_product() fe1305_mul_taun(32,192) fe1305_add_product() fe1305_mul_taun(48,168) fe1305_add_product() fe1305_mul_taun(64,144) fe1305_add_product() fe1305_mul_taun(80,120) fe1305_add_product() fe1305_mul_taun(96,96) fe1305_add_product() fe1305_mul_taun(112,72) fe1305_add_product() fe1305_mul_taun(128,48) fe1305_add_product() fe1305_mul_taun(144,24) fe1305_add_product() fe1305_mul_tau(160,0) fe1305_add_product() fe1305_reduce_5l() addq $176,%rsi jmp .LB0 .LB13: fe1305_mul_taun(0,264) fe1305_add_product() fe1305_mul_taun(16,240) fe1305_add_product() fe1305_mul_taun(32,216) fe1305_add_product() fe1305_mul_taun(48,192) fe1305_add_product() fe1305_mul_taun(64,168) fe1305_add_product() fe1305_mul_taun(80,144) fe1305_add_product() fe1305_mul_taun(96,120) fe1305_add_product() fe1305_mul_taun(112,96) fe1305_add_product() fe1305_mul_taun(128,72) fe1305_add_product() fe1305_mul_taun(144,48) fe1305_add_product() fe1305_mul_taun(160,24) fe1305_add_product() fe1305_mul_tau(176,0) fe1305_add_product() fe1305_reduce_5l() addq $192,%rsi jmp .LB0 .LB14: fe1305_mul_taun(0,288) fe1305_add_product() fe1305_mul_taun(16,264) fe1305_add_product() fe1305_mul_taun(32,240) fe1305_add_product() fe1305_mul_taun(48,216) fe1305_add_product() fe1305_mul_taun(64,192) fe1305_add_product() fe1305_mul_taun(80,168) fe1305_add_product() fe1305_mul_taun(96,144) fe1305_add_product() fe1305_mul_taun(112,120) fe1305_add_product() fe1305_mul_taun(128,96) fe1305_add_product() fe1305_mul_taun(144,72) fe1305_add_product() fe1305_mul_taun(160,48) fe1305_add_product() fe1305_mul_taun(176,24) fe1305_add_product() fe1305_mul_tau(192,0) fe1305_add_product() fe1305_reduce_5l() addq $208,%rsi jmp .LB0 .LB15: fe1305_mul_taun(0,312) fe1305_add_product() fe1305_mul_taun(16,288) fe1305_add_product() fe1305_mul_taun(32,264) fe1305_add_product() fe1305_mul_taun(48,240) fe1305_add_product() fe1305_mul_taun(64,216) fe1305_add_product() fe1305_mul_taun(80,192) fe1305_add_product() fe1305_mul_taun(96,168) fe1305_add_product() fe1305_mul_taun(112,144) fe1305_add_product() fe1305_mul_taun(128,120) fe1305_add_product() fe1305_mul_taun(144,96) fe1305_add_product() fe1305_mul_taun(160,72) fe1305_add_product() fe1305_mul_taun(176,48) fe1305_add_product() fe1305_mul_taun(192,24) fe1305_add_product() fe1305_mul_tau(208,0) fe1305_add_product() fe1305_reduce_5l() addq $224,%rsi jmp .LB0 .LB16: fe1305_mul_taun(0,336) fe1305_add_product() fe1305_mul_taun(16,312) fe1305_add_product() fe1305_mul_taun(32,288) fe1305_add_product() fe1305_mul_taun(48,264) fe1305_add_product() fe1305_mul_taun(64,240) fe1305_add_product() fe1305_mul_taun(80,216) fe1305_add_product() fe1305_mul_taun(96,192) fe1305_add_product() fe1305_mul_taun(112,168) fe1305_add_product() fe1305_mul_taun(128,144) fe1305_add_product() fe1305_mul_taun(144,120) fe1305_add_product() fe1305_mul_taun(160,96) fe1305_add_product() fe1305_mul_taun(176,72) fe1305_add_product() fe1305_mul_taun(192,48) fe1305_add_product() fe1305_mul_taun(208,24) fe1305_add_product() fe1305_mul_tau(224,0) fe1305_add_product() fe1305_reduce_5l() addq $240,%rsi jmp .LB0 .LB17: fe1305_mul_taun(0,360) fe1305_add_product() fe1305_mul_taun(16,336) fe1305_add_product() fe1305_mul_taun(32,312) fe1305_add_product() fe1305_mul_taun(48,288) fe1305_add_product() fe1305_mul_taun(64,264) fe1305_add_product() fe1305_mul_taun(80,240) fe1305_add_product() fe1305_mul_taun(96,216) fe1305_add_product() fe1305_mul_taun(112,192) fe1305_add_product() fe1305_mul_taun(128,168) fe1305_add_product() fe1305_mul_taun(144,144) fe1305_add_product() fe1305_mul_taun(160,120) fe1305_add_product() fe1305_mul_taun(176,96) fe1305_add_product() fe1305_mul_taun(192,72) fe1305_add_product() fe1305_mul_taun(208,48) fe1305_add_product() fe1305_mul_taun(224,24) fe1305_add_product() fe1305_mul_tau(240,0) fe1305_add_product() fe1305_reduce_5l() addq $256,%rsi jmp .LB0 .LB18: fe1305_mul_taun(0,384) fe1305_add_product() fe1305_mul_taun(16,360) fe1305_add_product() fe1305_mul_taun(32,336) fe1305_add_product() fe1305_mul_taun(48,312) fe1305_add_product() fe1305_mul_taun(64,288) fe1305_add_product() fe1305_mul_taun(80,264) fe1305_add_product() fe1305_mul_taun(96,240) fe1305_add_product() fe1305_mul_taun(112,216) fe1305_add_product() fe1305_mul_taun(128,192) fe1305_add_product() fe1305_mul_taun(144,168) fe1305_add_product() fe1305_mul_taun(160,144) fe1305_add_product() fe1305_mul_taun(176,120) fe1305_add_product() fe1305_mul_taun(192,96) fe1305_add_product() fe1305_mul_taun(208,72) fe1305_add_product() fe1305_mul_taun(224,48) fe1305_add_product() fe1305_mul_taun(240,24) fe1305_add_product() fe1305_mul_tau(256,0) fe1305_add_product() fe1305_reduce_5l() addq $272,%rsi jmp .LB0 .LB19: fe1305_mul_taun(0,408) fe1305_add_product() fe1305_mul_taun(16,384) fe1305_add_product() fe1305_mul_taun(32,360) fe1305_add_product() fe1305_mul_taun(48,336) fe1305_add_product() fe1305_mul_taun(64,312) fe1305_add_product() fe1305_mul_taun(80,288) fe1305_add_product() fe1305_mul_taun(96,264) fe1305_add_product() fe1305_mul_taun(112,240) fe1305_add_product() fe1305_mul_taun(128,216) fe1305_add_product() fe1305_mul_taun(144,192) fe1305_add_product() fe1305_mul_taun(160,168) fe1305_add_product() fe1305_mul_taun(176,144) fe1305_add_product() fe1305_mul_taun(192,120) fe1305_add_product() fe1305_mul_taun(208,96) fe1305_add_product() fe1305_mul_taun(224,72) fe1305_add_product() fe1305_mul_taun(240,48) fe1305_add_product() fe1305_mul_taun(256,24) fe1305_add_product() fe1305_mul_tau(272,0) fe1305_add_product() fe1305_reduce_5l() addq $288,%rsi jmp .LB0 .LB20: fe1305_mul_taun(0,432) fe1305_add_product() fe1305_mul_taun(16,408) fe1305_add_product() fe1305_mul_taun(32,384) fe1305_add_product() fe1305_mul_taun(48,360) fe1305_add_product() fe1305_mul_taun(64,336) fe1305_add_product() fe1305_mul_taun(80,312) fe1305_add_product() fe1305_mul_taun(96,288) fe1305_add_product() fe1305_mul_taun(112,264) fe1305_add_product() fe1305_mul_taun(128,240) fe1305_add_product() fe1305_mul_taun(144,216) fe1305_add_product() fe1305_mul_taun(160,192) fe1305_add_product() fe1305_mul_taun(176,168) fe1305_add_product() fe1305_mul_taun(192,144) fe1305_add_product() fe1305_mul_taun(208,120) fe1305_add_product() fe1305_mul_taun(224,96) fe1305_add_product() fe1305_mul_taun(240,72) fe1305_add_product() fe1305_mul_taun(256,48) fe1305_add_product() fe1305_mul_taun(272,24) fe1305_add_product() fe1305_mul_tau(288,0) fe1305_add_product() fe1305_reduce_5l() addq $304,%rsi jmp .LB0 .LB21: fe1305_mul_taun(0,456) fe1305_add_product() fe1305_mul_taun(16,432) fe1305_add_product() fe1305_mul_taun(32,408) fe1305_add_product() fe1305_mul_taun(48,384) fe1305_add_product() fe1305_mul_taun(64,360) fe1305_add_product() fe1305_mul_taun(80,336) fe1305_add_product() fe1305_mul_taun(96,312) fe1305_add_product() fe1305_mul_taun(112,288) fe1305_add_product() fe1305_mul_taun(128,264) fe1305_add_product() fe1305_mul_taun(144,240) fe1305_add_product() fe1305_mul_taun(160,216) fe1305_add_product() fe1305_mul_taun(176,192) fe1305_add_product() fe1305_mul_taun(192,168) fe1305_add_product() fe1305_mul_taun(208,144) fe1305_add_product() fe1305_mul_taun(224,120) fe1305_add_product() fe1305_mul_taun(240,96) fe1305_add_product() fe1305_mul_taun(256,72) fe1305_add_product() fe1305_mul_taun(272,48) fe1305_add_product() fe1305_mul_taun(288,24) fe1305_add_product() fe1305_mul_tau(304,0) fe1305_add_product() fe1305_reduce_5l() addq $320,%rsi jmp .LB0 .LB22: fe1305_mul_taun(0,480) fe1305_add_product() fe1305_mul_taun(16,456) fe1305_add_product() fe1305_mul_taun(32,432) fe1305_add_product() fe1305_mul_taun(48,408) fe1305_add_product() fe1305_mul_taun(64,384) fe1305_add_product() fe1305_mul_taun(80,360) fe1305_add_product() fe1305_mul_taun(96,336) fe1305_add_product() fe1305_mul_taun(112,312) fe1305_add_product() fe1305_mul_taun(128,288) fe1305_add_product() fe1305_mul_taun(144,264) fe1305_add_product() fe1305_mul_taun(160,240) fe1305_add_product() fe1305_mul_taun(176,216) fe1305_add_product() fe1305_mul_taun(192,192) fe1305_add_product() fe1305_mul_taun(208,168) fe1305_add_product() fe1305_mul_taun(224,144) fe1305_add_product() fe1305_mul_taun(240,120) fe1305_add_product() fe1305_mul_taun(256,96) fe1305_add_product() fe1305_mul_taun(272,72) fe1305_add_product() fe1305_mul_taun(288,48) fe1305_add_product() fe1305_mul_taun(304,24) fe1305_add_product() fe1305_mul_tau(320,0) fe1305_add_product() fe1305_reduce_5l() addq $336,%rsi jmp .LB0 .LB23: fe1305_mul_taun(0,504) fe1305_add_product() fe1305_mul_taun(16,480) fe1305_add_product() fe1305_mul_taun(32,456) fe1305_add_product() fe1305_mul_taun(48,432) fe1305_add_product() fe1305_mul_taun(64,408) fe1305_add_product() fe1305_mul_taun(80,384) fe1305_add_product() fe1305_mul_taun(96,360) fe1305_add_product() fe1305_mul_taun(112,336) fe1305_add_product() fe1305_mul_taun(128,312) fe1305_add_product() fe1305_mul_taun(144,288) fe1305_add_product() fe1305_mul_taun(160,264) fe1305_add_product() fe1305_mul_taun(176,240) fe1305_add_product() fe1305_mul_taun(192,216) fe1305_add_product() fe1305_mul_taun(208,192) fe1305_add_product() fe1305_mul_taun(224,168) fe1305_add_product() fe1305_mul_taun(240,144) fe1305_add_product() fe1305_mul_taun(256,120) fe1305_add_product() fe1305_mul_taun(272,96) fe1305_add_product() fe1305_mul_taun(288,72) fe1305_add_product() fe1305_mul_taun(304,48) fe1305_add_product() fe1305_mul_taun(320,24) fe1305_add_product() fe1305_mul_tau(336,0) fe1305_add_product() fe1305_reduce_5l() addq $352,%rsi .LB0: /* if the last block is full */ cmpq $0,64(%rsp) je .L3 /* if the last block has 8 bytes */ cmpq $64,64(%rsp) je .L2 /* if the last block has 1 to 7 bytes */ jl .L1 /* else if the last block has 9 to 15 bytes */ /* first chunk of message block = (r12) */ movq 0(%rsi),%r12 addq $8,%rsi movq $128,%rbx subq 64(%rsp),%rbx movq $64,%rcx subq %rbx,%rcx shrq $3,%rcx leaq 104(%rsp),%rdi rep movsb (%rsi),(%rdi) /* second chunk of message block = (r13) */ movq 104(%rsp),%r13 movq $-1,%r11 movq %rbx,%rcx shrq %cl,%r11 addq $1,%r11 orq %r11,%r13 movq $0,%r14 jmp .L4 .L1: movq 64(%rsp),%rcx shrq $3,%rcx leaq 104(%rsp),%rdi rep movsb (%rsi),(%rdi) /* first chunk of message block = (r12) */ movq 104(%rsp),%r12 movq $-1,%r11 movb $64,%cl subb 64(%rsp),%cl shrq %cl,%r11 addq $1,%r11 orq %r11,%r12 /* second chunk of message block = (r13) */ movq $0,%r13 movq $0,%r14 jmp .L4 .L2: movq 0(%rsi),%r12 movq $1,%r13 movq $0,%r14 jmp .L4 .L3: movq 0(%rsi),%r12 movq 8(%rsi),%r13 movq $1,%r14 .L4: addq %r12,%r8 adcq %r13,%r9 adcq %r14,%r10 movq 112(%rsp),%rdi fe1305_mul_taur(0) fe1305_reduce_5l() jmp .L9 .L5: /* if the single message block is full */ cmpq $0,64(%rsp) je .L8 /* if the single message block has 1 to 7 bytes */ cmpq $8,72(%rsp) jl .L6 /* if the single message block has 8 bytes */ je .L7 /* else if the single message block has 9 to 15 bytes */ /* first chunk of message block = (r13) */ movq 0(%rsi),%r13 addq $8,%rsi movq $128,%rbx subq 64(%rsp),%rbx movq $64,%rcx subq %rbx,%rcx shrq $3,%rcx leaq 104(%rsp),%rdi rep movsb (%rsi),(%rdi) /* second chunk of message block = (rax) */ movq 104(%rsp),%rax movq $-1,%r11 movq %rbx,%rcx shrq %cl,%r11 addq $1,%r11 orq %r11,%rax /* integer multiplication */ xorq %r11,%r11 movq %r13,%rdx mulx %r14,%r8,%r9 mulx %r15,%rbx,%r10 adcx %rbx,%r9 adcx %r11,%r10 xorq %r12,%r12 movq %rax,%rdx mulx %r14,%rbx,%rbp adcx %rbx,%r9 adox %rbp,%r10 mulx %r15,%rbx,%rbp adcx %rbx,%r10 adox %rbp,%r11 adcx %r12,%r11 /* reduction on the integer product (r11 : r10 : r9 : r8) */ movq %r10,%rbx andq mask2(%rip),%r10 andq mask2c(%rip),%rbx addq %rbx,%r8 adcq %r11,%r9 adcq %r12,%r10 shrd $2,%r11,%rbx shrq $2,%r11 addq %rbx,%r8 adcq %r11,%r9 adcq %r12,%r10 jmp .L9 .L6: movq 64(%rsp),%rcx shrq $3,%rcx leaq 104(%rsp),%rdi rep movsb (%rsi),(%rdi) /* message block = (r13) */ movq 104(%rsp),%r13 movq $-1,%r11 movb $64,%cl subb 64(%rsp),%cl shrq %cl,%r11 addq $1,%r11 orq %r11,%r13 /* integer multiplication */ xorq %r11,%r11 movq %r13,%rdx mulx %r14,%r8,%r9 mulx %r15,%rbx,%r10 adcx %rbx,%r9 adcx %r11,%r10 jmp .L9 .L7: /* integer multiplication */ xorq %r11,%r11 movq 0(%rsi),%rdx mulx %r14,%r8,%r9 mulx %r15,%rbx,%r10 adcx %rbx,%r9 adcx %r11,%r10 xorq %rax,%rax adcx %r14,%r9 adox %rax,%r10 adcx %r15,%r10 adox %rax,%r11 adcx %rax,%r11 /* reduction on the integer product (r11 : r10 : r9 : r8) */ movq %r10,%r13 andq mask2(%rip),%r10 andq mask2c(%rip),%r13 addq %r13,%r8 adcq %r11,%r9 adcq $0,%r10 shrd $2,%r11,%r13 shrq $2,%r11 addq %r13,%r8 adcq %r11,%r9 adcq $0,%r10 jmp .L9 .L8: /* integer multiplication */ xorq %r11,%r11 movq 0(%rsi),%rdx mulx %r14,%r8,%r9 mulx %r15,%rbx,%r10 adcx %rbx,%r9 adcx %r11,%r10 xorq %r12,%r12 movq 8(%rsi),%rdx mulx %r14,%rbx,%rbp adcx %rbx,%r9 adox %rbp,%r10 mulx %r15,%rbx,%rbp adcx %rbx,%r10 adox %rbp,%r11 adcx %r12,%r11 xorq %rax,%rax adcx %r14,%r10 adox %rax,%r11 adcx %r15,%r11 adox %rax,%r12 adcx %rax,%r12 /* reduction on the integer product (r12 : r11 : r10 : r9 : r8) */ movq %r10,%rbx andq mask2(%rip),%r10 andq mask2c(%rip),%rbx addq %rbx,%r8 adcq %r11,%r9 adcq %r12,%r10 shrd $2,%r11,%rbx shrd $2,%r12,%r11 shrq $2,%r12 addq %rbx,%r8 adcq %r11,%r9 adcq %r12,%r10 .L9: /* final reduction on (r10 : r9 : r8) */ movq %r10,%r11 shrq $2,%r11 andq mask2(%rip),%r10 imul $5,%r11,%r11 addq %r11,%r8 adcq $0,%r9 adcq $0,%r10 /* freeze the reduced field element (r10 : r9 : r8) */ movq %r8,%r11 movq %r9,%r12 movq %r10,%r13 subq p0(%rip),%r8 sbbq p1(%rip),%r9 sbbq p2(%rip),%r10 movq %r10,%rcx shlq $62,%rcx cmovc %r11,%r8 cmovc %r12,%r9 cmovc %r13,%r10 /* add last 16 bytes of the key */ addq 88(%rsp),%r8 adcq 96(%rsp),%r9 adcq $0,%r10 /* store first 128 bytes of the result */ movq 56(%rsp),%rdi movq %r8,0(%rdi) movq %r9,8(%rdi) movq 0(%rsp),%r11 movq 8(%rsp),%r12 movq 16(%rsp),%r13 movq 24(%rsp),%r14 movq 32(%rsp),%r15 movq 40(%rsp),%rbx movq 48(%rsp),%rbp movq %r11,%rsp ret