-rw-r--r-- 42539 lib1305-20250407/crypto_onetimeauth/poly1305/amd64-maa44-g32/poly1305_maa44_g32.S raw
/* assembly to compute poly1305 using precomputed key powers and
applying lazy reduction over a group of 32 field elements */
#include "crypto_asm_hidden.h"
// linker define poly1305_maa44_g32
// linker use mask44
// linker use mask42
// linker use pmask1
// linker use pmask2
// linker use pmask3
// linker use pmask4
// linker use pmask5
// linker use upmask1
// linker use upmask2
// linker use upmask3
// linker use upmask4
// linker use upmask5
// linker use twoexp41
// linker use p0
// linker use p1
// linker use p2
#define mask44 CRYPTO_SHARED_NAMESPACE(mask44)
#define mask42 CRYPTO_SHARED_NAMESPACE(mask42)
#define pmask1 CRYPTO_SHARED_NAMESPACE(pmask1)
#define pmask2 CRYPTO_SHARED_NAMESPACE(pmask2)
#define pmask3 CRYPTO_SHARED_NAMESPACE(pmask3)
#define pmask4 CRYPTO_SHARED_NAMESPACE(pmask4)
#define pmask5 CRYPTO_SHARED_NAMESPACE(pmask5)
#define upmask1 CRYPTO_SHARED_NAMESPACE(upmask1)
#define upmask2 CRYPTO_SHARED_NAMESPACE(upmask2)
#define upmask3 CRYPTO_SHARED_NAMESPACE(upmask3)
#define upmask4 CRYPTO_SHARED_NAMESPACE(upmask4)
#define upmask5 CRYPTO_SHARED_NAMESPACE(upmask5)
#define twoexp41 CRYPTO_SHARED_NAMESPACE(twoexp41)
#define p0 CRYPTO_SHARED_NAMESPACE(p0)
#define p1 CRYPTO_SHARED_NAMESPACE(p1)
#define p2 CRYPTO_SHARED_NAMESPACE(p2)
#include "poly1305_asm.h"
.p2align 5
ASM_HIDDEN _CRYPTO_SHARED_NAMESPACE(poly1305_maa44_g32)
ASM_HIDDEN CRYPTO_SHARED_NAMESPACE(poly1305_maa44_g32)
.global _CRYPTO_SHARED_NAMESPACE(poly1305_maa44_g32)
.global CRYPTO_SHARED_NAMESPACE(poly1305_maa44_g32)
_CRYPTO_SHARED_NAMESPACE(poly1305_maa44_g32):
CRYPTO_SHARED_NAMESPACE(poly1305_maa44_g32):
movq %rsp,%r11
andq $-32,%rsp
subq $160,%rsp
movq %r11,0(%rsp)
movq %r12,8(%rsp)
movq %r13,16(%rsp)
movq %r14,24(%rsp)
movq %r15,32(%rsp)
movq %rbx,40(%rsp)
movq %rbp,48(%rsp)
movq %rdi,56(%rsp)
movq %r8,64(%rsp)
movq %r9,72(%rsp)
/* store last 16 bytes of the key */
movq 768(%rdx),%r14
movq 776(%rdx),%r15
movq %r14,88(%rsp)
movq %r15,96(%rsp)
/* initialize a quad-word on the stack with 0 */
movq $0,104(%rsp)
/* if the message has a single block */
cmpq $1,%rcx
je .L5
movq %rcx,80(%rsp)
movq %rdx,%rdi
movq %rdx,112(%rsp)
movq $0,%r8
movq $0,%r9
movq $0,%r10
movq $0,%r11
movq $0,%r12
movq $0,%r13
cmpq $2,%rcx
je .LB2
cmpq $3,%rcx
je .LB3
cmpq $4,%rcx
je .LB4
cmpq $5,%rcx
je .LB5
cmpq $6,%rcx
je .LB6
cmpq $7,%rcx
je .LB7
cmpq $8,%rcx
je .LB8
cmpq $9,%rcx
je .LB9
cmpq $10,%rcx
je .LB10
cmpq $11,%rcx
je .LB11
cmpq $12,%rcx
je .LB12
cmpq $13,%rcx
je .LB13
cmpq $14,%rcx
je .LB14
cmpq $15,%rcx
je .LB15
cmpq $16,%rcx
je .LB16
cmpq $17,%rcx
je .LB17
cmpq $18,%rcx
je .LB18
cmpq $19,%rcx
je .LB19
cmpq $20,%rcx
je .LB20
cmpq $21,%rcx
je .LB21
cmpq $22,%rcx
je .LB22
cmpq $23,%rcx
je .LB23
cmpq $24,%rcx
je .LB24
cmpq $25,%rcx
je .LB25
cmpq $26,%rcx
je .LB26
cmpq $27,%rcx
je .LB27
cmpq $28,%rcx
je .LB28
cmpq $29,%rcx
je .LB29
cmpq $30,%rcx
je .LB30
cmpq $31,%rcx
je .LB31
.LB32:
fe1305_mul_taun(0,720)
fe1305_add_product()
fe1305_mul_taun(16,696)
fe1305_add_product()
fe1305_mul_taun(32,672)
fe1305_add_product()
fe1305_mul_taun(48,648)
fe1305_add_product()
fe1305_mul_taun(64,624)
fe1305_add_product()
fe1305_mul_taun(80,600)
fe1305_add_product()
fe1305_mul_taun(96,576)
fe1305_add_product()
fe1305_mul_taun(112,552)
fe1305_add_product()
fe1305_mul_taun(128,528)
fe1305_add_product()
fe1305_mul_taun(144,504)
fe1305_add_product()
fe1305_mul_taun(160,480)
fe1305_add_product()
fe1305_mul_taun(176,456)
fe1305_add_product()
fe1305_mul_taun(192,432)
fe1305_add_product()
fe1305_mul_taun(208,408)
fe1305_add_product()
fe1305_mul_taun(224,384)
fe1305_add_product()
fe1305_mul_taun(240,360)
fe1305_add_product()
fe1305_mul_taun(256,336)
fe1305_add_product()
fe1305_mul_taun(272,312)
fe1305_add_product()
fe1305_mul_taun(288,288)
fe1305_add_product()
fe1305_mul_taun(304,264)
fe1305_add_product()
fe1305_mul_taun(320,240)
fe1305_add_product()
fe1305_mul_taun(336,216)
fe1305_add_product()
fe1305_mul_taun(352,192)
fe1305_add_product()
fe1305_mul_taun(368,168)
fe1305_add_product()
fe1305_mul_taun(384,144)
fe1305_add_product()
fe1305_mul_taun(400,120)
fe1305_add_product()
fe1305_mul_taun(416,96)
fe1305_add_product()
fe1305_mul_taun(432,72)
fe1305_add_product()
fe1305_mul_taun(448,48)
fe1305_add_product()
fe1305_mul_taun(464,24)
fe1305_add_product()
fe1305_mul_taun(480,0)
fe1305_add_product()
fe1305_reduce_3l_128bits()
addq $496,%rsi
movq 80(%rsp),%rcx
subq $32,%rcx
movq %rcx,80(%rsp)
/* if there are no blocks left before processing the last block */
cmpq $0,%rcx
je .LB0
/* if there is one more block before processing the last block */
fe1305_add_msg_block(0)
addq $16,%rsi
cmpq $1,%rcx
je .LB1
.LT2:
cmpq $2,%rcx
jg .LT3
/* if there are two more block before processing the last block */
fe1305_mul_taunr(24)
jmp .LB2
.LT3:
cmpq $3,%rcx
jg .LT4
/* if there are three more block before processing the last block */
fe1305_mul_taunr(48)
jmp .LB3
.LT4:
cmpq $4,%rcx
jg .LT5
/* if there are four more block before processing the last block */
fe1305_mul_taunr(72)
jmp .LB4
.LT5:
cmpq $5,%rcx
jg .LT6
/* if there are five more block before processing the last block */
fe1305_mul_taunr(96)
jmp .LB5
.LT6:
cmpq $6,%rcx
jg .LT7
/* if there are six more block before processing the last block */
fe1305_mul_taunr(120)
jmp .LB6
.LT7:
cmpq $7,%rcx
jg .LT8
/* if there are seven more block before processing the last block */
fe1305_mul_taunr(144)
jmp .LB7
.LT8:
cmpq $8,%rcx
jg .LT9
/* if there are eight more block before processing the last block */
fe1305_mul_taunr(168)
jmp .LB8
.LT9:
cmpq $9,%rcx
jg .LT10
/* if there are nine more block before processing the last block */
fe1305_mul_taunr(192)
jmp .LB9
.LT10:
cmpq $10,%rcx
jg .LT11
/* if there are ten more block before processing the last block */
fe1305_mul_taunr(216)
jmp .LB10
.LT11:
cmpq $11,%rcx
jg .LT12
/* if there are eleven more block before processing the last block */
fe1305_mul_taunr(240)
jmp .LB11
.LT12:
cmpq $12,%rcx
jg .LT13
/* if there are twelve more block before processing the last block */
fe1305_mul_taunr(264)
jmp .LB12
.LT13:
cmpq $13,%rcx
jg .LT14
/* if there are thirteen more block before processing the last block */
fe1305_mul_taunr(288)
jmp .LB13
.LT14:
cmpq $14,%rcx
jg .LT15
/* if there are fourteen more block before processing the last block */
fe1305_mul_taunr(312)
jmp .LB14
.LT15:
cmpq $15,%rcx
jg .LT16
/* if there are fifteen more block before processing the last block */
fe1305_mul_taunr(336)
jmp .LB15
.LT16:
cmpq $16,%rcx
jg .LT17
/* if there are sixteen more block before processing the last block */
fe1305_mul_taunr(360)
jmp .LB16
.LT17:
cmpq $17,%rcx
jg .LT18
/* if there are seventeen more block before processing the last block */
fe1305_mul_taunr(384)
jmp .LB17
.LT18:
cmpq $18,%rcx
jg .LT19
/* if there are eighteen more block before processing the last block */
fe1305_mul_taunr(408)
jmp .LB18
.LT19:
cmpq $19,%rcx
jg .LT20
/* if there are nineteen more block before processing the last block */
fe1305_mul_taunr(432)
jmp .LB19
.LT20:
cmpq $20,%rcx
jg .LT21
/* if there are twenty more block before processing the last block */
fe1305_mul_taunr(456)
jmp .LB20
.LT21:
cmpq $21,%rcx
jg .LT22
/* if there are twenty one more block before processing the last block */
fe1305_mul_taunr(480)
jmp .LB21
.LT22:
cmpq $22,%rcx
jg .LT23
/* if there are twenty two more block before processing the last block */
fe1305_mul_taunr(504)
jmp .LB22
.LT23:
cmpq $23,%rcx
jg .LT24
/* if there are twenty three more block before processing the last block */
fe1305_mul_taunr(528)
jmp .LB23
.LT24:
cmpq $24,%rcx
jg .LT25
/* if there are twenty four more block before processing the last block */
fe1305_mul_taunr(552)
jmp .LB24
.LT25:
cmpq $25,%rcx
jg .LT26
/* if there are twenty five more block before processing the last block */
fe1305_mul_taunr(576)
jmp .LB25
.LT26:
cmpq $26,%rcx
jg .LT27
/* if there are twenty six more block before processing the last block */
fe1305_mul_taunr(600)
jmp .LB26
.LT27:
cmpq $27,%rcx
jg .LT28
/* if there are twenty seven more block before processing the last block */
fe1305_mul_taunr(624)
jmp .LB27
.LT28:
cmpq $28,%rcx
jg .LT29
/* if there are twenty eight more block before processing the last block */
fe1305_mul_taunr(648)
jmp .LB28
.LT29:
cmpq $29,%rcx
jg .LT30
/* if there are twenty nine more block before processing the last block */
fe1305_mul_taunr(672)
jmp .LB29
.LT30:
cmpq $30,%rcx
jg .LT31
/* if there are thirty more block before processing the last block */
fe1305_mul_taunr(696)
jmp .LB30
.LT31:
cmpq $31,%rcx
jg .LT32
/* if there are thirty one more block before processing the last block */
fe1305_mul_taunr(720)
jmp .LB31
.LT32:
/* if there are at least thirty two more block before processing the last block */
fe1305_reduce_3l_64bits()
fe1305_mul_taunr(744)
jmp .LB32
.LB1:
fe1305_mul_taunr(0)
fe1305_reduce_3l_128bits()
jmp .LB0
.LB2:
fe1305_mul_taun(0,0)
fe1305_add_product()
fe1305_reduce_3l_128bits()
addq $16,%rsi
jmp .LB0
.LB3:
fe1305_mul_taun(0,24)
fe1305_add_product()
fe1305_mul_taun(16,0)
fe1305_add_product()
fe1305_reduce_3l_128bits()
addq $32,%rsi
jmp .LB0
.LB4:
fe1305_mul_taun(0,48)
fe1305_add_product()
fe1305_mul_taun(16,24)
fe1305_add_product()
fe1305_mul_taun(32,0)
fe1305_add_product()
fe1305_reduce_3l_128bits()
addq $48,%rsi
jmp .LB0
.LB5:
fe1305_mul_taun(0,72)
fe1305_add_product()
fe1305_mul_taun(16,48)
fe1305_add_product()
fe1305_mul_taun(32,24)
fe1305_add_product()
fe1305_mul_taun(48,0)
fe1305_add_product()
fe1305_reduce_3l_128bits()
addq $64,%rsi
jmp .LB0
.LB6:
fe1305_mul_taun(0,96)
fe1305_add_product()
fe1305_mul_taun(16,72)
fe1305_add_product()
fe1305_mul_taun(32,48)
fe1305_add_product()
fe1305_mul_taun(48,24)
fe1305_add_product()
fe1305_mul_taun(64,0)
fe1305_add_product()
fe1305_reduce_3l_128bits()
addq $80,%rsi
jmp .LB0
.LB7:
fe1305_mul_taun(0,120)
fe1305_add_product()
fe1305_mul_taun(16,96)
fe1305_add_product()
fe1305_mul_taun(32,72)
fe1305_add_product()
fe1305_mul_taun(48,48)
fe1305_add_product()
fe1305_mul_taun(64,24)
fe1305_add_product()
fe1305_mul_taun(80,0)
fe1305_add_product()
fe1305_reduce_3l_128bits()
addq $96,%rsi
jmp .LB0
.LB8:
fe1305_mul_taun(0,144)
fe1305_add_product()
fe1305_mul_taun(16,120)
fe1305_add_product()
fe1305_mul_taun(32,96)
fe1305_add_product()
fe1305_mul_taun(48,72)
fe1305_add_product()
fe1305_mul_taun(64,48)
fe1305_add_product()
fe1305_mul_taun(80,24)
fe1305_add_product()
fe1305_mul_taun(96,0)
fe1305_add_product()
fe1305_reduce_3l_128bits()
addq $112,%rsi
jmp .LB0
.LB9:
fe1305_mul_taun(0,168)
fe1305_add_product()
fe1305_mul_taun(16,144)
fe1305_add_product()
fe1305_mul_taun(32,120)
fe1305_add_product()
fe1305_mul_taun(48,96)
fe1305_add_product()
fe1305_mul_taun(64,72)
fe1305_add_product()
fe1305_mul_taun(80,48)
fe1305_add_product()
fe1305_mul_taun(96,24)
fe1305_add_product()
fe1305_mul_taun(112,0)
fe1305_add_product()
fe1305_reduce_3l_128bits()
addq $128,%rsi
jmp .LB0
.LB10:
fe1305_mul_taun(0,192)
fe1305_add_product()
fe1305_mul_taun(16,168)
fe1305_add_product()
fe1305_mul_taun(32,144)
fe1305_add_product()
fe1305_mul_taun(48,120)
fe1305_add_product()
fe1305_mul_taun(64,96)
fe1305_add_product()
fe1305_mul_taun(80,72)
fe1305_add_product()
fe1305_mul_taun(96,48)
fe1305_add_product()
fe1305_mul_taun(112,24)
fe1305_add_product()
fe1305_mul_taun(128,0)
fe1305_add_product()
fe1305_reduce_3l_128bits()
addq $144,%rsi
jmp .LB0
.LB11:
fe1305_mul_taun(0,216)
fe1305_add_product()
fe1305_mul_taun(16,192)
fe1305_add_product()
fe1305_mul_taun(32,168)
fe1305_add_product()
fe1305_mul_taun(48,144)
fe1305_add_product()
fe1305_mul_taun(64,120)
fe1305_add_product()
fe1305_mul_taun(80,96)
fe1305_add_product()
fe1305_mul_taun(96,72)
fe1305_add_product()
fe1305_mul_taun(112,48)
fe1305_add_product()
fe1305_mul_taun(128,24)
fe1305_add_product()
fe1305_mul_taun(144,0)
fe1305_add_product()
fe1305_reduce_3l_128bits()
addq $160,%rsi
jmp .LB0
.LB12:
fe1305_mul_taun(0,240)
fe1305_add_product()
fe1305_mul_taun(16,216)
fe1305_add_product()
fe1305_mul_taun(32,192)
fe1305_add_product()
fe1305_mul_taun(48,168)
fe1305_add_product()
fe1305_mul_taun(64,144)
fe1305_add_product()
fe1305_mul_taun(80,120)
fe1305_add_product()
fe1305_mul_taun(96,96)
fe1305_add_product()
fe1305_mul_taun(112,72)
fe1305_add_product()
fe1305_mul_taun(128,48)
fe1305_add_product()
fe1305_mul_taun(144,24)
fe1305_add_product()
fe1305_mul_taun(160,0)
fe1305_add_product()
fe1305_reduce_3l_128bits()
addq $176,%rsi
jmp .LB0
.LB13:
fe1305_mul_taun(0,264)
fe1305_add_product()
fe1305_mul_taun(16,240)
fe1305_add_product()
fe1305_mul_taun(32,216)
fe1305_add_product()
fe1305_mul_taun(48,192)
fe1305_add_product()
fe1305_mul_taun(64,168)
fe1305_add_product()
fe1305_mul_taun(80,144)
fe1305_add_product()
fe1305_mul_taun(96,120)
fe1305_add_product()
fe1305_mul_taun(112,96)
fe1305_add_product()
fe1305_mul_taun(128,72)
fe1305_add_product()
fe1305_mul_taun(144,48)
fe1305_add_product()
fe1305_mul_taun(160,24)
fe1305_add_product()
fe1305_mul_taun(176,0)
fe1305_add_product()
fe1305_reduce_3l_128bits()
addq $192,%rsi
jmp .LB0
.LB14:
fe1305_mul_taun(0,288)
fe1305_add_product()
fe1305_mul_taun(16,264)
fe1305_add_product()
fe1305_mul_taun(32,240)
fe1305_add_product()
fe1305_mul_taun(48,216)
fe1305_add_product()
fe1305_mul_taun(64,192)
fe1305_add_product()
fe1305_mul_taun(80,168)
fe1305_add_product()
fe1305_mul_taun(96,144)
fe1305_add_product()
fe1305_mul_taun(112,120)
fe1305_add_product()
fe1305_mul_taun(128,96)
fe1305_add_product()
fe1305_mul_taun(144,72)
fe1305_add_product()
fe1305_mul_taun(160,48)
fe1305_add_product()
fe1305_mul_taun(176,24)
fe1305_add_product()
fe1305_mul_taun(192,0)
fe1305_add_product()
fe1305_reduce_3l_128bits()
addq $208,%rsi
jmp .LB0
.LB15:
fe1305_mul_taun(0,312)
fe1305_add_product()
fe1305_mul_taun(16,288)
fe1305_add_product()
fe1305_mul_taun(32,264)
fe1305_add_product()
fe1305_mul_taun(48,240)
fe1305_add_product()
fe1305_mul_taun(64,216)
fe1305_add_product()
fe1305_mul_taun(80,192)
fe1305_add_product()
fe1305_mul_taun(96,168)
fe1305_add_product()
fe1305_mul_taun(112,144)
fe1305_add_product()
fe1305_mul_taun(128,120)
fe1305_add_product()
fe1305_mul_taun(144,96)
fe1305_add_product()
fe1305_mul_taun(160,72)
fe1305_add_product()
fe1305_mul_taun(176,48)
fe1305_add_product()
fe1305_mul_taun(192,24)
fe1305_add_product()
fe1305_mul_taun(208,0)
fe1305_add_product()
fe1305_reduce_3l_128bits()
addq $224,%rsi
jmp .LB0
.LB16:
fe1305_mul_taun(0,336)
fe1305_add_product()
fe1305_mul_taun(16,312)
fe1305_add_product()
fe1305_mul_taun(32,288)
fe1305_add_product()
fe1305_mul_taun(48,264)
fe1305_add_product()
fe1305_mul_taun(64,240)
fe1305_add_product()
fe1305_mul_taun(80,216)
fe1305_add_product()
fe1305_mul_taun(96,192)
fe1305_add_product()
fe1305_mul_taun(112,168)
fe1305_add_product()
fe1305_mul_taun(128,144)
fe1305_add_product()
fe1305_mul_taun(144,120)
fe1305_add_product()
fe1305_mul_taun(160,96)
fe1305_add_product()
fe1305_mul_taun(176,72)
fe1305_add_product()
fe1305_mul_taun(192,48)
fe1305_add_product()
fe1305_mul_taun(208,24)
fe1305_add_product()
fe1305_mul_taun(224,0)
fe1305_add_product()
fe1305_reduce_3l_128bits()
addq $240,%rsi
jmp .LB0
.LB17:
fe1305_mul_taun(0,360)
fe1305_add_product()
fe1305_mul_taun(16,336)
fe1305_add_product()
fe1305_mul_taun(32,312)
fe1305_add_product()
fe1305_mul_taun(48,288)
fe1305_add_product()
fe1305_mul_taun(64,264)
fe1305_add_product()
fe1305_mul_taun(80,240)
fe1305_add_product()
fe1305_mul_taun(96,216)
fe1305_add_product()
fe1305_mul_taun(112,192)
fe1305_add_product()
fe1305_mul_taun(128,168)
fe1305_add_product()
fe1305_mul_taun(144,144)
fe1305_add_product()
fe1305_mul_taun(160,120)
fe1305_add_product()
fe1305_mul_taun(176,96)
fe1305_add_product()
fe1305_mul_taun(192,72)
fe1305_add_product()
fe1305_mul_taun(208,48)
fe1305_add_product()
fe1305_mul_taun(224,24)
fe1305_add_product()
fe1305_mul_taun(240,0)
fe1305_add_product()
fe1305_reduce_3l_128bits()
addq $256,%rsi
jmp .LB0
.LB18:
fe1305_mul_taun(0,384)
fe1305_add_product()
fe1305_mul_taun(16,360)
fe1305_add_product()
fe1305_mul_taun(32,336)
fe1305_add_product()
fe1305_mul_taun(48,312)
fe1305_add_product()
fe1305_mul_taun(64,288)
fe1305_add_product()
fe1305_mul_taun(80,264)
fe1305_add_product()
fe1305_mul_taun(96,240)
fe1305_add_product()
fe1305_mul_taun(112,216)
fe1305_add_product()
fe1305_mul_taun(128,192)
fe1305_add_product()
fe1305_mul_taun(144,168)
fe1305_add_product()
fe1305_mul_taun(160,144)
fe1305_add_product()
fe1305_mul_taun(176,120)
fe1305_add_product()
fe1305_mul_taun(192,96)
fe1305_add_product()
fe1305_mul_taun(208,72)
fe1305_add_product()
fe1305_mul_taun(224,48)
fe1305_add_product()
fe1305_mul_taun(240,24)
fe1305_add_product()
fe1305_mul_taun(256,0)
fe1305_add_product()
fe1305_reduce_3l_128bits()
addq $272,%rsi
jmp .LB0
.LB19:
fe1305_mul_taun(0,408)
fe1305_add_product()
fe1305_mul_taun(16,384)
fe1305_add_product()
fe1305_mul_taun(32,360)
fe1305_add_product()
fe1305_mul_taun(48,336)
fe1305_add_product()
fe1305_mul_taun(64,312)
fe1305_add_product()
fe1305_mul_taun(80,288)
fe1305_add_product()
fe1305_mul_taun(96,264)
fe1305_add_product()
fe1305_mul_taun(112,240)
fe1305_add_product()
fe1305_mul_taun(128,216)
fe1305_add_product()
fe1305_mul_taun(144,192)
fe1305_add_product()
fe1305_mul_taun(160,168)
fe1305_add_product()
fe1305_mul_taun(176,144)
fe1305_add_product()
fe1305_mul_taun(192,120)
fe1305_add_product()
fe1305_mul_taun(208,96)
fe1305_add_product()
fe1305_mul_taun(224,72)
fe1305_add_product()
fe1305_mul_taun(240,48)
fe1305_add_product()
fe1305_mul_taun(256,24)
fe1305_add_product()
fe1305_mul_taun(272,0)
fe1305_add_product()
fe1305_reduce_3l_128bits()
addq $288,%rsi
jmp .LB0
.LB20:
fe1305_mul_taun(0,432)
fe1305_add_product()
fe1305_mul_taun(16,408)
fe1305_add_product()
fe1305_mul_taun(32,384)
fe1305_add_product()
fe1305_mul_taun(48,360)
fe1305_add_product()
fe1305_mul_taun(64,336)
fe1305_add_product()
fe1305_mul_taun(80,312)
fe1305_add_product()
fe1305_mul_taun(96,288)
fe1305_add_product()
fe1305_mul_taun(112,264)
fe1305_add_product()
fe1305_mul_taun(128,240)
fe1305_add_product()
fe1305_mul_taun(144,216)
fe1305_add_product()
fe1305_mul_taun(160,192)
fe1305_add_product()
fe1305_mul_taun(176,168)
fe1305_add_product()
fe1305_mul_taun(192,144)
fe1305_add_product()
fe1305_mul_taun(208,120)
fe1305_add_product()
fe1305_mul_taun(224,96)
fe1305_add_product()
fe1305_mul_taun(240,72)
fe1305_add_product()
fe1305_mul_taun(256,48)
fe1305_add_product()
fe1305_mul_taun(272,24)
fe1305_add_product()
fe1305_mul_taun(288,0)
fe1305_add_product()
fe1305_reduce_3l_128bits()
addq $304,%rsi
jmp .LB0
.LB21:
fe1305_mul_taun(0,456)
fe1305_add_product()
fe1305_mul_taun(16,432)
fe1305_add_product()
fe1305_mul_taun(32,408)
fe1305_add_product()
fe1305_mul_taun(48,384)
fe1305_add_product()
fe1305_mul_taun(64,360)
fe1305_add_product()
fe1305_mul_taun(80,336)
fe1305_add_product()
fe1305_mul_taun(96,312)
fe1305_add_product()
fe1305_mul_taun(112,288)
fe1305_add_product()
fe1305_mul_taun(128,264)
fe1305_add_product()
fe1305_mul_taun(144,240)
fe1305_add_product()
fe1305_mul_taun(160,216)
fe1305_add_product()
fe1305_mul_taun(176,192)
fe1305_add_product()
fe1305_mul_taun(192,168)
fe1305_add_product()
fe1305_mul_taun(208,144)
fe1305_add_product()
fe1305_mul_taun(224,120)
fe1305_add_product()
fe1305_mul_taun(240,96)
fe1305_add_product()
fe1305_mul_taun(256,72)
fe1305_add_product()
fe1305_mul_taun(272,48)
fe1305_add_product()
fe1305_mul_taun(288,24)
fe1305_add_product()
fe1305_mul_taun(304,0)
fe1305_add_product()
fe1305_reduce_3l_128bits()
addq $320,%rsi
jmp .LB0
.LB22:
fe1305_mul_taun(0,480)
fe1305_add_product()
fe1305_mul_taun(16,456)
fe1305_add_product()
fe1305_mul_taun(32,432)
fe1305_add_product()
fe1305_mul_taun(48,408)
fe1305_add_product()
fe1305_mul_taun(64,384)
fe1305_add_product()
fe1305_mul_taun(80,360)
fe1305_add_product()
fe1305_mul_taun(96,336)
fe1305_add_product()
fe1305_mul_taun(112,312)
fe1305_add_product()
fe1305_mul_taun(128,288)
fe1305_add_product()
fe1305_mul_taun(144,264)
fe1305_add_product()
fe1305_mul_taun(160,240)
fe1305_add_product()
fe1305_mul_taun(176,216)
fe1305_add_product()
fe1305_mul_taun(192,192)
fe1305_add_product()
fe1305_mul_taun(208,168)
fe1305_add_product()
fe1305_mul_taun(224,144)
fe1305_add_product()
fe1305_mul_taun(240,120)
fe1305_add_product()
fe1305_mul_taun(256,96)
fe1305_add_product()
fe1305_mul_taun(272,72)
fe1305_add_product()
fe1305_mul_taun(288,48)
fe1305_add_product()
fe1305_mul_taun(304,24)
fe1305_add_product()
fe1305_mul_taun(320,0)
fe1305_add_product()
fe1305_reduce_3l_128bits()
addq $336,%rsi
jmp .LB0
.LB23:
fe1305_mul_taun(0,504)
fe1305_add_product()
fe1305_mul_taun(16,480)
fe1305_add_product()
fe1305_mul_taun(32,456)
fe1305_add_product()
fe1305_mul_taun(48,432)
fe1305_add_product()
fe1305_mul_taun(64,408)
fe1305_add_product()
fe1305_mul_taun(80,384)
fe1305_add_product()
fe1305_mul_taun(96,360)
fe1305_add_product()
fe1305_mul_taun(112,336)
fe1305_add_product()
fe1305_mul_taun(128,312)
fe1305_add_product()
fe1305_mul_taun(144,288)
fe1305_add_product()
fe1305_mul_taun(160,264)
fe1305_add_product()
fe1305_mul_taun(176,240)
fe1305_add_product()
fe1305_mul_taun(192,216)
fe1305_add_product()
fe1305_mul_taun(208,192)
fe1305_add_product()
fe1305_mul_taun(224,168)
fe1305_add_product()
fe1305_mul_taun(240,144)
fe1305_add_product()
fe1305_mul_taun(256,120)
fe1305_add_product()
fe1305_mul_taun(272,96)
fe1305_add_product()
fe1305_mul_taun(288,72)
fe1305_add_product()
fe1305_mul_taun(304,48)
fe1305_add_product()
fe1305_mul_taun(320,24)
fe1305_add_product()
fe1305_mul_taun(336,0)
fe1305_add_product()
fe1305_reduce_3l_128bits()
addq $352,%rsi
jmp .LB0
.LB24:
fe1305_mul_taun(0,528)
fe1305_add_product()
fe1305_mul_taun(16,504)
fe1305_add_product()
fe1305_mul_taun(32,480)
fe1305_add_product()
fe1305_mul_taun(48,456)
fe1305_add_product()
fe1305_mul_taun(64,432)
fe1305_add_product()
fe1305_mul_taun(80,408)
fe1305_add_product()
fe1305_mul_taun(96,384)
fe1305_add_product()
fe1305_mul_taun(112,360)
fe1305_add_product()
fe1305_mul_taun(128,336)
fe1305_add_product()
fe1305_mul_taun(144,312)
fe1305_add_product()
fe1305_mul_taun(160,288)
fe1305_add_product()
fe1305_mul_taun(176,264)
fe1305_add_product()
fe1305_mul_taun(192,240)
fe1305_add_product()
fe1305_mul_taun(208,216)
fe1305_add_product()
fe1305_mul_taun(224,192)
fe1305_add_product()
fe1305_mul_taun(240,168)
fe1305_add_product()
fe1305_mul_taun(256,144)
fe1305_add_product()
fe1305_mul_taun(272,120)
fe1305_add_product()
fe1305_mul_taun(288,96)
fe1305_add_product()
fe1305_mul_taun(304,72)
fe1305_add_product()
fe1305_mul_taun(320,48)
fe1305_add_product()
fe1305_mul_taun(336,24)
fe1305_add_product()
fe1305_mul_taun(352,0)
fe1305_add_product()
fe1305_reduce_3l_128bits()
addq $368,%rsi
jmp .LB0
.LB25:
fe1305_mul_taun(0,552)
fe1305_add_product()
fe1305_mul_taun(16,528)
fe1305_add_product()
fe1305_mul_taun(32,504)
fe1305_add_product()
fe1305_mul_taun(48,480)
fe1305_add_product()
fe1305_mul_taun(64,456)
fe1305_add_product()
fe1305_mul_taun(80,432)
fe1305_add_product()
fe1305_mul_taun(96,408)
fe1305_add_product()
fe1305_mul_taun(112,384)
fe1305_add_product()
fe1305_mul_taun(128,360)
fe1305_add_product()
fe1305_mul_taun(144,336)
fe1305_add_product()
fe1305_mul_taun(160,312)
fe1305_add_product()
fe1305_mul_taun(176,288)
fe1305_add_product()
fe1305_mul_taun(192,264)
fe1305_add_product()
fe1305_mul_taun(208,240)
fe1305_add_product()
fe1305_mul_taun(224,216)
fe1305_add_product()
fe1305_mul_taun(240,192)
fe1305_add_product()
fe1305_mul_taun(256,168)
fe1305_add_product()
fe1305_mul_taun(272,144)
fe1305_add_product()
fe1305_mul_taun(288,120)
fe1305_add_product()
fe1305_mul_taun(304,96)
fe1305_add_product()
fe1305_mul_taun(320,72)
fe1305_add_product()
fe1305_mul_taun(336,48)
fe1305_add_product()
fe1305_mul_taun(352,24)
fe1305_add_product()
fe1305_mul_taun(368,0)
fe1305_add_product()
fe1305_reduce_3l_128bits()
addq $384,%rsi
jmp .LB0
.LB26:
fe1305_mul_taun(0,576)
fe1305_add_product()
fe1305_mul_taun(16,552)
fe1305_add_product()
fe1305_mul_taun(32,528)
fe1305_add_product()
fe1305_mul_taun(48,504)
fe1305_add_product()
fe1305_mul_taun(64,480)
fe1305_add_product()
fe1305_mul_taun(80,456)
fe1305_add_product()
fe1305_mul_taun(96,432)
fe1305_add_product()
fe1305_mul_taun(112,408)
fe1305_add_product()
fe1305_mul_taun(128,384)
fe1305_add_product()
fe1305_mul_taun(144,360)
fe1305_add_product()
fe1305_mul_taun(160,336)
fe1305_add_product()
fe1305_mul_taun(176,312)
fe1305_add_product()
fe1305_mul_taun(192,288)
fe1305_add_product()
fe1305_mul_taun(208,264)
fe1305_add_product()
fe1305_mul_taun(224,240)
fe1305_add_product()
fe1305_mul_taun(240,216)
fe1305_add_product()
fe1305_mul_taun(256,192)
fe1305_add_product()
fe1305_mul_taun(272,168)
fe1305_add_product()
fe1305_mul_taun(288,144)
fe1305_add_product()
fe1305_mul_taun(304,120)
fe1305_add_product()
fe1305_mul_taun(320,96)
fe1305_add_product()
fe1305_mul_taun(336,72)
fe1305_add_product()
fe1305_mul_taun(352,48)
fe1305_add_product()
fe1305_mul_taun(368,24)
fe1305_add_product()
fe1305_mul_taun(384,0)
fe1305_add_product()
fe1305_reduce_3l_128bits()
addq $400,%rsi
jmp .LB0
.LB27:
fe1305_mul_taun(0,600)
fe1305_add_product()
fe1305_mul_taun(16,576)
fe1305_add_product()
fe1305_mul_taun(32,552)
fe1305_add_product()
fe1305_mul_taun(48,528)
fe1305_add_product()
fe1305_mul_taun(64,504)
fe1305_add_product()
fe1305_mul_taun(80,480)
fe1305_add_product()
fe1305_mul_taun(96,456)
fe1305_add_product()
fe1305_mul_taun(112,432)
fe1305_add_product()
fe1305_mul_taun(128,408)
fe1305_add_product()
fe1305_mul_taun(144,384)
fe1305_add_product()
fe1305_mul_taun(160,360)
fe1305_add_product()
fe1305_mul_taun(176,336)
fe1305_add_product()
fe1305_mul_taun(192,312)
fe1305_add_product()
fe1305_mul_taun(208,288)
fe1305_add_product()
fe1305_mul_taun(224,264)
fe1305_add_product()
fe1305_mul_taun(240,240)
fe1305_add_product()
fe1305_mul_taun(256,216)
fe1305_add_product()
fe1305_mul_taun(272,192)
fe1305_add_product()
fe1305_mul_taun(288,168)
fe1305_add_product()
fe1305_mul_taun(304,144)
fe1305_add_product()
fe1305_mul_taun(320,120)
fe1305_add_product()
fe1305_mul_taun(336,96)
fe1305_add_product()
fe1305_mul_taun(352,72)
fe1305_add_product()
fe1305_mul_taun(368,48)
fe1305_add_product()
fe1305_mul_taun(384,24)
fe1305_add_product()
fe1305_mul_taun(400,0)
fe1305_add_product()
fe1305_reduce_3l_128bits()
addq $416,%rsi
jmp .LB0
.LB28:
fe1305_mul_taun(0,624)
fe1305_add_product()
fe1305_mul_taun(16,600)
fe1305_add_product()
fe1305_mul_taun(32,576)
fe1305_add_product()
fe1305_mul_taun(48,552)
fe1305_add_product()
fe1305_mul_taun(64,528)
fe1305_add_product()
fe1305_mul_taun(80,504)
fe1305_add_product()
fe1305_mul_taun(96,480)
fe1305_add_product()
fe1305_mul_taun(112,456)
fe1305_add_product()
fe1305_mul_taun(128,432)
fe1305_add_product()
fe1305_mul_taun(144,408)
fe1305_add_product()
fe1305_mul_taun(160,384)
fe1305_add_product()
fe1305_mul_taun(176,360)
fe1305_add_product()
fe1305_mul_taun(192,336)
fe1305_add_product()
fe1305_mul_taun(208,312)
fe1305_add_product()
fe1305_mul_taun(224,288)
fe1305_add_product()
fe1305_mul_taun(240,264)
fe1305_add_product()
fe1305_mul_taun(256,240)
fe1305_add_product()
fe1305_mul_taun(272,216)
fe1305_add_product()
fe1305_mul_taun(288,192)
fe1305_add_product()
fe1305_mul_taun(304,168)
fe1305_add_product()
fe1305_mul_taun(320,144)
fe1305_add_product()
fe1305_mul_taun(336,120)
fe1305_add_product()
fe1305_mul_taun(352,96)
fe1305_add_product()
fe1305_mul_taun(368,72)
fe1305_add_product()
fe1305_mul_taun(384,48)
fe1305_add_product()
fe1305_mul_taun(400,24)
fe1305_add_product()
fe1305_mul_taun(416,0)
fe1305_add_product()
fe1305_reduce_3l_128bits()
addq $432,%rsi
jmp .LB0
.LB29:
fe1305_mul_taun(0,648)
fe1305_add_product()
fe1305_mul_taun(16,624)
fe1305_add_product()
fe1305_mul_taun(32,600)
fe1305_add_product()
fe1305_mul_taun(48,576)
fe1305_add_product()
fe1305_mul_taun(64,552)
fe1305_add_product()
fe1305_mul_taun(80,528)
fe1305_add_product()
fe1305_mul_taun(96,504)
fe1305_add_product()
fe1305_mul_taun(112,480)
fe1305_add_product()
fe1305_mul_taun(128,456)
fe1305_add_product()
fe1305_mul_taun(144,432)
fe1305_add_product()
fe1305_mul_taun(160,408)
fe1305_add_product()
fe1305_mul_taun(176,384)
fe1305_add_product()
fe1305_mul_taun(192,360)
fe1305_add_product()
fe1305_mul_taun(208,336)
fe1305_add_product()
fe1305_mul_taun(224,312)
fe1305_add_product()
fe1305_mul_taun(240,288)
fe1305_add_product()
fe1305_mul_taun(256,264)
fe1305_add_product()
fe1305_mul_taun(272,240)
fe1305_add_product()
fe1305_mul_taun(288,216)
fe1305_add_product()
fe1305_mul_taun(304,192)
fe1305_add_product()
fe1305_mul_taun(320,168)
fe1305_add_product()
fe1305_mul_taun(336,144)
fe1305_add_product()
fe1305_mul_taun(352,120)
fe1305_add_product()
fe1305_mul_taun(368,96)
fe1305_add_product()
fe1305_mul_taun(384,72)
fe1305_add_product()
fe1305_mul_taun(400,48)
fe1305_add_product()
fe1305_mul_taun(416,24)
fe1305_add_product()
fe1305_mul_taun(432,0)
fe1305_add_product()
fe1305_reduce_3l_128bits()
addq $448,%rsi
jmp .LB0
.LB30:
fe1305_mul_taun(0,672)
fe1305_add_product()
fe1305_mul_taun(16,648)
fe1305_add_product()
fe1305_mul_taun(32,624)
fe1305_add_product()
fe1305_mul_taun(48,600)
fe1305_add_product()
fe1305_mul_taun(64,576)
fe1305_add_product()
fe1305_mul_taun(80,552)
fe1305_add_product()
fe1305_mul_taun(96,528)
fe1305_add_product()
fe1305_mul_taun(112,504)
fe1305_add_product()
fe1305_mul_taun(128,480)
fe1305_add_product()
fe1305_mul_taun(144,456)
fe1305_add_product()
fe1305_mul_taun(160,432)
fe1305_add_product()
fe1305_mul_taun(176,408)
fe1305_add_product()
fe1305_mul_taun(192,384)
fe1305_add_product()
fe1305_mul_taun(208,360)
fe1305_add_product()
fe1305_mul_taun(224,336)
fe1305_add_product()
fe1305_mul_taun(240,312)
fe1305_add_product()
fe1305_mul_taun(256,288)
fe1305_add_product()
fe1305_mul_taun(272,264)
fe1305_add_product()
fe1305_mul_taun(288,240)
fe1305_add_product()
fe1305_mul_taun(304,216)
fe1305_add_product()
fe1305_mul_taun(320,192)
fe1305_add_product()
fe1305_mul_taun(336,168)
fe1305_add_product()
fe1305_mul_taun(352,144)
fe1305_add_product()
fe1305_mul_taun(368,120)
fe1305_add_product()
fe1305_mul_taun(384,96)
fe1305_add_product()
fe1305_mul_taun(400,72)
fe1305_add_product()
fe1305_mul_taun(416,48)
fe1305_add_product()
fe1305_mul_taun(432,24)
fe1305_add_product()
fe1305_mul_taun(448,0)
fe1305_add_product()
fe1305_reduce_3l_128bits()
addq $464,%rsi
jmp .LB0
.LB31:
fe1305_mul_taun(0,696)
fe1305_add_product()
fe1305_mul_taun(16,672)
fe1305_add_product()
fe1305_mul_taun(32,648)
fe1305_add_product()
fe1305_mul_taun(48,624)
fe1305_add_product()
fe1305_mul_taun(64,600)
fe1305_add_product()
fe1305_mul_taun(80,576)
fe1305_add_product()
fe1305_mul_taun(96,552)
fe1305_add_product()
fe1305_mul_taun(112,528)
fe1305_add_product()
fe1305_mul_taun(128,504)
fe1305_add_product()
fe1305_mul_taun(144,480)
fe1305_add_product()
fe1305_mul_taun(160,456)
fe1305_add_product()
fe1305_mul_taun(176,432)
fe1305_add_product()
fe1305_mul_taun(192,408)
fe1305_add_product()
fe1305_mul_taun(208,384)
fe1305_add_product()
fe1305_mul_taun(224,360)
fe1305_add_product()
fe1305_mul_taun(240,336)
fe1305_add_product()
fe1305_mul_taun(256,312)
fe1305_add_product()
fe1305_mul_taun(272,288)
fe1305_add_product()
fe1305_mul_taun(288,264)
fe1305_add_product()
fe1305_mul_taun(304,240)
fe1305_add_product()
fe1305_mul_taun(320,216)
fe1305_add_product()
fe1305_mul_taun(336,192)
fe1305_add_product()
fe1305_mul_taun(352,168)
fe1305_add_product()
fe1305_mul_taun(368,144)
fe1305_add_product()
fe1305_mul_taun(384,120)
fe1305_add_product()
fe1305_mul_taun(400,96)
fe1305_add_product()
fe1305_mul_taun(416,72)
fe1305_add_product()
fe1305_mul_taun(432,48)
fe1305_add_product()
fe1305_mul_taun(448,24)
fe1305_add_product()
fe1305_mul_taun(464,0)
fe1305_add_product()
fe1305_reduce_3l_128bits()
addq $480,%rsi
.LB0:
/* if the last block is full */
cmpq $0,64(%rsp)
je .L3
/* if the last block has 8 bytes */
cmpq $64,64(%rsp)
je .L2
/* if the last block has 1 to 7 bytes */
jl .L1
/* else if the last block has 9 to 15 bytes */
/* first chunk of message block = (rax) */
movq 0(%rsi),%rax
addq $8,%rsi
movq $128,%rbp
subq 64(%rsp),%rbp
movq $64,%rcx
subq %rbp,%rcx
shrq $3,%rcx
leaq 104(%rsp),%rdi
rep movsb (%rsi),(%rdi)
/* second chunk of message block = (rdi) */
movq 104(%rsp),%rdi
movq $-1,%r11
movq %rbp,%rcx
shrq %cl,%r11
addq $1,%r11
orq %r11,%rdi
movq $0,%rbp
jmp .L4
.L1:
movq 64(%rsp),%rcx
shrq $3,%rcx
leaq 104(%rsp),%rdi
rep movsb (%rsi),(%rdi)
/* first chunk of message block = (rax) */
movq 104(%rsp),%rax
movq $-1,%r11
movb $64,%cl
subb 64(%rsp),%cl
shrq %cl,%r11
addq $1,%r11
orq %r11,%rax
/* second chunk of message block = (rdi) */
movq $0,%rdi
movq $0,%rbp
jmp .L4
.L2:
movq 0(%rsi),%rax
movq $1,%rdi
movq $0,%rbp
jmp .L4
.L3:
movq 0(%rsi),%rax
movq 8(%rsi),%rdi
movq $1,%rbp
.L4:
movq %rax,%r13
andq pmask1(%rip),%rax
movq %rdi,%r11
andq pmask2(%rip),%r13
shrq $44,%r13
andq pmask3(%rip),%rdi
shlq $20,%rdi
orq %r13,%rdi
andq pmask4(%rip),%r11
shrq $24,%r11
andq pmask5(%rip),%rbp
shlq $40,%rbp
orq %r11,%rbp
addq %rax,%r8
addq %rdi,%r10
addq %rbp,%r12
movq 112(%rsp),%rdi
fe1305_mul_taunr(0)
fe1305_reduce_3l_128bits()
jmp .L10
.L5:
movq 0(%rdx),%r14
movq 8(%rdx),%r15
movq 16(%rdx),%rbx
/* if the single message block is full */
cmpq $0,64(%rsp)
je .L8
/* if the single message block has 1 to 7 bytes */
cmpq $8,72(%rsp)
jl .L6
/* if the single message block has 8 bytes */
je .L7
/* else if the single message block has 9 to 15 bytes */
/* first chunk of message block = (rax) */
movq 0(%rsi),%rax
addq $8,%rsi
movq $128,%r8
subq 64(%rsp),%r8
movq $64,%rcx
subq %r8,%rcx
shrq $3,%rcx
leaq 104(%rsp),%rdi
rep movsb (%rsi),(%rdi)
/* second chunk of message block = (rbp) */
movq 104(%rsp),%rbp
movq $-1,%r11
movq %r8,%rcx
shrq %cl,%r11
addq $1,%r11
orq %r11,%rbp
movq %rax,%r9
andq pmask1(%rip),%rax
movq %rbp,%rdi
andq pmask2(%rip),%r9
shrq $44,%r9
andq pmask3(%rip),%rdi
shlq $20,%rdi
orq %r9,%rdi
andq pmask4(%rip),%rbp
shrq $24,%rbp
movq %rax,104(%rsp)
mulq %r14
movq %rax,%r8
movq %rdx,%r9
movq %rdi,%rax
imul $20,%rax,%rax
mulq %rbx
addq %rax,%r8
adcq %rdx,%r9
movq %rbp,%rax
imul $20,%rax,%rax
movq %rax,112(%rsp)
mulq %r15
addq %rax,%r8
adcq %rdx,%r9
shld $20,%r8,%r9
movq 104(%rsp),%rax
mulq %r15
movq %rax,%r10
movq %rdx,%r11
movq %rdi,%rax
mulq %r14
addq %rax,%r10
adcq %rdx,%r11
movq 112(%rsp),%rax
mulq %rbx
addq %rax,%r10
adcq %rdx,%r11
shld $20,%r10,%r11
movq 104(%rsp),%rax
mulq %rbx
movq %rax,%r12
movq %rdx,%r13
movq %rdi,%rax
mulq %r15
addq %rax,%r12
adcq %rdx,%r13
movq %rbp,%rax
mulq %r14
addq %rax,%r12
adcq %rdx,%r13
shld $22,%r12,%r13
jmp .L9
.L6:
/* read the remainder bytes onto stack */
movq 64(%rsp),%rcx
shrq $3,%rcx
leaq 104(%rsp),%rdi
rep movsb (%rsi),(%rdi)
/* message block = (rax) */
movq 104(%rsp),%rax
movq $-1,%r11
movb $64,%cl
subb 64(%rsp),%cl
shrq %cl,%r11
addq $1,%r11
orq %r11,%rax
movq %rax,%rdi
andq pmask1(%rip),%rax
andq pmask2(%rip),%rdi
shrq $44,%rdi
movq %rax,104(%rsp)
mulq %r14
movq %rax,%r8
movq %rdx,%r9
movq %rdi,%rax
imul $20,%rax,%rax
mulq %rbx
addq %rax,%r8
adcq %rdx,%r9
shld $20,%r8,%r9
movq 104(%rsp),%rax
mulq %r15
movq %rax,%r10
movq %rdx,%r11
movq %rdi,%rax
mulq %r14
addq %rax,%r10
adcq %rdx,%r11
shld $20,%r10,%r11
movq 104(%rsp),%rax
mulq %rbx
movq %rax,%r12
movq %rdx,%r13
movq %rdi,%rax
mulq %r15
addq %rax,%r12
adcq %rdx,%r13
shld $22,%r12,%r13
jmp .L9
.L7:
/* message block = (rax) */
movq 0(%rsi),%rax
movq $1,%rdi
movq %rax,%r9
andq pmask1(%rip),%rax
movq %rdi,%rbp
andq pmask2(%rip),%r9
shrq $44,%r9
andq pmask3(%rip),%rdi
shlq $20,%rdi
orq %r9,%rdi
andq pmask4(%rip),%rbp
shrq $24,%rbp
movq %rax,104(%rsp)
mulq %r14
movq %rax,%r8
movq %rdx,%r9
movq %rdi,%rax
imul $20,%rax,%rax
mulq %rbx
addq %rax,%r8
adcq %rdx,%r9
shld $20,%r8,%r9
movq 104(%rsp),%rax
mulq %r15
movq %rax,%r10
movq %rdx,%r11
movq %rdi,%rax
mulq %r14
addq %rax,%r10
adcq %rdx,%r11
shld $20,%r10,%r11
movq 104(%rsp),%rax
mulq %rbx
movq %rax,%r12
movq %rdx,%r13
movq %rdi,%rax
mulq %r15
addq %rax,%r12
adcq %rdx,%r13
shld $22,%r12,%r13
jmp .L9
.L8:
/* message block = (rbp : rdi : rax) */
movq 0(%rsi),%rax
movq 8(%rsi),%rdi
movq $1,%rbp
movq %rax,%r9
andq pmask1(%rip),%rax
movq %rdi,%r11
andq pmask2(%rip),%r9
shrq $44,%r9
andq pmask3(%rip),%rdi
shlq $20,%rdi
orq %r9,%rdi
andq pmask4(%rip),%r11
shrq $24,%r11
andq pmask5(%rip),%rbp
shlq $40,%rbp
orq %r11,%rbp
movq %rax,104(%rsp)
mulq %r14
movq %rax,%r8
movq %rdx,%r9
movq %rdi,%rax
imul $20,%rax,%rax
mulq %rbx
addq %rax,%r8
adcq %rdx,%r9
movq %rbp,%rax
imul $20,%rax,%rax
movq %rax,112(%rsp)
mulq %r15
addq %rax,%r8
adcq %rdx,%r9
shld $20,%r8,%r9
movq 104(%rsp),%rax
mulq %r15
movq %rax,%r10
movq %rdx,%r11
movq %rdi,%rax
mulq %r14
addq %rax,%r10
adcq %rdx,%r11
movq 112(%rsp),%rax
mulq %rbx
addq %rax,%r10
adcq %rdx,%r11
shld $20,%r10,%r11
movq 104(%rsp),%rax
mulq %rbx
movq %rax,%r12
movq %rdx,%r13
movq %rdi,%rax
mulq %r15
addq %rax,%r12
adcq %rdx,%r13
movq %rbp,%rax
mulq %r14
addq %rax,%r12
adcq %rdx,%r13
shld $22,%r12,%r13
.L9:
/* reduction on (r12 : r10 : r8) */
movq mask44(%rip),%rbp
andq %rbp,%r8
andq %rbp,%r10
addq %r9,%r10
andq mask42(%rip),%r12
addq %r11,%r12
imul $5,%r13,%r13
addq %r13,%r8
.L10:
movq %r8,%rdx
shrq $44,%rdx
addq %r10,%rdx
andq %rbp,%r8
movq %rdx,%r10
shrq $44,%rdx
addq %r12,%rdx
andq %rbp,%r10
movq %rdx,%r12
shrq $42,%rdx
imul $5,%rdx,%rdx
addq %rdx,%r8
andq mask42(%rip),%r12
/* get back the element in base 2^{64} */
andq upmask1(%rip),%r8
movq %r10,%r9
andq upmask2(%rip),%r9
shlq $44,%r9
orq %r9,%r8
andq upmask3(%rip),%r10
shrq $20,%r10
movq %r12,%r11
andq upmask4(%rip),%r11
shlq $24,%r11
orq %r11,%r10
andq upmask5(%rip),%r12
shrq $20,%r12
/* freeze the reduced field element (r12 : r10 : r8) */
movq %r8,%r9
movq %r10,%r11
movq %r12,%r13
subq p0(%rip),%r8
sbbq p1(%rip),%r10
sbbq p2(%rip),%r12
movq %r12,%rcx
shlq $62,%rcx
cmovc %r9,%r8
cmovc %r11,%r10
cmovc %r13,%r12
/* add last 16 bytes of the key */
addq 88(%rsp),%r8
adcq 96(%rsp),%r10
adcq $0,%r12
/* store first 128 bytes of the result */
movq 56(%rsp),%rdi
movq %r8,0(%rdi)
movq %r10,8(%rdi)
movq 0(%rsp),%r11
movq 8(%rsp),%r12
movq 16(%rsp),%r13
movq 24(%rsp),%r14
movq 32(%rsp),%r15
movq 40(%rsp),%rbx
movq 48(%rsp),%rbp
movq %r11,%rsp
ret