/* assembly to compute the key powers */

#include "crypto_asm_hidden.h"
// linker define poly1305_maa44_g16_key
// linker define poly1305_maa44_g16_keypowers
// linker use mask44
// linker use mask42
// linker use pmask1
// linker use pmask2
// linker use pmask3
// linker use pmask4

#define mask44  CRYPTO_SHARED_NAMESPACE(mask44)
#define mask42  CRYPTO_SHARED_NAMESPACE(mask42)
#define pmask1  CRYPTO_SHARED_NAMESPACE(pmask1)
#define pmask2  CRYPTO_SHARED_NAMESPACE(pmask2)
#define pmask3  CRYPTO_SHARED_NAMESPACE(pmask3)
#define pmask4  CRYPTO_SHARED_NAMESPACE(pmask4)

#include "poly1305_keypowers_asm.h"

	.p2align 5
	
ASM_HIDDEN _CRYPTO_SHARED_NAMESPACE(poly1305_maa44_g16_key)
ASM_HIDDEN CRYPTO_SHARED_NAMESPACE(poly1305_maa44_g16_key)
.global _CRYPTO_SHARED_NAMESPACE(poly1305_maa44_g16_key)
.global CRYPTO_SHARED_NAMESPACE(poly1305_maa44_g16_key)
_CRYPTO_SHARED_NAMESPACE(poly1305_maa44_g16_key):
CRYPTO_SHARED_NAMESPACE(poly1305_maa44_g16_key):

	/* key */
	movq    0(%rdi),%r8
	movq    8(%rdi),%r9
	
	/* tau */
	movq    %r8,%r10
	andq    pmask1(%rip),%r8
	movq    %r9,%rax
	andq    pmask2(%rip),%r10
	shrq    $44,%r10
	andq    pmask3(%rip),%r9
	shlq    $20,%r9
	orq     %r10,%r9
	andq    pmask4(%rip),%rax
	shrq    $24,%rax

	movq    %r8,0(%rdi)
	movq    %r9,8(%rdi)
	movq    %rax,16(%rdi)

	ret

		
	.p2align 5
	
ASM_HIDDEN _CRYPTO_SHARED_NAMESPACE(poly1305_maa44_g16_keypowers)
ASM_HIDDEN CRYPTO_SHARED_NAMESPACE(poly1305_maa44_g16_keypowers)
.global _CRYPTO_SHARED_NAMESPACE(poly1305_maa44_g16_keypowers)
.global CRYPTO_SHARED_NAMESPACE(poly1305_maa44_g16_keypowers)
_CRYPTO_SHARED_NAMESPACE(poly1305_maa44_g16_keypowers):
CRYPTO_SHARED_NAMESPACE(poly1305_maa44_g16_keypowers):
	
	movq 	%rsp,%r11
	andq    $-32,%rsp
	subq 	$64,%rsp

	movq 	%r11,0(%rsp)
	movq 	%r12,8(%rsp)
	movq 	%r13,16(%rsp)
	movq 	%r14,24(%rsp)
	movq 	%r15,32(%rsp)
	movq 	%rbx,40(%rsp)
	movq 	%rbp,48(%rsp)

	/* tau = key */
	movq    0(%rdi),%r14
	movq    8(%rdi),%r15
	movq    16(%rdi),%rbx	
		
	/* tau^2 */	
	fe1305_tau_squaren(1)
	cmp	$3,%rsi
	je	.L	
	
	/* tau^3 */	
	fe1305_mul_tau_taun(1,2)
	cmp	$4,%rsi
	je	.L
	
	/* tau^4 */
	movq    24(%rdi),%r14
	movq    32(%rdi),%r15
	movq    40(%rdi),%rbx		
	fe1305_tau_squaren(3)
	cmp	$5,%rsi
	je	.L		
	
	/* tau^5 */	
	fe1305_mul_tau_taun(3,4)
	cmp	$6,%rsi
	je	.L	
	
	/* tau^6 */	
	fe1305_mul_tau_taun(4,5)
	cmp	$7,%rsi
	je	.L	
	
	/* tau^7 */	
	fe1305_mul_tau_taun(5,6)
	cmp	$8,%rsi
	je	.L
	
	/* tau^8 */
	movq    72(%rdi),%r14
	movq    80(%rdi),%r15
	movq    88(%rdi),%rbx		
	fe1305_tau_squaren(7)
	cmp	$9,%rsi
	je	.L	
	
	/* tau^9 */	
	fe1305_mul_tau_taun(7,8)
	cmp	$10,%rsi
	je	.L	
	
	/* tau^10 */	
	fe1305_mul_tau_taun(8,9)
	cmp	$11,%rsi
	je	.L	
	
	/* tau^11 */	
	fe1305_mul_tau_taun(9,10)
	cmp	$12,%rsi
	je	.L	
	
	/* tau^12 */	
	fe1305_mul_tau_taun(10,11)
	cmp	$13,%rsi
	je	.L	
	
	/* tau^13 */	
	fe1305_mul_tau_taun(11,12)
	cmp	$14,%rsi
	je	.L	
	
	/* tau^14 */	
	fe1305_mul_tau_taun(12,13)
	cmp	$15,%rsi
	je	.L	
	
	/* tau^15 */	
	fe1305_mul_tau_taun(13,14)
	cmp	$16,%rsi
	je	.L
	
	/* tau^16 */
	movq    168(%rdi),%r14
	movq    176(%rdi),%r15
	movq    184(%rdi),%rbx	
	fe1305_tau_squaren(15)
.L:

	movq 	0(%rsp),%r11
	movq 	8(%rsp),%r12
	movq 	16(%rsp),%r13
	movq 	24(%rsp),%r14
	movq 	32(%rsp),%r15
	movq 	40(%rsp),%rbx
	movq 	48(%rsp),%rbp

	movq 	%r11,%rsp

	ret