#ifndef __POLY1305KEYPOWERSASM__
#define __POLY1305KEYPOWERSASM__

/* field arithmetic used for computing the key powers */

#define fe1305_tau_squaren(y)				\
							\
	movq    %r14,%rax;				\
	shlq	$1,%r14;				\
	mulq	%rax;					\
	movq    %rax,%r8;				\
	movq    %rdx,%r9;				\
							\
	movq    %r15,%rax;				\
	mulq	%r14;					\
	movq    %rax,%r10;				\
	movq    %rdx,%r11;				\
							\
	movq    %r15,%rax;				\
	mulq	%rax;					\
	movq    %rax,%r12;				\
	movq    %rdx,%r13;				\
							\
	movq    %rbx,%rax;				\
	imul    $20,%rax,%rax;				\
	movq	%rax,%rcx;				\
	shlq	$1,%r15;				\
	mulq	%r15;					\
	addq    %rax,%r8;				\
	adcq    %rdx,%r9;				\
							\
	shld 	$20,%r8,%r9;				\
							\
	movq    %rbx,%rax;				\
	mulq	%rcx;					\
	addq    %rax,%r10;				\
	adcq    %rdx,%r11;				\
							\
	shld 	$20,%r10,%r11;				\
							\
	movq    %rbx,%rax;				\
	mulq	%r14;					\
	addq    %rax,%r12;				\
	adcq    %rdx,%r13;				\
							\
	shld 	$22,%r12,%r13;				\
							\
	movq 	mask44(%rip),%rax;			\
							\
	andq  	%rax,%r8;				\
							\
	andq  	%rax,%r10;				\
	addq  	%r9,%r10;				\
							\
	andq  	mask42(%rip),%r12;			\
	addq  	%r11,%r12;				\
							\
	imul    $5,%r13,%r13;				\
	addq	%r13,%r8;				\
							\
	movq  	%r8,%r14;				\
	shrq 	$44,%r8;				\
	addq  	%r10,%r8;				\
	andq  	%rax,%r14;				\
							\
	movq  	%r8,%r15;				\
	shrq 	$44,%r8;				\
	addq  	%r12,%r8;				\
	andq 	%rax,%r15;				\
							\
	movq  	%r8,%rbx;				\
	shrq 	$42,%r8;				\
	imul    $5,%r8,%r8;				\
	addq    %r8,%r14;				\
	andq 	mask42(%rip),%rbx;			\
							\
	movq    %r14,24*y+0(%rdi);			\
	movq    %r15,24*y+8(%rdi);			\
	movq    %rbx,24*y+16(%rdi);			\


#define fe1305_mul_tau_taun(x,y)			\
							\
	movq	24*x+0(%rdi),%r14;			\
	movq	24*x+8(%rdi),%r15;			\
	movq	24*x+16(%rdi),%rbx;			\
							\
	movq    0(%rdi),%rax;				\
	mulq	%r14;					\
	movq    %rax,%r8;				\
	movq    %rdx,%r9;				\
							\
	movq    8(%rdi),%rax;				\
	imul    $20,%rax,%rax;				\
	mulq	%rbx;					\
	addq    %rax,%r8;				\
	adcq    %rdx,%r9;				\
							\
	movq    16(%rdi),%rax;				\
	imul    $20,%rax,%rax;				\
	movq    %rax,%rbp;				\
	mulq	%r15;					\
	addq    %rax,%r8;				\
	adcq    %rdx,%r9;				\
							\
	shld 	$20,%r8,%r9;				\
							\
	movq    0(%rdi),%rax;				\
	mulq	%r15;					\
	movq    %rax,%r10;				\
	movq    %rdx,%r11;				\
							\
	movq    8(%rdi),%rax;				\
	mulq	%r14;					\
	addq    %rax,%r10;				\
	adcq    %rdx,%r11;				\
							\
	movq    %rbp,%rax;				\
	mulq	%rbx;					\
	addq    %rax,%r10;				\
	adcq    %rdx,%r11;				\
							\
	shld 	$20,%r10,%r11;				\
							\
	movq    0(%rdi),%rax;				\
	mulq	%rbx;					\
	movq    %rax,%r12;				\
	movq    %rdx,%r13;				\
							\
	movq    8(%rdi),%rax;				\
	mulq	%r15;					\
	addq    %rax,%r12;				\
	adcq    %rdx,%r13;				\
							\
	movq    16(%rdi),%rax;				\
	mulq	%r14;					\
	addq    %rax,%r12;				\
	adcq    %rdx,%r13;				\
							\
	shld 	$22,%r12,%r13;				\
							\
	movq 	mask44(%rip),%rax;			\
							\
	andq  	%rax,%r8;				\
							\
	andq  	%rax,%r10;				\
	addq  	%r9,%r10;				\
							\
	andq  	mask42(%rip),%r12;			\
	addq  	%r11,%r12;				\
							\
	imul    $5,%r13,%r13;				\
	addq	%r13,%r8;				\
							\
	movq  	%r8,%rdx;				\
	shrq 	$44,%rdx;				\
	addq  	%r10,%rdx;				\
	andq  	%rax,%r8;				\
							\
	movq  	%rdx,%r10;				\
	shrq 	$44,%rdx;				\
	addq  	%r12,%rdx;				\
	andq 	%rax,%r10;				\
							\
	movq  	%rdx,%r12;				\
	shrq 	$42,%rdx;				\
	imul    $5,%rdx,%rdx;				\
	addq    %rdx,%r8;				\
	andq 	mask42(%rip),%r12;			\
							\
	movq    %r8,24*y+0(%rdi);			\
	movq    %r10,24*y+8(%rdi);			\
	movq    %r12,24*y+16(%rdi);			\
	
#endif