//-------------------------------------------------------------------------------------
//
// Copyright 2009 Intel Corporation
// All Rights Reserved
//
// Permission is granted to use, copy, distribute and prepare derivative works of this
// software for any purpose and without fee, provided, that the above copyright notice
// and this statement appear in all copies.  Intel makes no representations about the
// suitability of this software for any purpose.  THIS SOFTWARE IS PROVIDED "AS IS."
// INTEL SPECIFICALLY DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, AND ALL LIABILITY,
// INCLUDING CONSEQUENTIAL AND OTHER INDIRECT DAMAGES, FOR THE USE OF THIS SOFTWARE,
// INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PROPRIETARY RIGHTS, AND INCLUDING THE
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  Intel does not
// assume any responsibility for any errors which may appear in this software nor any
// responsibility to update it.
//

/////////////////////////////////////////////////////////////////////////////
// Based upon:
//
// Approximate Math Library for SSE / SSE2
//  Header File
//  Version 2.0
//  Author Alex Klimovitski, Intel GmbH
/////////////////////////////////////////////////////////////////////////////
#include <emmintrin.h>

#include "AMaths.h"
#include "AMaths_internal.h"

#ifdef AMATHS_ASM

__m128 __declspec(naked) __stdcall am_tan_eps(__m128 x)  // any x
{
	__asm
	{
		movaps	xmm7, xmm0
		andps	xmm0, _ps_am_inv_sign_mask
		andps	xmm7, _ps_am_sign_mask
		movaps	xmm1, xmm0
		mulps	xmm0, _ps_am_4_o_pi

		cvttps2dq	xmm0, xmm0
		movdqa	xmm4, _epi32_1
		movdqa	xmm5, _epi32_7

		pand	xmm4, xmm0
		pand	xmm5, xmm0
		movaps	xmm3, _ps_am_1
		paddd	xmm0, xmm4
		paddd	xmm5, xmm4

		cvtdq2ps	xmm0, xmm0

		mulps	xmm0, _ps_am_pi_o_4
		xorps	xmm6, xmm6
		subps	xmm1, xmm0
		movaps	xmm2, _ps_tan_p2
		minps	xmm1, xmm3
		movaps	xmm3, _ps_tan_q3
		movaps	xmm0, xmm1
		mulps	xmm1, xmm1

		mulps	xmm2, xmm1
		addps	xmm3, xmm1
		addps	xmm2, _ps_tan_p1
		mulps	xmm3, xmm1
		mulps	xmm2, xmm1
		addps	xmm3, _ps_tan_q2
		addps	xmm2, _ps_tan_p0
		mulps	xmm3, xmm1
		mulps	xmm2, xmm1
		addps	xmm3, _ps_tan_q1
		xorps	xmm0, xmm7
		mulps	xmm3, xmm1
		pand	xmm5, _epi32_2
		addps	xmm3, _ps_tan_q0
		mulps	xmm2, xmm0

		cmpneqps	xmm6, xmm1
		rcpps	xmm4, xmm3
		pxor	xmm7, xmm7
		mulps	xmm3, xmm4
		pcmpeqd	xmm5, xmm7
		mulps	xmm3, xmm4
		addps	xmm4, xmm4
		orps	xmm6, xmm5
		subps	xmm4, xmm3

		mulps	xmm2, xmm4
		movaps	xmm1, _ps_am_sign_mask
		movmskps	eax, xmm6
		addps	xmm2, xmm0

		rcpps	xmm4, xmm2
		cmp		eax, 0xf
		movaps	xmm0, xmm2
		mulps	xmm2, xmm4
		mulps	xmm2, xmm4
		addps	xmm4, xmm4
		subps	xmm4, xmm2
		jne		l_pole

		xorps	xmm4, xmm1

		andps	xmm0, xmm5
		andnps	xmm5, xmm4
		orps	xmm0, xmm5

		ret		16

l_pole:
		movaps	xmm7, xmm1
		movaps	xmm3, _ps_tan_poleval
		andps	xmm1, xmm0
		orps	xmm3, xmm1
		andps	xmm4, xmm6
		andnps	xmm6, xmm3
		orps	xmm4, xmm6

		xorps	xmm4, xmm7

		andps	xmm0, xmm5
		andnps	xmm5, xmm4
		orps	xmm0, xmm5

		ret		16
	}
}

#endif