//-------------------------------------------------------------------------------------
//
// Copyright 2009 Intel Corporation
// All Rights Reserved
//
// Permission is granted to use, copy, distribute and prepare derivative works of this
// software for any purpose and without fee, provided, that the above copyright notice
// and this statement appear in all copies.  Intel makes no representations about the
// suitability of this software for any purpose.  THIS SOFTWARE IS PROVIDED "AS IS."
// INTEL SPECIFICALLY DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, AND ALL LIABILITY,
// INCLUDING CONSEQUENTIAL AND OTHER INDIRECT DAMAGES, FOR THE USE OF THIS SOFTWARE,
// INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PROPRIETARY RIGHTS, AND INCLUDING THE
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  Intel does not
// assume any responsibility for any errors which may appear in this software nor any
// responsibility to update it.
//

/////////////////////////////////////////////////////////////////////////////
// Based upon:
//
// Approximate Math Library for SSE / SSE2
//  Header File
//  Version 2.0
//  Author Alex Klimovitski, Intel GmbH
/////////////////////////////////////////////////////////////////////////////
#include <emmintrin.h>

#include "AMaths.h"
#include "AMaths_internal.h"

#ifdef AMATHS_ASM

__m128 __declspec(naked) __stdcall am_tan_ss(__m128 x)  // any x
{
	__asm
	{
		movss	[esp - 8], xmm0
		mov		[esp - 4], esi
		movss	xmm1, _ps_am_inv_sign_mask
		mov		eax, [esp - 8]
		andps	xmm0, xmm1
		and		eax, 0x80000000
		movss	xmm1, xmm0
		mulss	xmm0, _ps_am_4_o_pi

		cvttss2si	edx, xmm0
		
		movss	xmm5, _ps_am_1

		mov		ecx, 0x1
		mov		esi, 0x7

		and		ecx, edx
		and		esi, edx
		add		edx, ecx
		add		esi, ecx
		mov		[esp - 8], eax

		cvtsi2ss	xmm0, edx
		xorps	xmm6, xmm6

		mulss	xmm0, _ps_am_pi_o_4
		test	esi, 0x2
		subss	xmm1, xmm0
		movss	xmm2, _ps_tan_p2
		minss	xmm1, xmm5
		movss	xmm3, _ps_tan_q3
		movss	xmm0, xmm1
		mulss	xmm1, xmm1
		movss	xmm7, [esp - 8]

		mulss	xmm2, xmm1
		addss	xmm3, xmm1
		addss	xmm2, _ps_tan_p1
		mulss	xmm3, xmm1
		mulss	xmm2, xmm1
		addss	xmm3, _ps_tan_q2
		addss	xmm2, _ps_tan_p0
		mulss	xmm3, xmm1
		mulss	xmm2, xmm1
		addss	xmm3, _ps_tan_q1
		xorps	xmm0, xmm7
		mulss	xmm3, xmm1
		mulss	xmm2, xmm0
		addss	xmm3, _ps_tan_q0

		rcpss	xmm4, xmm3
		mulss	xmm3, xmm4
		mov		esi, [esp - 4]
		mulss	xmm3, xmm4
		addss	xmm4, xmm4
		subss	xmm4, xmm3

		mulss	xmm2, xmm4
		jz		l_cont
		addss	xmm2, xmm0
		comiss	xmm6, xmm1

		rcpss	xmm4, xmm2
		movss	xmm0, _ps_am_sign_mask
		jz		l_pole
		mulss	xmm2, xmm4
		mulss	xmm2, xmm4
		addss	xmm4, xmm4
		subss	xmm4, xmm2
		xorps	xmm0, xmm4

		ret		16

l_pole:
		movss	xmm1, _ps_tan_poleval
		movss	xmm3, xmm0
		andps	xmm0, xmm2
		orps	xmm0, xmm1

		xorps	xmm0, xmm3

		ret		16

l_cont:
		addss	xmm0, xmm2
		ret		16
	}
}

#endif