//-------------------------------------------------------------------------------------
//
// Copyright 2009 Intel Corporation
// All Rights Reserved
//
// Permission is granted to use, copy, distribute and prepare derivative works of this
// software for any purpose and without fee, provided, that the above copyright notice
// and this statement appear in all copies.  Intel makes no representations about the
// suitability of this software for any purpose.  THIS SOFTWARE IS PROVIDED "AS IS."
// INTEL SPECIFICALLY DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, AND ALL LIABILITY,
// INCLUDING CONSEQUENTIAL AND OTHER INDIRECT DAMAGES, FOR THE USE OF THIS SOFTWARE,
// INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PROPRIETARY RIGHTS, AND INCLUDING THE
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  Intel does not
// assume any responsibility for any errors which may appear in this software nor any
// responsibility to update it.
//

/////////////////////////////////////////////////////////////////////////////
// Based upon:
//
// Approximate Math Library for SSE / SSE2
//  Header File
//  Version 2.0
//  Author Alex Klimovitski, Intel GmbH
/////////////////////////////////////////////////////////////////////////////
#include <emmintrin.h>

#include "AMaths.h"
#include "AMaths_internal.h"

#ifdef AMATHS_ASM

void __declspec(naked) __stdcall am_sincos_eps(__m128 x, __m128* s, __m128* c)  // any x
{
	__asm
	{
		movaps	xmm7, xmm0
		andps	xmm0, _ps_am_inv_sign_mask
		andps	xmm7, _ps_am_sign_mask
		mulps	xmm0, _ps_am_2_o_pi

		pxor	xmm3, xmm3
		movdqa	xmm5, _epi32_1
		movaps	xmm4, _ps_am_1

		cvttps2dq	xmm2, xmm0
		pand	xmm5, xmm2
		pcmpeqd	xmm5, xmm3
		movdqa	xmm3, _epi32_1
		movdqa	xmm1, _epi32_2
		cvtdq2ps	xmm6, xmm2
		paddd	xmm3, xmm2
		pand	xmm2, xmm1
		pand	xmm3, xmm1
		subps	xmm0, xmm6
		pslld	xmm2, (31 - 1)
		minps	xmm0, xmm4
		mov		eax, [esp + 4 + 16]
		mov		edx, [esp + 4 + 16 + 4]
		subps	xmm4, xmm0
		pslld	xmm3, (31 - 1)

		movaps	xmm6, xmm4
		xorps	xmm2, xmm7
		movaps	xmm7, xmm5
		andps	xmm6, xmm7
		andnps	xmm7, xmm0
		andps	xmm0, xmm5
		andnps	xmm5, xmm4
		movaps	xmm4, _ps_sincos_p3
		orps	xmm6, xmm7
		orps	xmm0, xmm5
		movaps	xmm5, _ps_sincos_p2

		movaps	xmm1, xmm0
		movaps	xmm7, xmm6
		mulps	xmm0, xmm0
		mulps	xmm6, xmm6
		orps	xmm1, xmm2
		orps	xmm7, xmm3
		movaps	xmm2, xmm0
		movaps	xmm3, xmm6
		mulps	xmm0, xmm4
		mulps	xmm6, xmm4
		movaps	xmm4, _ps_sincos_p1
		addps	xmm0, xmm5
		addps	xmm6, xmm5
		movaps	xmm5, _ps_sincos_p0
		mulps	xmm0, xmm2
		mulps	xmm6, xmm3
		addps	xmm0, xmm4
		addps	xmm6, xmm4
		mulps	xmm0, xmm2
		mulps	xmm6, xmm3
		addps	xmm0, xmm5
		addps	xmm6, xmm5
		mulps	xmm0, xmm1
		mulps	xmm6, xmm7

		movaps	[eax], xmm0
		movaps	[edx], xmm6

		ret		16 + 4 + 4 + 8
	}
}

#endif