//
// Perspective correct mapper by
// 	   MasterBoy / Winter in July , Mistery , ZeroDefects
//
// contains : subpixel,subtexel accuracy and scanline subdivizion
//
// email: cobra11@netvision.net.il
// icq  : 14054887
//

#define XRES 320

#define SUB_DIVIDE_SHIFT 4

#define SUB_DIVIDE_SIZE  (1<<SUB_DIVIDE_SHIFT)

typedef struct
{
	float x, y,z,
	      u,   v;
} PK_Vertex;

float Left_dXdY, Right_dXdY, LeftX, RightX;
float Left_dUdY, LeftU;
float Left_dVdY, LeftV;
float Left_dZdY, LeftZ;
float PK_dudx, PK_dvdx, PK_dzdx;
float PK_dudx_, PK_dvdx_, PK_dzdx_;

unsigned char *dest_ptr,*dest;

#define SUB_PIX(a) (ceil(a)-a)
void DrawSegment(long y1, long y2, unsigned char *textmap);
void swap_ (void * a, void * b);

void Pk_Triangle(PK_Vertex *v1, PK_Vertex *v2, PK_Vertex *v3, unsigned char *textmap, unsigned char *where)
{
	float u_a, v_a, z_a,
	      u_b, v_b, z_b,
	      u_c, v_c, z_c;

        if (v1->y > v2->y) swap_(&v1, &v2);
	if (v1->y > v3->y) swap_(&v1, &v3);
        if (v2->y > v3->y) swap_(&v2, &v3);



        long y1i=ceil(v1->y);
	long y2i=ceil(v2->y);
        long y3i=ceil(v3->y);

	if (y1i==y3i) return;

	z_a = 1.0 / v1->z;
	z_b = 1.0 / v2->z;
	z_c = 1.0 / v3->z;
	u_a = v1->u * z_a;
	u_b = v2->u * z_b;
	u_c = v3->u * z_c;
	v_a = v1->v * z_a;
	v_b = v2->v * z_b;
	v_c = v3->v * z_c;

	float prestep;

	dest_ptr = &where[y1i*XRES];

	float dXdY_V1V3=(v3->x - v1->x) / (v3->y - v1->y);
	float dXdY_V2V3=(v3->x - v2->x) / (v3->y - v2->y);
	float dXdY_V1V2=(v2->x - v1->x) / (v2->y - v1->y);

	float dUdY_V1V3=(u_c - u_a) / (v3->y - v1->y);
	float dUdY_V2V3=(u_c - u_b) / (v3->y - v2->y);
	float dUdY_V1V2=(u_b - u_a) / (v2->y - v1->y);

	float dVdY_V1V3=(v_c - v_a) / (v3->y - v1->y);
	float dVdY_V2V3=(v_c - v_b) / (v3->y - v2->y);
	float dVdY_V1V2=(v_b - v_a) / (v2->y - v1->y);

	float dZdY_V1V3=(z_c - z_a) / (v3->y - v1->y);
	float dZdY_V2V3=(z_c - z_b) / (v3->y - v2->y);
	float dZdY_V1V2=(z_b - z_a) / (v2->y - v1->y);

	float denom = ((v3->x - v1->x) * (v2->y - v1->y) - (v2->x - v1->x) * (v3->y - v1->y));

	if (!denom) return;

	denom = 1.0f / denom;

	PK_dudx=((u_c - u_a) * (v2->y - v1->y) - (u_b - u_a) * (v3->y - v1->y))*denom;
	PK_dvdx=((v_c - v_a) * (v2->y - v1->y) - (v_b - v_a) * (v3->y - v1->y))*denom;
	PK_dzdx=((z_c - z_a) * (v2->y - v1->y) - (z_b - z_a) * (v3->y - v1->y))*denom;

	PK_dudx_=PK_dudx*SUB_DIVIDE_SIZE;
	PK_dvdx_=PK_dvdx*SUB_DIVIDE_SIZE;
	PK_dzdx_=PK_dzdx*SUB_DIVIDE_SIZE;



	bool mid = dXdY_V1V3<dXdY_V1V2;
	// if dXdY_V1V3 slope bigger than dXdY_V1V2
	// then v2 is at the left side of triangle
        if (!mid) {
	         // v2 at the left side

		prestep = SUB_PIX(v1->y);
                if (y1i==y2i) {
		
			Left_dUdY = dUdY_V2V3;
			Left_dVdY = dVdY_V2V3;
			Left_dZdY = dZdY_V2V3;
			Left_dXdY = dXdY_V2V3;
			Right_dXdY = dXdY_V1V3;

			LeftU = u_b + SUB_PIX(v2->y)*Left_dUdY;
			LeftV = v_b + SUB_PIX(v2->y)*Left_dVdY;
			LeftZ = z_b + SUB_PIX(v2->y)*Left_dZdY;
			LeftX = v2->x + SUB_PIX(v2->y)*Left_dXdY;
			RightX = v1->x + prestep*Right_dXdY;

			DrawSegment(y1i, y3i, textmap);
			return;
		}

		Right_dXdY = dXdY_V1V3;

		if (y1i<y2i) {
		
			Left_dUdY = dUdY_V1V2;
			Left_dVdY = dVdY_V1V2;
			Left_dZdY = dZdY_V1V2;
			Left_dXdY = dXdY_V1V2;

			LeftU = u_a + prestep*Left_dUdY;
			LeftV = v_a + prestep*Left_dVdY;
			LeftZ = z_a + prestep*Left_dZdY;
			LeftX = v1->x + prestep*Left_dXdY;
			RightX = v1->x + prestep*Right_dXdY;
			DrawSegment(y1i, y2i, textmap);
		 }

		if (y2i<y3i) {
		
			Left_dXdY = dXdY_V2V3;
			Left_dUdY = dUdY_V2V3;
			Left_dVdY = dVdY_V2V3;
			Left_dZdY = dZdY_V2V3;

			LeftU = u_b + SUB_PIX(v2->y)*Left_dUdY;
			LeftV = v_b + SUB_PIX(v2->y)*Left_dVdY;
			LeftZ = z_b + SUB_PIX(v2->y)*Left_dZdY;
			LeftX = v2->x + SUB_PIX(v2->y)*Left_dXdY;
			DrawSegment(y2i, y3i, textmap);
		}

	}
	 else

	if (mid) {
	         // v2 at the right side

		prestep = SUB_PIX(v1->y);

		if (y1i==y2i) {
		
			Left_dUdY = dUdY_V1V3;
			Left_dVdY = dVdY_V1V3;
			Left_dZdY = dZdY_V1V3;
			Left_dXdY = dXdY_V1V3;
			Right_dXdY = dXdY_V2V3;

			LeftU = u_a + prestep*Left_dUdY;
			LeftV = v_a + prestep*Left_dVdY;
			LeftZ = z_a + prestep*Left_dZdY;
			LeftX = v1->x + prestep*Left_dXdY;
			RightX = v2->x + SUB_PIX(v2->y)*Right_dXdY;
			DrawSegment(y1i, y3i, textmap);
			return;
		}

		Left_dXdY = dXdY_V1V3;
		Left_dUdY = dUdY_V1V3;
		Left_dVdY = dVdY_V1V3;
		Left_dZdY = dZdY_V1V3;

		if (y1i<y2i) {
		
			Right_dXdY = dXdY_V1V2;

			LeftU = u_a + prestep*Left_dUdY;
			LeftV = v_a + prestep*Left_dVdY;
			LeftZ = z_a + prestep*Left_dZdY;
			LeftX = v1->x + prestep*Left_dXdY;
			RightX = v1->x + prestep*Right_dXdY;
			DrawSegment(y1i, y2i, textmap);
		}

		if (y2i<y3i) {
		
			Right_dXdY = dXdY_V2V3;
			RightX = v2->x + SUB_PIX(v2->y)*Right_dXdY;
			DrawSegment(y2i, y3i, textmap);
		}

	}

}


		// ecx = x2-x1
		// eax = u1  16:16
		// edx = v1  16:16
		// esi = texturemap
		// edi = destination

#pragma aux affine_tline=\
"push ebp"\
"test ecx,ecx"\
"jle @eend"\
"mov ebp,ecx"\
"add edi,ebp"\
"xor ebp,-1"\
"inc ebp"\
"@inner:"\
"mov ebx,eax"\
"mov ecx,edx"\
"and ebx,0xff0000"\
"and ecx,0xff0000"\
"shr ebx,16"\
"shr ecx,8"\
"add ecx,ebx"\
"mov bl,[esi+ecx]"\
"mov [edi+ebp],bl"\
"add eax,[DU]"\
"add edx,[DV]"\
"inc ebp"\
"jnz @inner"\
"@eend:"\
"pop ebp"\
parm [ecx] [eax] [edx] [esi] [edi] modify [eax ebx ecx edx edi];
//    dx    u     v     src    dst

inline void DrawSegment(long y1, long y2, unsigned char *textmap)
{
	float u, v, z, Z;
	long du, dv, width;
	long U, V, U1, V1, U2, V2;
	long x1, x2, y, x;

	for (y=y1;y<y2;y++)
	{
		x1 = ceil(LeftX);
		x2 = ceil(RightX);

		dest = dest_ptr+x1;
		u = LeftU + SUB_PIX(LeftX)*PK_dudx;
		v = LeftV + SUB_PIX(LeftX)*PK_dvdx;
		z = LeftZ + SUB_PIX(LeftX)*PK_dzdx;

		Z = 65536.0 / z;
		U2 = u*Z;
		V2 = v*Z;
		width = x2 - x1;
		while (width>=SUB_DIVIDE_SIZE)
		{

			u+=PK_dudx_;
			v+=PK_dvdx_;
			z+=PK_dzdx_;

			U1 = U2;
			V1 = V2;

			Z = 65536 / z;
			U2 = u*Z;
			V2 = v*Z;


			du = (U2 - U1) >> SUB_DIVIDE_SHIFT;
			dv = (V2 - V1) >> SUB_DIVIDE_SHIFT;
			U=U1;
			V=V1;
			x = SUB_DIVIDE_SIZE;
		

			while (x--)
			{
				*dest++=textmap[( (U&0xFF0000)>>16 ) + ( (V & 0xFF0000)>>8 )];
				U+=du;
				V+=dv;
			}
		
			width-=SUB_DIVIDE_SIZE;
		}

		if (width>0)
		{
			U1 = U2;
			V1 = V2;

			u+=(PK_dudx*width);
			v+=(PK_dvdx*width);
			z+=(PK_dzdx*width);

			Z = 65536.0 / z;
			U2 = u*Z;
			V2 = v*Z;

			du = (U2 - U1) / width;
			dv = (V2 - V1) / width;
			U=U1;
			V=V1;

			while (width--)
			{
				*dest++=textmap[( U>>16 ) + ( (V & 0xFF0000)>>8 )];
				U+=du;
				V+=dv;
			}


		}

		LeftU+=Left_dUdY;
		LeftV+=Left_dVdY;
		LeftZ+=Left_dZdY;
		LeftX+=Left_dXdY;
		RightX+=Right_dXdY;
		dest_ptr+=XRES;
	}

}

#pragma aux swap_ = \
	"mov ebx, [eax]"\
	"mov ecx, [edx]"\
	"mov [edx], ebx"\
	"mov [eax], ecx"\
	parm [eax] [edx] \
	modify [ebx ecx];

