;

align 4
x_scale         dd 256.0
y_scale         dd 213.333333333

x_add           dd 0.0
y_add           dd -450.0
z_add           dd 3500.0

x_middle        dd 160.0
y_middle        dd 100.0

;
;In:
;       ebx: 3*3 matrix of 32-bit floats
;       ecx: num pts
;       esi: pointer to source pts
;       edx: pointer to transformed pts (use for lighting, clipping etc.)
;       edi: pointer to transformed & projected pts (use for drawing)

align 32
proc    f_matrix_project n

@@project:
        ;cmp     [b esi + 12], 1
        ;jne     @@skip

        fld     [d esi]         ;3
        fmul    [d ebx]         ;4-6
        fld     [d esi + 4]     ;5 
        fmul    [d ebx + 4]     ;6-8
        fld     [d esi + 8]     ;7
        fmul    [d ebx + 8]     ;8-10   ;x3, x2, x1

        fld     [d esi]         ;11
        fmul    [d ebx + 12]    ;12-14
        fld     [d esi + 4]     ;13
        fmul    [d ebx + 16]    ;14-16
        fld     [d esi + 8]     ;15
        fmul    [d ebx + 20]    ;16-18  ;y3, y2, y1, x3, x2, x1

        fxch    st(5)           ;16     ;x1, y2, y1, x3, x2, y3
        faddp   st(4), st       ;17-19  ;y2, y1, x3, x1x2, y3
        fxch    st(2)           ;17     ;x3, y1, y2, x1x2, y3
        fadd    [x_add]         ;18-20  ;x3dx, y1, y2, x1x2, y3

        fld     [d esi]         ;19
        fmul    [d ebx + 24]    ;20-22
        fld     [d esi + 4]     ;21
        fmul    [d ebx + 28]    ;22-24
        fld     [d esi + 8]     ;23
        fmul    [d ebx + 32]    ;24-26  ;z3, z2, z1, x3dx, y1, y2, x1x2, y3

        fxch    st(4)           ;24     ;y1, z2, z1, x3dx, z3, y2, x1x2, y3
        faddp   st(5), st       ;25-27  ;z2, z1, x3dx, z3, y1y2, x1x2, y3
        fxch    st(6)           ;25     ;y3, z1, x3dx, z3, y1y2, x1x2, z2
        fadd    [y_add]         ;26-28  ;y3dy, z1, x3dx, z3, y1y2, x1x2, z2

        fxch    st(5)           ;26     ;x1x2, z1, x3dx, z3, y1y2, y3dy, z2
        faddp   st(2), st       ;27-29  ;z1, x, z3, y1y2, y3dy, z2

        faddp   st(5), st       ;28-30  ;x, z3, y1y2, y3dy, z1z2
        fxch    st(1)           ;28     ;z3, x, y1y2, y3dy, z1z2
        fadd    [z_add]         ;29-31  ;z3dz, x, y1y2, y3dy, z1z2
        fxch    st(2)           ;29     ;y1y2, x, z3dz, y3dy, z1z2
        faddp   st(3), st       ;30-32  ;x, z3dz, y, z1z2

        fxch    st(1)           ;30     ;z3dz, x, y, z1z2
        faddp   st(3), st       ;32-34  ;x, y, z
                                ;wait for z3dz

        fst     [d edx]         ;33-34
        fxch    st(1)           ;33     ;y, x, z
        fst     [d edx + 4]     ;35-36
        fxch    st(2)           ;35
        fst     [d edx + 8]     ;37-38  ;z, x, y
        
        fxch    st(1)           ;37     ;x, z, y
        fmul    [x_scale]         ;39-41
        fxch    st(2)           ;39     ;y, z, x

        fld1                    ;40     ;1, y, z, x
        fdivrp  st(2), st       ;41-80  ;y, 1/z, x

        mov     eax, [esi + 16] ;42     ;load some cache lines during fdiv
        mov     eax, [edx + 16] ;42
        mov     eax, [edi + 16] ;43
        add     edx, 16         ;43     ;update pointers
        add     esi, 16         ;44
        add     edi, 16         ;44
        dec     ecx             ;45     ;update counter

        fmul    [y_scale]         ;79-81

        fxch    st(1)           ;79     ;1/z, y, x
        fst     [d edi + 8 - 16];80-82
                                ;wait for fdiv (need result 1 cycle early)

        fxch    st(2)           ;80     ;x, y, 1/z
        fmul    st, st(2)       ;82-84  ;x/z, y, 1/z

        fxch    st(1)           ;82     ;y, x/z, 1/z
        fmulp   st(2), st       ;84-86  ;x/z, y/z
                                ;wait for fmul (1 cycle between fmuls)
        
        fadd    [x_middle]      ;85-87  
        fxch    st(1)           ;85     ;y/z, x/z
        fadd    [y_middle]      ;87-89
                                ;wait for y/z
        fxch    st(1)           ;87     ;x/z, y/z

        fstp    [d edi - 16]    ;88-90  ;y/z
                                ;wait for x/z (need result 1 cycle early)
        fstp    [d edi + 4 - 16];89-91
                                ;wait for y/z (need result 1 cycle early)

        jnz     @@project       ;92

        ret

@@skip:
        add     edx, 16
        add     esi, 16
        add     edi, 16
        dec     ecx
        jnz     @@project

        ret

endp

;
;In:
;       st(0): scale
;       ebx: x ang
;       ecx: y ang
;       edx: z ang
;       edi: matrix

align 32
proc    f_gen_matrix n    ; x, z, y

        mov     eax, ebx
        call    f_cos
        mov     [cosx], eax
        mov     eax, ebx
        call    f_sin
        mov     [sinx], eax
        mov     eax, ecx
        call    f_cos
        mov     [cosy], eax
        mov     eax, ecx
        call    f_sin
        mov     [siny], eax
        mov     eax, edx
        call    f_cos
        mov     [cosz], eax
        mov     eax, edx
        call    f_sin
        mov     [sinz], eax

        ; cy * cz
        fld     [cosy]
        fmul    [cosz]
        fstp    [d edi]

        ; - sz
        fld     [sinz]
        fchs
        fstp    [d edi + 4]

        ; sy * cz
        fld     [siny]
        fmul    [cosz]
        fstp    [d edi + 8]

        ; cx * cy * sz + sx * sy
        fld     [sinx]
        fmul    [siny]

        fld     [cosx]
        fmul    [cosy]
        fmul    [sinz]

        fadd
        fstp    [d edi + 12]

        ; cx * cz
        fld     [cosx]
        fmul    [cosz]
        fstp    [d edi + 16]

        ; cx * sy * sz - sx * cy
        fld     [cosx]
        fmul    [siny]
        fmul    [sinz]

        fld     [sinx]
        fmul    [cosy]

        fsub
        fstp    [d edi + 20]

        ; sx * cy * sz - cx * sy
        fld     [sinx]
        fmul    [cosy]
        fmul    [sinz]

        fld     [cosx]
        fmul    [siny]

        fsub
        fstp    [d edi + 24]

        ; sx * cz
        fld     [sinx]
        fmul    [cosz]
        fstp    [d edi + 28]

        ; sx * sy * sz + cx * cy
        fld     [sinx]
        fmul    [siny]
        fmul    [sinz]

        fld     [cosx]
        fmul    [cosy]

        fadd
        fstp    [d edi + 32]

        mov     esi, edi
        call    f_scale_mtx
        ret

endp

align 4
cosx   dd ?
cosy   dd ?
cosz   dd ?

sinx   dd ?
siny   dd ?
sinz   dd ?

;
;In:
;       st(0): scale
;       esi: mtx
;       edi: dest mtx
;Out:
;       edi: mtx * scale

align 32
proc    f_scale_mtx n

        push    eax
        fstp    [d esp]
        
        fld     [d esi]
        fmul    [d esp]
        fld     [d esi + 4]
        fmul    [d esp]
        fld     [d esi + 8]
        fmul    [d esp]
        fxch    st(2)
        fstp    [d edi]
        fstp    [d edi + 4]
        fstp    [d edi + 8]

        fld     [d esi + 12]
        fmul    [d esp]
        fld     [d esi + 16]
        fmul    [d esp]
        fld     [d esi + 20]
        fmul    [d esp]
        fxch    st(2)
        fstp    [d edi + 12]
        fstp    [d edi + 16]
        fstp    [d edi + 20]

        fld     [d esi + 24]
        fmul    [d esp]
        fld     [d esi + 28]
        fmul    [d esp]
        fld     [d esi + 32]
        fmul    [d esp]
        fxch    st(2)
        fstp    [d edi + 24]
        fstp    [d edi + 28]
        fstp    [d edi + 32]

        pop     eax     
        ret

endp

;

align 4
f_objmatrix             dd 12 dup ( ? )

;
