;
; Rutina de rellenado Phong por environment mapping
;

.386
COMMENT %                  P1
                          +
                         /  \               (P1,P2,P3) dato
                      P4/-----\P2            P4 se calcula
                       /      /
                      /    /
                     /  /
                    //
                   + P3
%

;ONLY_EVEN=1


MAX_X = 319
MIN_Y = 0
MAX_Y = 199

; Es arbitrario... pero seguro que despista a el que lo mire :)
TO_THE_RITE = 0EFh
TO_THE_LEFT = 05Ah



;

_DATA   SEGMENT PARA PUBLIC USE32 'DATA'

        EXTRN _frame_buffer_adr:DWORD, _env_map_adr:DWORD
        PUBLIC _ex1, _ey1, _ex2, _ey2, _ex3, _ey3
        PUBLIC _u1, _u2, _u3, _v1, _v2, _v3
        PUBLIC _frame_buffer_scan_width

; 
; Datos de entrada
; 
; Misc
_frame_buffer_scan_width DW 320

; Coordenadas proyectadas de los vrtices
x1 LABEL WORD
_ex1     DD ?
y1 LABEL WORD
_ey1     DD ?

x2 LABEL WORD
_ex2     DD ?
y2 LABEL WORD
_ey2     DD ?

x3 LABEL WORD
_ex3     DD ?
y3 LABEL WORD
_ey3     DD ?

; Angulos esfricos de los tres vrtices
u1v1 LABEL DWORD
_u1      DW ?
_v1      DW ?

u2v2 LABEL DWORD
_u2      DW ?
_v2      DW ?

u3v3 LABEL DWORD
_u3      DW ?
_v3      DW ?

; 
; Variables de la rutina
; 
; Incrementos de y
dy21    DD ?
dy31    DD ?
dy32    DD ?

; Punto P4
x4      DW ?
y4      DW ?
u4      DW ?
v4      DW ?

; Rastering
current_y               DW ?
current_frame_buffer_pointer DD ?
scan_filling_dir        DB ?, ?

long_side_u             DW ?
long_side_v             DW ?
long_side_x             DD ?
short_side_x            DD ?

long_side_inc_u         DW ?
long_side_inc_v         DW ?
long_side_inc_x         DD ?
short_side_inc_x        DD ?

pixel_inc_u             DW ?
pixel_inc_v             DW ?

_DATA   ENDS
DGROUP  GROUP _DATA
;
_TEXT           SEGMENT PARA PUBLIC USE32 'CODE'
                ASSUME CS:_TEXT, DS:_DATA

                PUBLIC phong_fill_
phong_fill_     PROC
                pushad

                ; Clipping sencillo en x
                mov     ax,[x1]
                mov     bx,[x2]
                cmp     ax,bx
                jge     short L1
                xchg    ax,bx
L1:             cmp     ax,[x3]
                jge     short L2
                mov     ax,[x3]
L2:             cmp     ax,0
                jge     short L3
                jmp     near ptr L34
L3:             cmp     bx,[x3]
                jle     short L4
                mov     bx,[x3]
L4:             cmp     bx,MAX_X
                jle     short L5
                jmp     near ptr L34

                ; Ordenar puntos tal que y1 <= y2 <= y3
L5:             mov     ax,[y1]
                cmp     ax,[y2]
                jle     short L6
                mov     eax,[_ex1]
                mov     ebx,[_ex2]
                mov     ecx,[_ey1]
                mov     edx,[_ey2]
                mov     [_ex2],eax
                mov     [_ex1],ebx
                mov     [_ey2],ecx
                mov     [_ey1],edx
                mov     eax,[u2v2]
                mov     ebx,[u1v1]
                mov     [u2v2],ebx
                mov     [u1v1],eax
L6:             mov     eax,[_ey2]
                cmp     eax,[_ey3]
                jle     short L7
                mov     eax,[_ex3]
                mov     ecx,[_ex2]
                mov     ebx,[_ey3]
                mov     edx,[_ey2]
                mov     [_ex2],eax
                mov     [_ex3],ecx
                mov     [_ey3],edx
                mov     [_ey2],ebx
                mov     eax,[u3v3]
                mov     ebx,[u2v2]
                mov     [u2v2],eax
                mov     [u3v3],ebx
L7:             mov     eax,[_ey1]
                cmp     eax,[_ey2]
                jle     short L8
                mov     eax,[_ex1]
                mov     ebx,[_ex2]
                mov     ecx,[_ey1]
                mov     edx,[_ey2]
                mov     [_ex2],eax
                mov     [_ex1],ebx
                mov     [_ey2],ecx
                mov     [_ey1],edx
                mov     eax,[u2v2]
                mov     ebx,[u1v1]
                mov     [u1v1],eax
                mov     [u2v2],ebx
L8:
                ; Clipping sencillo en y
                mov     eax,[_ey1]
                cmp     eax,MAX_Y
                jle     short L9
                jmp     near ptr L34
L9:             mov     eax,[_ey3]
                cmp     eax,MIN_Y
                jge     short L10
                jmp     near ptr L34
L10:
                ; Clculo de delta(y)
                mov     eax,[_ey2]
                sub     eax,[_ey1]
                jne     short L11
                jmp     near ptr L35
L11:            mov     [dy21],eax
                mov     eax,[_ey3]
                sub     eax,[_ey1]
                jne     L12
                inc     eax
L12:            mov     [dy31],eax
                mov     eax,[_ey3]
                sub     eax,[_ey2]
                mov     [dy32],eax

L13:
                ; Inicializa (u,v) para scannear en las aristas
                mov     ax,[_u1]
                mov     [long_side_u],ax
                mov     ax,[_v1]
                mov     [long_side_v],ax

                ; Calcula los incrementos de (x,u,v) para rasterizar
                mov     ax,[_u3]
                sub     ax,[_u1]
                movsx   eax,ax
                mov     edx,eax
                sar     edx,31
                idiv    [dy31]
                mov     [long_side_inc_u],ax
                mov     ax,[_v3]
                sub     ax,[_v1]
                movsx   eax,ax
                mov     edx,eax
                sar     edx,31
                idiv    [dy31]
                mov     [long_side_inc_v],ax
                mov     eax,[_ex3]
                sub     eax,[_ex1]
                shl     eax,16
                mov     edx,eax
                sar     edx,31
                idiv    [dy31]
                mov     [long_side_inc_x],eax
                mov     eax,[_ex2]
                sub     eax,[_ex1]
                shl     eax,16
                mov     edx,eax
                sar     edx,31
                idiv    [dy21]
                mov     [short_side_inc_x],eax

                ; Inicializa (x,y) para el barrido de arriba abajo
                mov     eax,[_ex1]
                shl     eax,16
                mov     [long_side_x],eax
                mov     [short_side_x],eax
                mov     ax,[y1]
                mov     [current_y],ax

                ; Calcula los parmetros (u,v,x) del punto 4
                mov     ax,[_u3]
                sub     ax,[_u1]
                movsx   eax,ax
                imul    [dy21]
                idiv    [dy31]
                add     ax,[_u1]
                mov     [u4],ax
                mov     ax,[_v3]
                sub     ax,[_v1]
                movsx   eax,ax
                imul    [dy21]
                idiv    [dy31]
                add     ax,[_v1]
                mov     [v4],ax
                mov     ax,[x3]
                sub     ax,[x1]
                movsx   eax,ax
                imul    [dy21]
                idiv    [dy31]
                add     ax,[x1]
                mov     [x4],ax

                ; Vemos si el lado largo est a la izquierda o la derecha
                mov     ax,[x4]
                cmp     ax,[x2]
                jl      short L14               ; x4 <  x2
                je      short L17               ; x4 == x2
                jmp           L18               ; x4 >  x2

; 
; En las tres siguientes secciones, mutuamente exclusivas, se calculan
; la direccin en la que rellenar los scans y los incrementos de (z,w)
; para pixels avanzado en horizontal
; 
;**** El lado largo (P1-P3) est a la derecha
L14:            mov     [scan_filling_dir],TO_THE_RITE
                movzx   eax,[_u2]
                movzx   ebx,[u4]
                sub     eax,ebx
                cdq
                movsx   ebx,[x2]
                movsx   ecx,[x4]
                sub     ebx,ecx
                or      ebx,ebx
                jne     short L15
                inc     ebx
L15:
                idiv    ebx
                mov     [pixel_inc_u],ax
                movzx   eax,[_v2]
                movzx   ebx,[v4]
                sub     eax,ebx
                cdq
                movsx   ebx,[x2]
                movsx   ecx,[x4]
                sub     ebx,ecx
                or      ebx,ebx
                jne     short L16
                inc     ebx
L16:            idiv    ebx
                mov     [pixel_inc_v],ax
                jmp           L21

;**** El lado corto y el largo estn montados
L17:            mov     [scan_filling_dir],TO_THE_RITE
                mov     [pixel_inc_v],1
                mov     [pixel_inc_u],1
                jmp     short L21

;**** El lado largo est a la izquierda
L18:            mov     [scan_filling_dir],TO_THE_LEFT
                movzx   eax,[_u2]
                movzx   ebx,[u4]
                sub     eax,ebx
                cdq
                movsx   ebx,[x4]
                movsx   ecx,[x2]
                sub     ebx,ecx
                or      ebx,ebx
                jne     short L19
                inc     ebx
L19:            idiv    ebx
                mov     [pixel_inc_u],ax
                movzx   eax,[_v2]
                movzx   ebx,[v4]
                sub     eax,ebx
                cdq
                movsx   ebx,[x4]
                movsx   ecx,[x2]
                sub     ebx,ecx
                or      ebx,ebx
                jne     short L20
                inc     ebx
L20:            idiv    ebx
                mov     [pixel_inc_v],ax
; 
L21:


                ; Calcula el puntero al frame_buffer
                movsx   eax,[current_y]
                movzx   ebx,[_frame_buffer_scan_width]
                imul    ebx
                mov     [current_frame_buffer_pointer],eax

                ; Calcula el nmero de scans a dibujar
                mov     cx,[y3]
                sub     cx,[y1]
                or      cx,cx
                jnz     L22
                inc     cx


;
; Bucle principal, una vez por scan
;
L22:            push    cx
                ; Comprueba si hemos llegado al punto de rotura (y = y2)
                mov     ax,[current_y]
                cmp     ax,[y2]
                jne     short L23
                        ; Calcula el nuevo incremento de x por scan
                        mov     eax,[_ex3]
                        sub     eax,[_ex2]
                        shl     eax,10H
                        cdq
                        mov     ebx,[_ey3]
                        sub     ebx,[_ey2]
                        idiv    ebx
                        mov     [short_side_inc_x],eax
L23:

IFDEF ONLY_EVEN
                ; Dibuja solo las pares
                mov     ax,[current_y]
                test    ax,1
                jnz     NEAR PTR L33
ENDIF
                ; Clipping del scan si est arriba o debajo
                ; de la zona de corte
                cmp     [current_y],MIN_Y
                jl      near ptr L33
                cmp     [current_y],MAX_Y
                jg      near ptr L33

                ; Hace que esi apunte al primer pixel del scan (frame_buffer)
                mov     edi,[current_frame_buffer_pointer]
                mov     eax,[long_side_x]
                sar     eax,16
                add     edi,eax

                ; Si estamos rellenando de derecha a izquierda, goto L28
                cmp     [scan_filling_dir],TO_THE_LEFT
                je      near ptr L28

                ; Obtiene los parmetros (u,v) para el primer pixel
                mov     bp,[long_side_u]
                mov     si,[long_side_v]

                ; Toma las x entre las que pintar el scan
                mov     ecx,[short_side_x]
                sar     ecx,10H
                mov     ebx,[long_side_x]
                sar     ebx,10H

                ; Hace clipping del scan, comprobando que est
                ; totalmente fuera
                cmp     bx,MAX_X
                jg      near ptr L33
                cmp     cx,0
                jl      near ptr L33

                ; Clipping del scan por la derecha
                cmp     cx,MAX_X
                jle     short L24
                mov     cx,MAX_X
L24:
                ; Clipping por la izquierda
                cmp     bx,0
                jge     short L26
                        ; Avanzando (u,v) y x por cada pixel
                        xor     edx,edx
                        sub     dx,bx
                        mov     bx,0
                        add     edi,edx
L25:                    add     bp,[pixel_inc_u]
                        add     si,[pixel_inc_v]
                        dec     edx
                        jne     short L25
L26:
                ; Calcula el nmero de pixels a rellenar
                sub     cx,bx
                cmp     cx,0
                jge     short L27
                neg     cx
L27:
                inc     cx
                movzx   ecx,cx
COMMENT %
                ; Clipping del scan si est arriba o debajo
                ; de la zona de corte
                cmp     [current_y],MIN_Y
                jl      near ptr L33
                cmp     [current_y],MAX_Y
                jg      near ptr L33
%
                ; Carga los registros con los valores necesarios
                ; para el inner loop
                mov     ebx,esi
                shl     ebx,16
                mov     bx,bp
                mov     dx,[pixel_inc_v]
                shl     edx,16
                mov     dx,[pixel_inc_u]

                ; Salta en medio del bucle desenrrollado con todo OK
                mov     eax,0140H
                sub     eax,ecx
                sub     edi,eax

                        ; Multiplica eax por 18
                        ;mov     ecx,eax
                        mov     ebp,eax
                        shl     eax,4   ;*16
                        ;shl     ecx,2   ;*4
                        add     ebp,ebp ;*2
                        ;add     eax,ecx
                        add     eax,ebp

                add     eax,OFFSET Unrolled1
                mov     esi,[_env_map_adr]
                add     edi,[_frame_buffer_adr]
                call    eax
                jmp     near ptr L33            ; Cerrar el bucle de scans



L28:            mov     bp,[long_side_u]
                mov     si,[long_side_v]

                mov     ecx,[long_side_x]
                sar     ecx,16
                mov     ebx,[short_side_x]
                sar     ebx,16

                cmp     cx,0
                jl      near ptr L33
                cmp     bx,MAX_X
                jg      near ptr L33

                cmp     bx,0
                jge     short L29
                mov     bx,0
L29:

                cmp     cx,MAX_X
                jle     short L31
                        movzx   edx,cx
                        mov     ecx,MAX_X
                        sub     edx,MAX_X
                        sub     edi,edx
L30:                    add     bp,[pixel_inc_u]
                        add     si,[pixel_inc_v]
                        dec     edx
                        jne     short L30
L31:

                sub     cx,bx
                cmp     cx,0
                jge     short L32
                neg     cx
L32:            inc     cx
                movzx   ecx,cx

                cmp     [current_y],MIN_Y
                jl      near ptr L33
                cmp     [current_y],MAX_Y
                jg      near ptr L33

                ; Carga los registros con los valores necesarios
                ; para el inner loop
                mov     ebx,esi
                shl     ebx,16
                mov     bx,bp
                mov     dx,[pixel_inc_v]
                shl     edx,16
                mov     dx,[pixel_inc_u]

                mov     eax,0140H
                sub     eax,ecx
                add     edi,eax

                        ; Multiplica eax por 18
                        ;mov     ecx,eax
                        mov     ebp,eax
                        shl     eax,4   ;*16
                        ;shl     ecx,2   ;*4
                        add     ebp,ebp ;*2
                        ;add     eax,ecx
                        add     eax,ebp

                add     eax,OFFSET Unrolled2
                mov     esi,[_env_map_adr]
                add     edi,[_frame_buffer_adr]
                call    eax
L33:
                ; Incrementa valores para el prximo scan
                mov     ax,[long_side_inc_u]
                add     [long_side_u],ax
                mov     ax,[long_side_inc_v]
                add     [long_side_v],ax
                mov     eax,[long_side_inc_x]
                add     [long_side_x],eax
                mov     eax,[short_side_inc_x]
                add     [short_side_x],eax
                add     [current_frame_buffer_pointer],0140H
                inc     [current_y]
                pop     cx
                dec     cx
                jne     near ptr L22            ; Loop!

L34:            popad
                ret

; Second possibility - cuando el tringulo tiene y1 = y2 <= y3
     
L35:            mov     ax,[x2]
                cmp     ax,[x1]
                jge     short L36
                mov     eax,[_ex1]
                mov     ecx,[_ex2]
                mov     ebx,[_ey1]
                mov     edx,[_ey2]
                mov     [_ex2],eax
                mov     [_ex1],ecx
                mov     [_ey1],edx
                mov     [_ey2],ebx
                mov     eax,[u2v2]
                mov     ecx,[u1v1]
                mov     [u2v2],ecx
                mov     [u1v1],eax
L36:            mov     ax,[_u1]
                mov     [long_side_u],ax
                mov     ax,[_v1]
                mov     [long_side_v],ax
                mov     eax,[_ey3]
                sub     eax,[_ey1]
                or      eax,eax
                jnz     @@DY31OK
                inc     eax
@@DY31OK:
                mov     [dy31],eax

                mov     ax,[_u3]
                sub     ax,[_u1]
                cwde    
                cdq     
                idiv    [dy31]
                mov     [long_side_inc_u],ax
                mov     ax,[_v3]
                sub     ax,[_v1]
                cwde    
                cdq     
                idiv    [dy31]
                mov     [long_side_inc_v],ax
                mov     eax,[_ex3]
                sub     eax,[_ex1]
                shl     eax,10H
                cdq     
                idiv    [dy31]
                mov     [long_side_inc_x],eax
                mov     eax,[_ex3]
                sub     eax,[_ex2]
                shl     eax,10H
                cdq     
                idiv    [dy31]
                mov     [short_side_inc_x],eax
                mov     eax,[_ex1]
                shl     eax,10H
                mov     [long_side_x],eax
                mov     eax,[_ex2]
                shl     eax,10H
                mov     [short_side_x],eax
                mov     ax,[y1]
                mov     [current_y],ax

                ; Calcula pixel_inc_u y pixel_inc_v
                movzx   eax,[_u2]
                movzx   ebx,[_u1]
                sub     eax,ebx
                cdq
                movsx   ebx,[x2]
                movsx   ecx,[x1]
                sub     ebx,ecx
                or      ebx,ebx
                jne     short L37
                inc     ebx
L37:            idiv    ebx
                mov     [pixel_inc_u],ax
                movzx   eax,[_v2]
                movzx   ebx,[_v1]
                sub     eax,ebx
                cdq
                movsx   ebx,[x2]
                movsx   ecx,[x1]
                sub     ebx,ecx
                or      ebx,ebx
                jne     short L38
                inc     ebx
L38:            idiv    ebx
                mov     [pixel_inc_v],ax

                ; Calcula el puntero al frame_buffer
                movsx   eax,[current_y]
                movzx   ebx,[_frame_buffer_scan_width]
                imul    ebx
                mov     [current_frame_buffer_pointer],eax

                mov     cx,[y3]
                sub     cx,[y1]
                or      cx,cx
                jnz     L39
                inc     cx

;
; Bucle principal, una vez por scan - y1 == y2
;
L39:            push    cx
IFDEF ONLY_EVEN
                ; Dibuja solo las pares
                mov     ax,[current_y]
                test    ax,1
                jnz     NEAR PTR L44
ENDIF
                mov     edi,[current_frame_buffer_pointer]
                mov     eax,[long_side_x]
                sar     eax,10H
                add     edi,eax

                mov     bp,[long_side_u]
                mov     si,[long_side_v]

                mov     ecx,[short_side_x]
                sar     ecx,10H
                mov     ebx,[long_side_x]
                sar     ebx,10H

                cmp     bx,MAX_X
                jg      near ptr L44
                cmp     cx,0
                jl      near ptr L44
                cmp     cx,MAX_X
                jle     short L40
                mov     cx,MAX_X
L40:

                cmp     bx,0
                jge     short L42
                        xor     edx,edx
                        sub     dx,bx
                        xor     ebx,ebx
                        add     edi,edx
L41:                    add     bp,[pixel_inc_u]
                        add     si,[pixel_inc_v]
                        dec     dx
                        jne     short L41


L42:            sub     cx,bx
                cmp     cx,0
                jge     short L43
                neg     cx
L43:
                inc     cx
                movzx   ecx,cx

                cmp     [current_y],MIN_Y
                jl      near ptr L44
                cmp     [current_y],MAX_Y
                jg      near ptr L44

                mov     ebx,esi
                shl     ebx,16
                mov     bx,bp
                mov     dx,[pixel_inc_v]
                shl     edx,16
                mov     dx,[pixel_inc_u]

                mov     eax,0140H
                sub     eax,ecx
                sub     edi,eax

                        ; Multiplica eax por 18
                        ;mov     ecx,eax
                        mov     ebp,eax
                        shl     eax,4
                        ;shl     ecx,2
                        add     ebp,ebp
                        ;add     eax,ecx
                        add     eax,ebp

                add     eax,OFFSET Unrolled3
                mov     esi,[_env_map_adr]
                add     edi,[_frame_buffer_adr]
                call    eax

L44:            mov     ax,[long_side_inc_u]
                add     [long_side_u],ax
                mov     ax,[long_side_inc_v]
                add     [long_side_v],ax
                mov     eax,[long_side_inc_x]
                add     [long_side_x],eax
                mov     eax,[short_side_inc_x]
                add     [short_side_x],eax
                add     [current_frame_buffer_pointer],0140H
                inc     [current_y]
                pop     cx
                dec     cx
                jne     near ptr L39
                popad
                ret

; Vamos a despistar un poco...
DB 'Square root of negative number', 0
DB 'Ray traced too long', 0

ALIGN 16

        Unrolled1:
        Unrolled3:
        ; UNROLLED LOOP
        I = 0
        REPT 320
                mov     eax,ebx
                shr     eax,16
                mov     al,bh
                add     ebx,edx
                mov     al,[esi+eax]
                ;mov     [edi+I],al
                db 88h, 87h
                dd I
        I = I + 1
        ENDM
        ; END UNROLLED LOOP
        retn

ALIGN 16

        Unrolled2:
        ; UNROLLED LOOP
        I = 0
        REPT 320
                mov     eax,ebx
                shr     eax,16
                mov     al,bh
                add     ebx,edx
                mov     al,[esi+eax]
                ;mov     [edi+I],al
                db 88h, 87h
                dd I
        I = I - 1
        ENDM
        ; END UNROLLED LOOP
        retn


COMMENT %
        Es igual que el unrolled1, mirar arriba
        Unrolled3:
        ; UNROLLED LOOP
        I = 0
        REPT 320
                mov     eax,ebx
                shr     eax,16
                mov     al,bh
                mov     al,[esi+eax]
                ;mov     [edi+I],al
                db 88h, 87h
                dd I
                add     ebx,edx
        I = I + 1
        ENDM
        ; END UNROLLED LOOP
        retn
%
        ENDP

_TEXT   ENDS

        END
