;-----------------------------------------------------------
;Bubble Universe DOS - port & minor mods by Kuemmel 10/2023
;
;learn about the original frome here:
;https://stardot.org.uk/forums/viewtopic.php?t=25833
;seems to be from ZXDunny, but even he said he didn't
;know where he got the algorithm from
;-----------------------------------------------------------
;CONST n=200,r=TAU/235:
;x, y, v, t=0, sz=200,sw=SCRW/sz,sh=SCRH/sz:
;WINDOW DEPTH 0,32:
;ORIGIN -sw,-sh TO sw,sh:
;SCREEN LOCK:
;DO:
;  CLS 0:
;  FOR i=0 TO n; j=0 TO n:
;    u=SIN(i+v)+SIN(r*i+x),
;    v=COS(i+v)+COS(r*i+x),
;    x=u+t:
;    PLOT INK RGBTOINT(i,j,99);u,v:
;  NEXT j;i:
;  t+=.025:
;  WAIT SCREEN:
;LOOP

org 100h
;---parameters
x_res = 640
y_res = 480

;---screen mode stuff by JIN-X, set screen mode and get LFB address
mov di,0x200  ;where the mode information block is stored
mov ax,0x4f01 ;get mode info INT 0x10, ax=0x4f01, cx=mode, es:di=256 byte buffer
mov cx,0x4112 ;640x480x32Bit if successful ax = 0x004f, '4' is the Flag-Bit for LFB
int 0x10
mov bx,cx
mov ax,0x4f02 ;set video mode INT 0x10, ax=0x4f02, bx=mode, es:di=CRTCInfoBlock
int 0x10      ;if successful ax = 0x004f

;---pmode stuff by JIN-X
cli
pop es
mov eax,0x40603
lmsw ax
mov cr4,eax
xor ecx,ecx
xgetbv
or al,0x7
xsetbv

;---init stuff			      
fninit			;st0	      st1	  st2	      st3	  st4	  st5	  st6	  st7
fldz			;t=0 
fldz			;u=0	      t=0
fldz			;v=0	      u=0	  t=0
mov si,data_stuff
vzeroall

mainloop:
mov edi,[si+(0x200-data_stuff+0x28)] ;init screen address (di + 0x28 => 0x228)
;cx is zero here always cx = i|j
ij_loop:
    ;bubble universe algo
    movzx bx,ch 	;get i
    mov word[bp+si],bx	;basically not needed to update all the time, but saves space or a loop...
			;v	      u 	  t
    fiadd word[bp+si]	;v+i	      u 	  t  
    fsincos		;cos(v+i)     sin(v+i)	  u	      
    fld dword[si+6]	;r	      cos(v+i)	  sin(v+i)    u 	  t
    fimul word[bp+si]	;r*i	      cos(v+i)	  sin(v+i)    u 	  t
    fadd  st0,st3	;r*i+u	      cos(v+i)	  sin(v+i)    u 	  t   
    fadd  st0,st4	;r*i+u+t      cos(v+i)	  sin(v+i)    u 	  t   
    fsincos		;cos(r*i+u+t) sin(r*i+u+t)cos(v+i)    sin(v+i)	  u	  t
    faddp st2,st0	;sin(r*i+u+t) v'=cos+cos  sin(v+i)    u 	  t   
    faddp st2,st0	;v'	      u'=sin+sin  u	      t
    fxch  st1		;u'	      v'	  u	      t       
    fstp  st2		;v'	      u'	  t
     
    ;pixel addressing
    fld   st0		;v'	      v'	  u'	      t
    fimul word[si+2]	;v'*s	      v'	  u'	      t
    fistp dword[bp+si]	;v'	      u'	  t
    mov   ebx,(y_res/2) ;get and center y
    add   ebx,dword[bp+si]
    imul  ebx,ebx,x_res ;y*x_res
    fld   st1		;u'	      v'	  u'	      t
    fimul word[si]	;u'*s	      v'	  u'	      t
    fistp dword[bp+si]	;v'	      u'	  t
    add   ebx,(x_res/2) ;add and center x
    add   ebx,dword[bp+si]

    ;pixel colouring
    fld   st0		;v'	      v'	  u'	      t
    fimul word[si+4]	;v'*cm	      v'	  u'	      t
    fabs		;abs(v'*cm)   v'	  u'	      t
    fistp word[bp+si]	;v'	      u'	  t
    mov   ax,cx 	;Red = i, Green = j
    shl   eax,8
    mov   al,byte[bp+si];Blue  = ABS(v*127) ;not in the original code, brings a bit more colour   
 CMP AL,AH
 JA .1
 XCHG AL,AH
.1:

    ;plot RGB Pixel  
    mov   [es:edi+ebx*4],eax
    inc cx		
jnz ij_loop

;inc timer
fld dword[si+10]	;dt	      v'	  u'	      t
faddp st3,st0		;v'	      u'	  t+dt

;vsync
mov dx,0x03da
mov cl,3
vsync:
   in al,dx
   test al,8
jnz vsync
vsync2:
   in al,dx
   test al,8
jz vsync2
loop vsync

;clear screen only every second time...looks kinda better
inc bp
;test bp,1
;jnz skip_clear_screen
   mov cx,x_res*y_res*4/32
   clear_screen_loop:
      vmovdqa [es:edi],ymm0
      add edi,32
   loop clear_screen_loop
skip_clear_screen:

;exit or loop
in al,0x60
cbw
dec ax
jnz mainloop
xchg eax,ecx ;clear upper 3 Bits of eax for exit to be sure, ecx is zero here
mov cr0,eax
mov al,0x3   ;exit to text mode
int 0x10
int 0x20     ;needed due to pop es/fs from init code

data_stuff:
dw   83 	  ;sx  = x resize factor = sy*0.75 because (4:3)/(16:9)
dw  110 	  ;sy  = y resize factor
dw  127 	  ;cm = colour multiplier
dd  0.0245436926  ;r  = PI*2/256	  ***could be reduced to 2 Bytes
dd  0.007	  ;dt = timer offset	  ***could be reduced to 2 Bytes

