// [ Chaos Equations ] 256-byte intro for ARM / RISC OS
// (c) 2021 Jin X (t.me/jinxonik, jin_x@list.ru)

// Party 800*600/8bpp version without fade

//// SETTINGS //////////////////////////////////////////////////////////////////////////////////////////////////////////

way_length	=	1024
star_count	=	768

//// CODE //////////////////////////////////////////////////////////////////////////////////////////////////////////////

.include "riscos_swi.inc"

.syntax unified
.thumb

start:
		// Set screen mode
		movs	r0,0			// reason code to set screen mode by string
		movs	r1,32			// screen mode string
	rng_factor:				// RNG factor (data from code)
		swi	OS_ScreenMode		// set screen mode
		swi	OS_RemoveCursors	// remove cursor

		// Create VFP context
		lsls	r0,r1,26
		adds	r0,3			// r0 = 0x80000003
		movs	r2,0
		movw	r10,VFPSupport_CreateContext & 0xFFFF
		movt	r10,VFPSupport_CreateContext >> 16 // r10 = SWI number
		swi	OS_CallASWI		// call SWI number r10 (used for values > 0xFF)

		adr	r0,screen_address
		movs	r1,r0			// write = read address
		swi	OS_ReadVduVariables	// screen address at screen_address

		// Other preparations
		adr	r3,coefs		// r3 = coefficient array
		adr	r4,screen_buffer	// r4 = screen buffer address

//		swi	OS_ReadMonotonicTime	// number of centiseconds since the last hard reset
//		movs	r6,r0			// r6 = random seed
		ldr	r7,rng_factor		// r7 = RNG factor

		vmov	s9,3.0			// s9 = tmax = 3.0

new_equation:	// New equation
		movs	r1,11*4			// 11+1 coefs
	next_coef:
		muls	r6,r7
		adds	r6,1			// r6 = r6 * r7 + 1 = random number
		vmov	s0,r6
		vcvt.f32.s32 s0,s0,31		// convert int to float and scale to range -1..1
		adds	r0,r1,r3
		vstr	s0,[r0]			// store value to coefs
		subs	r1,4
		bcs	next_coef		// loop 12 times

		vmov	s6,-3.0			// s6 = t0 = -3.0

mainloop:
		// Copy to screen and clear buffer
		ldr	r5,screen_address	// r5 = screen address
		ldr	r1,screen_size		// screen size in bytes
copy_and_clear:
		subs	r1,4
		ldr	r0,[r4,r1]		// read 4 pixels
		str	r0,[r5,r1]		// store 4 pixels to screen
		str	r2,[r4,r1]		// clear 4 pixels in screen buffer
	fade_skip:
		bcs	copy_and_clear		// loop for whole screen

		// Calculations
		movs	r5,way_length
	next_step:
		vldr	s8,dt_large		// s8 = dt (large step)

		vmov	s2,s6			// s2 = x = t
		vmov	s3,s6			// s3 = y = t
		vmul.f32 s7,s6,s6		// s7 = t*t

		movs	r2,star_count
	next_star:
		vmul.f32 d2,d1,d1		// s4 = s2*s2 = x*x, s5 = s3*s3 = y*y
		vmov.f32 d0,0.0			// s0, s1 = x', y' = 0
		mov	r0,r3			// coefficient array
	calc_again:
		vldmia	r0!,{d5,d6,d7}		// s10, s11 = coefs1, s12, s13 = coefs2, s14, s15 = coefs3
		vmla.f32 d0,d1,d5		// s0, s1 += s2, s3 * s10, s11 (x', y' | y', x' += x, y * coefs1)
		vmla.f32 d0,d2,d6		// s0, s1 += s4, s5 * s12, s13 (x', y' | y', x' += x*x, y*y * coefs2)
		vmla.f32 d0,d3,d7		// s0, s1 += s6, s7 * s14, s15 (x', y' | y', x' += t, t*t * coefs3)
		vmov	r11,r10,s0,s1
		vmov	s0,s1,r10,r11		// swap s0 <--> s1 (s0 = y', s1 = x' | s0 = x', s1 = y')
		mvns	r3,r3
		bmi	calc_again		// repeat 1 more time for swapped x, y

		vmov	d1,d0			// s2, s3 = s0, s1 (x, y = x', y')
		vcvt.s32.f32 d0,d1,9		// convert floats to ints (s2, s3 --> s0, s1) and multiply 512x

		vmov	r0,r1,s0,s1		// r0, r1 = x_int, y_int
		cbnz	r0,x_ok
		cbz	r1,off_screen		// skip if r0 == r1 == 0 (wrong value)
	x_ok:
		adds	r0,800/2		// center x
		cmp	r0,800
		bhs	off_screen

		rsbs	r1,600/2		// center y
		cmp	r1,600
		bhs	off_screen

		movs	r10,800
		mla	r0,r1,r10,r0		// r0 = r0 + r1 * 800 (x_int + y_int * 800)
		mul	r1,r2,r7		// r1 = color = star number * RNG factor
		strb	r1,[r4,r0]		// put pixel

		vldr	s8,dt_small		// s8 = dt (small step)
	off_screen:
		subs	r2,1
		bne	next_star		// loop star_count times

		vadd.f32 s6,s8			// t += dt
		vcmp.f32 s6,s9			// t >= tmax?
		vmrs	APSR_nzcv,FPSCR
		bhs	new_equation		// yes, new equation

		subs	r5,1
		bne	next_step		// loop way_length times

		vldr	s0,dt_return
		vsub.f32 s6,s0			// decrease t back

		// Keyboard check and exit
		swi	OS_ReadEscapeState	// is ESC pressed?
		bcc	mainloop

		swi	OS_Exit			// yes, exit to OS

//// DATA //////////////////////////////////////////////////////////////////////////////////////////////////////////////

.align	2					// data align (by word = 4 bytes)

dt_large = start + 0x3C				// high delta t (for off-screen pixels), ~ 0.000126

dt_small:
.single	0.00005					// low delta t (for on-screen pixels)

dt_return:
.single	0.0384					// t decrease value

screen_size:
.word	800*600					// screen size in bytes

screen_address:
.word	148					// input block to read screen address
.word	-1					// request block terminator

coefs:						// equation coefficients

screen_buffer = coefs + 12*4			// screen buffer address
