/*
 * Copyright (c) 2021, Jeffrey Lee
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met: 
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
#include <stdint.h>
#include <stdio.h>
#include <time.h>
#ifndef DEMO
#include <stdlib.h>
#endif
#include <float.h>
#ifndef DEMO
#include "kernel.h"
#include "swis.h"
#endif

#define NUM_VORONOI 32

#include "triangulate.cc"

#include "osinterf.h"

#include "scalar.h"
#include "soa2.h"
#include "soa4.h"
using namespace scalar;

#include "plane2d.h"

typedef plane2d<vec2f> plane2df;

#define USE_MEMSET defined(DEMO)
#define PRINTRATE !defined(DEMO)

#ifndef DEMO
int old_voice;

static void restoresound()
{
	_swix(Sound_Configure,_INR(0,4),1,0,0,0,0);
	_swix(Sound_AttachVoice,_INR(0,1),1,old_voice);
}

static void sound_init()
{
	_swix(Sound_AttachVoice,_INR(0,1)|_OUT(1),1,0,&old_voice);
	_swix(Sound_Configure,_INR(0,4),4,0,0,0,0);
	atexit(restoresound);
	_swix(Sound_AttachNamedVoice,_INR(0,1),1,"Percussion-Soft");
	_swix(Sound_AttachNamedVoice,_INR(0,1),2,"Percussion-Medium");
	_swix(Sound_AttachNamedVoice,_INR(0,1),3,"Percussion-Snare");
	_swix(Sound_AttachNamedVoice,_INR(0,1),4,"Percussion-Noise");
}
#endif

static inline void playsound(int r0val)
{
	int r1val = 0x144000;
	register int r0valr asm ("r0") = r0val;
	register int r1valr asm ("r1") = r1val;
	asm ("swi 0x40186" : : "r" (r0valr), "r" (r1valr));
}

typedef struct {
	unsigned int width;
	unsigned int height;
	char *screen;
	unsigned int stride;
	int term;
} vduvars;

vduvars vdu;
int mode[] = {1,0,0,5,-1,0,4<<12,-1};

class voronoi
{
public:
	vec2f pos;
	vec2f dir;
	vec3f col;
	vec3f col0,col1;
	float delta;
};

#define SCALE 40.0f

static int screen_setup()
{
	for(int i=0;i<2;i++)
	{
		vdu.width = 11;
		vdu.height = 12;
		vdu.screen = (char *) 148;
		vdu.stride = 6;
		vdu.term = -1;
		readvduvariables(&vdu);
		vdu.width++;
		vdu.height++;
		if (!i)
		{
			mode[1] = vdu.width;
			mode[2] = vdu.height;
			if (setmode(mode))
			{
				mode[5]=-1;
				void *e = setmode(mode);
				if (e)
				{
					return (int) e;
				}
			}
		}
	}
	return 0;
}

static float randf()
{
	static unsigned int srand = 0xc1d846a;
	float f = (float)(srand & 0xffffff)/16777216.0f;
	srand = srand*22695477+1;
	return f;
}

#if USE_MEMSET
extern "C" {
void *memset(void *dest,int val,size_t len)
{
	while(len--)
	{
		((char *)dest)[len] = val;
	}
	return dest;
}
}
#endif

static plane2df separating_plane(const voronoi &a,const voronoi &b,vec2f *pos)
{
	vec2f mid = (a.pos+b.pos)*0.5f;
	vec2f dir = normalise(a.pos-b.pos);
	*pos = mid;
	return plane2df(dir, mid);
}

#define PRECALC_N0
//#define PRECALC_DELTA // Adds a bit too much bloat for not much benefit

class vor_plane {
public:
	plane2df p;
	vec2f pos;

	inline void precalc(float coord_step)
	{
		vec2f n = p.normal();
#ifdef PRECALC_N0
		if (abs(n[0]) > 0.001f)
			m_recp_n0 = recp(n[0]);
		else
			m_recp_n0 = 0.0f;
#endif
#ifdef PRECALC_DELTA
		m_delta = n[0]*SCALE*coord_step;
#endif
	}

	inline vec1f recp_n0() const
	{
#ifdef PRECALC_N0
		return m_recp_n0;
#else
		return recp(p.normal()[0]);
#endif
	}

	inline vec1f delta(float coord_step) const
	{
#ifdef PRECALC_DELTA
		return m_delta;
#else
		return p.normal()[0]*SCALE*coord_step;
#endif
	}

private:
#ifdef PRECALC_N0
	float m_recp_n0;
#endif
#ifdef PRECALC_DELTA
	float m_delta;
#endif
};

typedef struct {
	vor_plane p[NUM_VORONOI];
	int num;
} vor_planes;

/* SOA */

template<typename T> class plane_dataT {
public:
	vec1f delta[NUM_VORONOI];
	T dist[NUM_VORONOI];
};

template<typename T>
static inline T dist_funcK(plane_dataT<T> *pd,int N)
{
	T one = 1.0f;
	T d1,d2;
	T dist = 1.0f;
	const vec1f *deltas = pd->delta;
	T *dists = pd->dist;
	T *end = dists+N;
	/* Unroll the loop a little bit */
	d1 = *dists;
	d2 = max(d1,0.0f);
	d2 = mls(one,d2,d2);
	dist = d2;
	*dists++ = d1+T::vld1(deltas++);
	for(;dists!=end;)
	{
		d1 = *dists;
		d2 = max(d1,0.0f);
		d2 = mls(one,d2,d2);
		dist *= d2;
		*dists++ = d1+T::vld1(deltas++);
	}
	return max(dist,0.0f);
}

static float bgcol;

template<typename F1,typename F3,typename I1,const int SEP2>
static bool plot_voronoi_row(const voronoi *vor,int i,vec2f coord,float coord_step,int y,const vor_planes &planes)
{
	/* Left and right coords of the interior */
	vec1f left = 0.0f,right = 2.0f;
	for(int j=0;j<planes.num;j++)
	{
		/* Calculate crossing point */
		vec2f n = planes.p[j].p.normal();
		if (abs(n[0]) > 0.001f)
		{
			vec1f crossing = -(coord[1]*n[1] + planes.p[j].p.dist())*planes.p[j].recp_n0();
			/* Is this a crossing in to or out of the cell? */
			if (n[0] > 0.0f)
			{
				/* Crossing in */
				left = max(left,crossing);
			}
			else
			{
				/* Crossing out */
				right = min(right,crossing);
			}
		}
		else if (planes.p[j].p.dist(coord) < 0.0f)
		{
			/* Horizontal plane, and we're on the wrong side of it */
			return false;
		}
	}
	float pix_scale = recp(coord_step);
	int left_px = left*pix_scale;
	int right_px = right*pix_scale;
	const int SEPmask = (1<<SEP2)-1;
	left_px = left_px &~SEPmask;
	right_px = (right_px+SEPmask)&~SEPmask;
	if (right_px > (int) vdu.width)
		right_px = vdu.width;
	if (left_px >= right_px)
		return false;
	I1 *screen = (I1 *) (vdu.screen + vdu.stride*y + left_px*4);
	coord.set_elem(0, left_px*coord_step);

	plane_dataT<F1> pd;
	int k=0;
	left = 0.0f; right = 2.0f;
	float magic_min_value = coord_step*(-SCALE*(1<<SEP2));
	for(int j=0;j<planes.num;j++)
	{
		{
			vec1f delta = planes.p[j].delta(coord_step);
			vec1f dist = planes.p[j].p.dist(coord)*SCALE;
			for(int l=0;l<(1<<SEP2);l++)
			{
				pd.dist[k].set_elem(l,dist);
				dist += delta;
			}
			if (SEP2 == 2)
				delta += delta;
			pd.delta[k] = delta+delta;
		}

		float d0 = pd.dist[k][0];
		float d1 = pd.dist[k][0] + pd.delta[k]*((right_px-left_px)>>SEP2);
		if (((d0 >= magic_min_value) && (d0 < 1.0f)) || ((d1 >= magic_min_value) && (d1 < 1.0f)))
		{
			/* Factor the "1-d2" out of dist_funcK */
			pd.dist[k] = F1(1.0f)-pd.dist[k];
			pd.delta[k] = -pd.delta[k];
			k++;
			/* Calculate inner edge */
			plane2df p2(planes.p[j].p.normal(),planes.p[j].p.dist() - (1.0f/SCALE));
			vec2f n = p2.normal();
			if (abs(n[0]) > 0.001f)
			{
				vec1f crossing = -(coord[1]*n[1] + p2.dist())*planes.p[j].recp_n0();
				/* Is this a crossing in to or out of the cell? */
				if (n[0] > 0.0f)
				{
					/* Crossing in */
					left = max(left,crossing);
				}
				else
				{
					/* Crossing out */
					right = min(right,crossing);
				}
			}
			else if (p2.dist(coord) < 0.0f)
			{
				/* Horizontal plane, and we're on the wrong side of it */
				left = right = 1.0f;
			}
		}
	}
	int left_px2 = left*pix_scale;
	int right_px2 = right*pix_scale;
	left_px2 = (left_px2+SEPmask) & ~SEPmask;
	right_px2 = right_px2 & ~SEPmask;
	if (right_px2 > (int) vdu.width)
		right_px2 = vdu.width;
	if ((left_px2 > right_px) || (left_px2 >= right_px2))
	{
		left_px2 = right_px;
		right_px2 = right_px;
	}

	int x;
	F3 col4 = vor[i].col;
	F1 bgcol2 = bgcol;
	F3 col5 = col4-bgcol2;

#if 0 // 100 bytes

	for(x=left_px;x<left_px2;x+=(1<<SEP2))
	{
		F1 dist = dist_funcK(&pd,k);
		F3 col = mla(F3(bgcol2),col5,dist);
		*screen++ = rgb_fast(col);
	}
	if (left_px2 >= right_px2)
	{
		return true;
	}
	I1 coli = rgb_fast(col4);
	for(;x<right_px2;x+=(1<<SEP2))
	{
		*screen++ = coli;
	}
	if (right_px2 >= right_px)
	{
		return true;
	}
	for(int j=0;j<k;j++)
	{
		pd.dist[j] += pd.delta[j]*((x-left_px2)>>SEP2);
	}
	for(;x<right_px;x+=(1<<SEP2))
	{
		F1 dist = dist_funcK(&pd,k);
		F3 col = mla(F3(bgcol2),col5,dist);
		*screen++ = rgb_fast(col);
	}

#else

	for(x=left_px;x<right_px;x+=(1<<SEP2))
	{
		if (x == left_px2)
		{
			I1 coli = rgb_fast(col4);
			for(;x<right_px2;x+=(1<<SEP2))
			{
				*screen++ = coli;
			}
			if (right_px2 >= right_px)
			{
				return true;
			}
			for(int j=0;j<k;j++)
			{
				pd.dist[j] += pd.delta[j]*((x-left_px2)>>SEP2);
			}
		}
		F1 dist = dist_funcK(&pd,k);
		F3 col = mla(F3(bgcol2),col5,dist);
		*screen++ = rgb_fast(col);
	}

#endif

	return true;
}

static void plot_voronoi(const voronoi *vor,const vor_planes planes[NUM_VORONOI],float coord_step,int rate)
{
	vec2f coord = vec2f(0,0);
	uint32_t seen = 0;
	uint32_t done = 0;
	for(unsigned int y=0;y<vdu.height;y++)
	{
#ifdef VSYNC
		static int fdelta = 0;
		if (y==vdu.height>>1)
		{
			int cfs;
			_swix(OS_Byte,_INR(0,2)|_OUT(1),176,0,0,&cfs);
			fdelta++;
			fdelta += (int8_t)(cfs&255);
			if (cfs == 255)
				_swix(OS_Byte,_IN(0),19);
		}
#endif
#if PRINTRATE
		if(y==8)
		{
			printrate(rate);
		}
#endif
		for(int i=0;i<NUM_VORONOI;i++)
		{
			if (done & (1<<i))
				continue;
//			if (plot_voronoi_row<soa4::vec1f,soa4::vec3f,soa4::vec1i,2>(vor,i,coord,coord_step,y,planes[i]))
			if (plot_voronoi_row<soa2::vec1f,soa2::vec3f,soa2::vec1i,1>(vor,i,coord,coord_step,y,planes[i]))
				seen |= 1<<i;
			else if (seen & (1<<i))
				done |= 1<<i;
		}
		coord.set_elem(1,coord[1] + coord_step);
	}
}

static void add_plane(voronoi vor[NUM_VORONOI], vor_planes planes[NUM_VORONOI], float coord_step,uint32_t *seen,int i,int j)
{
	if (i > j)
	{
		int t = i; i = j; j = t;
	}
	if (seen[i] & (1<<j))
		return;
	seen[i] |= (1<<j);
	/* Add the first plane */
	int k = planes[i].num++;
	planes[i].p[k].p = separating_plane(vor[i],vor[j],&planes[i].p[k].pos);
	planes[i].p[k].precalc(coord_step);
	/* Flip it to create the second plane */
	int k2 = planes[j].num++;
	planes[j].p[k2].p = -planes[i].p[k].p;
	planes[j].p[k2].pos = planes[i].p[k].pos;
	planes[j].p[k2].precalc(coord_step);
}

static inline void do_triangulate(voronoi vor[NUM_VORONOI], vor_planes planes[NUM_VORONOI], float coord_step)
{
	/* Sort by X coord */
	for(int i=1;i<NUM_VORONOI;i++)
	{
		voronoi t = vor[i];
		voronoi *w = NULL;
		for(int j=i;j>0;j--)
		{
			if (vor[j-1].pos[0] <= t.pos[0])
			{
				break;
			}
			vor[j] = vor[j-1];
			w = &vor[j-1];
		}
		if (w)
			*w = t;
	}
	/* Feed into triangulator */
	XYZ points[NUM_VORONOI+3];
	for(int i=0;i<NUM_VORONOI;i++)
	{
		points[i].x = vor[i].pos[0];
		points[i].y = vor[i].pos[1];
	}
	ITRIANGLE tris[NUM_VORONOI*3];
	int ntri;
	Triangulate(NUM_VORONOI,points,tris,&ntri);
	/* Convert to set of planes */
	uint32_t seen[NUM_VORONOI];
	for(int i=0;i<NUM_VORONOI;i++)
	{
		seen[i] = 0;
		planes[i].num = 0;
	}
	for(int i=0;i<ntri;i++)
	{
		add_plane(vor,planes,coord_step,seen,tris[i].p1,tris[i].p2);
		add_plane(vor,planes,coord_step,seen,tris[i].p2,tris[i].p3);
		add_plane(vor,planes,coord_step,seen,tris[i].p3,tris[i].p1);
	}
}

#define YMIN -0.25f
#define YRANGE 1.25f
#define YCHECK -0.25f
#define YMIN2 1.125f
#define YRANGE2 0.125f

void spawn(voronoi *vor,float ymin,float yrange,float xrange)
{
	vor->pos = vec2f(randf()*xrange,randf()*yrange+ymin);
	vor->col = vec3f(randf(),randf(),randf());
	vor->dir = vec2f(randf()*2.0f-1.0f,randf()*-0.25f-0.1f);
	vor->col0 = vor->col;
	vor->delta = 1.0f;
}

/* HACK: Make this non-const so that GCC won't unroll a loop and add 300-400 bytes of code */
int num_voronoi = NUM_VORONOI;

typedef uint8_t note_t;

#define DUR 24
#define FLAG_PULSE 64
#define FLAG_HIT 128
#define DUR_MASK (DUR | (DUR<<1))

#define NOTE(n,f,d) ((n+1) + (FLAG_HIT | f) + DUR*d)

static const note_t notes[] = {
NOTE(0,FLAG_PULSE,2),NOTE(1,0,1),NOTE(0,0,1),NOTE(0,FLAG_PULSE,2),NOTE(1,FLAG_PULSE,2),
NOTE(0,FLAG_PULSE,2),NOTE(1,0,1),NOTE(0,0,1),NOTE(0,FLAG_PULSE,1),NOTE(1,0,1),NOTE(1,0,1),NOTE(1,0,1),
NOTE(0,FLAG_PULSE,2),NOTE(1,0,1),NOTE(0,0,1),NOTE(0,FLAG_PULSE,2),NOTE(1,FLAG_PULSE,2),
NOTE(0,FLAG_PULSE,2),NOTE(1,0,1),NOTE(0,0,1),NOTE(0,FLAG_PULSE,1),NOTE(1,0,1),NOTE(1,0,1),NOTE(1,0,1),
NOTE(0,FLAG_PULSE,2),NOTE(1,0,1),NOTE(0,0,1),NOTE(0,FLAG_PULSE,2),NOTE(1,FLAG_PULSE,2),
NOTE(3,FLAG_PULSE,1),NOTE(3,0,1),NOTE(3,FLAG_PULSE,1),NOTE(3,0,1),
NOTE(2,FLAG_PULSE,1),NOTE(2,0,1),NOTE(2,FLAG_PULSE,1),NOTE(2,0,1),
NOTE(1,FLAG_PULSE,1),NOTE(1,0,1),NOTE(1,FLAG_PULSE,1),NOTE(1,0,1),
NOTE(0,FLAG_PULSE,1),NOTE(0,0,1),NOTE(0,FLAG_PULSE,1),NOTE(0,0,1),
};

#define NOTEPARAM(X) (((*X) & 7) - (15<<16))
#define NOTEDUR(X) (*X)

/* Separating planes between the different voronoi */
vor_planes planes[NUM_VORONOI];

int main(int argc,char **argv)
{
	int i = screen_setup();
	if (i)
	{
		return i;
	}
 
	float coord_step = recp(vdu.height);
	float aspect = ((float)vdu.width)*coord_step;

	voronoi vor[NUM_VORONOI];
	for(int i=0;i<num_voronoi;i++)
	{
		spawn(&vor[i],YMIN,YRANGE,aspect);
	}

#ifndef DEMO
	sound_init();
	_swix(OS_Byte,_INR(0,1),106,1);
#endif

#if PRINTRATE
	clock_t t = clock();
	int frames = 0;
#endif
	clock_t t0 = clock();
	int rate=0;

	const note_t *current_note = notes;
	int note_time = 0;
	while (!readescapestate())
	{
		do_triangulate(vor, planes,coord_step);
		plot_voronoi(vor,planes,coord_step,rate);
		clock_t t1 = clock();
		int idelta = t1-t0;
		note_time -= idelta;
		int flags = 0;
		bgcol *= 0.9f;
		if (note_time <= 0)
		{
			flags = NOTEDUR(current_note);
			playsound(NOTEPARAM(current_note));
			note_time += flags & DUR_MASK;
			current_note++;
			if (current_note == notes+(sizeof(notes)/sizeof(note_t)))
				current_note = notes;
			if (flags & FLAG_PULSE)
				bgcol = 255.0f/256.0f;
		}
		float delta = idelta*0.01f;
		t0 = t1;
		for(int i=0;i<num_voronoi;i++)
		{
			vor[i].col = vor[i].col0*vor[i].delta + vor[i].col1*(1.0f-vor[i].delta);
			vor[i].delta += delta;
			if (vor[i].delta >= 1.0f)
			{
				vor[i].col1 = vor[i].col0;
				vor[i].col0 = vec3f(randf(),randf(),randf());
				vor[i].delta = 0.0f;
			}
			vor[i].pos += vor[i].dir*delta;
			float yv = vor[i].dir[1];
			yv += delta+delta;
			if (flags & FLAG_HIT)
				yv -= 0.7f;
			vor[i].dir.set_elem(1,yv);
			if (vor[i].pos[1] < YCHECK)
			{
				spawn(&vor[i],YMIN2,YRANGE2,aspect);
			}
			else
			{
				float x = vor[i].pos[0];
				float axd = abs(vor[i].dir[0]);
				if (x < 0.0f)
					vor[i].dir.set_elem(0,axd);
				if (x > aspect)
					vor[i].dir.set_elem(0,-axd);
			}
		}
#if PRINTRATE
		clock_t t2 = clock();
		frames++;
		rate = (10000*(t2-t))/frames;
#endif
	}
	return 0;
}
