// YM emulator for the NGPC
// Mic, 2013

#include "ym_emu.h"
#include "ngpc.h"


static const unsigned char YM_VOL_TB[] = {
	// for (i : 0..15) YM_VOL_TB[i] = floor(power(10, (i-15)/6.67)*80)
	0x00,0x00,0x01,0x01,
	0x02,0x03,0x05,0x06,
	0x08,0x0A,0x0E,0x14,
	0x1C,0x28,0x38,0x50
};


char ramCode[0x500];

u8 *pYmRegStream,	// The actual register stream in the YM file (i.e. past the header)
   *pWriteBuffer,	// Pointer to where to write in the PCM output buffer
   *pVolTblRam;		// Pointer to the YM volume table in RAM

u8  ymRegs[16];
u16 ymFrame;
u32 numFrames;
u32 dataOffs;

// "Position" counters for channels A-C, noise and the envelope
// generator (fixed point).
u32 posA, posB, posC, posN, posEnve;

// Envelope generator parameters
u16 attack, alt, hold;

u16 phaseA, phaseB, phaseC;	// Current phase (on / off) for channels A-C
u8  toneA,  toneB,  toneC;	// Tone disable flag for channels A-C
u8  noiseA, noiseB, noiseC;	// Noise disable flag for channels A-C
u8  modeA,  modeB,  modeC;	// Mode flag for channels A-C (0 = fixed envelope, otherwise use EG)
u8  volA,   volB,   volC;	// Volume for channels A-C (for when mode == 0)
u8  enve;					// Current output from the EG

u8 *pVolA, *pVolB, *pVolC;

u32 lfsr;					// Shift register used for noise generation


pfn_emu_run_t pfn_ym_emu_run;



void ym_emu_init(const unsigned char *pYmData, unsigned char *pPcmBuffer) {
	u16  i;
	char *src, *dest;
	u8   channels;

	pYmRegStream = (u8*)pYmData + 0x22;
	while (*pYmRegStream++);		// Skip song name
	while (*pYmRegStream++);		// Skip author name
	while (*pYmRegStream++);		// Skip song comment

	pWriteBuffer = pPcmBuffer;

	// Copy the emulation code to RAM to avoid the 8-bit ROM bus
	src = (char*)ym_emu_run;
	dest = ramCode;
	for (i = 0; i < 0x500; i++)	*(dest++) = *(src++);

	if (pYmData[0x17] == 0x1B) {
		// This is probably a ZX Spectrum tune with an AY clock of 1773400 Hz
		// (0x001B0F58 big-endian). Adjust the emulation speed accordingly.
		ramCode[0x296-8] = ramCode[0x2B5-8] = ramCode[0x2D4-8] = 0xCC;
		ramCode[0x297-8] = ramCode[0x2B6-8] = ramCode[0x2D5-8] = 0xDA;
		ramCode[0x298-8] = ramCode[0x2B7-8] = ramCode[0x2D6-8] = 0x0D;
		ramCode[0x2F8-8] = 0x66; ramCode[0x2F9-8] = 0xED; ramCode[0x2FA-8] = 0x06;
		ramCode[0x346-8] = 0xDA; ramCode[0x347-8] = 0x0D;
	}

	// Copy the volume table to RAM at 0x5800
	pVolTblRam = (u8*)0x5800;
	for (i = 0; i < 16; i++) pVolTblRam[i] = YM_VOL_TB[i];

	pfn_ym_emu_run = (pfn_emu_run_t)ramCode;

	ymFrame = 0;
	lfsr = 0x10000;
	phaseA = phaseB = phaseC = 0;

	numFrames = (u16)pYmData[0x0E] << 8;
	numFrames += pYmData[0x0F];

	dataOffs = ymFrame;
	for (i = 0; i < 16; i++) {
		ymRegs[i] = pYmRegStream[dataOffs];
		dataOffs += numFrames;
	}

	hold = alt = attack = 0;
}


unsigned char *ym_emu_buffer_ptr() {
	return pWriteBuffer;
}


void ym_emu_run() {
	u16 i;

	if (ymRegs[YMREG_ENVE_SHAPE] != 0xFF) {
		if (ymRegs[YMREG_ENVE_SHAPE] & 4)
			attack = 15;
		else
			attack = 0;

		if (ymRegs[YMREG_ENVE_SHAPE] & 8) {
			hold = (ymRegs[YMREG_ENVE_SHAPE] & 1) ? 15 : 0;
			alt  = (ymRegs[YMREG_ENVE_SHAPE] & 2) ? 15 : 0;
		} else {
			hold = 15;
			alt = attack;
		}
		enve = pVolTblRam[attack ^ 15];
	}

	modeA = ymRegs[YMREG_LEVEL_A] & 0x10;
	modeB = ymRegs[YMREG_LEVEL_B] & 0x10;
	modeC = ymRegs[YMREG_LEVEL_C] & 0x10;

	toneA = (ymRegs[YMREG_MIXER] & 1);
	toneB = (ymRegs[YMREG_MIXER] & 2) >> 1;
	toneC = (ymRegs[YMREG_MIXER] & 4) >> 2;

	noiseA = (ymRegs[YMREG_MIXER] & 8)  >> 3;
	noiseB = (ymRegs[YMREG_MIXER] & 16) >> 4;
	noiseC = (ymRegs[YMREG_MIXER] & 32) >> 5;

	volA = pVolTblRam[(ymRegs[YMREG_LEVEL_A] & 0x0F)];
	volB = pVolTblRam[(ymRegs[YMREG_LEVEL_B] & 0x0F)];
	volC = pVolTblRam[(ymRegs[YMREG_LEVEL_C] & 0x0F)];

	pVolA = modeA ? &enve : &volA;
	pVolB = modeB ? &enve : &volB;
	pVolC = modeC ? &enve : &volC;


	__asm(" TONE_STEP  equ 1024000");
	__asm(" ENVE_STEP  equ TONE_STEP/2");
	__asm(" NOISE_STEP equ TONE_STEP/256");

	__asm(" ld   xiz, (_pWriteBuffer)");

	__asm(" ld   hl,(_ymRegs)");
	__asm(" and  hl,0xFFF");		// rwa1 = A speed
	__asm(" ld   rwa1,hl");

	__asm(" ld   hl,(_ymRegs+2)");
	__asm(" and  hl,0xFFF");		// rbc0 = B speed
	__asm(" ld   rbc0,hl");

	__asm(" ld   hl,(_ymRegs+4)");
	__asm(" and  hl,0xFFF");		// rde1 = C speed
	__asm(" ld   rde1,hl");

	__asm(" ld   hl,(_ymRegs+6)");
	__asm(" and  hl,0x1F");
	__asm(" ld   qbc1,hl");			// qbc1 = N speed

	__asm(" ld   xwa, (_pVolA)");
	__asm(" ld   xhl2,xwa");		// xhl2 = pVolA
	__asm(" ld   xwa, (_pVolB)");
	__asm(" ld   xbc2,xwa");		// xbc2 = pVolB
	__asm(" ld   xwa, (_pVolC)");
	__asm(" ld   xde2,xwa");		// xde2 = pVolC

	__asm(" ld   qw2,0");  			// qw2 = noise

	__asm(" ld   xwa,(_posN)");
	__asm(" ld   xhl1,xwa");		// xhl1 = posN

	__asm(" ld   xwa,(_posB)");
	__asm(" ld   xhl0,xwa");		// xhl0 = posB
	__asm(" ld   xwa,(_posC)");
	__asm(" ld   xde0,xwa");		// xde0 = posC

	__asm(" ld   xwa,(_posEnve)");
	__asm(" ld   xwa0,xwa");		// xwa0 = posEnve

	__asm(" ld   qd1,15");			// stepEnve
	__asm(" ld   qwa1,320");		// samples per frame

	__asm(" ld   a,(_attack)");
	__asm(" ld   qe1,a");

	__asm(" ld   xiy,(_posA)");		// xiy = posA
	__asm(" ld   xde,(_lfsr)");		// xde = lfsr

	__asm(" ld   a,(_modeA)");
	__asm(" or   a,(_modeB)");
	__asm(" or   a,(_modeC)");
	__asm(" srl  4,a");
	__asm(" xor  a,1");
	__asm(" ld   qb,a");

	__asm(" ld   a,(_noiseA)");
	__asm(" ld   qc,a");			// qc = noiseA
	__asm(" ld   a,(_noiseB)");
	__asm(" ld   rb1,a");			// rb1 = noiseB
	__asm(" ld   a,(_noiseC)");
	__asm(" ld   rc1,a");			// rc1 = noiseC

	__asm(" ld   wa,(_ymRegs+11)");
	__asm(" ld   qbc0,wa");			// qbc0 = enve speed

	__asm(" ld   a,(_toneA)");
	__asm(" ld   qa,a");
	__asm(" or   a,(_phaseA)");
	__asm(" ld   ra2,a");			// ra2 = phaseA | toneA
	__asm(" ld   a,(_toneB)");
	__asm(" ld   ql1,a");
	__asm(" or   a,(_phaseB)");
	__asm(" ld   qa2,a");			// qa2 = phaseB | toneB
	__asm(" ld   a,(_toneC)");
	__asm(" ld   qh1,a");
	__asm(" or   a,(_phaseC)");
	__asm(" ld   rw2,a");			// rw2 = phaseC | toneC

	__asm(" ld   h,(_phaseA)");
	__asm(" ld   l,(_phaseB)");
	__asm(" ld   ix,hl");
	__asm(" ld   a,(_phaseC)");
	__asm(" ld   qixl,a");

	__asm(" ld   xhl,0x5800");		// volume table

	__asm(" ld   a,(_hold)");
	__asm(" ld   qixh,a");
	__asm(" ld   a,(_alt)");
	__asm(" ld   qw,a");

/*******************************************************/
    __asm(" ym_loop: ");

	__asm(" sub  xiy,TONE_STEP");
	__asm(" jr   pl,no_aup");
	__asm(" ld   wa,qiy");
	__asm(" add  wa,rwa1");
	__asm(" ld   qiy,wa");
	__asm(" xor  ixh,1");			// phaseA ^= 1
	__asm(" ld   a,qa");
	__asm(" or   a,ixh");
	__asm(" ld   ra2,a");			// ra2 = phaseA | toneA
	__asm(" no_aup: ");

	__asm(" sub  xhl0,TONE_STEP");
	__asm(" jr   pl,no_bup");
	__asm(" ld   wa,qhl0");
	__asm(" add  wa,rbc0");
	__asm(" ld   qhl0,wa");
	__asm(" xor  ixl,1");			// phaseB ^= 1
	__asm(" ld   a,ql1");
	__asm(" or   a,ixl");
	__asm(" ld   qa2,a");			// qa2 = phaseB | toneB
	__asm(" no_bup: ");

	__asm(" sub  xde0,TONE_STEP");
	__asm(" jr   pl,no_cup");
	__asm(" ld   wa,qde0");
	__asm(" add  wa,rde1");
	__asm(" ld   qde0,wa");
	__asm(" xor  qixl,1");			// phaseC ^= 1
	__asm(" ld   a,qh1");
	__asm(" or   a,qixl");
	__asm(" ld   rw2,a");			// rw2 = phaseC | toneC
	__asm(" no_cup: ");

	__asm(" cp   qb,0");			// is the current EG level being held?
	__asm(" jr   nz,no_eup2");
	__asm(" sub  xwa0,ENVE_STEP");	// A real YM/AY supposedly counts upwards - I count downwards (it's faster)
	__asm(" jr   pl,no_eup2");
	__asm(" ld   wa,qwa0");
	__asm(" add  wa,qbc0");
	__asm(" ld   qwa0,wa");
	__asm(" djnz qd1,no_eup");		// step--
	__asm(" cp   qixh,0");
	__asm(" jr   z,no_hold");
	__asm(" cp   qw,0");
	__asm(" jr   z,no_alt");
	__asm(" xor  qe1,15");			// if (alt) attack ^= 15
	__asm(" no_alt: ");
	__asm(" ld   qb,1");
	__asm(" ld   qd1,0");			// step = 0
	__asm(" jr   no_eup");
	__asm(" no_hold: ");
	__asm(" cp   qw,0");
	__asm(" jr   z,no_alt2");
	__asm(" ld   a,qd1");
	__asm(" and  a,16");
	__asm(" jr   z,no_alt2");
	__asm(" xor  qe1,15");			// attack ^= 15
	__asm(" no_alt2: ");
	__asm(" and  qd1,15");			// step &= 15
	__asm(" no_eup: ");
	__asm(" ld   l,qd1");
	__asm(" xor  l,qe1");
	__asm(" ld   a,(xhl)");
	__asm(" ld   (_enve),a");		// enve = pVolTblRam[step ^ attack]
	__asm(" no_eup2: ");

	__asm(" sub  rhl1,NOISE_STEP");
	__asm(" jr   pl,no_nup");
	__asm(" ld   w,rh1");
	__asm(" add  w,qc1");
	__asm(" ld   rh1,w");
	__asm(" ld   a,e");
	__asm(" ld   qw2,a");
	__asm(" srl  3,a");
	__asm(" xor  a,e");
	__asm(" and  a,1");
	__asm(" srl  1,xde");
	__asm(" ld   qe,a");		// lfsr = (lfsr >> 1) | ((lfsr.0 ^ lfsr.3) << 16)
	__asm(" and  qw2,1");		// current noise output (0 or 1)
	__asm(" no_nup: ");

	__asm(" ld   a,qw2");
	__asm(" ld   b,a");
	__asm(" ld   c,a");

	__asm(" or   a,qc");		// a = noise | noiseA
	__asm(" and  a,ra2");		// a = (noise | noiseA) & (phaseA | toneA)
	__asm(" neg  a");			// a = channel A off (0) or on (0xFF)

	__asm(" or   b,rb1");
	__asm(" and  b,qa2");
	__asm(" neg  b");			// b = channel B off (0) or on (0xFF)

	__asm(" or   c,rc1");
	__asm(" and  c,rw2");
	__asm(" neg  c");			// c = channel C off (0) or on (0xFF)

	__asm(" and  a,(xhl2)");	// AND by either volA or enve depeding on modeA
	__asm(" and  b,(xbc2)");
	__asm(" and  c,(xde2)");

	__asm(" add  a,b");
	__asm(" add  a,c");

	__asm(" ld   w,a");
	__asm(" ld   (xiz+),wa");
	__asm(" and  iz,0x4FFF");	// the output buffer is assumed to start at 0x4000

	__asm(" sub  qwa1,1");
	__asm(" jrl  nz,ym_loop");
/*******************************************************/

	__asm(" ld   (_pWriteBuffer),xiz");
	__asm(" ld   (_lfsr),xde");

	__asm(" ld   hl,ix");
	__asm(" ld   (_phaseA),h");
	__asm(" ld   (_phaseB),l");
	__asm(" ld   a,qixl");
	__asm(" ld   (_phaseC),a");

	__asm(" ld   xwa,xhl1");
	__asm(" ld   (_posN),xwa");
	__asm(" ld   (_posA),xiy");
	__asm(" ld   xwa,xhl0");
	__asm(" ld   (_posB),xwa");
	__asm(" ld   xwa,xde0");
	__asm(" ld   (_posC),xwa");
	__asm(" ld   xwa,xwa0");
	__asm(" ld   (_posEnve),xwa");

	ymFrame++;
	if (ymFrame >= numFrames) ymFrame = 0;
	dataOffs = ymFrame;
	for (i = 0; i < 16; i++) {
		ymRegs[i] = pYmRegStream[dataOffs];
		dataOffs += numFrames;
	}
}
