; NeoGeo Pocket TLCS-900/H prefetch instruction queue tests
; Mic, 2012


; Tell the AS assembler we want to code for NGPC
;
        cpu 96C141
        maxmode on
        include "stddef96.inc"


        include "hardware.inc"  ; Include hardware defines
        include "system.inc"    ; Include System Call defines
        
;
; Define variables
;


PUTSTR: MACRO str,pal,colm,row
  jr call_puts
  sstr:    db   str,0
  call_puts:
  ld xhl,sstr
  ld d,pal
  ld c,colm
  ld b,row
  call Puts
 ENDM

WAITF: MACRO frames
    ld w,	frames
    call	vbWait
 ENDM
 
        
START_OF_RAM    EVAL    _MAINRAM
   RAMDB YFRT
   RAMDB XFRT
   RAMDB HIH
   RAMDB SCOREL
   RAMDB VBCOUNTER


HEADER_OFS      EQU     0200000H

REG_Z80SND      equ     0b8h    ; 16bit w - switch the z80 and/or snd chip on or off
ZS_Z0S0         equ     0aaaah  ; Z80 off Snd off
ZS_Z0S1         equ     0aa55h  ; Z80 off Snd on - Snd with direct access from TLCS
ZS_Z1S0         equ     055aah  ; Z80 on Snd off
ZS_Z1S1         equ     05555h  ; Z80 on Snd on - Snd only available via Z80

REG_Z80_NMI     equ     0bah    ; 8bit w - triggers Z80 NMI
REG_Z80_COMM    equ     0bch    ; 8bit r/w - comms with z80 address 0x8000

_Z80RAM         equ     7000h   ; start of Z80 RAM

REG_DACL EQU 00A2h
REG_DACR EQU 00A3h

Z80_COUNTER	equ	7500h




;
; Standard cartridge header
;
    org    HEADER_OFS
    db     " LICENSED BY SNK CORPORATION"    ; 28 bytes license string
    dd     Start                             ; Program Counter
    dw     0                                 ; Catalog number
    db     0                                 ; Sub catalog number
    db     10h                               ; colour or b+w (10h = colour)
    db     "TLCS IQ     "                    ; Game name (12 bytes)
    dd     0,0,0,0                           ; padding - reserve for future use

; User Interrupt Vectors

UserIntVect:
        dd      nada            ; Software Interrupt (SWI 3)
        dd      nada            ; Software Interrupt (SWI 4)
        dd      nada            ; Software Interrupt (SWI 5)
        dd      nada            ; Software Interrupt (SWI 6)
        dd      nada            ; RTC Alarm Interrupt
        dd      VBlankInt       ; Vertical Blanking Interrupt
        dd      nada            ; Interrupt from Z80
        dd      nada            ; Timer Interrupt (8 bit timer 0)
        dd      nada            ; Timer Interrupt (8 bit timer 1)
        dd      nada            ; Timer Interrupt (8 bit timer 2)
        dd      nada            ; Timer Interrupt (8 bit timer 3)
        dd      nada            ; Serial Transmission Interrupt
        dd      nada            ; Serial Reception Interrupt
        dd      nada            ; (Reserved)
        dd      nada            ; End Micro DMA Int (MicroDMA 0)
        dd      nada            ; End Micro DMA Int (MicroDMA 1)
        dd      nada            ; End Micro DMA Int (MicroDMA 2)
        dd      nada            ; End Micro DMA Int (MicroDMA 3)

nada:   reti

; *** Start of User Code ***

Start:

        calr    OS_VERSION      ; Initialize NGP or NGPC mode

        set     6,(rUSERA)      ; User Answer

; Install User Interrupt Vectors

        lda     xix,(UserIntVect)
        lda     xiy,(rSWI3)
        ld      b,18
UIVloop:
        ld      xwa,(xix+)
        ld      (xiy+),xwa
        djnz    b,UIVloop


    ; set up screen size 160x152
    ld   (08002h),0
    ld   (08003h),0
    ld   (08004h),0a0h
    ld   (08005h),98h

    ld   wa,	0
    ld   xhl,	_TILERAM
    ld   bc,	200h
    clear_pattern_ram:
    ld  (xhl+),	wa
    djnz bc,	clear_pattern_ram
    
    ; copy charset to character RAM
    ld  bc,(ENDCHARSET-CHARSET)/2
    ld  xde, _TILERAM+400h
    ld  xhl, CHARSET
    ldirw (xde+),(xhl+)

    call ClearScreen
; Enable Interrupts

        ei      0

Restart:
    ; copy palette to character palette RAM
    ld bc, (ENDPALETTE-PALETTE)/2
    ld xde, _SCR2PAL
    ld xhl, PALETTE
    ldirw (xde+),(xhl+)

    ; background colour 
    ; _BGCPAL = GRxB
    ld (_BGCPAL), 33h
    ld (_BGCPAL+1), 07h
    ldb (8118h),80h
    
    PUTSTR ".Instruction Queue.",4,1,0
    PUTSTR ".   Test Suite    .",4,1,1

    ;--------------------
    PUTSTR "Queue length",4,2,4
    PUTSTR "Expected",2,4,5
    PUTSTR "Actual",2,4,6

    ; copy the self-modifying test code from ROM to RAM
    ld xde,	7000h
    ld xhl,	test
    ld bc,	test_end-test
    ldir (xde+),(xhl+)
    
    WAITF 1
    ld		xwa,7000h
    call	(xwa)
    
    ld hl, 	0d06h
    ld a, 	(SCOREL)
    ld w,	2*2
    call	DisplayHex

    ld hl, 	0d05h
    ld a, 	4
    ld w,	2*2
    call	DisplayHex

    cpb (SCOREL),4
    jr  nz,	wrong_length
    PUTSTR "OK",1,15,4
    jr  length_test_done
    wrong_length:
    PUTSTR "NOK",3,15,4
    length_test_done:
    ;--------------------

    ;--------------------
    PUTSTR "No flush",4,2,8
    PUTSTR "Expected",2,4,9
    PUTSTR "Actual",2,4,10

    ; copy the self-modifying test code from ROM to RAM
    ld xde,	7000h
    ld xhl,	test2
    ld bc,	test2_end-test2
    ldir (xde+),(xhl+)
    
    WAITF 1
    ld		xwa,7000h
    call	(xwa)
    
    ld hl, 	0d0ah
    ld a, 	(SCOREL)
    ld w,	2*2
    call	DisplayHex

    ld hl, 	0d09h
    ld a, 	1
    ld w,	2*2
    call	DisplayHex

    cpb (SCOREL),1
    jr  nz,	wrong_flush
    PUTSTR "OK",1,15,8
    jr  flush_test_done
    wrong_flush:
    PUTSTR "NOK",3,15,8
    flush_test_done:
    ;--------------------

   ;--------------------
    PUTSTR "Long instr",4,2,12
    PUTSTR "Expected",2,4,13
    PUTSTR "Actual",2,4,14

    ; copy the self-modifying test code from ROM to RAM
    ld xde,	7000h
    ld xhl,	test3
    ld bc,	test3_end-test3
    ldir (xde+),(xhl+)
    
    WAITF 1
    ld		xwa,7000h
    call	(xwa)
    
    ld hl, 	0d0eh
    ld a, 	(SCOREL)
    ld w,	2*2
    call	DisplayHex

    ld hl, 	0d0dh
    ld a, 	0
    ld w,	2*2
    call	DisplayHex

    cpb (SCOREL),0
    jr  nz,	wrong_linstr
    PUTSTR "OK",1,15,12
    jr  linstr_test_done
    wrong_linstr:
    PUTSTR "NOK",3,15,12
    linstr_test_done:
    ;--------------------
    
    
forever
	jr	forever
	

;
; Calculate the length of the instruction queue
;
; Based on the x86 implementation found at http://en.wikipedia.org/wiki/Prefetch_input_queue
;
test:
    ldf 0
    ld  ra1,	1
    ld  a,	0
    ld  b,	0
    ld		c,0Ch
    ld  xde,	7000h+nops-test
again:
    add a,	a
    jrl nz,	found_iq_length
    
    jr	flush_queue
flush_queue:
    ldb (xde),	c	; INCF
nops:
    dd 0,0,0,0,0,0,0,0
    dd 0,0,0,0,0,0,0,0
    dd 0,0,0,0,0,0,0,0
    dd 0,0,0,0,0,0,0,0
    dd 0,0,0,0,0,0,0,0
    dd 0,0,0,0,0,0,0,0
    dd 0,0,0,0,0,0,0,0
    dd 0,0,0,0,0,0,0,0
    ldb (xde),	00h	; NOP
    inc 1,	b
    inc 1,	xde
    jrl again
found_iq_length:
    ldf 0
    dec 1,	b
    ; Set B = Number Of NOPs That Had To Be Modified Before Taking Effect + The Length
    ; Of The Modifying Instruction Itself. This is the length of the instruction queue
    add b,	nops-flush_queue
    ld  (SCOREL),b
    ret
test_end:


;
; Tests that no instruction queue flush is performed after
; a branch that isn't taken, and that the prefetching is
; performed at the correct time.
;
test2:
    ldf 0
    ld  ra1,	1
    ld  ra2,	2
    ld  a,	0
    ld  xde,	7000h+no_flush-test2
    ld  bc,	0C0Ch	; INCF x 2
    rcf
    jr flush1
flush1:
    ; The instruction queue should now be empty. The below two instructions
    ; take up 4 bytes in total, and therefor will be loaded into the instruction
    ; queue.
    ld  (xde),	bc
    ; By the time the write for the above LD occurs, the first NOP below will
    ; already have been prefetched. Thus, the attempt by the above LD to overwrite
    ; that NOP with an INCF will have no effect.
    jr  c,	no_flush
no_flush:
    ; The modification of this NOP still won't be visible here, since the above JR
    ; wasn't taken and therefor didn't flush the instruction queue.
    nop
    ; ...but this one will get overwritten, and will cause the register bank to switch
    ; from 0 to 1 (making the current A == 1).
    nop
    nop
    ld  (SCOREL),a
    ldf 0
    ret
test2_end:



;
; Tests what happens when an instruction longer than the instruction queue
; buffer modifies the instructions immediately following it.
;
test3:
    ldf 0
    ld  ra1,	1
    ld  ra2,	2
    ld  ra3,	3
    ld  a,	0
    ldw (7400h),0C0Ch	; INCF x 2
    jr flush2
flush2:
    ; This 6-byte instruction overwrites the first two NOPs below, but
    ; since both of them will be in the instruction queue by the time
    ; the write occurs, they'll still be executed as NOPs.
    ldw (7000h+nops3-test3),(7400h)
nops3:    
    nop
    nop
    nop
    ld  (SCOREL),a
    ldf 0
    ret
test3_end:




; User shutdown? (Power off pressed?)

        cp      (rUSERS),0
        jp      z,Restart      ; no

; Power off NGP

        ld      rw3,VECT_SHUTDOWN
        calr    SYSTEM_CALL

done:   jr      done


; vbWait
; Waits for n vertical blank interrupts
; inputs: w = number of VBIs to wait for

vbWait:
   push xwa
   ld (VBCOUNTER), 0
vbw1:
   ld a,(7600h)		; 4
   ld a,(7600h)		; 4	
   ld a, (VBCOUNTER)	; 4
   cp a, w		; 2
   jr nz, vbw1		; 5 (19)
   pop xwa
   ret

   
; 
; DrawChar
;
; Draws a character on the background plane
; inputs: wa = char, hl = xy
;
DrawChar
    push xhl
    push xde
    push xbc
    ld xde, 9800h
    ld c, h
    ld b, 0
    mul xbc, 2
    add de, bc
    ld h, 0
    mul xhl, 40h
    add de, hl
    ld (xde), a
    add xde, 1
    ld (xde), w
    pop xbc
    pop xde
    pop xhl
    ret

;
; DisplayHex
;
; Displays a hex value (e.g. FF) on the background plane
;
; inputs: w = palette, a = value; hl = xy
;
DisplayHex
    push xhl
    push xbc
    push xwa
    ld c, a
    and a, 0f0h
    srl 4, a
    add a, 40h
    call DrawChar
    ld a, c
    and a, 0fh
    add a, 40h
    add h, 1
    call DrawChar
    pop xwa
    pop xbc
    pop xhl
    ret

; xhl=string, d=palette, c=column, b=row  (c=0..31, r=0..23)
Puts
	push 	xhl
	push 	xde

	sll	d
	ld	(HIH),d
	
	ld	xde,9800h
	ld	xwa,0
	ld	a,c
	add	a,c
	add	xde,xwa
	ld	c,b
	ld	b,0
	mul	xbc,40h
	add	xde,xbc
	ld	b,19
puts_loop
	ld 	a,(xhl)
	cp 	a,0
	jr	z,puts_done
	inc	xhl
	cp	a,91
	jr	ult,is_upper
	add	a,0E0h
is_upper
	ld	w,0
	add	wa,9	; first tile in charset
	add 	w,(HIH)		; palette
	ld	(xde),a
	inc	xde
	ld	(xde),w
	inc	xde
	djnz	b,puts_loop
puts_done
	pop	xde
	pop	xhl
	ret
	
	
; d = palette, wa = value, hl = xy
DisplayWord
    push xwa
    push xhl
    push xde
    ld a,w
    ld w,d
    call DisplayHex
    pop xde
    pop xhl
    pop xwa
    ld w,d
    add h,2
    jp DisplayHex

;
; Clear the background planes
;
;
ClearScreen
   ld xbc, _SCR1RAM
   ld hl, 4C0h
cs1
   ld (xbc), 0
   add xbc, 1
   sub hl, 1
   jr nz, cs1
   ld xbc, _SCR2RAM
   ld hl, 4C0h
cs2
   ld (xbc), 0
   add xbc, 1
   sub hl, 1
   jr nz, cs2
   ret




VBlankInt:
    push xwa
    push xbc
    push xde
    
    ; Update Watch Dog Timer to prevent CPU reset
    ld      (rWDCR),WD_CLR

    ; increment a counter
    inc (VBCOUNTER)
    pop xde
    pop xbc
    pop xwa
    reti

SYSTEM_CALL:
        SystemCallCode
OS_VERSION:
        OsVersionCode


    include "charset.inc"   


    ; make the cartridge size correct
    org HEADER_OFS + 007FFFh
    db 0FFh

