;                                     \|/
;                                     O O
; --------------------------------oOO--U--OOo--------------------------------
; -                                                                         -
; -                      - Intro for Coder's Revenge -                      -
; -                    Alain BROBECKER (baah/Arm'sTeack)                   -
; -                                                                 June 96 -
; ---------------------------------------------------------------------------
;
;   Mega THANX to Mr Hill of Archiologics for the 2nd Mb. (Yup it' s easier
; now to code all I have dreamt of...) Keep on being so kind and on coding!
;
;   This source is given for free and is widely commented, so I hope some
; people will look at it and (maybe) improve their own code. Re-use of my
; routines is allowed (though not recommended, cos you' ll understand more
; if you write your owns...) as long as it is not for commercial purposes,
; as long as you credit me and send me a free copy of your proggy. Oh, btw
; the assembler I used is ExtASM 0.50b. You' ll have to make changes in
; macros if you use a newer version of ExtASM.
;
; Briefly, this code features...
;   * Smart N-uple (N>1) buffering management using self-modifyed code.
;     Totally unusefull here, cos only half a VBl is used on my Arm2, but
;     you' ll certainly be interested in it if you code 3d.
;   * Sinus-cosinus code generator.
;   * Background texture generator. (Don' t you find this texture nice?)
;   * Smart background restoring. I restore only the part which has been
;     modified, and again this shall interest 3d coders.
;   * Fast box filling and big box copying using self-generated code.
;
;   I must say that everything is optimised for speed primarily, then for
; size. Also I' ve left some 'mistakes' in the code (for example, there' s
; a branch for vsync_routine though it' s used only once) just to increase
; readability. (Maybe some others are here by accident) Time now to let you
; enjoy the code, don' t hesitate to write me...
;
;           Alain BROBECKER         Dracula / Positivity (STe)
;           rte de Dardagny         baah / Arm's Tech (Archie)
;            01630 CHALLEX                           baah (PC)
;               FRANCE

#name       IntroCR                 ; tHa iNcReDibLe kEwl diScMaG...

;------ Constants -----------------------------------------------------------
#set        screennb_min = 2        ; Minimum number of mode13 screens.
#set        screennb_max = 8        ; Maximum number.
#set        shift = 16              ; Shift for fixed point values.
#set        sin_shift = 10          ; 2^sin_shift is the nb of angles.
#set        sin_nb = 1<<(sin_shift-3) ; Nb of angles between [0;pi/4[.
#set        bg_M = 8                ; Well, texture must be a power of 2, so
#set        bg_N = 1<<bg_M          ;   bg_N=2^bg_M is the size of texture.
#set        bg_middle = 11          ; Intensity for null relief*2.
#set        fractal = 29            ; Value for shift of rnd nb in fracland.
#set        nb_letters = 14         ; Nb of crazy letters.

;------ BSS Offsets ---------------------------------------------------------
#set        stack = 4*128           ; Top of stack.
#set        sinus = stack           ; Sinus table.
#set        background = sinus+10*sin_nb*4 ; Background.
#set        clsboxes = background+320*256 ; Coords for clear boxes.
#set        letters_coefs = clsboxes+4*screennb_max*4 ; Coefs for letters.
#set        end = letters_coefs+6*nb_letters*4


;****************************************************************************
;****************************************************************************
;*****                                                                  *****
;*****                              MACROS                              *****
;*****                                                                  *****
;****************************************************************************
;****************************************************************************

;====> Umul64 <=====================
; This macro performs an unsigned 32*32 bits multiply.
;   [m0|m1]=m2*m3. You can choose [m2|m3]=[m0|m1]
;   It destroys m0,m1,m4,m5,m6 and the flags. (C is the carry flag)
macro umul64 m0,m1,m2,m3,m4,m5,m6
{ mov       m4,m2,lsr #16           ; m4=up(m2).
  sub       m5,m2,m4,lsl #16        ; m5=down(m2).
  mov       m0,m3,lsr #16           ; m0=up(m3).
  sub       m1,m3,m0,lsl #16        ; m1=down(m3).
  mul       m6,m5,m0                ; m6=down(m2)*up(m3).
  mul       m0,m4,m0                ; m0=up(m2)*up(m3).
  mlaS      m6,m1,m4,m6             ; C+m6=up(m2)*down(m3)+down(m2)*up(m3).
  adc       m0,m0,m6,lsr #16
  mul       m1,m5,m1                ; m1=down(m2)*down(m3).
  addS      m1,m1,m6,lsl #16
  adc       m0,m0,#0                ; [m0|m1]=m2*m3.
}
;====> Adjust64 <===================
; This macro adjusts the 64 bits result to 32 bits, according to the fixed
; point shift factor. (c0)
;   m0=[m1|m2]>>c0. You can choose m1=m0.
;   It destroys m0.
macro adjust64 m0,m1,m2,c0
{ mov       m0,m1,lsl #32-c0
  add       m0,m0,m2,lsr #c0
}
;====> Add64 <======================
; This macro performs a 64 bits addition.
;   [m0|m1]=[m2|m3]+[m4|m5]. You can choose [m2|m3] or [m4|m5]=[m0|m1].
;   It destroys [m0|m1] and the flags.
macro add64 m0,m1,m2,m3,m4,m5
{ addS      m1,m3,m5
  adc       m0,m2,m4
}
;====> Sub64 <======================
; This macro performs a 64 bits substract.
;   [m0|m1]=[m2|m3]-[m4|m5]. You can choose [m2|m3] or [m4|m5]=[m0|m1].
;   It destroys [m0|m1] and the flags.
macro sub64 m0,m1,m2,m3,m4,m5
{ subS      m1,m3,m5
  sbc       m0,m2,m4
}
;====> Random32 <==================
; This macro takes a random number, and makes a new one.
macro random32 m0
{ eor       m0,m0,m0,rrx
  adc       m0,m0,m0,ror #7
}


;****************************************************************************
;****************************************************************************
;*****                                                                  *****
;*****                               CODE                               *****
;*****                                                                  *****
;****************************************************************************
;****************************************************************************

.proggy_start
  adr       r13,bss+stack           ; Initialise stack pointer.
  stmdb     r13!,{r14}              ; Save exit adress.
;----------------------------------------------------------------------------
; Get screen memory size, and check the number of mode13 screens it will
; allow us to have. Then, we modify the code which deals with multiple
; buffering to suit this ScreenNb. (I change op2 values in mov operations
; that are in vbl_routine, vsync_routine and in copy_screen)
  mov       r0,#2                   ; Ask for ScreenMem size.
  swi       OS_ReadDynamicArea      ; r1=current ScreenMem size.
  mov       r0,#0                   ; Count the number of screen banks.
._count_screens
  subS      r1,r1,#320*256          ; r1-=size of one mode13 screen.
  addGE     r0,r0,#1                ; If r1 still >=0, we have one more bank.
  bGT       _count_screens          ; If r1>0, maybe there is even more...
  cmp       r0,#screennb_min        ; More than the minimum amount?
  adrMI     r0,screenmem_error      ; No, then generate an error box with
  swiMI     OS_GenerateError        ;   the convenient message and quit.
  cmp       r0,#screennb_max        ; More than the maximum amount?
  movGE     r0,#screennb_max        ; Then don' t use extra ones.
  strB      r0,vbl_screennb_mov     ; Modify code which needs ScreenNb
  strB      r0,vsync_screennb_mov   ;   for multiple buffering,
  strB      r0,copy_one_screen_mov  ;   and for screen clearing.
;----------------------------------------------------------------------------
; Check if we have enough memory for all our precalcs tables. I use &8000
; as the base adress, because it' s extASM' s default value, but in fact
; I needed bss+end-proggy_start which is not valid in extASM.
  mov       r0,#-1                  ; Read current slot size,
  mov       r1,#-1                  ;   and don' t change next slot size.
  swi       Wimp_SlotSize
  adr       r1,bss+end-&8000        ; r1=needed memory size.
  cmp       r0,r1                   ; Enough memory allocated to us?
  adrMI     r0,slotsize_error       ; No, then generate an error box with
  swiMI     OS_GenerateError        ;   the convenient message and quit.
;----------------------------------------------------------------------------
; Clear the bss section, since some routines need it. I assume that bss+end
; is longword aligned. (So you must take care when defining bss offsets)
  adr       r0,bss+stack            ; Begin to clear here.
  mov       r1,#end-stack           ; Nb of bytes to clear.
  mov       r2,#0
.clear_bss
  subS      r1,r1,#4                ; One long will be cleared.
  str       r2,[r0,r1]
  bNE       clear_bss
;----------------------------------------------------------------------------
; Creates the sinus table. As for the inverses creation, the routine has
; already been released through "Memory War".
.make_sinus
  adr       r0,bss+sinus
  ldr       r1,sinA                 ; r1=sinA*2^28.
  ldr       r2,cosA                 ; r2=cosA*2^28.
  mov       r3,#0                   ; r3=sin0*2^28.
  mov       r4,#1<<28               ; r4=cos0*2^28.
  mov       r5,#sin_nb+1
.make_one_sinus
  mov       r6,r4,lsr #28-shift     ; r6=cosN*2^shift.
  str       r6,[r0,#sin_nb*2*4]     ; Save sin(N+pi/2)=cosN.
  mov       r6,r3,lsr #28-shift     ; r6=sinN*2^shift.
  str       r6,[r0],#4              ; Save sinN.
  umul64 r6,r7,r1,r3,r8,r9,r10      ; [r6|r7]=sinN*sinA.
  umul64 r8,r9,r2,r4,r10,r11,r12    ; [r8|r9]=cosN*cosA.
  sub64 r6,r7,r8,r9,r6,r7           ; [r6|r7]=cos(N+1)=cosN*sin1-sinN*sin1.
  umul64 r3,r8,r3,r2,r9,r10,r11     ; [r3|r8]=sinN*cosA.
  umul64 r4,r9,r4,r1,r10,r11,r12    ; [r4|r9]=cosN*sinA.
  add64 r3,r8,r3,r8,r4,r9           ; [r3|r8]=sin(N+1)=sinN*cos1+cosN*sin1.
  adjust64 r3,r3,r8,28              ; r1=sin(N+1)=sinN*cos1+cosN*sin1.
  adjust64 r4,r6,r7,28              ; r2=cos(N+1)=cosN*sin1-sinN*sin1.
  subS      r5,r5,#1                ; One sinus processed.
  bNE       make_one_sinus
  sub       r0,r0,#4                ; Complete the table by stupid copy.
  mov       r1,r0                   ; Point on the position which are like
  add       r2,r0,#sin_nb*8         ;  (pi/4+k*(pi/2))   0<=k<=4
  mov       r3,r2
  add       r4,r2,#sin_nb*8
  mov       r5,r4
  add       r6,r4,#sin_nb*8
  mov       r7,r6
  add       r8,r6,#sin_nb*8
  mov       r9,r8
  mov       r10,#sin_nb+1           ; Copy sin_nb+1 values.
._make_sinus_copy
  ldr       r11,[r0],#-4
  str       r11,[r3],#4             ; sin(pi-X)=sinX.
  str       r11,[r8],#-4            ; sin(2*pi+X)=sinX.
  rsb       r11,r11,#0
  str       r11,[r4],#-4            ; sin(pi+X)=-sinX.
  str       r11,[r7],#4             ; sin(2*pi-X)=-sinX.
  ldr       r11,[r2],#-4
  str       r11,[r1],#4             ; cos(-X)=cosX.
  subS      r10,r10,#1              ; One value copied.
  strNE     r11,[r9],#4             ; cos(2*pi+X)=cosX. No copy if r10=0.
  rsb       r11,r11,#0
  str       r11,[r5],#4             ; cos(pi-X)=-cosX.
  str       r11,[r6],#-4            ; cos(pi+X)=-cosX.
  bNE       _make_sinus_copy
;----------------------------------------------------------------------------
; Switch to the good video mode, and some more stupid bits.
  swi       256+22                  ; Vdu 22, set screenmode.
  swi       256+13                  ; Switch to mode 13.
  swi       OS_RemoveCursors        ; Who needs them?
  adr       r0,videoram_adress      ; Get videoram adress.
  mov       r1,r0
  swi       OS_ReadVduVariables
;----------------------------------------------------------------------------
; Creates the background texture. The main idea is to make a N*N fractal
; landscape, take the modulo of it, smooth, emboss and re-smooth it. The
; emboss and smoothing routines where already released in "Graphics War".
.make_fracland
  adr       r0,bss+background
  ldr       r1,random_germ          ; Load the random germ.
  mov       r2,#1                   ; This will be used by routine.
  strB      r2,[r0]                 ; Also save 1 as upper left pixel.
  mov       r3,#bg_M                ; Iteration=bg_M.
  mov       r4,#0                   ; Position of upper left pixel.
  mov       r5,#0
  bl        recursive_landscape
  ldr       r1,videoram_adress      ; Adress of a second buffer.
  add       r1,r1,#320*256
  bl        smooth_texture          ; Smooth texture.
.emboss_texture
  mov       r2,#0                   ; r2=y counter<<(32-bg_M).
._emboss_one_line
  sub       r3,r2,#1<<(32-bg_M)     ; r3=(y-1) mod bg_N <<(32-bg_M). (Wrapping)
  add       r4,r3,#1<<(32-bg_M)     ; r4=(y+1) mod bg_N <<(32-bg_M). (Wrapping)
  add       r3,r1,r3,lsr #(32-2*bg_M); r3 points on src_line up.
  add       r4,r1,r4,lsr #(32-2*bg_M); r4 points on src_line down.
  mov       r5,#bg_N                ; r5=nb of pixels per line.
._emboss_one
  ldrB      r14,[r3],#1             ; r14=pixie up.
  ldrB      r6,[r4],#1              ; r6=pixie down.
  and       r14,r14,#&1f            ; Take the modulo(32).
  and       r6,r6,#&1f
  sub       r6,r6,r14               ; r6=delta.
  addS      r6,r6,#bg_middle        ; Add the middle constant.
  movMI     r6,#0                   ; Make sure intensity is between 0-31.
  cmp       r6,#31
  movGE     r6,#31
  strB      r6,[r0],#1              ; Save it.
  subS      r5,r5,#1                ; One pixel done
  bNE       _emboss_one
  addS      r2,r2,#1<<(32-bg_M)     ; Line done.
  bNE       _emboss_one_line
  sub       r0,r0,#bg_N*bg_N        ; r0 points back on buffer.
  bl        smooth_texture          ; Smooth texture.
  adr       r2,bg_colors            ; Now convert the texture.
  mov       r3,#bg_N                ; y counter.
._bg_convert_line
  mov       r4,#bg_N                ; x counter.
._bg_convert_one
  ldrB      r5,[r1],#1              ; Load pixel.
  ldrB      r5,[r2,r5]              ; Load color.
  cmp       r4,#bg_N-(320-bg_N)     ; If x<2*bg_N-320, then
  strGTB    r5,[r0,#bg_N]           ;   copy pixel at r0+bg_N.
  strB      r5,[r0],#1              ; Draw pixel.
  subS      r4,r4,#1                ; One pixel done.
  bNE       _bg_convert_one
  add       r0,r0,#320-bg_N         ; Next line.
  subS      r3,r3,#1                ; One line done.
  bNE       _bg_convert_line
;----------------------------------------------------------------------------
; Copy the whole background on all screen banks.
  adr       r0,bss+background       ; Set parameters for the CopyBigBox
  ldr       r1,videoram_adress      ;   routine, so that it copies the
  mov       r2,#0                   ;   whole screen.
  mov       r3,#0
  mov       r4,#319
  mov       r5,#255
.copy_one_screen_mov
  mov       r6,#0                   ; r6=ScreenNb.
.copy_one_screen
  bl        CopyBigBox256
  add       r1,r1,#320*256          ; Next screen bank.
  subS      r6,r6,#1                ; One screen copies.
  bNE       copy_one_screen
;----------------------------------------------------------------------------
; Randomly initialise the angles_inc of each letter. In fact, I also put
; values in the multiplicators coefs and sizes since they are just after,
; but it does not matters because those coefs will be calculated.
  adr       r0,bss+letters_coefs
  mov       r1,#nb_letters*6        ; 6 longs per letters.
  ldr       r2,random_germ
.init_one_angle
  and       r3,r2,#&f               ; This is the value for the increment.
  add       r3,r3,#3                ; Nicer if there' s no null movements.
  str       r3,[r0],#4
  random32  r2
  subS      r1,r1,#1
  bNE       init_one_angle
;----------------------------------------------------------------------------
; Enables our Vertical Blanking (VBl) interrupt.
  mov       r0,#&10                 ; Claim event vector. (&10)
  adr       r1,vbl_routine          ; Adress of claiming routine.
  adr       r2,workscr_nb           ; Value to pass in r12 when rout is called.
  swi       OS_Claim
  mov       r0,#&e                  ; Enable an event.
  mov       r1,#4                   ; Event 4=VBl.
  swi       OS_Byte
;----------------------------------------------------------------------------
; This is the Crazy Letters part.
.one_frame
;  swi OS_WriteS
;  dcb 19,1,24,32,32,128,0,0
  bl        get_workscr_adr         ; r0=workscr_adr | r1=clsbox adress.
  ldmia     r1,{r2-r5}              ; Load xleft,yup,xright,ydown.
  sub       r2,r2,#1                ; Always better to take a bigger box.
  sub       r3,r3,#1
  mov       r1,r0                   ; And go for clearing.
  adr       r0,bss+background
  bl        CopyBigBox256
;  swi OS_WriteS
;  dcb 19,1,24,128,32,32,0,0
; We calculate the sizes and multiplicator coefficients for all the letters,
; and also we compute the total width and max height.
  adr       r0,bss+sinus
  adr       r1,letters_sizes        ; Contains original sizes.
  adr       r2,bss+letters_coefs    ; Contains angles, coefs and sizes.
  mov       r3,#nb_letters
  mov       r4,#0                   ; Will contain full x size.
  mov       r5,#0                   ; Will contain maximum y size.
.calculate_coefs_and_sizes
  ldmia     r2,{r6-r7}              ; Load angles and angles_inc.
  add       r6,r6,r6,lsl #32-sin_shift ; angle_x+=inc_angle_x.
  add       r7,r7,r7,lsl #32-sin_shift ; angle_y+=inc_angle_y.
  ldr       r8,[r0,r6,lsr #32-sin_shift-2] ; r8=sin(angle_x).
  ldr       r9,[r0,r7,lsr #32-sin_shift-2] ; r9=sin(angle_y).
  add       r8,r8,#7<<(shift-2)     ; r8=coef_x=(9+4*sin(angle_x))<<shift-2.
  add       r9,r9,#7<<(shift-2)     ; r9=coef_y=(9+4*sin(angle_y))<<shift-2.
  ldrB      r11,[r1],#1             ; r11=size_x.
  ldrB      r10,[r1],#1             ; r10=size_y.
  mul       r11,r8,r11              ; r11=(size_x*coef_x)<<shift-2.
  mul       r10,r9,r10              ; r10=(size_x*coef_x)<<shift-2.
  stmia     r2!,{r6-r11}            ; Save angles, coefs and sizes.
  add       r4,r4,r11,lsr #shift-3  ; Add new size_x to full x size.
  cmp       r5,r10,lsr #shift-3     ; Max_y smaller than new size_y?
  movMI     r5,r10,lsr #shift-3     ; Then size_y is new max.
  subS      r3,r3,#1
  bNE       calculate_coefs_and_sizes
; Now we calculate the coords of cls box. Xleft=r6 will be the x position
; of the first letter we' ll draw.
  ldr       r1,angle                ; Load angle for whole logo position.
  add       r1,r1,r1,lsl #32-sin_shift ; angle+=inc_angle.
  str       r1,angle                ; Save modified angle.
  ldr       r1,[r0,r1,lsr #32-sin_shift-2] ; r1=sin(angle).
  rsb       r6,r4,#320              ; r6=320-width.
  mov       r6,r6,asr #1            ; r6=(320-width)/2.
  add       r6,r6,r1,asr #shift-9   ; r6=?*sin(angle)+(320-width)/2=xleft.
  add       r8,r6,r4                ; r8=xleft+width=xright.
  rsb       r7,r5,#256              ; r7=256-height.
  mov       r7,r7,asr #1            ; r7=(256-heiht)/2=yup.
  add       r9,r7,r5                ; r9=yup+height=ydown.
  bl        get_workscr_adr         ; r0=workscr_adr | r1=clsbox adress.
  stmia     r1,{r6-r9}              ; Save clear box coords.
; Time has come to draw the letters one after another.
  mov       r1,#nb_letters
  sub       r7,r2,#nb_letters*6*4   ; r7 points on letters_coefs.
  adr       r8,crazy_letters        ; r8 points on letters definition.
.draw_one_letter
  stmfd     r13!,{r1}               ; Save the nb of letters left.
  ldrB      r1,[r8],#1              ; Load the color of letter.
  add       r1,r1,r1,lsl #8
  add       r1,r1,r1,lsl #16
  ldrB      r9,[r8],#1              ; r9=nb of boxes.
  add       r7,r7,#8                ; Don' t care about angles.
  ldmia     r7!,{r10-r12}           ; Load coef multiplicators and y_size.
  rsb       r12,r12,#256<<(shift-3) ; r12=256-y_size.
  mov       r12,r12,asr #shift-2    ; r12=(256-y_size)/2=yup.
.draw_one_box
  ldrB      r2,[r8],#1              ; r2=x1.
  mul       r2,r10,r2               ; r2=Coef*x1.
  add       r2,r6,r2,asr #shift-3   ; Recentering.
  ldrB      r3,[r8],#1              ; r3=y1.
  mul       r3,r11,r3               ; r3=Coef*y1.
  add       r3,r12,r3,asr #shift-3  ; Recentering.
  ldrB      r4,[r8],#1              ; r4=x2.
  mul       r4,r10,r4               ; r4=Coef*x2.
  add       r4,r6,r4,asr #shift-3   ; Recentering.
  ldrB      r5,[r8],#1              ; r5=y2.
  mul       r5,r11,r5               ; r5=Coef*y2.
  add       r5,r12,r5,asr #shift-3  ; Recentering.
  sub       r4,r4,#1                ; Nicer if we draw (x1;y1)->(x2-1;y2-1).
  sub       r5,r5,#1
  bl        FastBox256
  subS      r9,r9,#1                ; One box drawn.
  bNE       draw_one_box
  ldr       r9,[r7],#4              ; r9=size_x.
  add       r6,r6,r9,lsr #shift-3   ; And add it to x of current letter.
  ldmfd     r13!,{r1}               ; Load the nb of letters left.
  subS      r1,r1,#1                ; One letter drawn.
  bNE       draw_one_letter
;  swi OS_WriteS
;  dcb 19,1,24,1,1,1,0,0
  bl        vsync_routine           ; Wait until next workscr is ready.
  swi       OS_ReadEscapeState      ; Escape key pressed?
  bCC       one_frame               ; No, then loop.
;---------------------------------------------------------------------------
; Disables our Vertical Blanking interrupt and then quit.
  mov       r0,#&d                  ; Disable an event.
  mov       r1,#4                   ; Event 4=VBl.
  swi       OS_Byte
  mov       r0,#&10                 ; Release Event Vector. (&10)
  adr       r1,vbl_routine          ; Give same values as when claiming.
  adr       r2,workscr_nb
  swi       OS_Release
  ldmia     r13!,{pc}               ; That' s all folks.


;****************************************************************************
;****************************************************************************
;*****                                                                  *****
;*****                            MAIN DATAS                            *****
;*****                                                                  *****
;****************************************************************************
;****************************************************************************

; .............X...........X.............................................
; .............X...........X.............................................
; .XXX..XX...XXX..XX...XX.....XXX......XX...XX..X...X..XX..XXX...XX...XX.
; X....X..X.X..X.X..X.X..X...X........X..X.X..X.X...X.X..X.X..X.X..X.X..X
; X....X..X.X..X.XXXX.X.......XX......X....XXXX.X...X.XXXX.X..X.X..X.XXXX
; X....X..X.X..X.X....X.........X.....X....X.....X.X..X....X..X..XXX.X...
; .XXX..XX...XX...XX..X......XXX......X.....XX....X....XX..X..X....X..XX.
; ..............................................................XXX......
.letters_sizes
  dcb       5,8 ,5,8 ,5,8 ,5,8 ,5,8, 2,8, 9,8   ; coder's
  dcb       5,8, 5,8, 6,8, 5,8, 5,8, 5,8, 4,8   ; revenge
.crazy_letters          ; Colors, nb_boxes and boxes coords for each letter.
  dcb       &da  ,3  ,1,2,4,3  ,0,3,1,6  ,1,6,4,7
  dcb       &07  ,4  ,1,2,3,3  ,0,3,1,6  ,1,6,3,7  ,3,3,4,6
  dcb       &f2  ,4  ,1,2,3,3  ,0,3,1,6  ,1,6,3,7  ,3,0,4,6
  dcb       &0f  ,5  ,1,2,3,3  ,0,3,1,6  ,1,6,3,7  ,3,3,4,5  ,1,4,3,5
  dcb       &f6  ,3  ,1,2,3,3  ,0,3,1,7  ,3,3,4,4
  dcb       &2b  ,1  ,0,0,1,2
  dcb       &fc  ,5  ,1,2,4,3  ,0,3,1,4  ,1,4,3,5  ,3,5,4,6  ,0,6,3,7
  dcb       &0b  ,3  ,1,2,3,3  ,0,3,1,7  ,3,3,4,4
  dcb       &d6  ,5  ,1,2,3,3  ,0,3,1,6  ,1,6,3,7  ,3,3,4,5  ,1,4,3,5
  dcb       &23  ,5  ,0,2,1,5  ,1,5,2,6  ,2,6,3,7  ,3,5,4,6  ,4,2,5,5
  dcb       &de  ,5  ,1,2,3,3  ,0,3,1,6  ,1,6,3,7  ,3,3,4,5  ,1,4,3,5
  dcb       &27  ,3  ,0,2,3,3  ,0,3,1,7  ,3,3,4,7
  dcb       &fa  ,5  ,1,2,3,3  ,0,3,1,5  ,1,5,3,6  ,3,3,4,7  ,0,7,3,8
  dcb       &2d  ,5  ,1,2,3,3  ,0,3,1,6  ,1,6,3,7  ,3,3,4,5  ,1,4,3,5

.screenmem_error
  dcb       0,0,0,0,"I need at least 160Kb ScreenMem",0
.slotsize_error
  dcb       0,0,0,0,"I need more Memory",0
ALIGN

.sinA       dcd 1647089             ; sin((pi/4)/sin_nb)*2^28.
.cosA       dcd 268430403           ; cos((pi/4)/sin_nb)*2^28.

.random_germ                        ; The magical random number.
  dcd       &eb1a2c34

.videoram_adress                    ; Values for the swi.
  dcd       148
  dcd       -1

.bg_colors
  dcb &20,&2c,&21,&2d,&22,&2e,&23,&2f,&4c,&d0,&4d,&d1 ; Green+grey.
  dcb &4e,&d2,&4f,&d3,&f0,&fc,&f1,&fd,&f2,&fe,&f3,&ff

; This is the angle and angle_inc for the whole logo movement. Both these
; parameters are in the same longword, the angle is in the sin_nb_shift+3
; upper bits, and the increment is in the lower bits, this allowing to have
; the modulo operation gratuitous. The same trick is used for letters angles.
.angle
  dcd       3

.workscr_nb                         ; This two variables must be left together.
  dcd       2
.displayscr_nb
  dcd       1


;****************************************************************************
;****************************************************************************
;*****                                                                  *****
;*****                             ROUTINES                             *****
;*****                                                                  *****
;****************************************************************************
;****************************************************************************

; ---------------------------------------------------------------------------
; ---              Routine for Vertical Blank interrupt.                  ---
; ---------------------------------------------------------------------------
;   We check if the next screen which will be displayed is entirely drawn
; (ie display_scr_nb-1<>workscr_nb), and in this case we use a swi to change
; the screen bank to display_scr_nb-1. When displayscr_nb-1 reachs 0 it is
; set back to ScreenNb by using self-modified code. (Op2 in vbl_screennb_mov
; was changed to ScreenNb)
;   At first I was accessing directly the MemC to change the display screen,
; but since this isn' t compatible coding, I had to spent some time with
; ArmOric and his PRMs in order to use a swi during this interrupt, and I
; recommend you get a look at PRMs.
;   When this routine is called we must have r12 pointing on a buffer which
; contains workscr_nb and displayscr_nb.
.vbl_routine
  cmp       r0,#4                   ; Event=VBl?
  movNE     pc,r14                  ; No, then it' s none of our business.
  stmfd     r13!,{r0,r1,r14}        ; Save registers.
  ldmia     r12,{r0,r1}             ; r0=workscr_nb | r1=displayscr_nb.
  subS      r1,r1,#1                ; r1=displayscr_nb-1.
.vbl_screennb_mov
  movEQ     r1,#0                   ; If r1=0 then back to ScreenNb.
  cmp       r0,r1                   ; Flags=workscr_nb-(displayscr_nb-1).
  ldmEQfd   r13!,{r0,r1,pc}         ; If equal don' t show next screen.
  str       r1,[r12,#4]             ; Save new displayscr_nb.
  mov	    r12,pc		    ; Keep current status/mode.
  orr	    r0,r12,#3		    ; Derive supervisor version of it.
  teqp	    r0,#0		    ; Enter supervisor mode.
  mov  	    r0,r0
  stmfd	    r13!,{r14}		    ; Save Supervisor R14
  mov       r0,#&71                 ; Next showscreen.
  swi       OS_Byte
  ldmfd	    r13!,{r14}		    ; Restore Supervisor R14
  teqp      r12,#0		    ; Re-enter original processor mode.
  mov	    r0,r0
  ldmfd     r13!,{r0,r1,pc}         ; Could have been so short and so good.

; ---------------------------------------------------------------------------
; ---                Routine for Vertical Synchronisation.                ---
; ---------------------------------------------------------------------------
;   When this routine is called, this means we have just finished to draw the
; workscr_nb, and we notify it by setting new workscreen to old workscr_nb-1.
; (As for the vbl_routine, we loop if workscr_nb-1 reaches 0, and this is
; performed with modification of vsync_screennb_mov)
;   Once the notification has been made, we wait until the new workscr_nb is
; different from the displayscr_nb.
.vsync_routine
  stmdb     r13!,{r0,r14}
  ldr       r0,workscr_nb           ; Load workscr_nb.
  subS      r0,r0,#1                ; r0=workscr_nb-1.
.vsync_screennb_mov
  movEQ     r0,#0                   ; If r0=0 then back to ScreenNb.
  str       r0,workscr_nb           ; Save new workscr_nb.
._wait_vsync
  ldr       r14,displayscr_nb       ; Load displayscr_nb.
  cmp       r0,r14                  ; displayscr_nb=new_workscr_nb?
  bEQ       _wait_vsync             ; Then wait.
  ldmia     r13!,{r0,pc}

; ---------------------------------------------------------------------------
; ---                  Routine to get WorkScreen adress.                  ---
; ---------------------------------------------------------------------------
; This routine returns the workscreen adress in r0, and also the adress of
; the clsbox buffer corresponding to this workscreen. (The clsboxes buffer
; contains a succession of xleft,yup,xright,ydown used for cls)
.get_workscr_adr
  stmdb     r13!,{r14}
  ldr       r0,videoram_adress
  adr       r1,bss+clsboxes
  ldr       r14,workscr_nb
  sub       r14,r14,#1              ; RiscOS is an OS for coders... ;)
  add       r1,r1,r14,lsl #4        ; 16 longs per cls boxes.
  add       r14,r14,r14,lsl #2      ; r14=workscr_nb*5.
  add       r0,r0,r14,lsl #6+8      ; r0=video+workscr_nb*320*256.
  ldmia     r13!,{pc}

; ---------------------------------------------------------------------------
; ---                    Routine smoothing a texture                      ---
; ---                           Alain BROBECKER                          ---
; ---------------------------------------------------------------------------
; This routines works by applying the following 3*3 matrix...
;         ( 1 2 1 )   ( pix0 pix1 pix2 )
;  1/16 * ( 2 4 2 ) * ( pix3 pix4 pix5 ) = new pix.
;         ( 1 2 1 )   ( pix6 pix7 pix8 )
; Parameters are...
;     r0 = adress of initial N*N texture.
;     r1 = adress of N*N buffer for smoothed result.
.smooth_texture
  stmfd     r13!,{r0-r9,r14}
  mov       r2,#0                   ; r2=y counter.
._smooth_line
  mov       r3,#0                   ; r3=x counter.
  sub       r4,r2,#1<<(32-bg_M)     ; r4=(y-1) mod N <<(32-M). (Wrapping)
  add       r6,r2,#1<<(32-bg_M)     ; r6=(y+1) mod N <<(32-M). (Wrapping)
  add       r4,r0,r4,lsr #(32-2*bg_M) ; r4 points on src_line up.
  add       r5,r0,r2,lsr #(32-2*bg_M) ; r5 points on src_line.
  add       r6,r0,r6,lsr #(32-2*bg_M) ; r6 points on src_line down.
._smooth_one
  sub       r7,r3,#1<<(32-bg_M)     ; r7=(x-1) mod N <<(32-M). (Wrapping)
  add       r8,r3,#1<<(32-bg_M)     ; r8=(x+1) mod N <<(32-M). (Wrapping)
  ldrB      r9,[r5,r3,lsr #(32-bg_M)] ; Load all the pixels, and add them
  ldrB      r14,[r4,r3,lsr #(32-bg_M)] ;   with the good coefficients in r9.
  add       r9,r14,r9,lsl #1
  ldrB      r14,[r6,r3,lsr #(32-bg_M)]
  add       r9,r9,r14
  ldrB      r14,[r5,r7,lsr #(32-bg_M)]
  add       r9,r9,r14
  ldrB      r14,[r5,r8,lsr #(32-bg_M)]
  add       r9,r9,r14
  ldrB      r14,[r4,r7,lsr #(32-bg_M)]
  add       r9,r14,r9,lsl #1
  ldrB      r14,[r4,r8,lsr #(32-bg_M)]
  add       r9,r9,r14
  ldrB      r14,[r6,r7,lsr #(32-bg_M)]
  add       r9,r9,r14
  ldrB      r14,[r6,r8,lsr #(32-bg_M)]
  add       r9,r9,r14
  mov       r9,r9,lsr #4            ; r9=smoothed intensity.
  strB      r9,[r1],#1              ; Save new pixel value.
  addS      r3,r3,#1<<(32-bg_M)     ; Next pixel.
  bNE       _smooth_one
  addS      r2,r2,#1<<(32-bg_M)     ; Next line.
  bNE       _smooth_line
  ldmfd     r13!,{r0-r9,pc}

; ---------------------------------------------------------------------------
; ---                Routine creating a fractal landscape                 ---
; ---                           Alain BROBECKER                          ---
; ---------------------------------------------------------------------------
; Recursive landscape creation. Considering a point in the landscape and
; the iteration (=width of square) we construct the points m4-m8 and
; go on recursively on all resulting four squares.
;    m0--m4--m1         h4=0.5*(h0+h1)+rnd
;    |   |   |          h5=0.5*(h1+h2)+rnd
;    m7--m8--m5         h6=0.5*(h2+h3)+rnd
;    |   |   |          h7=0.5*(h3+h0)+rnd
;    m3--m6--m2         h8=0.25*(h0+h1+h2+h3)+rnd
; Parameters are...
;     r0=adress of buffer for landscape.
;     r1=random number.
;     r2=1.
;     r3=iteration.
;     r4=posx.
;     r5=posy.
.recursive_landscape
  stmfd     r13!,{r3-r5,r14}
; At first, we calculate h4,h5,h6,h7 and h8.
  add       r6,r4,r2,lsl r3
  and       r6,r6,#bg_N-1           ; r6=(posx+2^iteration) mod(bg_N).
  add       r7,r5,r2,lsl r3
  and       r7,r7,#bg_N-1           ; r7=(posy+2^iteration) mod(bg_N).
  add       r9,r4,r7,lsl #bg_M      ; r9 points on m3.
  add       r8,r6,r7,lsl #bg_M      ; r8 points on m2.
  add       r7,r6,r5,lsl #bg_M      ; r7 points on m1.
  add       r6,r4,r5,lsl #bg_M      ; r6 points on m0.
  ldrB      r6,[r0,r6]              ; r6=h0.
  ldrB      r7,[r0,r7]              ; r7=h1.
  ldrB      r8,[r0,r8]              ; r8=h2.
  ldrB      r9,[r0,r9]              ; r9=h3.
  sub       r10,r3,#1
  mov       r10,r2,lsl r10          ; r10=2^(iteration-1).
; Calculation of m8.
  add       r14,r6,r7
  add       r14,r14,r8
  add       r14,r14,r9              ; r14=h0+h1+h2+h3.
  mov       r14,r14,asr #2          ; r14=0.25*(h0+h1+h2+h3).
  random32  r1                      ; New random number.
  rsb       r11,r3,#fractal+1       ; r11=fractal+1-iteration=shift for rnd.
  addS      r14,r14,r1,asr r11      ; r14=0.25*(h0+h1+h2+h3)+rnd.
  movLE     r14,#1                  ; Make sure 0<r14<256.
  cmp       r14,#255
  movGE     r14,#255
  add       r12,r5,r10              ; Make r12 point on m8.
  add       r12,r4,r12,lsl #bg_M
  add       r12,r12,r10
  strB      r14,[r0,r12]            ; Save h8.
; Calculation of m6.
  add       r14,r8,r9               ; r14=h2+h3.
  mov       r14,r14,asr #1          ; r14=0.5*(h2+h3).
  random32  r1                      ; New random number.
  rsb       r11,r3,#fractal         ; r11=fractal-iteration=shift for rnd.
  addS      r14,r14,r1,asr r11      ; r14=0.5*(h2+h3)+rnd.
  movLE     r14,#1                  ; Make sure 1<r14<256.
  cmp       r14,#255
  movGE     r14,#255
  add       r12,r5,r2,lsl r3        ; Make r12 point on m6.
  add       r12,r4,r12,lsl #bg_M
  add       r12,r12,r10
  strB      r14,[r0,r12]            ; Save h6.
; Calculation of m5.
  add       r14,r7,r8               ; r14=h1+h2.
  mov       r14,r14,asr #1          ; r14=0.5*(h1+h2).
  random32  r1                      ; New random number.
  addS      r14,r14,r1,asr r11      ; r14=0.5*(h1+h2)+rnd.
  movLE     r14,#1                  ; Make sure 1<r14<256.
  cmp       r14,#255
  movGE     r14,#255
  add       r12,r4,r2,lsl r3        ; Make r12 point on m5.
  add       r12,r12,r5,lsl #bg_M
  add       r12,r12,r10,lsl #bg_M
  ldrB      r8,[r0,r12]             ; Load value at m5.
  cmp       r8,#0                   ; Pixel already set?
  strEQB    r14,[r0,r12]            ; Else save h5.
; Calculation of m4.
  add       r14,r6,r7               ; r14=h0+h1.
  mov       r14,r14,asr #1          ; r14=0.5*(h0+h1).
  random32  r1                      ; New random number.
  addS      r14,r14,r1,asr r11      ; r14=0.5*(h0+h1)+rnd.
  movLE     r14,#1                  ; Make sure 1<r14<256.
  cmp       r14,#255
  movGE     r14,#255
  add       r12,r4,r10              ; Make r12 point on m4.
  add       r12,r12,r5,lsl #bg_M
  ldrB      r8,[r0,r12]
  cmp       r8,#0
  strEQB    r14,[r0,r12]            ; Save h4.
; Calculation of m7.
  add       r14,r6,r9               ; r14=h0+h3.
  mov       r14,r14,asr #1          ; r14=0.5*(h0+h3).
  random32  r1                      ; New random number.
  addS      r14,r14,r1,asr r11      ; r14=0.5*(h0+h3)+rnd.
  movLE     r14,#0                  ; Make sure 1<r14<256.
  cmp       r14,#255
  movGE     r14,#255
  add       r12,r5,r10              ; Make r12 point on m7.
  add       r12,r4,r12,lsl #bg_M
  ldrB      r8,[r0,r12]
  cmp       r8,#0
  strEQB    r14,[r0,r12]            ; Save h7.
; Second part, recursive call.
  subS      r3,r3,#1
  ldmEQfd   r13!,{r3-r5,pc}         ; Stop recusrion when iter=0.
  bl        recursive_landscape     ; Else go on with four subsquares.
  add       r4,r4,r2,lsl r3         ;   start pos=m4.
  bl        recursive_landscape
  add       r5,r5,r2,lsl r3         ;   start pos=m8.
  bl        recursive_landscape
  sub       r4,r4,r2,lsl r3         ;   start pos=m7.
  bl        recursive_landscape
  ldmfd     r13!,{r3-r5,pc}

; ---------------------------------------------------------------------------
; ---                Routine copying an aligned 8 bpp box                 ---
; ---                           Alain BROBECKER                   May 96 ---
; ---------------------------------------------------------------------------
; * This routine copies the box between (x1-x1mod8;y1) and (x2+8-x2mod8;y2)
; from the source to the destination. This routine is mostly aimed at
; screenparts clearing.
; * r13 is saved just after the generated code, so we can use it for the
; ldmia-stmia copy. But we have to generate the instructions which will
; restore it at the end of routine.
; * There are many other tricks (for the generation of 'bGE'...) but I won' t
; explain them since code is widely commented.
;
; Parameters are...
;     r0 = source adress.
;     r1 = destination adress.
;     r2 = x1.    1------+
;     r3 = y1.    |      |
;     r4 = x2.    |      |
;     r5 = y2.    +------2
.CopyBigBox256
  cmp       r2,#320                 ; At first check if the box is
  cmpLT     r3,#256                 ;   completly out of screen, and in
  movGE     pc,r14                  ;   such case we quit.
  cmp       r4,#0
  cmpGE     r5,#0
  movLT     pc,r14
  stmfd     r13!,{r0-r12,r14}       ; Be clean or die.
  cmp       r2,#0                   ; Perform the clipping.
  movLT     r2,#0
  cmp       r3,#0
  movLT     r3,#0
  cmp       r4,#320
  movGE     r4,#319
  cmp       r5,#256
  movGE     r5,#255
  mov       r2,r2,lsr #2            ; r2=x1>>2.
  rsbS      r4,r2,r4,lsr #2         ; r4=x2>>2-x1>>2=nb_longs-1.
  subGES    r14,r5,r3               ; r14=dy=y2-y1.
  ldmMIfd   r13!,{r0-r12,pc}        ; Quit if nb_longs-1<0 or dy<0.
  add       r3,r3,r3,lsl #2         ; r3=y1*5.
  add       r3,r2,r3,lsl #4         ; r3=y1*80+x1>>2.
  add       r0,r0,r3,lsl #2         ; r0=source+y1*320+4*(x1>>2).
  add       r1,r1,r3,lsl #2         ; r1=dest+y1*320+4*(x1>>2).
  add       r2,r4,#1                ; r2=nb_longs.
  rsb       r3,r2,#80               ; r3=nb longs to pass each line=offset/4.
  adr       r4,_code+4              ; Code will be generated here.
  adr       r5,_opcodes
  ldmia     r5,{r6-r11}             ; Load some opcodes.
._one_copy_max
  subS      r2,r2,#12               ; More than 12 longs left?
  stmGEia   r4!,{r6-r7}             ; Yes then save one ldmia+stmia max.
  bGT       _one_copy_max           ; r2>0? Then test again.
  bEQ       _generate_add           ; r2=0? Then no more copy.
  add       r5,r5,r2,lsl #3         ; r5 point on opcodes for last copy.
  ldmda     r5,{r6-r7}              ; Load them.
  cmp       r2,#-11                 ; Last fill instruction is a str?
  addEQ     r6,r6,r3,lsl #2         ; Then r6='ldr r2,[r0],#offset+4',
  addEQ     r7,r7,r3,lsl #2         ;   and r7='str r2,[r1],#offset+4'.
  stmia     r4!,{r6-r7}             ; Save last fill instruction.
  bEQ       _end_generate_add       ; No need of an add if we have an str.
._generate_add
  cmp       r3,#0                   ; Offset is null?
  addNE     r8,r8,r3                ; No, then r8='add r0,r0,#(offset/4)<<2',
  addNE     r9,r9,r3                ;   r9='add r1,r1,#(offset/4)<<2',
  stmNEia   r4!,{r8-r9}             ;   and save instructions.
._end_generate_add
  adr       r9,_code-2*4            ; Beware the pipeline.
  sub       r9,r9,r4                ; r9=offset for the bGE.
  mov       r9,r9,asr #2            ; r9=offset/4. (higher byte=&ff)
  eor       r9,r9,#&55<<24          ; r9=&AAxxxxxx='bGE offset'.
  stmia     r4!,{r9-r11,r13}        ; Save instructions and stack.
._code
  subS      r14,r14,#1              ; One line will be drawn.
  dbd       8*2+4                   ; Space for the code and stack.

  ldr       r2,[r0],#4              ; Opcodes for last copy instruction.
  str       r2,[r1],#4
  ldmia     r0!,{r2-r3}
  stmia     r1!,{r2-r3}
  ldmia     r0!,{r2-r4}
  stmia     r1!,{r2-r4}
  ldmia     r0!,{r2-r5}
  stmia     r1!,{r2-r5}
  ldmia     r0!,{r2-r6}
  stmia     r1!,{r2-r6}
  ldmia     r0!,{r2-r7}
  stmia     r1!,{r2-r7}
  ldmia     r0!,{r2-r8}
  stmia     r1!,{r2-r8}
  ldmia     r0!,{r2-r9}
  stmia     r1!,{r2-r9}
  ldmia     r0!,{r2-r10}
  stmia     r1!,{r2-r10}
  ldmia     r0!,{r2-r11}
  stmia     r1!,{r2-r11}
  ldmia     r0!,{r2-r12}
  stmia     r1!,{r2-r12}
  stmia     r0!,{r1-r12}
._opcodes
  ldmia     r0!,{r2-r13}            ; Maximum copying instructions.
  stmia     r1!,{r2-r13}
  dcd       &e2800f00               ; Opcode of 'add r0,r0,#0<<2'.
  dcd       &e2811f00               ; Opcode of 'add r1,r1,#0<<2'.
  ldr       r13,[pc,#0]             ; Load stack which is 8 bytes after.
  ldmfd     r13!,{r0-r12,pc}        ; And quit.


; ---------------------------------------------------------------------------
; ---                     Routine drawing a 8 bpp box                     ---
; ---                           Alain BROBECKER                   May 96 ---
; ---------------------------------------------------------------------------
; * This routine draws the box between (x1;y1) and (x2;y2) on the mode13
; screen with the given filling pattern.
; * r13 is saved just after the generated code, so we can use it for the
; ldmia-stmia copy. But we have to generate the instructions which will
; restore it at the end of routine.
; * The last filling instruction (str, strB or add, to modify the adress)
; is generated with the stmia used for the endcode generation.
; * Most times (>75%) we won' t need an add to modify offsets, so I choosed
; to branch in such cases instead of cases when we have a str(B).
; * There are many other tricks (for the generation of 'bGE'..) but I won' t
; explain them since code is widely commented.
;
; Parameters are...
;     r0 = screen adress.
;     r1 = filling pattern.
;     r2 = x1.    1------+
;     r3 = y1.    |      |
;     r4 = x2.    |      |
;     r5 = y2.    +------2
.FastBox256
  cmp       r2,#320                 ; At first check if the box is
  cmpLT     r3,#256                 ;   completly out of screen, and in
  movGE     pc,r14                  ;   such case we quit.
  cmp       r4,#0
  cmpGE     r5,#0
  movLT     pc,r14
  stmfd     r13!,{r0-r12,r14}       ; Be clean or die.
  cmp       r2,#0                   ; Perform the clipping.
  movLT     r2,#0
  cmp       r3,#0
  movLT     r3,#0
  cmp       r4,#320
  movGE     r4,#319
  cmp       r5,#256
  movGE     r5,#255
  subS      r14,r5,r3               ; r14=dy=y2-y1.
  subGES    r5,r4,r2                ; r5=dx=x2-x1.
  ldmMIfd   r13!,{r0-r12,pc}        ; Quit if dy<0 or dx<0.
  add       r3,r3,r3,lsl #2         ; r3=y1*5.
  add       r0,r0,r3,lsl #6         ; r0=screen+y1*320.
  add       r0,r0,r2                ; r0=screen+y1*320+x1.
  mov       r3,r2,lsr #2            ; r3=x1/4.
  rsbS      r3,r3,r4,lsr #2         ; r3=x2/4-x1/4=nb_longs.
  adr       r7,_small_adr           ; r7 points on adresses for small boxes.
  ldrEQ     pc,[r7,r5,lsl #2]       ; nb_longs=0, then execute small box rout.
  rsb       r5,r5,#319              ; r5=319-dx=nb of bytes to pass each line.
  adr       r6,_code+4              ; Generate code here.
  ldmdb     r7!,{r8-r11}            ; Load some opcodes.
; Here we begin to care about first longword filling.
  andS      r2,r2,#%11              ; r2=x1 mod(3).
  bEQ       _first_long_full        ; If x1 mod(3)=0, first long is full.
  sub       r3,r3,#1                ; Else first long mustn' t be drawn.
  tst       r2,#%01                 ; Down bit of x1 set?
  strNE     r8,[r6],#4              ; Then we have an odd nb of strB.
  tst       r2,r2,lsl #1            ; bit1 AND bit0 of x1 cleared?
  strEQ     r8,[r6],#4              ; Then x1 mod(3)=1 or 2, so we must
  strEQ     r8,[r6],#4              ;   generate two strB more.
._first_long_full
  and       r4,r4,#%11              ; r4=x2 mod(3).
  and       r2,r4,r4,lsr #1         ; r2=bit1 AND bit0 of x2 mod(3).
  addS      r3,r3,r2                ; If x2 mod(3)=%11, last long is full.
  bEQ       _last_longword          ; If nb_longs=0 go to last long.
._one_stmia_max
  subS      r3,r3,#13               ; More than 13 longs left?
  strGE     r9,[r6],#4              ; Yes, then save one stmia max.
  bGT       _one_stmia_max          ; r3>0? Then test again.
  ldrMI     r9,[r7,r3,lsl #2]       ; If r3<0 then load opcode of last long
  strMI     r9,[r6],#4              ;   fill instruction and save it.
._last_longword
  teq       r4,#%11                 ; x2 mod(3)=%11?
  bEQ       _last_long_full         ; Then last long is full.
  tst       r4,#%01                 ; Down bit clear?
  strEQ     r8,[r6],#4              ; Then we have an odd nb of strB.
  teq       r4,r4,lsl #1            ; bit1 EOR bit0<>0?
  strNE     r8,[r6],#4              ; Then x2 mod(3)=1 or 2, then there
  strNE     r8,[r6],#4              ;   are two strB more.
._last_long_full
  ldr       r8,[r6,#-4]!            ; Load last saved instruction.
  tst       r8,#1<<26               ; Is it a str or strB?
  bEQ       _generate_add           ; No, then we' ll need an add.
  add       r8,r8,r5                ; Yes, then add to 319-dx to offset.
._generate_endcode
  adr       r9,_code-3*4            ; Beware the pipeline and last instruction.
  sub       r9,r9,r6                ; r9=offset for the bGE.
  mov       r9,r9,asr #2            ; r9=offset/4. (higher byte=&ff)
  eor       r9,r9,#&55<<24          ; r9=&AAxxxxxx='bGE offset'.
  stmia     r6!,{r8-r11,r13}        ; Save instructions and stack.
  mov       r2,r1                   ; Put pattern in other longwords.
  mov       r3,r1
  mov       r4,r1
  mov       r5,r1
  mov       r6,r1
  mov       r7,r1
  mov       r8,r1
  mov       r9,r1
  mov       r10,r1
  mov       r11,r1
  mov       r12,r1
  mov       r13,r1
._code
  subS      r14,r14,#1              ; One line will be drawn.
  dbd       3+6+3+4                 ; Space for the code and stack.

._generate_add
  cmp       r5,#0                   ; Offset is null?
  bEQ       _generate_endcode       ; Then go on...
  add       r6,r6,#4                ; Don' t modify loaded instruction.
  mov       r8,#&e28<<20            ; r8=opcode of 'add r0,r0,#0'.
  cmp       r5,#255                 ; Offset bigger than 255?
  addGE     r2,r8,#255              ; Then generate an 'add r0,r0,#255'
  strGE     r2,[r6],#4
  subGE     r5,r5,#255              ;   and substract 255 to offset.
  add       r8,r8,r5                ; r8='add r0,r0,#offset'.
  b         _generate_endcode

  str       r1,[r0],#4              ; Opcodes for lasts longs filling.
  stmia     r0!,{r1-r2}
  stmia     r0!,{r1-r3}
  stmia     r0!,{r1-r4}
  stmia     r0!,{r1-r5}
  stmia     r0!,{r1-r6}
  stmia     r0!,{r1-r7}
  stmia     r0!,{r1-r8}
  stmia     r0!,{r1-r9}
  stmia     r0!,{r1-r10}
  stmia     r0!,{r1-r11}
  stmia     r0!,{r1-r12}
._opcodes
  strB      r1,[r0],#1              ; Byte filling instruction.
  stmia     r0!,{r1-r13}            ; Maximum filling instruction.
  ldr       r13,[pc,#0]             ; Load stack which is 8 bytes after.
  ldmfd     r13!,{r0-r12,pc}        ; And quit.
._small_adr
  dcd       _small1                 ; Adresses for the routines corresponding
  dcd       _small2                 ;   to x1-x2 being in same longword.
  dcd       _small3
  dcd       _small4

; Here are the routine for small boxes.
._small1
  strB      r1,[r0],#320
  subS      r14,r14,#1
  bGE       _small1
  ldmfd     r13!,{r0-r12,pc}

._small2
  strB      r1,[r0],#1
  strB      r1,[r0],#319
  subS      r14,r14,#1
  bGE       _small2
  ldmfd     r13!,{r0-r12,pc}

._small3
  strB      r1,[r0],#1
  strB      r1,[r0],#1
  strB      r1,[r0],#318
  subS      r14,r14,#1
  bGE       _small3
  ldmfd     r13!,{r0-r12,pc}

._small4
  str       r1,[r0],#320
  subS      r14,r14,#1
  bGE       _small4
  ldmfd     r13!,{r0-r12,pc}

;----------------------->  THIS MUST BE AT VERY END  <-----------------------
.bss
