/*
Scaling, Rotation, Antialiasing prototyper.

This program was written by Lewis A. Sellers (Minimalist)
of The Minimalist Group, and the MOSOCI Grail Project in
1995-1996 Anno Dominus.

While it is the result of several days of work on my part,
fiddled with here and there over a few months as time permits,
you can use it and the code involved if you wish as long as
you include the standard greetings to me somewhere in your program,
say in the credits.

This program is the primary C/ASM version for DOS 16-bit.  There is
also a PASCAL version which has a little better comments in some
places. There are even newer functions here in the primary C version
however since I decided to reveal the code.

There is also a pure asm (TASM) version of this program which I am tweaking
on as I gain experience with the floating-point op-codes.

You must supply a BMP filename as an argument such as:
C:> ALIAS DEATH.BMP
The BMP it uses must be a 320x200 256 color grayscale image.

Most of this was originally written with borland turbo c++ 3.0 simply because
it was easy to do so. To get a speed increase I tried using Borland C++ 4.52
which I bought a while back. Most of the new Borland stuff is much more
complicated than needs be so I hadn't used it much, but I did get about an
extra two FPS from the recompile.

This program is designed for at least the 486 processors. Preferably pentiums.

Probably it'll end up as some kind of tutorial one day.

--MIN (rhymes with NIN)

  "the me that you know is now made up of wires
   and even when i'm right with you i'm so far away" --NIN
*/

//tell TASM we can use .386 op-codes
#pragma inline
asm .486
asm .487 //there is no 487 FPU but it makes TASM happy :)
#pragma option -wasm- //yes, i know there are op-codes the built-in assembler
                      //doesn't understand. shut up about it.

//this tells the compiler to use the FPU. If you are using a processor
//without a FPU then comment this out and recompile.
//#pragma OPTION -f287

//large memory model
//#pragma option -ml

//8k stack
extern unsigned _stklen = 8192U;

//.h prototypes
//unsigned long farcoreleft(void);

#include <malloc.h>
#include <alloc.h>
#include <stdio.h>
#include <stdlib.h>
#include <conio.h>
#include <time.h>
#include <dos.h>
#include <math.h>

#define FALSE (1==0)
#define TRUE  (1==1)

#define WIDTH      320
#define HEIGHT     200
#define SCREENSIZE WIDTH*HEIGHT

#define FAILURE 0
#define SUCCESS 1

#ifdef __cplusplus
    #define __CPPARGS ...
#else
    #define __CPPARGS
#endif

typedef unsigned char byte;
typedef unsigned int word;
typedef unsigned long dword;

//.h prototypes
//size_t stackavail(void);

//prototypes
void change_timer(void);
void restore_timer(void);
void mode_graphics(void);
void mode_text(void);
int LoadTestImage(char *name);
void setup_fps(void);
void print_fps(long fps);
void bilinear(void);
void trilinear(void);
void hyperlinear(void);
void copycomposite(void);
void clearcomposite(void);
int lines(void);

//TIMER
#define TIMERINTR 8
#define PIT_FREQ  0x1234DDL
#define frequency 100
#define counter   PIT_FREQ/frequency

long BIOS_ticks;
int second_ticks;
int second_flag=FALSE;

void interrupt far (*BIOStimerhandler)(__CPPARGS);

void interrupt far timerhandler(__CPPARGS)
{
  BIOS_ticks+=counter;
  second_ticks++;
  if(BIOS_ticks>=0x10000L) {
      BIOS_ticks=0L;
//      asm pushf
//      call BIOStimerhandler
////      (*BIOStimerhandler)(__CPPARGS);
  }
  if(second_ticks>=100) {
      second_flag=TRUE;
      second_ticks=0;
  }
  outportb(0x20,0x20);
}

void change_timer()
{
  //initialize the tick counter to 0
  BIOS_ticks=0L;
  second_ticks=0;

  //swap out the BIOS handler for our own....
  BIOStimerhandler=getvect(TIMERINTR);
  setvect(TIMERINTR,timerhandler);

  //change the clock frequency to 100 per second
  outportb(0x43,0x34);
  outportb(0x40,(byte)(counter%256));
  outportb(0x40,(byte)(counter/256));
}

void restore_timer()
{
  //restore 18.2 frequency of PIT 0
  outportb(0x43,0x34);
  outportb(0x40,0);
  outportb(0x40,0);

  //restore the BIOS tick handler
  setvect(TIMERINTR,BIOStimerhandler);
}


//GRAPHICS
#define PI 3.14159265359

//char far *base_screen=(char far *)MK_FP(0xA000,0);
char far *texture;
int far *y320;
char far *composite;

void mode_graphics(void)
{
    asm mov ax,0x13 //to mode 13h
    asm int 0x10
    outportb(0x3c2,0xe3); //put it in square mode
}


void mode_text(void)
{
    asm mov ax,0x03 //back to text
    asm int 0x10;
}


//load in a grayscale 320x200 BMP
int LoadTestImage(char *name)
{
    FILE *bmp;
    int n,x,y;
    word psi;
    word source=(HEIGHT-1)*WIDTH;
    word destination=0;
    byte far *palette;
    byte far *thrash;

    bmp=fopen(name,"rb");
    if(bmp==NULL) return FAILURE;
    fseek(bmp,54,0);

    //from b-g-r-unused dword format to proper RGB 3byte.
    palette=(byte *)farmalloc(1024L+16L);
    if(palette==NULL) {
        mode_text();
//        printf("Not enough memory (%lu) for palette.\n",farcoreleft());
        exit(FAILURE);
    }
    fread(palette,1024,1,bmp);
    outportb(0x3c6,0xff);
    outportb(0x3c8,0);
    for(psi=0,n=0;n<=255;n++,psi+=4)
    {
        outportb(0x3c9,palette[psi+2]>>2);
        outportb(0x3c9,palette[psi+1]>>2);
        outportb(0x3c9,palette[psi]>>2);
    }
    farfree(palette);

    // thrash the dumb MS format...
    thrash=(byte far *)farmalloc(320L*200L);
    if(thrash==NULL) {
        mode_text();
//        printf("Not enough memory (%lu) for temporary bitmap thrashing area.\n",farcoreleft());
        exit(FAILURE);
    }
    fread(thrash,320U*200U,1,bmp);
    for(y=0;y<HEIGHT;y++)
    {
        for(x=0;x<WIDTH;x++) {
            texture[destination+x]=thrash[source+x];
        }
        source-=WIDTH;
        destination+=WIDTH;
    }
    farfree(thrash);

    fclose(bmp);
    return SUCCESS;
}


//
byte fpsset[8][24]={
  " ###### ######   ####   ",
  " #      #     # #    #  ",
  " #      #     # #       ",
  " #####  ######   ####   ",
  " #      #            #  ",
  " #      #            #  ",
  " #      #       #    #  ",
  " #      #        ####   "
};

byte charset[10][8][8]={
  //0
 {"  ##### ",
  " #     #",
  " #     #",
  " #     #",
  " #     #",
  " #     #",
  "  ##### ",
  "        "},
  //1
 {"     ## ",
  "      # ",
  "      # ",
  "      # ",
  "      # ",
  "      # ",
  "     ###",
  "        "},
  //2
 {"   #### ",
  "  #    #",
  "       #",
  "    ### ",
  "   #    ",
  "  #    #",
  "  ##### ",
  "        "},
  //3
 {"  ##### ",
  " #     #",
  "       #",
  "   #### ",
  "       #",
  " #     #",
  "  ##### ",
  "        "},
  //4
 {"    ##  ",
  "   # #  ",
  "  #  #  ",
  " ###### ",
  "     #  ",
  "     #  ",
  "    ### ",
  "        "},
  //5
 {" #######",
  " #      ",
  " #      ",
  " ###### ",
  "       #",
  " #     #",
  "  ##### ",
  "        "},
  //6
 {"  ##### ",
  " #      ",
  " #      ",
  " ###### ",
  " #     #",
  " #     #",
  "  ##### ",
  "        "},
  //7
 {"  ######",
  " #     #",
  "      # ",
  "     #  ",
  "    #   ",
  "    #   ",
  "    #   ",
  "        "},
  //8
 {"  ##### ",
  " #     #",
  " #     #",
  "  ##### ",
  " #     #",
  " #     #",
  "  ##### ",
  "        "},

  //9
 {"  ##### ",
  " #     #",
  " #     #",
  "  ######",
  "       #",
  "      # ",
  "  ####  ",
  "        "}
};



//setup the fps counter...
void setup_fps()
{
  int c,x,y;
  byte far *vid=((byte far *)composite)+(8*320);

  //change the 0 to 9 charset from spaces and pound-signs
  //into 0 and 255 (ie, black and white).
  for(c=0;c<=9;c++)
         for(y=0;y<=7;y++)
      for(x=0;x<=7;x++)
        if(charset[c][y][x]=='#')
          charset[c][y][x]=255;
        else
          charset[c][y][x]=0;

  //draw a "FPS" right below where the fps will be.
  //yes, it's slow C code, but here that's ok.
  for(y=0;y<=7;y++)
    for(x=0;x<=23;x++)
      if(fpsset[y][x]=='#')
        vid[y*320+x]=255;
      else
        vid[y*320+x]=0;
}


//draw a counter number at the top left of the graphics screen
void print_fps(long fps)
{
  char s[10];
  char *src;
  word x_offset=0;
  int c;

  sprintf(s,"%ld",fps);
  src=s;
  while((*src)!=NULL) {
    c=(*src)-'0';
    asm {
      les di,composite
      add di,x_offset

      push ds
      mov ax,c
      mov si,offset charset
      mov dx,seg charset
      mov ds,dx
      mov dx,(8*8)
      mul dx
      add si,ax

      mov cx,8
    }
yloop:
         asm {
      push di
      movsw
      movsw
      movsw
      movsw
      pop di
      add di,320
      sub cx,1
      jnz yloop
      pop ds
    }
    x_offset+=8;
    src++;
  }

  //ok. we wrote out the digits. now cleanup anything left over from the
  //last time on the right. we assume no more than 6 digits MAX.
  asm {
      cld

      mov bx,8*6
      sub bx,x_offset
      shr bx,1

      les di,composite
      add di,x_offset

      sub ax,ax
      mov cx,8
  }
cloop:
  asm {
      push cx
      push di
      mov cx,bx
      rep stosw
      pop di
      pop cx
      add di,WIDTH
      sub cx,1
      jnz cloop
  }
}


//this is the fixed-point version of the scaling/rotation routine.
//since we want maximum speed while still remaining in C parts of this
//may be hard to read compared to the general rotate/scale routine.
//It is similiar to the mathematical version except it uses 16.16
//fixed-point and computes an initial texture vector then does something
//similiar to a two-level line drawing routine, ok?
//frankly, it's still slow as hell when compared to the full asm version.
//hmm. Is it just C or Borland that is the problem? :)
void FastRotateScale(float scale, float angle)
{
    long sinas=sin(-angle)*65536L*scale;
    long cosas=cos(-angle)*65536L*scale;

    //x' = cos(-angle)+sin(-angle)
    //y' = cos(-angle)-sin(-angle)
    long xc=160L*65536L - (100L*(cosas+sinas));
    long yc=100L*65536L - (100L*(cosas-sinas));

    char far *screen=(char far *)composite;
    int x,y;

    register int tempx,tempy;

    for (y=0;y<HEIGHT;y++) //normally from 0 to 199
    {
        long xlong=xc,ylong=yc; //init x/ylong to topleft of square
        for (x=60;x<60+HEIGHT;x++) //normally from 0 to 319
        {
            tempx=(int)(xlong>>16);
            tempy=(int)(ylong>>16);

            if( (tempx<0) || //clip
                (tempx>=WIDTH) ||
                (tempy<0) ||
                (tempy>=HEIGHT) )
                screen[x]=0; //clip to black
            else
                screen[x]=texture[tempx+y320[tempy]]; //draw texel
            //this also happens to be horrible mangled by borland
            //and produces some very fat slow code.

            xlong+=cosas;ylong-=sinas;
        }
        screen+=WIDTH;
        xc+=sinas;yc+=cosas;
    }
}


//this is the revised floating-point version of the scaling/rotation routine.
//now that I finally have a cpu that actually supports FPU op-code
//(from a 486sx25 to a 486dx4-100) thought I'd try them out.
//note that unlike the pure mathematical version, we compute the vector
//of rotation just once at the beginning of the function.
void RotateScale(float scale, //the scaling factor
            float angle) //the rotation angle
{
    #define x_offset 60
    #define y_offset 0
    #define x_window 200
    #define y_window 200
    #define x_center WIDTH/2
    #define y_center HEIGHT/2

    float sinas=sin(-angle)*scale;
    float cosas=cos(-angle)*scale;

    //x' = cos(-angle)+sin(-angle)
    //y' = cos(-angle)-sin(-angle)
    float xc=x_center - ((x_window>>1)*(cosas+sinas));
    float yc=y_center - ((y_window>>1)*(cosas-sinas));

    float tx, ty;
    int x,y;
    //actually to composite buffer and not to screen...
    //but it can be changed easily.
    char far *screen=composite+x_offset+y_offset*WIDTH;
    for (y=0;y<y_window;y++)
    {
        tx=xc;
        ty=yc;
        for (x=0;x<x_window;x++)
        {
            if( (tx<0.0) ||
                (tx>=(float)WIDTH) ||
                (ty<0.0) ||
                (ty>=(float)HEIGHT) )
                screen[x]=0;
            else
                screen[x]=texture[(int)(tx)+y320[(int)(ty)]];

            tx+=cosas;
            ty-=sinas;
        }
        screen+=WIDTH;
        xc+=sinas;
        yc+=cosas;
    }
}



//This is the pure mathematical version of rotation and scaling.
//It is naturally slow since it has not be optimized to take advantage
//of the way computers currently operate. Nevertheless it can help
//some of you understand what is going on in the other routines.
//
//"How slow can we go?"
void MathematicalRotateScale(float scale, float angle)
{
    #define x_LEFT   0
    #define x_RIGHT  200
    #define y_TOP    0
    #define y_BOTTOM 200

    #define x_offset 60
    #define y_offset 0

    //precompute the cosine and sine values used to speed things up
    float cosas=cos(angle)*scale;
    float sinas=sin(angle)*scale;

    float xc, yc;
    float tx, ty;

    int x,y;

    char far *screen;

    for (y=y_TOP;y<y_BOTTOM;y++)
    {
        for (x=x_LEFT;x<x_RIGHT;x++)
        {
            //compute the center of the texture bitmap
            xc=(float)(x-100);
            yc=(float)(y-100);

            //compute the translation of x and y into texture map
            //x' = x*cos(angle) - y*sin(angle)
            //y' = x*sin(angle) + y*cos(angle)
            tx=( (xc * cosas) - (yc * sinas) ) + 160.0;
            ty=( (xc * sinas) + (yc * cosas) ) + 100.0;

            //compute the pixel on the composite screen buffer to draw to
            screen=composite+((x+x_offset)+(y+y_offset)*WIDTH);

            //clip to black any texels that fall off of our 320x200 map
            if( (tx<0.0) ||
                (tx>=(float)WIDTH) ||
                (ty<0.0) ||
                (ty>=(float)HEIGHT) )
                *screen=0; //clipped to black
            else
                *screen=texture[ (int)(tx) + ( (int)(ty)*320) ]; //texel
        }
    }
}


//16-bit code...um, sucks. You knew that already didn't you?
//good.
//Anyway, even with the added overhead of the protection mechanisms this would
//be faster in protected mode.
//ES:DI always points to the composite buffer, while FS:BX points to the texture
#pragma option -wasm-
long fixedconst=65536L;
void TASMFixedRotateScale(float scale, float angle)
{
    #define x_offset 60
    #define y_offset 0

    long sinas, cosas;
    long xc, yc;
    long xlong, ylong;
    int y;

    asm {
        //ok. these are precalculates requiring an FPU. An integrated 387 FPU
        //in fact. The results will be... unfortunate on anything earlier.

        // sinas=sin(angle)*scale;
        fld     dword ptr [angle] //st(0)
        fchs //-angle
        fsin //st(0)
        fmul    dword ptr [scale] //st(0)=st(1)*st(0) ;pop;
        fimul   dword ptr [fixedconst] //st(0)= dword 65536 * st(0)
        fistp   dword ptr [sinas] //sinas=(long)st(0); pop;

        // cosas=cos(angle)*scale;
        fld     dword ptr [angle]
        fchs
        fcos
        fmul    dword ptr [scale]
        fimul   dword ptr [fixedconst]
        fistp   dword ptr [cosas]

        // xc=160*65536 - (100*(cosas+sinas));
        mov ecx,160*65536
        mov eax,cosas
        mov edx,100
        add eax,sinas
        mul edx
        sub ecx,eax
        mov [xc],ecx //opt me

        // yc=100*65536 - (100*(cosas-sinas));
        mov ecx,100*65536
        mov eax,cosas
        mov edx,100
        sub eax,sinas
        mul edx
        sub ecx,eax
        mov [yc],ecx //opt me

        //INITs
        les     di, composite
        lfs     bx, texture
        add     di, x_offset

        // for (y=0;y<HEIGHT;y++) //normally from 0 to 199
        mov     word ptr [y],0
    }
yloop: asm {
        // xlong=xc,ylong=yc; //init x/ylong to topleft of square
        mov     eax,[xc]
        mov     edx,[yc]
        mov     [xlong],eax
        mov     [ylong],edx

//////////////////START OF X LOOP
        mov cx,WIDTH-x_offset*2 //width of 200 pixels
    }
xloop: asm {
        mov     dx, word ptr [ylong+2] //tempy=ylong>>16
        mov     si, word ptr [xlong+2] //tempx=xlong>>16
        mov     ax,dx //pipeline for the src=x+y*320 below. good on pentium?

        //CLIPPING SECTION
        or      si,si //tempx<0?
        jl      short clip
        cmp     si,WIDTH //tempx>=320?
        jge     short clip
        or      dx,dx //tempy<0?
        jl      short clip
        cmp     dx,HEIGHT //tempy>=200?
        jl      short noclip
    }
clip: asm {
        mov     byte ptr es:[di],0
        jmp     short doneclipping
    }
noclip: asm {
        //compute x,y position into texture. si = x_offset + x + y*320
        //mov ax,dx //pipelined away
        shl     dx,8 //2c
        shl     ax,6 //2c
        add     si,dx //1c
        add     si,ax //1c

        mov     al,byte ptr fs:[si+bx] //1c+1c get texel
        mov     byte ptr es:[di],al //draw texel
    }
doneclipping: asm {
                  //xlong+=yscale; ylong-=sinas
        mov     edx, [sinas]
        mov     eax, [cosas]
        sub     [ylong],edx
        add     [xlong],eax

        inc     di //screen++;

        sub     cx,1 //reversed x++
        jnz     short xloop //loop x?
        ////////////////

        //xc+=sinas; yc+=cosas
        mov     eax,[sinas]
        mov     edx,[cosas]
        add     [xc],eax
        add     [yc],edx

        inc     word ptr [y] //y++

        add di,x_offset*2 //add to get next y line

        //loop y?
        cmp     word ptr [y],HEIGHT
        jge     short endall
        jmp     yloop
    }
endall: asm {
    }
    return;
}


//................ ignore. under construction.
float widthconst=160.0;
float heightconst=100.0;
void TASMFloatingRotateScale(float scale, float angle)
{
    #define x_LEFT   0
    #define x_RIGHT  200
    #define y_TOP    0
    #define y_BOTTOM 200

    #define x_offset 60
    #define y_offset 0

    float cosas; //cos(angle)*scale;
    float sinas; //sin(angle)*scale;

    int y;

    int tx,ty;
    int xc,yc;

    asm {
        //ok. these are precalculates requiring an FPU. An integrated 387 FPU
        //in fact. The results will be... unfortunate on anything earlier.

        // sinas=sin(angle)*scale;
        fld  dword ptr [angle] //st(0)
        fsin //st(0)
        fmul dword ptr [scale] //st(0)=st(1)*st(0) ;pop;
        fstp dword ptr [sinas] //sinas=(long)st(0); pop;

        // cosas=cos(angle)*scale;
        fld        dword ptr [angle]
        fcos
        fmul dword ptr [scale]
        fstp dword ptr [cosas]
    }

    asm { //INITs
        les     di, composite
        add     di, x_offset
        lfs     bx, texture

        //
        mov     word ptr [yc],y_TOP-100 //yc=(float)(y-100);
        mov     word ptr [xc],x_LEFT-100 //xc=(float)(x-100);

        //center of the texture bitmap, y
        fild    word ptr [yc] //st(1)
        fild    word ptr [xc] //st(0)

        mov     word ptr [y],y_TOP //start of y loop
    }
yloop: asm {
        //center of the texture bitmap, x
        ffree   st(0)
        fincstp
        fild    word ptr [xc] //st(0)

        mov     cx,WIDTH-x_offset*2 //start of x loop
    }
xloop: asm {
        //compute the translation of x and y into texture map
        //x' = x*cos(angle) - y*sin(angle)
        //y' = x*sin(angle) + y*cos(angle)

        //tx=( (xc * cosas) - (yc * sinas) ) + 160.0;
        fld   dword ptr [cosas]
        fmul  st,st(1) //xc*cosas

        fld   dword ptr [sinas]
        fmul  st,st(3) //yc*sinas

        fsubp st(1),st
        fadd  dword ptr [widthconst]
        fistp word ptr [tx]

        fwait
        mov   si,[tx] //pick up tx.

        //ty=( (xc * sinas) + (yc * cosas) ) + 100.0;
        fld   dword ptr [cosas]
        fmul  st,st(2) //yc*cosas

        fld   dword ptr [sinas]
        fmul  st,st(2) //xc*sinas

        faddp st(1),st
        fadd  dword ptr [heightconst]
        fistp word ptr [ty]

//CLIPPING SECTION
        or    si,si //tempx<0?
        jl    short clip

        fwait
        mov   dx,[ty] //get ty. hope it's ready by now....

        cmp   si,WIDTH //tempx>=320?
        mov   ax,dx //pipelined code snuck into if branch....
        jge   short clip

        or    dx,dx //tempy<0?
        jl    short clip

        cmp   dx,HEIGHT //tempy>=200?
        jl    short noclip
    }
clip: asm {
        mov   byte ptr es:[di],0
        jmp   short doneclipping
    }
noclip: asm {
        //compute x,y position into texture. si = x_offset + x + y*320
        //mov ax,dx //pipelined away
        shl   dx,8 //2c
        shl   ax,6 //2c
        add   si,dx //1c
        add   si,ax //1c

        mov   al,byte ptr fs:[si+bx] //1c+1c get texel
        mov   byte ptr es:[di],al //draw texel
    }
doneclipping: asm {
        inc   di //screen++

        fld1  //inc xc
        faddp st(1),st

        sub   cx,1
        jnz   short xloop

        fld1  //inc yc
        faddp st(2),st

        add   di,x_offset*2 //screen+=60*2

        inc   word ptr [y] //y++
        cmp   word ptr [y],y_BOTTOM //loop y?
        jge   short endall
        jmp   yloop
    }
endall: asm {
        //
        ffree st(1)
        ffree st(0)
    }
    return;
}


//post bilinear antialiasing
void bilinear()
{
    asm {
         push ds
         lds di,composite
         add di,(WIDTH+1)+60
         mov cx, (HEIGHT-2)
    }
yloop:
    asm {
         push cx
         push di
         mov cx, (WIDTH-2) - 60*2
    }
xloop:
    asm {
        sub ax,ax
        sub bx,bx
        mov al,[di-1]
        add bx,ax
        mov al,[di+1]
        add bx,ax
        mov al,[di-WIDTH]
        add bx,ax
        mov al,[di+WIDTH]
        add bx,ax

        shr bx,2

        mov al,[di]
        add bx,ax
        shr bx,1

        mov [di],bl
        inc di
        sub cx,1
        jnz xloop

        pop di
        pop cx
        add di,WIDTH
        sub cx,1
        jnz yloop
        pop ds
    }
}


//post trilinear antialiasing
void trilinear()
{
    asm {
        push ds
        lds di,composite
        add di,(WIDTH+1) + 60
        mov cx, (HEIGHT-2)
    }
yloop:
    asm {
        push cx
        push di
        mov cx, (WIDTH-2) - 60*2
    }
xloop:
    asm {
        sub ax,ax
        sub bx,bx
        mov al,[di-1]
        add bx,ax
        mov al,[di+1]
        add bx,ax
        mov al,[di-WIDTH]
        add bx,ax
        mov al,[di+WIDTH]
        add bx,ax

        mov al,[di-(WIDTH+1)]
        add bx,ax
        mov al,[di-(WIDTH-1)]
        add bx,ax
        mov al,[di+(WIDTH+1)]
        add bx,ax
        mov al,[di+(WIDTH-1)]
        add bx,ax

        shr bx,3

        mov al,[di]
        add bx,ax
        shr bx,1

        mov [di],bl
        inc di
        sub cx,1
        jnz xloop

        pop di
        pop cx
        add di,WIDTH
        sub cx,1
        jnz yloop
        pop ds
    }
}


//post hyperlinear? antialiasing. This is just an experiment.
void hyperlinear()
{
mloop: asm {
        push ds
        lds di,composite
        add di,(WIDTH+1)+60
        mov cx,(HEIGHT-2)
    }
yloop:
    asm {
        push cx
        push di
        mov cx, (WIDTH-2) - 60*2
    }
xloop:
    asm {
        sub ax,ax
        sub dx,dx
        mov al,[di-1]
        add dx,ax
        mov al,[di+1]
        add dx,ax
        mov al,[di-WIDTH]
        add dx,ax
        mov al,[di+WIDTH]
        add dx,ax

        mov al,[di-(WIDTH+1)]
        add dx,ax
        mov al,[di-(WIDTH-1)]
        add dx,ax
        mov al,[di+(WIDTH+1)]
        add dx,ax
        mov al,[di+(WIDTH-1)]
        add dx,ax

        shr dx,3

        mov al,[di]
        add dx,ax
        shr dx,1

        mov [di-(WIDTH+1)],dl
        mov [di+(WIDTH+1)],dl
        mov [di-(WIDTH-1)],dl
        mov [di+(WIDTH-1)],dl

        inc di
        sub cx,1
        jnz xloop

        pop di
        pop cx

        add di,WIDTH
        sub cx,1
        jnz yloop
        pop ds
    }
}


void copycomposite()
{
    asm {
        mov ax,0xa000
        mov es,ax
        sub di,di

        push ds
        lds si,composite
        mov cx,SCREENSIZE/2
        cld
        rep movsw
        pop ds
    }
}


void clearcomposite()
{
    asm {
        les di,composite
        mov cx,SCREENSIZE/2
        sub ax,ax
        cld
        rep stosw
    }
}


/*
void test()
{
  int m1=7;
  int m2=3;
  float m3=2.5;
  int r1=0;
  float r2=0.0;
  float r3=0.0;

  asm {
                fild word ptr m1
                fimul word ptr m2
                fist word ptr r1
                fld m3
                fmul
                fst dword ptr r2
                fstp qword ptr r3
  }

  printf("r1: %i\n",r1);
  printf("r2: %f\n",r2);
  printf("r3: %lf\n",r3);

  getch();
}
*/


int main(int argc,char *argv[])
{
    float angle=PI/256.0;
    float angle_v=-PI/128.0;
    float scale=1.05;
    int n;
    int alias=0;
    int fpu=0;
    int key='~',key2=0; //so as not to trip up the arrow keys below
    long frames=0L;

    //inpho
    clrscr();
    printf("Scaling/Rotation/Antialiasing Prototyper by Minimalist 1995-1996.\n");
    printf("Last compiled %s %s. %d lines of C code.\n",__DATE__,__TIME__,lines());
    #ifdef __TURBOC__
        printf("Compiler: Borland C++ version %x.%x. ",
        (__TURBOC__)/256,(__TURBOC__%256));
    #endif
//         printf("Memory:%lu. Stack:%u. ",farcoreleft(),stackavail());
    if(_8087)
        printf("FPU:%d87.",_8087);
    else
        printf("No FPU.");
    printf("\n\n");

    //you did specify a filename didn't you?
    if(argc!=2)
    {
        printf("Use: ALIASC 'filename.bmp'\n");
        printf("Such as ALIASC MISSCATG.BMP\n");
        return(FAILURE);
    }

    //the propaganda
    printf("This program requires at least a 486 with an integrated 387 grade FPU.\n");
    printf("The BMP must be 320x200 256 grayscale.\n\n");

    printf("You may use any of the following keys:\n");
    printf(" ESC will exit the program.\n");

    printf("Scaling and Rotation:\n");
    printf(" N   No floating-point, use fixed-point\n");
    printf(" F   Use floating-point\n");
    printf(" M   Use pure floating-point mathematical routine\n");
    printf(" X   Use TASM pipelined fixed-point\n");
    printf(" A   Use TASM pipelined floating-point\n");

    printf("Antialiasing:\n");
    printf(" 1   No antialiasing\n");
    printf(" 2   Post Bilinear antialiasing\n");
    printf(" 3   Post Trilinear antialiasing\n");
    printf(" 4   Post um.... post ghosting/antialiasing? :-)\n");
    printf("Use left/right arrows to change rotation.\n");
    printf("Zoom with the - + [ and ] keys. Use BACKSPACE to stop.\n\n");

//    test();

    //wait for windows to quit thrashing the hard drive...
    printf("Press any key to start...");
    while (!kbhit()); getch();

    //initialize our table of n*320
    y320=(int far *)farmalloc(2L*200L);
    if(y320==NULL) {
        mode_text();
//        printf("Not enough memory (%lu) for y320 table.\n",farcoreleft());
        exit(FAILURE);
    }
    for(n=0;n<HEIGHT;n++) y320[n]=n*WIDTH;

    //load our texture map
    texture=(char far *)farmalloc(320L*200L);
    if(texture==NULL) {
        mode_text();
//        printf("Not enough memory (%lu) for texture map.\n",farcoreleft());
        exit(FAILURE);
    }

    //switch to graphic mode 320x200 256 colors
    mode_graphics();
    if(LoadTestImage(argv[1])==FAILURE)
    {
        mode_text();
        printf("The file %s does not exist.\n",argv[1]);
        return(FAILURE);
    }

    //create a composite buffer (off-screen double buffer)
    composite=(char far *)farmalloc(320L*200L);
    if(composite==NULL) {
        mode_text();
//        printf("Not enough memory (%lu) for composite buffer.\n",farcoreleft());
        exit(FAILURE);
    }

    //set things up
    clearcomposite();
    setup_fps();
    change_timer();

    //the main loop
    while (key!=27) {
        if(kbhit()) {
            key=getch();
            switch (key) {
                //internal testing frame for yazzie :)
                case 'y':
                case 'Y':
                    angle=PI/2.0;
                    angle_v=0.0;
                    scale=1.0;
                    break;
                //and susy honey
                case 'h':
                case 'H':
                    angle+=PI/4.0;
                    angle_v=0.0;
                    break;
                //and sissy
                case 's':
                case 'S':
                    angle-=PI/4.0;
                    angle_v=0.0;
                    break;
                //and prissy
                case 'z':
                case 'Z':
                    scale=0.5;
                    angle=PI/2.0;
                    angle_v=-0.1;
                    break;

                case 'n':
                case 'N': fpu=0; break;
                case 'f':
                case 'F': fpu=1; break;
                case 'm':
                case 'M': fpu=2; break;
                case 'a':
                case 'A': fpu=3; break;
                case 'x':
                case 'X': fpu=4; break;

                case '1': alias=1; break;
                case '2': alias=2; break;
                case '3': alias=3; break;
                case '4': alias=4; break;

                case '-': scale-=0.05; break;
                case '=': scale+=0.05; break;
                case '[': scale-=0.5; break;
                case ']': scale+=0.5; break;
                case 0:
                    key2=getch();
                    switch (key2) {
                        case 75: angle_v+=PI/128.0; break; //left arrow
                        case 77: angle_v-=PI/128.0; break; //right arrow
                    }
                    break;
            }
        }

        switch (fpu) {
            case 0: FastRotateScale(scale,angle); break;
            case 1: RotateScale(scale,angle); break;
            case 2: MathematicalRotateScale(scale,angle); break;
            case 3: TASMFixedRotateScale(scale,angle); break;
            case 4: TASMFloatingRotateScale(scale,angle); break;
        }

        switch (alias) {
          //case 1: do nothing....
            case 2: bilinear(); break;
            case 3: trilinear(); break;
            case 4: hyperlinear(); break;
        }

        copycomposite(); //show us the composite buffer!

        angle+=angle_v;
        if(angle>2*PI) angle-=2*PI; //bound our angle...
        if(angle<-2*PI) angle+=2*PI;

        //do the FPS calcing
        frames++;
        if(second_flag) { //been a second yet?
            second_flag=FALSE;
            print_fps(frames); //yes, so see how many frames we've done in that time.
            frames=0L; //and the frames
        }
    }

    //restore things to normal
    restore_timer();
    farfree(composite);
    farfree(texture);
    farfree(y320);

    //parting words....
    mode_text();
    printf("By Minimalist (Lewis A. Sellers) 1995-96. Part of the C/Pascal/Asm package.\n");
    printf("To contact, email: lsellers@1stresource.com (shortly to be lsellers@usit.net).\n");
    printf("or drop by http://www.dwc.edu/grail, site of Grail Operating System Project.\n\n");

    printf("#coders home page: http://www.realtime.net/~dlinvill/coders/index.html\n\n");

    printf("Psst. Testing out the original code or modification you did to it? Then press\n");
    printf("Z to set a standard scale and rotation factor. On a 486dx4 100mhz 256kb cache\n");
    printf("60ns DRAM, and an awful PCI Trio32 S3 graphics card, compiled with ");
    #ifdef __TURBOC__
        printf("Borland\nC++ version %x.%x.",
        (__TURBOC__)/256,(__TURBOC__%256));
    #endif
    printf("\n");

    printf("              M=~4.5fps F=~6fps X=~12fps N=~22fps A=~34fps\n\n");

    printf("         'It was men like me that built the bomb.' --paraphrased, T2\n\n");

    return(SUCCESS);
}


//cute trick of mine for single file programs
int lines() { return __LINE__+1; }
