Blaster Master + other fixes
[fceu.git] / fce.c
diff --git a/fce.c b/fce.c
index 653a3f6..3cb0632 100644 (file)
--- a/fce.c
+++ b/fce.c
@@ -1,7 +1,7 @@
 /* FCE Ultra - NES/Famicom Emulator
  *
  * Copyright notice for this file:
- *  Copyright (C) 1998 BERO 
+ *  Copyright (C) 1998 BERO
  *  Copyright (C) 2002 Ben Parnell
  *
  * This program is free software; you can redistribute it and/or modify
 #include       "input.h"
 #include       "file.h"
 #include       "crc32.h"
+#include        "ppu.h"
+
+#include        "movie.h"
 
 #define Pal     (PALRAM)
 
-static void FetchSpriteData(void);
-static void FASTAPASS(1) RefreshLine(uint8 *target);
+
+static void (*RefreshLine)(uint8 *P, uint32 vofs) = NULL;
 static void PRefreshLine(void);
-static void FASTAPASS(1) RefreshSprite(uint8 *target);
+
 static void ResetPPU(void);
 static void PowerPPU(void);
 
@@ -62,7 +65,7 @@ int MMC5Hack;
 uint32 MMC5HackVROMMask;
 uint8 *MMC5HackExNTARAMPtr;
 uint8 *MMC5HackVROMPTR;
-uint8 MMC5HackCHRMode=0; 
+uint8 MMC5HackCHRMode=0;
 uint8 MMC5HackSPMode;
 uint8 MMC5HackSPScroll;
 uint8 MMC5HackSPPage;
@@ -81,7 +84,7 @@ uint8 PPUCHRRAM;
 static uint8 deemp=0;
 static int deempcnt[8];
 
-static int tosprite=256;
+int tosprite=256;
 
 FCEUGI FCEUGameInfo;
 void (*GameInterface)(int h);
@@ -97,6 +100,71 @@ static readfunc *AReadG;
 static writefunc *BWriteG;
 static int RWWrap=0;
 
+#ifdef ASM_6502
+static void asmcpu_update(int32 cycles)
+{
+ // timestamp..
+ //timestamp += ((cycles >> 4) * 43) >> 7; // aproximating /= 48
+
+ // some code from x6502.c
+ fhcnt-=cycles;
+ if(fhcnt<=0)
+ {
+  FrameSoundUpdate();
+  fhcnt+=fhinc;
+ }
+
+ if(PCMIRQCount>0)
+ {
+  PCMIRQCount-=cycles;
+  if(PCMIRQCount<=0)
+  {
+   vdis=1;
+   if((PSG[0x10]&0x80) && !(PSG[0x10]&0x40))
+   {
+    extern uint8 SIRQStat;
+    SIRQStat|=0x80;
+    X6502_IRQBegin(FCEU_IQDPCM);
+   }
+  }
+ }
+}
+
+void asmcpu_unpack(void)
+{
+       nes_registers[0] = X.A << 24;
+       nes_registers[1] = X.X;
+       nes_registers[2] = X.Y;
+       pc_base = 0;
+       nes_registers[3] = X.PC;
+       X6502_Rebase_a();
+       nes_registers[4] = X.S << 24;
+       nes_registers[4]|= X.IRQlow << 8;
+       nes_registers[7] = (uint32)X.count << 16;
+
+       // NVUB DIZC
+       nes_registers[4]|= X.P & 0x5d;
+       nes_registers[5] = X.P << 24; // N
+       if (!(X.P&0x02)) nes_registers[5] |= 1; // Z
+}
+
+void asmcpu_pack(void)
+{
+       X.A = nes_registers[0] >> 24;
+       X.X = nes_registers[1];
+       X.Y = nes_registers[2];
+       X.PC= nes_registers[3] - pc_base;
+       X.S = nes_registers[4] >> 24;
+       X.IRQlow = nes_registers[4] >> 8;
+       X.count = (int32) nes_registers[7] >> 16;
+
+       // NVUB DIZC
+       X.P = nes_registers[4] & 0x5d;
+       if (  nes_registers[5]&0x80000000)  X.P |= 0x80; // N
+       if (!(nes_registers[5]&0x000000ff)) X.P |= 0x02; // Z
+}
+#endif
+
 DECLFW(BNull)
 {
 
@@ -198,28 +266,29 @@ uint8 XOffset=0;
 
 uint32 TempAddr,RefreshAddr;
 
-static int maxsprites=8;
 
 /* scanline is equal to the current visible scanline we're on. */
 
 int scanline;
-static uint32 scanlines_per_frame;
+
+uint8 GameMemBlock[131072] __attribute__ ((aligned (4)));
+uint8 NTARAM[0x800] __attribute__ ((aligned (4)));
+uint8 PALRAM[0x20] __attribute__ ((aligned (4)));
+#if !defined(ASM_6502) || defined(DEBUG_ASM_6502)
+uint8 RAM[0x800] __attribute__ ((aligned (4)));
+#endif
 
 uint8 PPU[4];
 uint8 PPUSPL;
 
-uint8 GameMemBlock[131072];
-uint8 NTARAM[0x800],PALRAM[0x20],SPRAM[0x100],SPRBUF[0x100];
-uint8 RAM[0x800];
-
 uint8 PAL=0;
 
-#define MMC5SPRVRAMADR(V)      &MMC5SPRVPage[(V)>>10][(V)]
+
 #define MMC5BGVRAMADR(V)      &MMC5BGVPage[(V)>>10][(V)]
 #define        VRAMADR(V)      &VPage[(V)>>10][(V)]
+
 static DECLFW(BRAML)
-{  
+{
         RAM[A]=V;
 }
 
@@ -238,7 +307,7 @@ static DECLFR(ARAMH)
         return RAM[A&0x7FF];
 }
 
-           
+
 static DECLFR(A2002)
 {
                         uint8 ret;
@@ -260,11 +329,11 @@ static DECLFR(A2007)
 
                         PPUGenLatch=ret=VRAMBuffer;
                        if(PPU_hook) PPU_hook(tmp);
-                        if(tmp<0x2000) 
+                        if(tmp<0x2000)
                        {
                         VRAMBuffer=VPage[tmp>>10][tmp];
                        }
-                        else 
+                        else
                        {
                         VRAMBuffer=vnapage[(tmp>>10)&0x3][tmp&0x3FF];
                        }
@@ -320,6 +389,7 @@ static DECLFW(B2004)
                 PPUSPL++;
                }
                PPU[3]++;
+
 }
 
 static DECLFW(B2005)
@@ -365,7 +435,7 @@ static DECLFW(B2006)
 }
 
 static DECLFW(B2007)
-{  
+{
                        uint32 tmp=RefreshAddr&0x3FFF;
 
                         PPUGenLatch=V;
@@ -383,7 +453,7 @@ static DECLFW(B2007)
                             VPage[tmp>>10][tmp]=V;
                         }
                         else
-                       {                                               
+                       {
                          if(PPUNTARAM&(1<<((tmp&0xF00)>>10)))
                           vnapage[((tmp&0xF00)>>10)][tmp&0x3FF]=V;
                         }
@@ -393,7 +463,7 @@ static DECLFW(B2007)
 }
 
 static DECLFW(B4014)
-{                        
+{
        uint32 t=V<<8;
        int x;
        for(x=0;x<256;x++)
@@ -401,20 +471,33 @@ static DECLFW(B4014)
        X6502_AddCycles(512);
 }
 
-static void FASTAPASS(1) BGRender(uint8 *target)
+void BGRender(uint8 *target)
 {
-       uint32 tem;
-        RefreshLine(target);
+       uint32 tem, vofs;
+       vofs=((PPU[0]&0x10)<<8) | ((RefreshAddr>>12)&7);
+
+        Pal[0]|=64;
+        Pal[4]|=64;
+        Pal[8]|=64;
+        Pal[0xC]|=64;
+        RefreshLine(target-XOffset, vofs);
+        Pal[0]&=63;
+        Pal[4]&=63;
+        Pal[8]&=63;
+        Pal[0xC]&=63;
+
         if(!(PPU[1]&2))
         {
-         tem=Pal[0]|(Pal[0]<<8)|(Pal[0]<<16)|(Pal[0]<<24);
-         tem|=0x40404040;
+         tem=Pal[0]|0x40;
+        tem|=tem<<8;
+        tem|=tem<<16;
          *(uint32 *)target=*(uint32 *)(target+4)=tem;
         }
 }
 
 #ifdef FRAMESKIP
-static int FSkip=0;
+int FSkip_setting=-1; // auto
+int FSkip=0;
 void FCEUI_FrameSkip(int x)
 {
  FSkip=x;
@@ -426,7 +509,7 @@ static void Loop6502(void)
 {
        uint32 tem;
        int x;
-        uint8 *target=XBuf+(scanline<<8)+(scanline<<4)+8;
+        uint8 *target=XBuf+scanline*320+32;
 
         if(ScreenON || SpriteON)
         {
@@ -464,20 +547,21 @@ static void Loop6502(void)
         {
         #endif
          if(PPU[1]&0x01)
-         { 
+         {
           for(x=63;x>=0;x--)
-           *(uint32 *)&target[x<<2]=(*(uint32*)&target[x<<2])&0xF0F0F0F0;
+           ((uint32 *)target)[x]=((uint32*)target)[x]&0xF0F0F0F0;
          }
           if((PPU[1]>>5)==0x7)
            for(x=63;x>=0;x--)
-            *(uint32 *)&target[x<<2]=((*(uint32*)&target[x<<2])&0x3f3f3f3f)|0x40404040;
+            ((uint32 *)target)[x]=(((uint32*)target)[x]&0x3f3f3f3f)|0x40404040;
           else if(PPU[1]&0xE0)
            for(x=63;x>=0;x--)
-            *(uint32 *)&target[x<<2]=(*(uint32*)&target[x<<2])|0xC0C0C0C0;
+            ((uint32 *)target)[x]=((uint32*)target)[x]|0xC0C0C0C0;
           else
             for(x=63;x>=0;x--)
-             *(uint32 *)&target[x<<2]=(*(uint32*)&target[x<<2])&0x3f3f3f3f;
-
+             ((uint32 *)target)[x]=((uint32*)target)[x]&0x3f3f3f3f;
+         FCEU_dwmemset(target-  8,0x3f3f3f3f,8);
+         FCEU_dwmemset(target+256,0x3f3f3f3f,8);
         #ifdef FRAMESKIP
         }
         #endif
@@ -486,7 +570,7 @@ static void Loop6502(void)
        {
         tem=Pal[0]|(Pal[0]<<8)|(Pal[0]<<16)|(Pal[0]<<24);
         FCEU_dwmemset(target,tem,256);
-       }               
+       }
         if(InputScanlineHook)
          InputScanlineHook(target, scanline);
 }
@@ -519,47 +603,28 @@ static void PRefreshLine(void)
         }
 }
 
-/*              Total of 33 tiles(32 + 1 extra) */
-static void FASTAPASS(1) RefreshLine(uint8 *target)
+/* This high-level graphics MMC5 emulation code was written
+   for MMC5 carts in "CL" mode.  It's probably not totally
+   correct for carts in "SL" mode.
+   */
+static void RefreshLine_MMC5Hack1(uint8 *P, uint32 vofs)
 {
-       uint32 vofs;
-        int X1;
-        register uint8 *P=target; 
-
-       vofs=0;
-       
-        Pal[0]|=64;
-        Pal[4]|=64;
-        Pal[8]|=64;
-        Pal[0xC]|=64;
-
-       vofs=((PPU[0]&0x10)<<8) | ((RefreshAddr>>12)&7);
-        P-=XOffset;
-
-       /* This high-level graphics MMC5 emulation code was written
-          for MMC5 carts in "CL" mode.  It's probably not totally
-          correct for carts in "SL" mode.
-       */
-        if(MMC5Hack && geniestage!=1)
-        {
-        if(MMC5HackCHRMode==0 && (MMC5HackSPMode&0x80))
-        {
-         int8 tochange;
+         int8 tochange, X1;
 
           tochange=MMC5HackSPMode&0x1F;
 
           for(X1=33;X1;X1--,P+=8)
           {
                 uint8 *C;
-                register uint8 cc,zz,zz2;
+                uint8 cc,zz,zz2;
                 uint32 vadr;
 
-                if((tochange<=0 && MMC5HackSPMode&0x40) || 
+                if((tochange<=0 && MMC5HackSPMode&0x40) ||
                   (tochange>0 && !(MMC5HackSPMode&0x40)))
                 {
                  uint8 xs,ys;
 
-                 xs=33-X1; 
+                 xs=33-X1;
                  ys=((scanline>>3)+MMC5HackSPScroll)&0x1F;
                  if(ys>=0x1E) ys-=0x1E;
                  vadr=(MMC5HackExNTARAMPtr[xs|(ys<<5)]<<4)+(vofs&7);
@@ -587,18 +652,19 @@ static void FASTAPASS(1) RefreshLine(uint8 *target)
                  RefreshAddr++;
                 tochange--;
           }
-        }
-        else if(MMC5HackCHRMode==1 && (MMC5HackSPMode&0x80))
-        {
-          int8 tochange;
+}
+
+static void RefreshLine_MMC5Hack2(uint8 *P, uint32 vofs)
+{
+          int8 tochange, X1;
 
           tochange=MMC5HackSPMode&0x1F;
 
           for(X1=33;X1;X1--,P+=8)
           {
                 uint8 *C;
-                register uint8 cc;
-                register uint8 zz2;
+                uint8 cc;
+                uint8 zz2;
                 uint32 vadr;
 
                 if((tochange<=0 && MMC5HackSPMode&0x40) ||
@@ -606,7 +672,7 @@ static void FASTAPASS(1) RefreshLine(uint8 *target)
                 {
                  uint8 xs,ys;
 
-                 xs=33-X1; 
+                 xs=33-X1;
                  ys=((scanline>>3)+MMC5HackSPScroll)&0x1F;
                  if(ys>=0x1E) ys-=0x1E;
                  vadr=(MMC5HackExNTARAMPtr[xs|(ys<<5)]<<4)+(vofs&7);
@@ -634,21 +700,23 @@ static void FASTAPASS(1) RefreshLine(uint8 *target)
                  RefreshAddr++;
                tochange--;
           }
-        }
+}
+
+static void RefreshLine_MMC5Hack3(uint8 *P, uint32 vofs)
+{
+          int8 X1;
 
-         else if(MMC5HackCHRMode==1)
-         {
           for(X1=33;X1;X1--,P+=8)
           {
-                uint8 *C;                                   
-                register uint8 cc;
-                register uint8 zz2;
-                uint32 vadr;  
+                uint8 *C;
+                uint8 cc;
+                uint8 zz2;
+                uint32 vadr;
 
                 C=MMC5HackVROMPTR;
                 zz2=(RefreshAddr>>10)&3;
                 vadr = (vnapage[zz2][RefreshAddr & 0x3ff] << 4) + vofs;
-                C += (((MMC5HackExNTARAMPtr[RefreshAddr & 0x3ff]) & 0x3f & 
+                C += (((MMC5HackExNTARAMPtr[RefreshAddr & 0x3ff]) & 0x3f &
                        MMC5HackVROMMask) << 12) + (vadr & 0xfff);
                 vadr = (MMC5HackExNTARAMPtr[RefreshAddr & 0x3ff] & 0xC0)>> 4;
                 cc = vadr;
@@ -659,13 +727,16 @@ static void FASTAPASS(1) RefreshLine(uint8 *target)
                 else
                  RefreshAddr++;
           }
-         }
-         else
-         {
+}
+
+static void RefreshLine_MMC5Hack4(uint8 *P, uint32 vofs)
+{
+          int8 X1;
+
           for(X1=33;X1;X1--,P+=8)
           {
                 uint8 *C;
-                register uint8 cc,zz,zz2;
+                uint8 cc,zz,zz2;
                 uint32 vadr;
 
                 zz=RefreshAddr&0x1F;
@@ -676,22 +747,23 @@ static void FASTAPASS(1) RefreshLine(uint8 *target)
                 cc=((cc >> ((zz&2) + ((RefreshAddr&0x40)>>4))) &3) <<2;
 
                #include "fceline.h"
-                
+
                if((RefreshAddr&0x1f)==0x1f)
                  RefreshAddr^=0x41F;
                 else
                  RefreshAddr++;
-          }          
-         }
-        }       // End if(MMC5Hack) 
+          }
+}
+
+static void RefreshLine_PPU_hook(uint8 *P, uint32 vofs)
+{
+         int8 X1;
 
-        else if(PPU_hook)
-        {
          for(X1=33;X1;X1--,P+=8)
          {
-                uint8 *C;                                   
-                register uint8 cc,zz,zz2;
-                uint32 vadr;  
+                uint8 *C;
+                uint8 cc,zz,zz2;
+                uint32 vadr;
 
                 zz=RefreshAddr&0x1F;
                 zz2=(RefreshAddr>>10)&3;
@@ -710,39 +782,79 @@ static void FASTAPASS(1) RefreshLine(uint8 *target)
                 else
                  RefreshAddr++;
          }
-        }
-        else
-        {      
-         for(X1=33;X1;X1--,P+=8)
+}
+
+static void RefreshLine_normal(uint8 *P, uint32 vofs) // vofs is 0x107 max
+{
+         int8 X1;
+        uint32 rfraddr = RefreshAddr;
+        uint8 *page = vnapage[(rfraddr>>10)&3];
+         uint32 cc2=0;
+
+        if ((rfraddr&0xc)!=0)
+         cc2=*(uint32 *) (page + ((rfraddr&0x380)>>4) + ((rfraddr&0x10)>>2) + 0x3c0);
+
+         for (X1=33;X1;X1--,P+=8)
          {
-                uint8 *C;
-                register uint8 cc,zz,zz2;
+                uint8 cc,*C;
                 uint32 vadr;
 
-                zz=RefreshAddr&0x1F;
-               zz2=(RefreshAddr>>10)&3;
-                vadr=(vnapage[zz2][RefreshAddr&0x3ff]<<4)+vofs;
+                vadr=(page[rfraddr&0x3ff]<<4)+vofs;
                 C = VRAMADR(vadr);
-               cc=vnapage[zz2][0x3c0+(zz>>2)+((RefreshAddr&0x380)>>4)];
-               cc=((cc >> ((zz&2) + ((RefreshAddr&0x40)>>4))) &3) <<2;
-               #include "fceline.h"
-                
-                if((RefreshAddr&0x1f)==0x1f)
-                 RefreshAddr^=0x41F;
-                else
-                 RefreshAddr++;
-         }
-        }
+               if ((rfraddr&0xc)==0)
+                cc2=*(uint32 *) (page + ((rfraddr&0x380)>>4) + ((rfraddr&0x10)>>2) + 0x3c0);
+               cc=((cc2 >> ((rfraddr&2) + ((rfraddr&0x40)>>4) + ((rfraddr&0xc)<<1))) & 3) << 2;
 
-        #undef vofs
+               #include "fceline.h"
 
-        Pal[0]&=63;
-        Pal[4]&=63;
-        Pal[8]&=63;
-        Pal[0xC]&=63;
+                if((rfraddr&0x1f)==0x1f) {
+                 rfraddr^=0x41F;
+                page = vnapage[(rfraddr>>10)&3];
+                } else
+                 rfraddr++;
+         }
+        RefreshAddr = rfraddr;
 }
 
-static INLINE void Fixit2(void)
+static void SetRefreshLine(void)
+{
+        if(MMC5Hack && geniestage!=1)
+        {
+        if(MMC5HackCHRMode==0 && (MMC5HackSPMode&0x80))
+        {
+                if (RefreshLine != RefreshLine_MMC5Hack1) printf("set refr RefreshLine_MMC5Hack1\n");
+                RefreshLine = RefreshLine_MMC5Hack1;
+        }
+        else if(MMC5HackCHRMode==1 && (MMC5HackSPMode&0x80))
+        {
+               if (RefreshLine != RefreshLine_MMC5Hack2) printf("set refr RefreshLine_MMC5Hack2\n");
+                RefreshLine = RefreshLine_MMC5Hack2;
+        }
+         else if(MMC5HackCHRMode==1)
+         {
+               if (RefreshLine != RefreshLine_MMC5Hack3) printf("set refr RefreshLine_MMC5Hack3\n");
+                RefreshLine = RefreshLine_MMC5Hack3;
+         }
+         else
+         {
+               if (RefreshLine != RefreshLine_MMC5Hack4) printf("set refr RefreshLine_MMC5Hack4\n");
+                RefreshLine = RefreshLine_MMC5Hack4;
+         }
+        }       // End if(MMC5Hack)
+        else if(PPU_hook)
+        {
+               if (RefreshLine != RefreshLine_PPU_hook) printf("set refr RefreshLine_PPU_hook\n");
+               RefreshLine = RefreshLine_PPU_hook;
+        }
+        else
+        {
+               if (RefreshLine != RefreshLine_normal) printf("set refr RefreshLine_normal\n");
+               RefreshLine = RefreshLine_normal;
+        }
+}
+
+//static INLINE
+void Fixit2(void)
 {
    if(ScreenON || SpriteON)
    {
@@ -754,7 +866,8 @@ static INLINE void Fixit2(void)
    }
 }
 
-static INLINE void Fixit1(void)
+//static INLINE
+void Fixit1(void)
 {
    if(ScreenON || SpriteON)
    {
@@ -782,7 +895,10 @@ static INLINE void Fixit1(void)
    }
 }
 
+//#define NEW_TRY
+
 /*      This is called at the beginning of all h-blanks on visible lines. */
+#ifndef NEW_TRY
 static void DoHBlank(void)
 {
  if(ScreenON || SpriteON)
@@ -804,380 +920,12 @@ static void DoHBlank(void)
  //PPU_hook(0,-1);
  //fprintf(stderr,"%3d: $%04x\n",scanline,RefreshAddr);
 }
+#endif
 
-#define        V_FLIP  0x80
-#define        H_FLIP  0x40
-#define        SP_BACK 0x20
-
-typedef struct {
-        uint8 y,no,atr,x;
-} SPR;
-
-typedef struct {
-       uint8 ca[2],atr,x;
-} SPRB;
-
-uint8 sprlinebuf[256+8];        
-
-void FCEUI_DisableSpriteLimitation(int a)
-{
- maxsprites=a?64:8;
-}
-
-static uint8 nosprites,SpriteBlurp;
-
-static void FetchSpriteData(void)
-{
-       SPR *spr;
-       uint8 H;
-       int n,vofs;
-
-       spr=(SPR *)SPRAM;
-       H=8;
-
-       nosprites=SpriteBlurp=0;
-
-        vofs=(unsigned int)(PPU[0]&0x8&(((PPU[0]&0x20)^0x20)>>2))<<9;
-       H+=(PPU[0]&0x20)>>2;
-
-        if(!PPU_hook)
-         for(n=63;n>=0;n--,spr++)
-         {
-                if((unsigned int)(scanline-spr->y)>=H) continue;
-
-                if(nosprites<maxsprites)
-                {
-                 if(n==63) SpriteBlurp=1;
-
-                {
-                 SPRB dst;
-                 uint8 *C;
-                  int t;
-                  unsigned int vadr;
-
-                  t = (int)scanline-(spr->y);
-
-                  if (Sprite16)
-                   vadr = ((spr->no&1)<<12) + ((spr->no&0xFE)<<4);
-                  else
-                   vadr = (spr->no<<4)+vofs;
-
-                  if (spr->atr&V_FLIP)
-                  {
-                        vadr+=7;
-                        vadr-=t;
-                        vadr+=(PPU[0]&0x20)>>1;
-                        vadr-=t&8;
-                  }
-                  else
-                  {
-                        vadr+=t;
-                        vadr+=t&8;
-                  }
-
-                 /* Fix this geniestage hack */
-                 if(MMC5Hack && geniestage!=1) C = MMC5SPRVRAMADR(vadr);
-                  else C = VRAMADR(vadr);
-
-                 
-                 dst.ca[0]=C[0];
-                 dst.ca[1]=C[8];
-                 dst.x=spr->x;
-                 dst.atr=spr->atr;
-
-
-                 *(uint32 *)&SPRBUF[nosprites<<2]=*(uint32 *)&dst;
-                }
-
-                 nosprites++;
-                }
-                else
-                {
-                  PPU_status|=0x20;
-                  break;
-                }
-         }
-       else
-         for(n=63;n>=0;n--,spr++)
-         {
-                if((unsigned int)(scanline-spr->y)>=H) continue;
-
-                if(nosprites<maxsprites)
-                {
-                 if(n==63) SpriteBlurp=1;
-
-                 {
-                  SPRB dst;
-                  uint8 *C;
-                  int t;
-                  unsigned int vadr;
-
-                  t = (int)scanline-(spr->y);
-
-                  if (Sprite16)
-                   vadr = ((spr->no&1)<<12) + ((spr->no&0xFE)<<4);
-                  else
-                   vadr = (spr->no<<4)+vofs;
-
-                  if (spr->atr&V_FLIP)
-                  {
-                        vadr+=7;
-                        vadr-=t;
-                        vadr+=(PPU[0]&0x20)>>1;
-                        vadr-=t&8;
-                  }
-                  else
-                  {
-                        vadr+=t;
-                        vadr+=t&8;
-                  }
-
-                  if(MMC5Hack) C = MMC5SPRVRAMADR(vadr);
-                  else C = VRAMADR(vadr);
-                  dst.ca[0]=C[0];
-                 PPU_hook(vadr);
-                  dst.ca[1]=C[8];
-                 PPU_hook(vadr|8);
-                  dst.x=spr->x;
-                  dst.atr=spr->atr;
-
-
-                  *(uint32 *)&SPRBUF[nosprites<<2]=*(uint32 *)&dst;
-                 }
-
-                 nosprites++;
-                }
-                else
-                {
-                  PPU_status|=0x20;
-                  break;
-                }
-         }
-}
-
-static void FASTAPASS(1) RefreshSprite(uint8 *target)
-{
-       int n;
-        SPRB *spr;
-        uint8 *P=target;
-
-        if(!nosprites) return;
-       #ifdef FRAMESKIP
-       if(FSkip)
-       {
-        if(!SpriteBlurp)
-        {
-         nosprites=0;
-         return;
-        }
-        else
-         nosprites=1;
-       }
-       #endif
-
-        FCEU_dwmemset(sprlinebuf,0x80808080,256);
-        nosprites--;
-        spr = (SPRB*)SPRBUF+nosprites;
-
-       for(n=nosprites;n>=0;n--,spr--)
-       {
-        register uint8 J,atr,c1,c2;
-       int x=spr->x;
-        uint8 *C;
-        uint8 *VB;
-                
-        P+=x;
-
-        c1=((spr->ca[0]>>1)&0x55)|(spr->ca[1]&0xAA);
-       c2=(spr->ca[0]&0x55)|((spr->ca[1]<<1)&0xAA);
-
-        J=spr->ca[0]|spr->ca[1];
-       atr=spr->atr;
-
-                       if(J)
-                       {        
-                        if(n==0 && SpriteBlurp && !(PPU_status&0x40))
-                        {  
-                        int z,ze=x+8;
-                        if(ze>256) {ze=256;}
-                        if(ScreenON && (scanline<FSettings.FirstSLine || scanline>FSettings.LastSLine
-                        #ifdef FRAMESKIP
-                        || FSkip
-                        #endif
-                        ))
-                         BGRender(target);
-
-                        if(!(atr&H_FLIP))
-                        {
-                         for(z=x;z<ze;z++)
-                         {
-                          if(J&(0x80>>(z-x)))
-                          {
-                           if(!(target[z]&64))
-                            tosprite=z;
-                          }
-                         }
-                        }
-                        else
-                        {
-                          for(z=x;z<ze;z++)
-                          {
-                           if(J&(1<<(z-x)))
-                           {
-                            if(!(target[z]&64))
-                             tosprite=z;
-                           }
-                          }
-                        }
-                        //FCEU_DispMessage("%d, %d:%d",scanline,x,tosprite);
-                        }
-
-        C = sprlinebuf+x;
-         VB = (PALRAM+0x10)+((atr&3)<<2);
-
-         if(atr&SP_BACK) 
-         {
-          if (atr&H_FLIP)
-          {
-           if (J&0x02)  C[1]=VB[c1&3]|0x40;
-           if (J&0x01)  *C=VB[c2&3]|0x40;
-           c1>>=2;c2>>=2;
-           if (J&0x08)  C[3]=VB[c1&3]|0x40;;
-           if (J&0x04)  C[2]=VB[c2&3]|0x40;;
-           c1>>=2;c2>>=2;
-           if (J&0x20)  C[5]=VB[c1&3]|0x40;;
-           if (J&0x10)  C[4]=VB[c2&3]|0x40;;
-           c1>>=2;c2>>=2;
-           if (J&0x80)  C[7]=VB[c1]|0x40;;
-           if (J&0x40)  C[6]=VB[c2]|0x40;;
-         } else  {
-           if (J&0x02)  C[6]=VB[c1&3]|0x40;
-           if (J&0x01)  C[7]=VB[c2&3]|0x40;
-          c1>>=2;c2>>=2;
-           if (J&0x08)  C[4]=VB[c1&3]|0x40;
-           if (J&0x04)  C[5]=VB[c2&3]|0x40;
-           c1>>=2;c2>>=2;
-           if (J&0x20)  C[2]=VB[c1&3]|0x40;
-           if (J&0x10)  C[3]=VB[c2&3]|0x40;
-           c1>>=2;c2>>=2;
-           if (J&0x80)  *C=VB[c1]|0x40;
-           if (J&0x40)  C[1]=VB[c2]|0x40;
-         }
-         } else {
-          if (atr&H_FLIP)
-         {
-           if (J&0x02)  C[1]=VB[(c1&3)];
-           if (J&0x01)  *C=VB[(c2&3)];
-           c1>>=2;c2>>=2;
-           if (J&0x08)  C[3]=VB[(c1&3)];
-           if (J&0x04)  C[2]=VB[(c2&3)];
-           c1>>=2;c2>>=2;
-           if (J&0x20)  C[5]=VB[(c1&3)];
-           if (J&0x10)  C[4]=VB[(c2&3)];
-           c1>>=2;c2>>=2;
-           if (J&0x80)  C[7]=VB[c1];
-           if (J&0x40)  C[6]=VB[c2];
-          }else{                 
-           if (J&0x02)  C[6]=VB[(c1&3)];
-           if (J&0x01)  C[7]=VB[(c2&3)];
-           c1>>=2;c2>>=2;
-           if (J&0x08)  C[4]=VB[(c1&3)];
-           if (J&0x04)  C[5]=VB[(c2&3)];
-           c1>>=2;c2>>=2;
-           if (J&0x20)  C[2]=VB[(c1&3)];
-           if (J&0x10)  C[3]=VB[(c2&3)];
-           c1>>=2;c2>>=2;
-           if (J&0x80)  *C=VB[c1];
-           if (J&0x40)  C[1]=VB[c2];
-          }
-         }
-        }
-       P-=x;
-      }
-
-     nosprites=0;
-     #ifdef FRAMESKIP
-     if(FSkip) return;
-     #endif
-
-     {
-      uint8 n=((PPU[1]&4)^4)<<1;
-      loopskie:
-      {
-       uint32 t=*(uint32 *)(sprlinebuf+n);
-       if(t!=0x80808080)
-       {
-       #ifdef LSB_FIRST
-        if(!(t&0x80))
-        {
-         if(!(t&0x40))       // Normal sprite
-          P[n]=sprlinebuf[n];
-         else if(P[n]&64)        // behind bg sprite
-          P[n]=sprlinebuf[n];
-        }
-
-        if(!(t&0x8000))
-        {
-         if(!(t&0x4000))       // Normal sprite
-          P[n+1]=(sprlinebuf+1)[n];
-         else if(P[n+1]&64)        // behind bg sprite
-          P[n+1]=(sprlinebuf+1)[n];
-        }
-
-        if(!(t&0x800000))
-        {
-         if(!(t&0x400000))       // Normal sprite
-          P[n+2]=(sprlinebuf+2)[n];
-         else if(P[n+2]&64)        // behind bg sprite
-          P[n+2]=(sprlinebuf+2)[n];
-        }
-
-        if(!(t&0x80000000))
-        {
-         if(!(t&0x40000000))       // Normal sprite
-          P[n+3]=(sprlinebuf+3)[n];
-         else if(P[n+3]&64)        // behind bg sprite
-          P[n+3]=(sprlinebuf+3)[n];
-        }
-       #else
-        if(!(t&0x80000000))
-        {
-         if(!(t&0x40))       // Normal sprite
-          P[n]=sprlinebuf[n];
-         else if(P[n]&64)        // behind bg sprite
-          P[n]=sprlinebuf[n];
-        }
-
-        if(!(t&0x800000))
-        {
-         if(!(t&0x4000))       // Normal sprite
-          P[n+1]=(sprlinebuf+1)[n];
-         else if(P[n+1]&64)        // behind bg sprite
-          P[n+1]=(sprlinebuf+1)[n];
-        }
 
-        if(!(t&0x8000))
-        {
-         if(!(t&0x400000))       // Normal sprite
-          P[n+2]=(sprlinebuf+2)[n];
-         else if(P[n+2]&64)        // behind bg sprite
-          P[n+2]=(sprlinebuf+2)[n];
-        }
-
-        if(!(t&0x80))
-        {
-         if(!(t&0x40000000))       // Normal sprite
-          P[n+3]=(sprlinebuf+3)[n];
-         else if(P[n+3]&64)        // behind bg sprite
-          P[n+3]=(sprlinebuf+3)[n];
-        }
-       #endif
-       }
-      }
-      n+=4;
-      if(n) goto loopskie;
-     }
-}
+// ============================//
+// end of new code
+// ===========================//
 
 void ResetMapping(void)
 {
@@ -1226,7 +974,7 @@ void CloseGame(void)
    FlushGameCheats();
   #ifdef NETWORK
   if(FSettings.NetworkPlay) KillNetplay();
-  #endif       
+  #endif
   GameInterface(GI_CLOSE);
   CloseGenie();
   GameLoaded=0;
@@ -1256,28 +1004,54 @@ void ResetGameLoaded(void)
        FCEUGameInfo.inputfc=-1;
 }
 
+char lastLoadedGameName [2048];
+
 FCEUGI *FCEUI_LoadGame(char *name)
 {
+       char name2[512];
+       int have_movie = 0;
         int fp;
 
         Exit=1;
         ResetGameLoaded();
 
-       fp=FCEU_fopen(name,"rb");
+       strncpy(name2, name, sizeof(name2));
+       name2[sizeof(name2)-1] = 0;
+
+       fp=FCEU_fopen(name2,"rb");
        if(!fp)
         {
         FCEU_PrintError("Error opening \"%s\"!",name);
         return 0;
        }
 
-        GetFileBase(name);
-        if(iNESLoad(name,fp))
+        {
+        char *p = name2 + strlen(name2) - 4;
+        if (strcmp(p, ".fcm") == 0)
+        {
+         // movie detected
+         printf("movie detected\n");
+         FCEU_fclose(fp);
+         *p = 0;
+         fp=FCEU_fopen(name2,"rb");
+         if (!fp) {
+          printf("no ROM for movie\n");
+          return 0;
+         }
+         have_movie = 1;
+        }
+       }
+
+       strcpy(lastLoadedGameName, name2);
+
+        GetFileBase(name2);
+        if(iNESLoad(name2,fp))
          goto endlseq;
         if(NSFLoad(fp))
          goto endlseq;
-        if(FDSLoad(name,fp))
+        if(FDSLoad(name2,fp))
          goto endlseq;
-        if(UNIFLoad(name,fp))
+        if(UNIFLoad(name2,fp))
          goto endlseq;
 
         FCEU_PrintError("An error occurred while loading the file.");
@@ -1286,7 +1060,7 @@ FCEUGI *FCEUI_LoadGame(char *name)
 
         endlseq:
         FCEU_fclose(fp);
-        GameLoaded=1;        
+        GameLoaded=1;
 
         FCEU_ResetVidSys();
         if(FCEUGameInfo.type!=GIT_NSF)
@@ -1303,9 +1077,12 @@ FCEUGI *FCEUI_LoadGame(char *name)
          LoadGamePalette();
          LoadGameCheats();
         }
-        
+
        FCEU_ResetPalette();
         Exit=0;
+
+       if (have_movie)
+               FCEUI_LoadMovie(name, 1);
         return(&FCEUGameInfo);
 }
 
@@ -1324,17 +1101,16 @@ void FCEU_ResetVidSys(void)
  if(w)
  {
   PAL=1;
-  scanlines_per_frame=312;
   FSettings.FirstSLine=FSettings.UsrFirstSLine[1];
   FSettings.LastSLine=FSettings.UsrLastSLine[1];
  }
  else
  {
   PAL=0;
-  scanlines_per_frame=262;
   FSettings.FirstSLine=FSettings.UsrFirstSLine[0];
   FSettings.LastSLine=FSettings.UsrLastSLine[0];
  }
+ printf("PAL = %i\n", PAL);
  SetSoundVariables();
 }
 
@@ -1346,7 +1122,7 @@ int FCEUI_Initialize(void)
        FSettings.UsrFirstSLine[0]=8;
        FSettings.UsrFirstSLine[1]=0;
         FSettings.UsrLastSLine[0]=FSettings.UsrLastSLine[1]=239;
-       FSettings.SoundVolume=65536;    // 100%
+       FSettings.SoundVolume=65535;    // 100%
         return 1;
 }
 
@@ -1354,9 +1130,10 @@ int FCEUI_Initialize(void)
 static INLINE void Thingo(void)
 {
    Loop6502();
+#ifndef NEW_TRY
 
    if(tosprite>=256)
-   { 
+   {
     X6502_Run(256-harko);
     Fixit1();
     X6502_Run(harko);
@@ -1382,6 +1159,9 @@ static INLINE void Thingo(void)
     tosprite=256;
    }
    DoHBlank();
+#else
+   X6502_Run_scanline();
+#endif
 }
 #undef harko
 
@@ -1389,6 +1169,10 @@ void EmLoop(void)
 {
  for(;;)
  {
+  uint32 scanlines_per_frame = PAL ? 312 : 262;
+       //extern int asdc;
+       //printf("asdc: %i\n", asdc);
+       //asdc=0;
   ApplyPeriodicCheats();
   X6502_Run(256+85);
 
@@ -1399,14 +1183,22 @@ void EmLoop(void)
                                  breaks a Super Donkey Kong game. */
 
   X6502_Run(12);               /* I need to figure out the true nature and length
-                                  of this delay. 
+                                  of this delay.
                                */
   if(FCEUGameInfo.type==GIT_NSF)
    TriggerNMINSF();
   else if(VBlankON)
    TriggerNMI();
 
-  X6502_Run((scanlines_per_frame-242)*(256+85)-12); 
+  // Note: this is needed for asm core
+  // Warning: using 'scanline' var here breaks Castlevania III
+  {
+   int lines;
+   X6502_Run(256+85-12);
+   for (lines=scanlines_per_frame-242-1;lines;lines--)
+     X6502_Run(256+85);
+  }
+  // X6502_Run((scanlines_per_frame-242)*(256+85)-12);
 
   PPU_status&=0x1f;
 
@@ -1427,12 +1219,15 @@ void EmLoop(void)
    if(PPU_hook) PPU_hook(RefreshAddr&0x3fff);
   }
   if(FCEUGameInfo.type==GIT_NSF)
+  {
    X6502_Run((256+85)*240);
+  }
   else
   {
    int x,max,maxref;
 
    deemp=PPU[1]>>5;
+   SetRefreshLine();
    for(scanline=0;scanline<240;scanline++)
    {
     deempcnt[deemp]++;
@@ -1462,13 +1257,13 @@ void EmLoop(void)
    {
     FCEU_PutImageDummy();
     FSkip--;
-    FCEUD_Update(0,WaveFinal,ssize);
+    FCEUD_Update(0,WaveFinalMono,ssize);
    }
    else
    #endif
    {
     FCEU_PutImage();
-    FCEUD_Update(XBuf+8,WaveFinal,ssize);
+    FCEUD_Update(XBuf+8,WaveFinalMono,ssize);
    }
    UpdateInput();
   }
@@ -1545,7 +1340,7 @@ void ResetNES(void)
         X6502_Reset();
 }
 
-void PowerNES(void) 
+void PowerNES(void)
 {
         if(!GameLoaded) return;