wannabe optimizations
authornotaz <notasas@gmail.com>
Sun, 8 Apr 2007 23:35:37 +0000 (23:35 +0000)
committernotaz <notasas@gmail.com>
Sun, 8 Apr 2007 23:35:37 +0000 (23:35 +0000)
git-svn-id: file:///home/notaz/opt/svn/fceu@100 be3aeb3a-fb24-0410-a615-afba39da0efa

Makefile.gp2x
bench.txt [new file with mode: 0644]
fce.c
ppu.c
sound.c

index b376ce6..4b1553a 100644 (file)
@@ -1,6 +1,6 @@
 CC     = arm-linux-gcc
 STRIP  = arm-linux-strip
-TFLAGS  = -mcpu=arm920t -Izlib -DGP2X=1 -DLSB_FIRST -DSDL -DUNIX -DPSS_STYLE=1 -DZLIB -DFRAMESKIP -D_REENTRANT
+TFLAGS  = -mcpu=arm920t -Izlib -DGP2X=1 -DLSB_FIRST -DUNIX -DPSS_STYLE=1 -DZLIB -DFRAMESKIP -D_REENTRANT
 RM     = rm -f
 B      = drivers/gp2x/
 ifdef DEBUG
@@ -17,7 +17,7 @@ gpfce: fceu
 include zlib/Makefile
 
 OBJDRIVER      = ${B}minimal.o ${B}gp2x.o ${B}main.o ${B}throttle.o ${B}unix-netplay.o ${B}gp2x-sound.o ${B}gp2x-video.o ${B}lnx-joystick.o drivers/common/cheat.o drivers/common/config.o drivers/common/args.o drivers/common/vidblit.o ${UNZIPOBJS} ppu.o
-LDRIVER                = -L /mnt/sd/lib  -L/mnt/sd/gp2x/usr/lib  -lm  -lpthread -lz -static
+LDRIVER                = -L /mnt/sd/lib  -L/mnt/sd/gp2x/usr/lib -lm -lz -static
 
 include Makefile.base
 
diff --git a/bench.txt b/bench.txt
new file mode 100644 (file)
index 0000000..a766c8c
--- /dev/null
+++ b/bench.txt
@@ -0,0 +1,7 @@
+@200
+
+fsa / fs0
+
+Kage 25/48
+smb 30/55
+
diff --git a/fce.c b/fce.c
index 687b686..42a4d76 100644 (file)
--- a/fce.c
+++ b/fce.c
@@ -205,13 +205,14 @@ uint32 TempAddr,RefreshAddr;
 int scanline;
 static uint32 scanlines_per_frame;
 
+uint8 GameMemBlock[131072] __attribute__ ((aligned (4)));
+uint8 NTARAM[0x800] __attribute__ ((aligned (4)));
+uint8 PALRAM[0x20] __attribute__ ((aligned (4)));
+uint8 RAM[0x800] __attribute__ ((aligned (4)));
+
 uint8 PPU[4];
 uint8 PPUSPL;
 
-uint8 GameMemBlock[131072];
-uint8 NTARAM[0x800],PALRAM[0x20];
-uint8 RAM[0x800];
-
 uint8 PAL=0;
 
 
@@ -419,8 +420,9 @@ void BGRender(uint8 *target)
 
         if(!(PPU[1]&2))
         {
-         tem=Pal[0]|(Pal[0]<<8)|(Pal[0]<<16)|(Pal[0]<<24);
-         tem|=0x40404040;
+         tem=Pal[0]|0x40;
+        tem|=tem<<8;
+        tem|=tem<<16;
          *(uint32 *)target=*(uint32 *)(target+4)=tem;
         }
 }
@@ -713,45 +715,36 @@ static void RefreshLine_PPU_hook(uint8 *P, uint32 vofs)
          }
 }
 
-static void RefreshLine_normal(uint8 *P, uint32 vofs)
+static void RefreshLine_normal(uint8 *P, uint32 vofs) // vofs is 0x107 max
 {
          int8 X1;
+        uint32 rfraddr = RefreshAddr;
+        uint8 *page = vnapage[(rfraddr>>10)&3];
+         uint32 cc2=0;
 
-         for(X1=33;X1;X1--,P+=8)
+        if ((rfraddr&0xc)!=0)
+         cc2=*(uint32 *) (page + ((rfraddr&0x380)>>4) + ((rfraddr&0x10)>>2) + 0x3c0);
+
+         for (X1=33;X1;X1--,P+=8)
          {
-                uint8 *C;
-                uint8 cc,zz,zz2;
+                uint8 cc,*C;
                 uint32 vadr;
 
-                zz=RefreshAddr&0x1F;
-               zz2=(RefreshAddr>>10)&3;
-                vadr=(vnapage[zz2][RefreshAddr&0x3ff]<<4)+vofs;
+                vadr=(page[rfraddr&0x3ff]<<4)+vofs;
                 C = VRAMADR(vadr);
-               cc=vnapage[zz2][0x3c0+(zz>>2)+((RefreshAddr&0x380)>>4)];
-               cc=((cc >> ((zz&2) + ((RefreshAddr&0x40)>>4))) &3) <<2;
-        {
-        uint8 *S=PALRAM+cc;
-        uint8 c1,c2;
-
-        c1=((C[0]>>1)&0x55)|(C[8]&0xAA);
-         c2=(C[0]&0x55)|((C[8]<<1)&0xAA);
+               if ((rfraddr&0xc)==0)
+                cc2=*(uint32 *) (page + ((rfraddr&0x380)>>4) + ((rfraddr&0x10)>>2) + 0x3c0);
+               cc=((cc2 >> ((rfraddr&2) + ((rfraddr&0x40)>>4) + ((rfraddr&0xc)<<1))) & 3) << 2;
 
-         P[6]=S[c1&3];
-         P[7]=S[c2&3];
-         P[4]=S[(c1>>2)&3];
-         P[5]=S[(c2>>2)&3];
-         P[2]=S[(c1>>4)&3];
-         P[3]=S[(c2>>4)&3];
-
-         P[0]=S[c1>>6];
-         P[1]=S[c2>>6];
-        }
+               #include "fceline.h"
 
-                if((RefreshAddr&0x1f)==0x1f)
-                 RefreshAddr^=0x41F;
-                else
-                 RefreshAddr++;
+                if((rfraddr&0x1f)==0x1f) {
+                 rfraddr^=0x41F;
+                page = vnapage[(rfraddr>>10)&3];
+                } else
+                 rfraddr++;
          }
+        RefreshAddr = rfraddr;
 }
 
 static void SetRefreshLine(void)
@@ -856,20 +849,6 @@ static void DoHBlank(void)
 
 
 
-
-
-
-
-
-
-
-
-
-
-
-
-
-
 // ============================//
 // end of new code
 // ===========================//
diff --git a/ppu.c b/ppu.c
index 0f2a47f..d2ea449 100644 (file)
--- a/ppu.c
+++ b/ppu.c
@@ -36,7 +36,7 @@
 uint8 SPRAM[0x100];
 static uint8 SPRBUF[0x100];
 
-static uint8 sprlinebuf[256+8];        
+static uint8 sprlinebuf[256+8];
 extern void BGRender(uint8 *target);
 extern int tosprite;
 
@@ -56,10 +56,10 @@ typedef struct {
 } SPR __attribute__((aligned(1)));
 
 typedef struct {
-  //   uint8 ca[2],atr,x; 
-       uint8 ca[2],atr,x; 
+  //   uint8 ca[2],atr,x;
+       uint8 ca[2],atr,x;
   //  union {  int z; }
-  
+
 
 } SPRB __attribute__((aligned(1)));
 
@@ -120,7 +120,7 @@ void FetchSpriteData(void)
                  if(MMC5Hack && geniestage!=1) C = MMC5SPRVRAMADR(vadr);
                   else C = VRAMADR(vadr);
 
-                 
+
                  dst.ca[0]=C[0];
                  dst.ca[1]=C[8];
                  dst.x=spr->x;
@@ -202,9 +202,8 @@ extern int FSkip;
 
 void RefreshSprite(uint8 *target)
 {
-       int n;
+       int n, minx=256;
         SPRB *spr;
-        uint8 *P=target;
 
         if(!nosprites) return;
        #ifdef FRAMESKIP
@@ -220,29 +219,30 @@ void RefreshSprite(uint8 *target)
        }
        #endif
 
-        FCEU_dwmemset(sprlinebuf,0x80808080,256);
         nosprites--;
         spr = (SPRB*)SPRBUF+nosprites;
 
        for(n=nosprites;n>=0;n--,spr--)
        {
-        register uint8 J,atr,c1,c2;
-       int x=spr->x;
-        uint8 *C;
-        uint8 *VB;
-                
-        P+=x;
-
-        c1=((spr->ca[0]>>1)&0x55)|(spr->ca[1]&0xAA);
-       c2=(spr->ca[0]&0x55)|((spr->ca[1]<<1)&0xAA);
+        register uint32 J;
 
         J=spr->ca[0]|spr->ca[1];
-       atr=spr->atr;
 
-                       if(J)
-                       {        
+        if (J)
+        {
+          register uint8 atr,c1,c2;
+          uint8 *C;
+          uint8 *VB;
+         int x=spr->x;
+         atr=spr->atr;
+
+          if (x < minx)
+         {
+           if (minx == 256) FCEU_dwmemset(sprlinebuf,0x80808080,256); // only clear sprite buff when we encounter first sprite
+           minx = x;
+         }
                         if(n==0 && SpriteBlurp && !(PPU_status&0x40))
-                        {  
+                        {
                         int z,ze=x+8;
                         if(ze>256) {ze=256;}
                         if(ScreenON && (scanline<FSettings.FirstSLine || scanline>FSettings.LastSLine
@@ -277,113 +277,77 @@ void RefreshSprite(uint8 *target)
                         //FCEU_DispMessage("%d, %d:%d",scanline,x,tosprite);
                         }
 
+         c1=((spr->ca[0]>>1)&0x55)|(spr->ca[1]&0xAA);
+        c2=(spr->ca[0]&0x55)|((spr->ca[1]<<1)&0xAA);
+
         C = sprlinebuf+x;
          VB = (PALRAM+0x10)+((atr&3)<<2);
 
-         if(atr&SP_BACK) 
          {
+         J &= 0xff;
+         if(atr&SP_BACK) J |= 0x4000;
           if (atr&H_FLIP)
           {
-           if (J&0x02)  C[1]=VB[c1&3]|0x40;
-           if (J&0x01)  *C=VB[c2&3]|0x40;
+           if (J&0x02)  C[1]=VB[c1&3]|(J>>8);
+           if (J&0x01)  *C=VB[c2&3]|(J>>8);
            c1>>=2;c2>>=2;
-           if (J&0x08)  C[3]=VB[c1&3]|0x40;;
-           if (J&0x04)  C[2]=VB[c2&3]|0x40;;
+           if (J&0x08)  C[3]=VB[c1&3]|(J>>8);
+           if (J&0x04)  C[2]=VB[c2&3]|(J>>8);
            c1>>=2;c2>>=2;
-           if (J&0x20)  C[5]=VB[c1&3]|0x40;;
-           if (J&0x10)  C[4]=VB[c2&3]|0x40;;
+           if (J&0x20)  C[5]=VB[c1&3]|(J>>8);
+           if (J&0x10)  C[4]=VB[c2&3]|(J>>8);
            c1>>=2;c2>>=2;
-           if (J&0x80)  C[7]=VB[c1]|0x40;;
-           if (J&0x40)  C[6]=VB[c2]|0x40;;
+           if (J&0x80)  C[7]=VB[c1]|(J>>8);
+           if (J&0x40)  C[6]=VB[c2]|(J>>8);
          } else  {
-           if (J&0x02)  C[6]=VB[c1&3]|0x40;
-           if (J&0x01)  C[7]=VB[c2&3]|0x40;
+           if (J&0x02)  C[6]=VB[c1&3]|(J>>8);
+           if (J&0x01)  C[7]=VB[c2&3]|(J>>8);
           c1>>=2;c2>>=2;
-           if (J&0x08)  C[4]=VB[c1&3]|0x40;
-           if (J&0x04)  C[5]=VB[c2&3]|0x40;
+           if (J&0x08)  C[4]=VB[c1&3]|(J>>8);
+           if (J&0x04)  C[5]=VB[c2&3]|(J>>8);
            c1>>=2;c2>>=2;
-           if (J&0x20)  C[2]=VB[c1&3]|0x40;
-           if (J&0x10)  C[3]=VB[c2&3]|0x40;
+           if (J&0x20)  C[2]=VB[c1&3]|(J>>8);
+           if (J&0x10)  C[3]=VB[c2&3]|(J>>8);
            c1>>=2;c2>>=2;
-           if (J&0x80)  *C=VB[c1]|0x40;
-           if (J&0x40)  C[1]=VB[c2]|0x40;
+           if (J&0x80)  *C=VB[c1]|(J>>8);
+           if (J&0x40)  C[1]=VB[c2]|(J>>8);
          }
-         } else {
-          if (atr&H_FLIP)
-         {
-           if (J&0x02)  C[1]=VB[(c1&3)];
-           if (J&0x01)  *C=VB[(c2&3)];
-           c1>>=2;c2>>=2;
-           if (J&0x08)  C[3]=VB[(c1&3)];
-           if (J&0x04)  C[2]=VB[(c2&3)];
-           c1>>=2;c2>>=2;
-           if (J&0x20)  C[5]=VB[(c1&3)];
-           if (J&0x10)  C[4]=VB[(c2&3)];
-           c1>>=2;c2>>=2;
-           if (J&0x80)  C[7]=VB[c1];
-           if (J&0x40)  C[6]=VB[c2];
-          }else{                 
-           if (J&0x02)  C[6]=VB[(c1&3)];
-           if (J&0x01)  C[7]=VB[(c2&3)];
-           c1>>=2;c2>>=2;
-           if (J&0x08)  C[4]=VB[(c1&3)];
-           if (J&0x04)  C[5]=VB[(c2&3)];
-           c1>>=2;c2>>=2;
-           if (J&0x20)  C[2]=VB[(c1&3)];
-           if (J&0x10)  C[3]=VB[(c2&3)];
-           c1>>=2;c2>>=2;
-           if (J&0x80)  *C=VB[c1];
-           if (J&0x40)  C[1]=VB[c2];
-          }
          }
-        }
-       P-=x;
       }
+     }
 
      nosprites=0;
      #ifdef FRAMESKIP
      if(FSkip) return;
      #endif
+     if (minx == 256) return; // no visible sprites
 
      {
       uint8 n=((PPU[1]&4)^4)<<1;
+      if ((int)n < minx) n = minx & 0xfc;
       loopskie:
       {
        uint32 t=*(uint32 *)(sprlinebuf+n);
        if(t!=0x80808080)
        {
        #ifdef LSB_FIRST
-        if(!(t&0x80))
-        {
-         if(!(t&0x40))       // Normal sprite
-          P[n]=sprlinebuf[n];
-         else if(P[n]&64)        // behind bg sprite
-          P[n]=sprlinebuf[n];
+        uint32 tb=*(uint32 *)(target+n);
+        if(!(t&0x00000080) && (!(t&0x00000040) || (tb&0x00000040))) { // have sprite pixel AND (normal sprite OR behind bg with no bg)
+          tb &= ~0x000000ff; tb |= t & 0x000000ff;
         }
 
-        if(!(t&0x8000))
-        {
-         if(!(t&0x4000))       // Normal sprite
-          P[n+1]=(sprlinebuf+1)[n];
-         else if(P[n+1]&64)        // behind bg sprite
-          P[n+1]=(sprlinebuf+1)[n];
+        if(!(t&0x00008000) && (!(t&0x00004000) || (tb&0x00004000))) {
+          tb &= ~0x0000ff00; tb |= t & 0x0000ff00;
         }
 
-        if(!(t&0x800000))
-        {
-         if(!(t&0x400000))       // Normal sprite
-          P[n+2]=(sprlinebuf+2)[n];
-         else if(P[n+2]&64)        // behind bg sprite
-          P[n+2]=(sprlinebuf+2)[n];
+        if(!(t&0x00800000) && (!(t&0x00400000) || (tb&0x00400000))) {
+          tb &= ~0x00ff0000; tb |= t & 0x00ff0000;
         }
 
-        if(!(t&0x80000000))
-        {
-         if(!(t&0x40000000))       // Normal sprite
-          P[n+3]=(sprlinebuf+3)[n];
-         else if(P[n+3]&64)        // behind bg sprite
-          P[n+3]=(sprlinebuf+3)[n];
+        if(!(t&0x80000000) && (!(t&0x40000000) || (tb&0x40000000))) {
+          tb &= ~0xff000000; tb |= t & 0xff000000;
         }
+       *(uint32 *)(target+n)=tb;
        #else
         if(!(t&0x80000000))
         {
@@ -438,7 +402,7 @@ void FetchSpriteData(void)
        int vofs;
         uint8 P0=PPU[0];
 
-        
+
         spr=(SPR *)SPRAM;
         H=8;
 
@@ -482,11 +446,11 @@ void FetchSpriteData(void)
                         vadr+=t&8;
                   }
 
-                  // Fix this geniestage hack 
+                  // Fix this geniestage hack
                   if(MMC5Hack && geniestage!=1) C = MMC5SPRVRAMADR(vadr);
                   else C = VRAMADR(vadr);
 
-                  
+
                   dst.ca[0]=C[0];
                   dst.ca[1]=C[8];
                   dst.x=spr->x;
@@ -565,7 +529,7 @@ void FetchSpriteData(void)
         //if(ns>=7)
         //printf("%d %d\n",scanline,ns);
         if(ns>8) PPU_status|=0x20;     // Handle case when >8 sprites per
-//                                scanline option is enabled. 
+//                                scanline option is enabled.
        else if(PPU_hook)
        {
         for(n=0;n<(8-ns);n++)
@@ -582,7 +546,7 @@ void FetchSpriteData(void)
 
 void RefreshSprite(uint8 *target)
 {
-      
+
        int n,sprindex;
        SPRB *spr;
         uint8 *P=target;
@@ -603,7 +567,7 @@ void RefreshSprite(uint8 *target)
        int x=spr[sprindex].x;
         uint8 *C;
         uint8 *VB;
-                
+
         P+=x;
 
         c1=((spr[sprindex].ca[0]>>1)&0x55)|(spr[sprindex].ca[1]&0xAA);
@@ -613,9 +577,9 @@ void RefreshSprite(uint8 *target)
        atr=spr[sprindex].atr;
 
                        if(J)
-                       {        
+                       {
                         if(n==0 && SpriteBlurp && !(PPU_status&0x40))
-                        {  
+                        {
                         int z,ze=x+8;
                         if(ze>256) {ze=256;}
                         if(ScreenON && (scanline<FSettings.FirstSLine || scanline>FSettings.LastSLine
@@ -655,7 +619,7 @@ void RefreshSprite(uint8 *target)
         C = &(sprlinebuf[(uint8)x]);
          VB = (PALRAM+0x10)+((atr&3)<<2);
 
-         if(atr&SP_BACK) 
+         if(atr&SP_BACK)
          {
           if (atr&H_FLIP)
           {
@@ -697,7 +661,7 @@ void RefreshSprite(uint8 *target)
            c1>>=2;c2>>=2;
            if (J&0x80)  C[7]=VB[c1];
            if (J&0x40)  C[6]=VB[c2];
-          }else{                 
+          }else{
            if (J&0x02)  C[6]=VB[(c1&3)];
            if (J&0x01)  C[7]=VB[(c2&3)];
            c1>>=2;c2>>=2;
diff --git a/sound.c b/sound.c
index 61c8fcd..8cdde0e 100644 (file)
--- a/sound.c
+++ b/sound.c
@@ -25,7 +25,7 @@
 /*******  routines.  A few ideas were inspired         */
 /*******  by code from Marat Fayzullin's EMUlib                */
 /*******                                               */
-/********************************************************/             
+/********************************************************/
 
 #include <stdlib.h>
 #include <stdio.h>
@@ -61,8 +61,8 @@ uint8 sqnon=0;
 
 #undef printf
 uint16 nreg;
-int32 lengthcount[4]; 
+
+int32 lengthcount[4];
 
 extern int soundvol;
 
@@ -108,7 +108,7 @@ int32 PCMIRQCount;
 uint8 PCMBitIndex=0;
 uint32 PCMAddressIndex=0;
 int32 PCMSizeIndex=0;
-uint8 PCMBuffer=0; 
+uint8 PCMBuffer=0;
 int vdis=0;
 
 static void Dummyfunc(void) {};
@@ -131,9 +131,9 @@ static void CalcDPCMIRQ(void)
   freq=(NTSCPCMTable[PSG[0x10]&0xF]<<4);
 
  cycles=(((PSG[0x13]<<4)+1));
- cycles*=freq/14; 
+ cycles*=freq/14;
  honk=((PSG[0x13]<<4)+1)*freq;
- honk-=cycles; 
+ honk-=cycles;
  //if(PAL) honk/=107;
  //else honk/=(double)113.66666666;
  PCMIRQCount=honk*48;
@@ -144,9 +144,9 @@ static void CalcDPCMIRQ(void)
 
 static void PrepDPCM()
 {
- PCMAddressIndex=0x4000+(PSG[0x12]<<6); 
+ PCMAddressIndex=0x4000+(PSG[0x12]<<6);
  PCMSizeIndex=(PSG[0x13]<<4)+1;
- PCMBitIndex=0;  
+ PCMBitIndex=0;
  //PCMBuffer=ARead[0x8000+PCMAddressIndex](0x8000+PCMAddressIndex);
  if(PAL)
   PCMfreq=PALPCMTable[PSG[0x10]&0xF];
@@ -211,12 +211,12 @@ static DECLFW(Write_PSG)
            curfreq[0]&=0xFF00;
            curfreq[0]|=V;
            break;
-  case 0x3:          
+  case 0x3:
            if(PSG[0x15]&1)
            {
             DoSQ1();
             lengthcount[0]=lengthtable[(V>>3)&0x1f];
-            sqnon|=1;                
+            sqnon|=1;
           }
            sweepon[0]=PSG[1]&0x80;
            curfreq[0]=PSG[0x2]|((V&7)<<8);
@@ -227,7 +227,7 @@ static DECLFW(Write_PSG)
            sqacc[0]=((int64)curfreq[0]+1)<<50;
            break;
 
-  case 0x4:           
+  case 0x4:
           DoSQ2();
            if(V&0x10)
             realvolume[1]=V&0xF;
@@ -240,7 +240,7 @@ static DECLFW(Write_PSG)
           curfreq[1]&=0xFF00;
           curfreq[1]|=V;
           break;
-  case 0x7:          
+  case 0x7:
           if(PSG[0x15]&2)
           {
           DoSQ2();
@@ -248,14 +248,14 @@ static DECLFW(Write_PSG)
            sqnon|=2;
          }
           sweepon[1]=PSG[0x5]&0x80;
-          curfreq[1]=PSG[0x6]|((V&7)<<8);          
+          curfreq[1]=PSG[0x6]|((V&7)<<8);
           decvolume[1]=0xF;
          DecCountTo1[1]=(PSG[0x4]&0xF)+1;
           SweepCount[1]=((PSG[0x5]>>4)&7)+1;
           DutyCount[1]=0;
           sqacc[1]=((int64)curfreq[1]+1)<<50;
           break;
-  case 0x8:                                 
+  case 0x8:
           DoTriangle();
          if(laster&0x80)
          {
@@ -292,13 +292,13 @@ static DECLFW(Write_PSG)
            lengthcount[3]=lengthtable[(V>>3)&0x1f];
           }
            decvolume[2]=0xF;
-          DecCountTo1[2]=(PSG[0xC]&0xF)+1;          
+          DecCountTo1[2]=(PSG[0xC]&0xF)+1;
            break;
  case 0x10:DoPCM();
           if(!(V&0x80))
            X6502_IRQEnd(FCEU_IQDPCM);
           break;
- case 0x15: 
+ case 0x15:
           {
            int t=V^PSG[0x15];
 
@@ -328,14 +328,14 @@ static DECLFW(Write_PSG)
             X6502_IRQEnd(FCEU_IQDPCM);
           }
            break;
- case 0x17: 
+ case 0x17:
           V&=0xC0;
-           fcnt=0;      
+           fcnt=0;
            if(V&0x80)
             FrameSoundUpdate();
            fhcnt=fhinc;
            X6502_IRQEnd(FCEU_IQFCOUNT);
-          SIRQStat&=~0x40;        
+          SIRQStat&=~0x40;
            break;
  }
  PSG[A]=V;
@@ -384,7 +384,7 @@ static void FASTAPASS(1) FrameSoundStuff(int V)
               DoTriangle();
               sqnon&=~4;
              }
-           }        
+           }
          }
 
         for(P=0;P<2;P++)
@@ -395,7 +395,7 @@ static void FASTAPASS(1) FrameSoundStuff(int V)
           {
            if(lengthcount[P]>0)
            {
-            lengthcount[P]--;            
+            lengthcount[P]--;
             if(lengthcount[P]<=0)
              {
               sqnon&=~(P+1);
@@ -410,14 +410,14 @@ static void FASTAPASS(1) FrameSoundStuff(int V)
           {
            int32 mod=0;
 
-          if(SweepCount[P]>0) SweepCount[P]--; 
+          if(SweepCount[P]>0) SweepCount[P]--;
           if(SweepCount[P]<=0)
           {
            SweepCount[P]=((PSG[(P<<2)+0x1]>>4)&7)+1; //+1;
             {
              if(PSG[(P<<2)+0x1]&0x8)
              {
-              mod-=(P^1)+((curfreq[P])>>(PSG[(P<<2)+0x1]&7));          
+              mod-=(P^1)+((curfreq[P])>>(PSG[(P<<2)+0x1]&7));
 
               if(curfreq[P] && (PSG[(P<<2)+0x1]&7)/* && sweepon[P]&0x80*/)
               {
@@ -442,7 +442,7 @@ static void FASTAPASS(1) FrameSoundStuff(int V)
              }
             }
           }
-          } 
+          }
          }
 
        if(PSG[0x15]&0x8 && sqnon&8)
@@ -462,7 +462,7 @@ static void FASTAPASS(1) FrameSoundStuff(int V)
 
   case 0:       /* Envelope decay + linear counter */
          if(!trimode)
-         {           
+         {
            laster=0;
            if(tricoop)
            {
@@ -569,7 +569,7 @@ static void RDoPCM(void)
    uint32 out=PSG[0x11]<<3;
 
    start=ChannelBC[4];
-   end=(timestamp<<16)/soundtsinc;   
+   end=(timestamp<<16)/soundtsinc;
    if(end<=start) return;
    ChannelBC[4]=end;
 
@@ -737,7 +737,7 @@ static void RDoTriangle(void)
    int64 freq=(((PSG[0xa]|((PSG[0xb]&7)<<8))+1));
 
    start=ChannelBC[2];
-   end=(timestamp<<16)/soundtsinc;   
+   end=(timestamp<<16)/soundtsinc;
    if(end<=start) return;
    ChannelBC[2]=end;
 
@@ -760,8 +760,8 @@ static void RDoTriangle(void)
     }
     else
     {
-     static int64 triacc=0; 
-     static uint8 tc=0; 
+     static int64 triacc=0;
+     static uint8 tc=0;
 
       freq<<=49;
       for(V=start;V<end;V++)
@@ -789,7 +789,7 @@ static void RDoNoise(void)
    int32 start,end;
 
    start=ChannelBC[3];
-   end=(timestamp<<16)/soundtsinc;   
+   end=(timestamp<<16)/soundtsinc;
    if(end<=start) return;
    ChannelBC[3]=end;
 
@@ -798,27 +798,27 @@ static void RDoNoise(void)
       uint32 outo;
       uint32 amptab[2];
       uint8 amplitude;
-       
+
       amplitude=realvolume[2];
       //if(PSG[0xC]&0x10)
       // amplitude=(PSG[0xC]&0xF);
-      //else                  
+      //else
       // amplitude=decvolume[2]&0xF;
 
-      inc=NoiseFreqTable[PSG[0xE]&0xF]; 
+      inc=NoiseFreqTable[PSG[0xE]&0xF];
       amptab[0]=((amplitude<<2)+amplitude+amplitude)<<1;
       amptab[1]=0;
       outo=amptab[nreg&1];
 
-      if(amplitude) 
+      if(amplitude)
       {
-       if(PSG[0xE]&0x80)       // "short" noise        
+       if(PSG[0xE]&0x80)       // "short" noise
         for(V=start;V<end;V++)
-        {     
+        {
          Wave[V>>4]+=outo;
          if(count[3]>=inc)
-         {                          
-          uint8 feedback;      
+         {
+          uint8 feedback;
 
           feedback=((nreg>>8)&1)^((nreg>>14)&1);
           nreg=(nreg<<1)+feedback;
@@ -850,19 +850,19 @@ static void RDoNoise(void)
 }
 
 void SetNESSoundMap(void)
-{ 
+{
   SetWriteHandler(0x4000,0x4013,Write_PSG);
   SetWriteHandler(0x4011,0x4011,Write0x11);
   SetWriteHandler(0x4015,0x4015,Write_PSG);
-  SetWriteHandler(0x4017,0x4017,Write_PSG);        
+  SetWriteHandler(0x4017,0x4017,Write_PSG);
   SetReadHandler(0x4015,0x4015,Read_PSG);
 }
 
 static int32 WaveNSF[256];
 
-int64 highp;                   // 0 through 65536, 0 = no high pass, 65536 = max high pass
+int32 highp;                   // 0 through 65536, 0 = no high pass, 65536 = max high pass
 
-int64 lowp;                    // 0 through 65536, 65536 = max low pass(total attenuation)
+int32 lowp;                    // 0 through 65536, 65536 = max low pass(total attenuation)
                                // 65536 = no low pass
 static void FilterSound(uint32 *in, int32 *out, int16 *outMono, int count)
 {
@@ -871,7 +871,7 @@ static void FilterSound(uint32 *in, int32 *out, int16 *outMono, int count)
  //int16* tmp;
  //int16* outorig=out;
  //int32 prev=-99999;
- for(;count;count--,in++,out++)//,index++)
+ for(;count;count--,in++)//,out++)//,index++)
  {
   int64 diff;
 
@@ -880,7 +880,7 @@ static void FilterSound(uint32 *in, int32 *out, int16 *outMono, int count)
   acc+=(diff*highp)>>16;
   acc2+=((diff-acc2)*lowp)>>16;
   *in=0;
-  
+
   // don't change the sound here
 //  *out=(acc2*(int64)FSettings.SoundVolume)>>(24+16);
   // volume, 4 times louder by default??
@@ -891,23 +891,23 @@ static void FilterSound(uint32 *in, int32 *out, int16 *outMono, int count)
   if(*out<-32767) *out=-32767;
   if(*out>32767) *out=32767;
   // go one back
-  
+
   // do MONO
-  tmp=(int16 *)(out-1); 
+  tmp=(int16 *)(out-1);
   // don't do this the first time
   if (prev == -99999) continue;
   // the middle one should be interpolated
-  tmp[1]=(int16)((*out + prev) >> 1); 
-  prev = *out; 
+  tmp[1]=(int16)((*out + prev) >> 1);
+  prev = *out;
   */
   //outMono[index] = (int16)*out;
   *outMono = (int16)(acc2 >> 24);
   //if(*outMono<-16384) *outMono=-16384;
   //if(*outMono>16384) *outMono=16384;
   outMono++;
-   
+
   // out=((int64)(acc2>>24)*(int64)FSettings.SoundVolume)>>16; //acc2>>24;
-   
+
  }
  // do one more
 }
@@ -955,13 +955,13 @@ int FlushEmulateSound(void)
 
   if(end&0xF)
    Wave[0]=Wave[(end>>4)];
-  Wave[(end>>4)]=0;  
+  Wave[(end>>4)]=0;
 
   nosoundo:
   for(x=0;x<5;x++)
    ChannelBC[x]=end&0xF;
   timestampbase+=timestamp;
-  timestamp=(soundtsinc*(end&0xF))>>16;  
+  timestamp=(soundtsinc*(end&0xF))>>16;
   timestampbase-=timestamp;
   return(end>>4);
 }
@@ -999,7 +999,7 @@ void ResetSound(void)
 
 void SetSoundVariables(void)
 {
-  int x;  
+  int x;
 
   fhinc=PAL?16626:14915;       // *2 CPU clock rate
   fhinc*=24;
@@ -1013,7 +1013,7 @@ void SetSoundVariables(void)
    DoPCM=RDoPCM;
    DoSQ1=RDoSQ1;
    DoSQ2=RDoSQ2;
-  }  
+  }
   else
   {
    DoNoise=DoTriangle=DoPCM=DoSQ1=DoSQ2=Dummyfunc;