endif
 
 asm_6502=1
-#debug_asm_6502=1
+debug_asm_6502=1
 
 all:           fceu
 
 endif
 
 x6502.o: x6502.c x6502.h ops.h fce.h sound.h
+
 ncpu.o: ncpu.S ncpu.h
        $(CC) $(TFLAGS) -c $< -o $@
 
 
 #define Pal     (PALRAM)
 
 
-#ifdef DEBUG_ASM_6502
-extern int cpu_repeat;
-extern int cpu_lastval;
-#endif
-
 static void (*RefreshLine)(uint8 *P, uint32 vofs) = NULL;
 static void PRefreshLine(void);
 
 static DECLFR(A2002)
 {
                         uint8 ret;
-#ifdef DEBUG_ASM_6502
-       if (cpu_repeat) return cpu_lastval;
-#endif
                         ret = PPU_status;
                         vtoggle=0;
                         PPU_status&=0x7F;
-#ifdef DEBUG_ASM_6502
-//     cpu_lastval=ret|(PPUGenLatch&0x1F);
-#endif
                         return ret|(PPUGenLatch&0x1F);
 }
 
 {
                         uint8 ret;
                        uint32 tmp=RefreshAddr&0x3FFF;
-#ifdef DEBUG_ASM_6502
-       if (cpu_repeat) return cpu_lastval;
-#endif
 
                         PPUGenLatch=ret=VRAMBuffer;
                        if(PPU_hook) PPU_hook(tmp);
                         if (INC32) RefreshAddr+=32;
                         else RefreshAddr++;
                        if(PPU_hook) PPU_hook(RefreshAddr&0x3fff);
-#ifdef DEBUG_ASM_6502
-//     cpu_lastval=ret;
-#endif
                         return ret;
 }
 
 
 #include "input.h"
 #include "movie.h"
 
-#ifdef DEBUG_ASM_6502
-extern int cpu_repeat;
-extern int cpu_lastval;
-#endif
-
 extern INPUTC *FCEU_InitZapper(int w);
 extern INPUTC *FCEU_InitPowerpad(int w);
 extern INPUTC *FCEU_InitArkanoid(int w);
 {
        uint8 ret=0;
 
-#ifdef DEBUG_ASM_6502
-       if (cpu_repeat) return cpu_lastval;
-#endif
        if(JPorts[A&1]->Read)
         ret|=JPorts[A&1]->Read(A&1);
 
          ret=FCExp->Read(A&1,ret);
 
        ret|=X.DB&0xC0;
-#ifdef DEBUG_ASM_6502
-//     cpu_lastval=ret;
-#endif
        return(ret);
 }
 
 
 @@@
 
 read_rom_byte:
+#ifndef DEBUG_ASM_6502
        ldr     r0, =CartBR
        ldr     r2, =ARead
        mov     r1, #0xff00
        ldr     r2, [r2, r1, lsl #2]
        ldrb    r0, [r2, REG_ADDR]
        bx      lr
+#endif
 
 
 read_ppu_reg:
        mov     lr, pc
        ldr     pc, [r2, r0, lsl #2]
 #else
-       ldr     r1, =cpu_lastval
-       ldr     r0, [r1]
-       tst     r0, #0x20000
-       orreq   r0, r0, #0x20000
-       streq   r0, [r1]
-       movne   r0, r0, lsr #8
-       and     r0, r0, #0xff
+       ldr     r2, =dread_count_a
+       ldr     r0, =dreads
+       ldr     r1, [r2]
+       ldrb    r0, [r0, r1]
+       add     r1, r1, #1
+       str     r1, [r2]
 #endif
 
        ldr     REG_OP_TABLE, =cpu_exec_table   @ got trashed because was in r12
        ldr     REG_PC,     [REG_OP_TABLE, #(OTOFFS_NES_REGS + 0x0c)]   @ might get rebased
        ldr     REG_P_REST, [REG_OP_TABLE, #(OTOFFS_NES_REGS + 0x10)]   @ might set irq
        ldr     REG_CYCLE,  [REG_OP_TABLE, #(OTOFFS_NES_REGS + 0x1c)]   @ might get used
+#else
+       ldr     r1, =dwrite_count_a
+       ldr     r2, =dwrites_a
+       ldr     r1, [r1]
+       and     r0, r0, #0xff
+       orr     r0, r0, REG_ADDR, lsl #8
+       str     r0, [r2, r1, lsl #2]
+       ldr     r2, =dwrite_count_a
+       add     r1, r1, #1
+       str     r1, [r2]
 #endif
        bx      lr
 
 
 uint32 PC_prev = 0xcccccc, OP_prev = 0xcccccc;
 int32  g_cnt = 0;
 
-int cpu_repeat;
-int cpu_lastval;
 static int pending_add_cycles = 0, pending_rebase = 0;
 
+uint8  dreads[4];
+uint32 dwrites_c[2], dwrites_a[2];
+int dread_count_c, dread_count_a, dwrite_count_c, dwrite_count_a;
+
 static void leave(void)
 {
        printf("\nA: %02x, X: %02x, Y: %02x, S: %02x\n", X.A, X.X, X.Y, X.S);
        printf("PC = %04lx, OP=%02lX\n", PC_prev, OP_prev);
-       printf("cpu_lastval = %02x\n", cpu_lastval);
        exit(1);
 }
 
 static void compare_state(void)
 {
        uint8 nes_flags;
-       int fail = 0;
+       int i, fail = 0;
 
        if ((nes_registers[0] >> 24) != X.A) {
                printf("A: %02lx vs %02x\n", nes_registers[0] >> 24, X.A);
                fail = 1;
        }
 
+       if (dread_count_a != dread_count_c) {
+               printf("dread_count: %i vs %i\n", dread_count_a, dread_count_c);
+               fail = 1;
+       }
+
+       if (dwrite_count_a != dwrite_count_c) {
+               printf("dwrite_count: %i vs %i\n", dwrite_count_a, dwrite_count_c);
+               fail = 1;
+       }
+
+       for (i = dwrite_count_a - 1; !fail && i >= 0; i--)
+               if (dwrites_a[i] != dwrites_c[i]) {
+                       printf("dwrites[%i]: %06lx vs %06lx\n", dwrite_count_a, dwrites_a[i], dwrites_c[i]);
+                       fail = 1;
+               }
+
        if (fail) leave();
 }
 
                nes_registers[7]=1;
                X.count=1;
 
-               cpu_lastval = 0;
-               cpu_repeat = 0;
+               dread_count_c = dread_count_a = dwrite_count_c = dwrite_count_a = 0;
                X6502_Run_c();
 
-               cpu_repeat = 1;
                X6502_Run_a();
 
                compare_state();
 {
        printf("-- power\n");
        if (nes_internal_ram == RAM) printf("nes_internal_ram == RAM!!\n");
+       dread_count_c = dread_count_a = dwrite_count_c = dwrite_count_a = 0;
 
        X6502_Power_c();
        X6502_Power_a();
 
 #include "cart.h"
 
 #ifdef DEBUG_ASM_6502
+#include <stdio.h>
+#include <stdlib.h>
 extern uint32 PC_prev, OP_prev;
-extern int cpu_lastval;
+extern uint8  dreads[4];
+extern uint32 dwrites_c[2];
+extern int dread_count_c, dwrite_count_c;
+#define DummyRdMem(...)
+#else
+#define DummyRdMem RdMem
 #endif
 
 X6502 X;
 #endif
  _DB=ARead[A](A);
 #ifdef DEBUG_ASM_6502
- // TODO: read counter, not 0x10000..
- if (cpu_lastval)
-  cpu_lastval|=_DB<<8;
- else
-  cpu_lastval=_DB|0x10000;
- printf("read [%04x] %02x, cpu_lastval=%02x\n", A, _DB, cpu_lastval);
+ //printf("a == %x, pc == %x\n", A, _PC);
+ if (A >= 0x2000 && A != _PC && A != _PC - 1 && A != _PC + 1) {
+  dreads[dread_count_c++] = _DB;
+  if (dread_count_c > 4) { printf("dread_count out of range\n"); exit(1); }
+ }
 #endif
  return _DB;
 }
   return;
  }
  BWrite[A](A,V);
+#ifdef DEBUG_ASM_6502
+ dwrites_c[dwrite_count_c++] = (A<<8)|V;
+ if (dwrite_count_c > 2) { printf("dwrite_count_c out of range\n"); exit(1); }
+#endif
 }
 
 static INLINE uint8 RdRAM(unsigned int A)
  if((target^tmp)&0x100)        \
  {     \
   target&=0xFFFF;      \
-  RdMem(target^0x100); \
+  DummyRdMem(target^0x100);    \
   ADDCYC(1);   \
  }     \
 }
  target=rt;    \
  target+=i;    \
  target&=0xFFFF;       \
- RdMem((target&0x00FF)|(rt&0xFF00));   \
+ DummyRdMem((target&0x00FF)|(rt&0xFF00));      \
 }
 
 /* Zero Page */
  if((target^rt)&0x100) \
  {     \
   target&=0xFFFF;      \
-  RdMem(target^0x100); \
+  DummyRdMem(target^0x100);    \
   ADDCYC(1);   \
  }     \
 }
  rt|=RdRAM(tmp)<<8;    \
  target=rt;    \
  target+=_Y;   \
- RdMem((target&0x00FF)|(rt&0xFF00));   \
+ DummyRdMem((target&0x00FF)|(rt&0xFF00));      \
 }
 
 /* Now come the macros to wrap up all of the above stuff addressing mode functions
 #ifdef DEBUG_ASM_6502
         PC_prev = _PC;
         OP_prev = b1;
-        cpu_lastval = 0;
 #endif
          //printf("$%04x:$%02x\n",_PC,b1);
         //_PC++;