psxinterpreter: yet more exceptions, new config option
authornotaz <notasas@gmail.com>
Wed, 19 Jul 2023 22:51:46 +0000 (01:51 +0300)
committernotaz <notasas@gmail.com>
Fri, 21 Jul 2023 00:04:18 +0000 (03:04 +0300)
18 files changed:
frontend/libretro.c
frontend/libretro_core_options.h
frontend/menu.c
include/compiler_features.h
libpcsxcore/new_dynarec/assem_arm.c
libpcsxcore/new_dynarec/assem_arm64.c
libpcsxcore/new_dynarec/emu_if.c
libpcsxcore/new_dynarec/events.c
libpcsxcore/new_dynarec/linkage_arm.S
libpcsxcore/new_dynarec/linkage_arm64.S
libpcsxcore/new_dynarec/new_dynarec.c
libpcsxcore/new_dynarec/pcsxmem.c
libpcsxcore/psxbios.c
libpcsxcore/psxcommon.h
libpcsxcore/psxinterpreter.c
libpcsxcore/psxinterpreter.h
libpcsxcore/r3000a.c
libpcsxcore/r3000a.h

index 21189d6..32d0bec 100644 (file)
@@ -2047,6 +2047,16 @@ static void update_variables(bool in_flight)
          Config.icache_emulation = 1;
    }
 
+   var.value = NULL;
+   var.key = "pcsx_rearmed_exception_emulation";
+   if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value)
+   {
+      if (strcmp(var.value, "enabled") == 0)
+         Config.PreciseExceptions = 1;
+      else
+         Config.PreciseExceptions = 0;
+   }
+
    psxCpu->ApplyConfig();
 
    // end of CPU emu config
index 38169c4..781c514 100644 (file)
@@ -1171,7 +1171,21 @@ struct retro_core_option_v2_definition option_defs_us[] = {
       "pcsx_rearmed_icache_emulation",
       "Instruction Cache Emulation",
       NULL,
-      "Enable emulation of the PSX CPU instruction cache. Improves accuracy at the expense of increased performance overheads. Required for Formula One 2001, Formula One Arcade and Formula One 99. [Interpreter only and partial on lightrec, unsupported when using ARMv7 backend]",
+      "Enable emulation of the PSX CPU instruction cache. Improves accuracy at the expense of increased performance overheads. Required for Formula One 2001, Formula One Arcade and Formula One 99. [Interpreter only; partial on lightrec and ARM dynarecs]",
+      NULL,
+      "compat_hack",
+      {
+         { "enabled",  NULL },
+         { "disabled", NULL },
+         { NULL, NULL },
+      },
+      "enabled",
+   },
+   {
+      "pcsx_rearmed_exception_emulation",
+      "Exception and Breakpoint Emulation",
+      NULL,
+      "Enable emulation of some almost never used PSX's debug features. This causes a performance hit, is not useful for games and is intended for PSX homebrew and romhack developers only. Only enable if you know what you are doing. [Interpreter only]",
       NULL,
       "compat_hack",
       {
index f1c7862..b480506 100644 (file)
@@ -398,6 +398,7 @@ static const struct {
        CE_CONFIG_VAL(DisableStalls),
        CE_CONFIG_VAL(Cpu),
        CE_CONFIG_VAL(GpuListWalking),
+       CE_CONFIG_VAL(PreciseExceptions),
        CE_INTVAL(region),
        CE_INTVAL_V(g_scaler, 3),
        CE_INTVAL(g_gamma),
@@ -1594,12 +1595,14 @@ static const char h_cfg_nodrc[]  = "Disable dynamic recompiler and use interpret
 #endif
 static const char h_cfg_shacks[] = "Breaks games but may give better performance";
 static const char h_cfg_icache[] = "Support F1 games (only when dynarec is off)";
-static const char h_cfg_gpul[]   = "Try enabling this if the game is missing some graphics\n"
+static const char h_cfg_exc[]    = "Emulate some PSX's debug hw like breakpoints\n"
+                                  "and exceptions (slow, interpreter only, keep off)";
+static const char h_cfg_gpul[]   = "Try enabling this if the game misses some graphics\n"
                                   "causes a performance hit";
 static const char h_cfg_psxclk[]  = "Over/under-clock the PSX, default is " DEFAULT_PSX_CLOCK_S "\n"
                                    "(adjust this if the game is too slow/too fast/hangs)";
 
-enum { AMO_XA, AMO_CDDA, AMO_IC, AMO_CPU, AMO_GPUL };
+enum { AMO_XA, AMO_CDDA, AMO_IC, AMO_BP, AMO_CPU, AMO_GPUL };
 
 static menu_entry e_menu_adv_options[] =
 {
@@ -1609,6 +1612,7 @@ static menu_entry e_menu_adv_options[] =
        mee_onoff_h   ("Disable XA Decoding",    0, menu_iopts[AMO_XA],   1, h_cfg_xa),
        mee_onoff_h   ("Disable CD Audio",       0, menu_iopts[AMO_CDDA], 1, h_cfg_cdda),
        mee_onoff_h   ("ICache emulation",       0, menu_iopts[AMO_IC],   1, h_cfg_icache),
+       mee_onoff_h   ("BP exception emulation", 0, menu_iopts[AMO_BP],   1, h_cfg_exc),
        mee_enum_h    ("GPU l-list slow walking",0, menu_iopts[AMO_GPUL], men_gpul, h_cfg_gpul),
 #if !defined(DRC_DISABLE) || defined(LIGHTREC)
        mee_onoff_h   ("Disable dynarec (slow!)",0, menu_iopts[AMO_CPU],  1, h_cfg_nodrc),
@@ -1628,6 +1632,7 @@ static int menu_loop_adv_options(int id, int keys)
                { &Config.Xa,      &menu_iopts[AMO_XA] },
                { &Config.Cdda,    &menu_iopts[AMO_CDDA] },
                { &Config.icache_emulation, &menu_iopts[AMO_IC] },
+               { &Config.PreciseExceptions, &menu_iopts[AMO_BP] },
                { &Config.Cpu,     &menu_iopts[AMO_CPU] },
        };
        int i;
index 0c1119d..3841866 100644 (file)
@@ -2,9 +2,11 @@
 #ifdef __GNUC__
 # define likely(x)       __builtin_expect((x),1)
 # define unlikely(x)     __builtin_expect((x),0)
+# define noinline        __attribute__((noinline))
 #else
 # define likely(x)       (x)
 # define unlikely(x)     (x)
+# define noinline
 #endif
 
 #ifndef __has_builtin
index 6af93e2..2847e51 100644 (file)
@@ -430,7 +430,7 @@ static void emit_loadreg(int r, int hr)
     //case HIREG: addr = &hi; break;
     //case LOREG: addr = &lo; break;
     case CCREG: addr = &cycle_count; break;
-    case CSREG: addr = &psxRegs.CP0.n.Status; break;
+    case CSREG: addr = &psxRegs.CP0.n.SR; break;
     case INVCP: addr = &invc_ptr; break;
     case ROREG: addr = &ram_offset; break;
     default:
@@ -572,6 +572,11 @@ static void emit_addimm(u_int rs,int imm,u_int rt)
   else if(rs!=rt) emit_mov(rs,rt);
 }
 
+static void emit_addimm_ptr(u_int rs, uintptr_t imm, u_int rt)
+{
+  emit_addimm(rs, imm, rt);
+}
+
 static void emit_addimm_and_set_flags(int imm,int rt)
 {
   assert(imm>-65536&&imm<65536);
index 67ce02a..6f9c91d 100644 (file)
@@ -463,7 +463,7 @@ static void emit_loadreg(u_int r, u_int hr)
     //case HIREG: addr = &hi; break;
     //case LOREG: addr = &lo; break;
     case CCREG: addr = &cycle_count; break;
-    case CSREG: addr = &psxRegs.CP0.n.Status; break;
+    case CSREG: addr = &psxRegs.CP0.n.SR; break;
     case INVCP: addr = &invc_ptr; is64 = 1; break;
     case ROREG: addr = &ram_offset; is64 = 1; break;
     default:
@@ -629,6 +629,11 @@ static void emit_addimm64(u_int rs, uintptr_t imm, u_int rt)
   emit_addimm_s(0, 1, rs, imm, rt);
 }
 
+static void emit_addimm_ptr(u_int rs, uintptr_t imm, u_int rt)
+{
+  emit_addimm64(rs, imm, rt);
+}
+
 static void emit_addimm_and_set_flags(int imm, u_int rt)
 {
   emit_addimm_s(1, 0, rt, imm, rt);
index e89b635..89716fa 100644 (file)
@@ -28,7 +28,7 @@ void pcsx_mtc0(u32 reg, u32 val)
        evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle);
        MTC0(&psxRegs, reg, val);
        gen_interupt(&psxRegs.CP0);
-       if (psxRegs.CP0.n.Cause & psxRegs.CP0.n.Status & 0x0300) // possible sw irq
+       if (psxRegs.CP0.n.Cause & psxRegs.CP0.n.SR & 0x0300) // possible sw irq
                pending_exception = 1;
 }
 
index 5d981f8..71aed6b 100644 (file)
@@ -68,7 +68,7 @@ static void irq_test(psxCP0Regs *cp0)
                }
        }
 
-       if ((psxHu32(0x1070) & psxHu32(0x1074)) && (cp0->n.Status & 0x401) == 0x401) {
+       if ((psxHu32(0x1070) & psxHu32(0x1074)) && (cp0->n.SR & 0x401) == 0x401) {
                psxException(0x400, 0, cp0);
                pending_exception = 1;
        }
index f97b2d0..7a6d2ed 100644 (file)
@@ -292,7 +292,7 @@ FUNCTION(jump_break):
        b       call_psxException
 FUNCTION(jump_syscall_ds):
        mov     r0, #0x20
-       mov     r1, #1
+       mov     r1, #2
        b       call_psxException
 FUNCTION(jump_syscall):
        mov     r0, #0x20
index 72d13f3..bc5f115 100644 (file)
@@ -150,7 +150,7 @@ FUNCTION(jump_break):
        b       call_psxException
 FUNCTION(jump_syscall_ds):
        mov     w0, #0x20
-       mov     w1, #1
+       mov     w1, #2
        b       call_psxException
 FUNCTION(jump_syscall):
        mov     w0, #0x20
index 17f7af9..f597646 100644 (file)
@@ -784,7 +784,7 @@ static void noinline *get_addr(u_int vaddr, int can_compile)
     return ndrc_get_addr_ht(vaddr);
 
   // generate an address error
-  psxRegs.CP0.n.Status |= 2;
+  psxRegs.CP0.n.SR |= 2;
   psxRegs.CP0.n.Cause = (vaddr<<31) | (4<<2);
   psxRegs.CP0.n.EPC = (vaddr&1) ? vaddr-5 : vaddr;
   psxRegs.CP0.n.BadVAddr = vaddr & ~1;
@@ -3559,11 +3559,11 @@ static void cop0_assemble(int i, const struct regstat *i_regs, int ccadj_)
     assert(dops[i].opcode2==0x10);
     //if((source[i]&0x3f)==0x10) // RFE
     {
-      emit_readword(&psxRegs.CP0.n.Status,0);
+      emit_readword(&psxRegs.CP0.n.SR,0);
       emit_andimm(0,0x3c,1);
       emit_andimm(0,~0xf,0);
       emit_orrshr_imm(1,2,0);
-      emit_writeword(0,&psxRegs.CP0.n.Status);
+      emit_writeword(0,&psxRegs.CP0.n.SR);
     }
   }
 }
@@ -4132,6 +4132,7 @@ static void call_c_cpu_handler(int i, const struct regstat *i_regs, int ccadj_,
   emit_addimm(HOST_CCREG,ccadj_,HOST_CCREG);
   emit_add(2,HOST_CCREG,2);
   emit_writeword(2,&psxRegs.cycle);
+  emit_addimm_ptr(FP,(u_char *)&psxRegs - (u_char *)&dynarec_local,0);
   emit_far_call(func);
   emit_far_jump(jump_to_new_pc);
 }
@@ -4149,9 +4150,14 @@ static void syscall_assemble(int i, const struct regstat *i_regs, int ccadj_)
   emit_far_jump(func);
 }
 
+static void hlecall_bad()
+{
+  SysPrintf("bad hlecall\n");
+}
+
 static void hlecall_assemble(int i, const struct regstat *i_regs, int ccadj_)
 {
-  void *hlefunc = gteNULL;
+  void *hlefunc = hlecall_bad;
   uint32_t hleCode = source[i] & 0x03ffffff;
   if (hleCode < ARRAY_SIZE(psxHLEt))
     hlefunc = psxHLEt[hleCode];
index 694b8d0..190f8fc 100644 (file)
@@ -67,7 +67,7 @@ static u32 read_mem_dummy(u32 addr)
 
 static void write_mem_dummy(u32 data)
 {
-       if (!(psxRegs.CP0.n.Status & (1 << 16)))
+       if (!(psxRegs.CP0.n.SR & (1 << 16)))
                memprintf("unmapped w %08x, %08x @%08x %u\n",
                          address, data, psxRegs.pc, psxRegs.cycle);
 }
index d31465c..13a7197 100644 (file)
@@ -1949,7 +1949,7 @@ void psxBios_StartPAD() { // 13
 #endif
        pad_stopped = 0;
        psxHwWrite16(0x1f801074, (unsigned short)(psxHwRead16(0x1f801074) | 0x1));
-       psxRegs.CP0.n.Status |= 0x401;
+       psxRegs.CP0.n.SR |= 0x401;
        pc0 = ra;
 }
 
@@ -1976,7 +1976,7 @@ void psxBios_PAD_init() { // 15
        psxHwWrite16(0x1f801074, (u16)(psxHwRead16(0x1f801074) | 0x1));
        pad_buf = (int *)Ra1;
        *pad_buf = -1;
-       psxRegs.CP0.n.Status |= 0x401;
+       psxRegs.CP0.n.SR |= 0x401;
        v0 = 2;
        pc0 = ra;
 }
@@ -1996,8 +1996,7 @@ void psxBios_ReturnFromException() { // 17
        k0 = interrupt_r26;
        if (psxRegs.CP0.n.Cause & 0x80000000) pc0 += 4;
 
-       psxRegs.CP0.n.Status = (psxRegs.CP0.n.Status & 0xfffffff0) |
-                                                 ((psxRegs.CP0.n.Status & 0x3c) >> 2);
+       psxRegs.CP0.n.SR = (psxRegs.CP0.n.SR & ~0x0f) | ((psxRegs.CP0.n.SR & 0x3c) >> 2);
 }
 
 void psxBios_ResetEntryInt() { // 18
@@ -2698,7 +2697,7 @@ void psxBios_ChangeClearRCnt() { // 0a
        v0 = *ptr;
        *ptr = a1;
 
-//     psxRegs.CP0.n.Status|= 0x404;
+//     psxRegs.CP0.n.SR|= 0x404;
        pc0 = ra;
 }
 
@@ -3301,12 +3300,12 @@ void psxBiosException() {
                        switch (a0) {
                                case 1: // EnterCritical - disable irq's
                                        /* Fixes Medievil 2 not loading up new game, Digimon World not booting up and possibly others */
-                                       v0 = (psxRegs.CP0.n.Status & 0x404) == 0x404;
-                                       psxRegs.CP0.n.Status &= ~0x404;
+                                       v0 = (psxRegs.CP0.n.SR & 0x404) == 0x404;
+                                       psxRegs.CP0.n.SR &= ~0x404;
                                        break;
 
                                case 2: // ExitCritical - enable irq's
-                                       psxRegs.CP0.n.Status |= 0x404;
+                                       psxRegs.CP0.n.SR |= 0x404;
                                        break;
                                /* Normally this should cover SYS(00h, SYS(04h but they don't do anything relevant so... */
                                default:
@@ -3314,8 +3313,7 @@ void psxBiosException() {
                        }
                        pc0 = psxRegs.CP0.n.EPC + 4;
 
-                       psxRegs.CP0.n.Status = (psxRegs.CP0.n.Status & 0xfffffff0) |
-                                                                 ((psxRegs.CP0.n.Status & 0x3c) >> 2);
+                       psxRegs.CP0.n.SR = (psxRegs.CP0.n.SR & ~0x0f) | ((psxRegs.CP0.n.SR & 0x3c) >> 2);
                        return;
 
                default:
@@ -3328,8 +3326,7 @@ void psxBiosException() {
        pc0 = psxRegs.CP0.n.EPC;
        if (psxRegs.CP0.n.Cause & 0x80000000) pc0+=4;
 
-       psxRegs.CP0.n.Status = (psxRegs.CP0.n.Status & 0xfffffff0) |
-                                                 ((psxRegs.CP0.n.Status & 0x3c) >> 2);
+       psxRegs.CP0.n.SR = (psxRegs.CP0.n.SR & ~0x0f) | ((psxRegs.CP0.n.SR & 0x3c) >> 2);
 }
 
 #define bfreeze(ptr, size) { \
index 92e69ee..4c78255 100644 (file)
@@ -141,6 +141,7 @@ typedef struct {
        boolean UseNet;
        boolean icache_emulation;
        boolean DisableStalls;
+       boolean PreciseExceptions;
        int GpuListWalking;
        int cycle_multiplier; // 100 for 1.0
        int cycle_multiplier_override;
index b9e1dbc..f3bf7b6 100644 (file)
 // these may cause issues: because of poor timing we may step
 // on instructions that real hardware would never reach
 #define DO_EXCEPTION_RESERVEDI
-#define DO_EXCEPTION_ALIGNMENT_BRANCH
-//#define DO_EXCEPTION_ALIGNMENT_DATA
 #define HANDLE_LOAD_DELAY
 
-static int branch = 0;
-static int branch2 = 0;
+static int branchSeen = 0;
 
 #ifdef __i386__
 #define INT_ATTR __attribute__((regparm(2)))
@@ -112,9 +109,54 @@ static void dloadClear(psxRegisters *regs)
 
 static void intException(psxRegisters *regs, u32 pc, u32 cause)
 {
+       if (cause != 0x20) {
+               //FILE *f = fopen("/tmp/psx_ram.bin", "wb");
+               //fwrite(psxM, 1, 0x200000, f); fclose(f);
+               log_unhandled("exception %08x @%08x\n", cause, pc);
+       }
        dloadFlush(regs);
        regs->pc = pc;
-       psxException(cause, branch, &regs->CP0);
+       psxException(cause, regs->branching, &regs->CP0);
+       regs->branching = R3000A_BRANCH_NONE_OR_EXCEPTION;
+}
+
+// exception caused by current instruction (excluding unkasking)
+static void intExceptionInsn(psxRegisters *regs, u32 cause)
+{
+       cause |= (regs->code & 0x0c000000) << 2;
+       intException(regs, regs->pc - 4, cause);
+}
+
+// 29  Enable for 80000000-ffffffff
+// 30  Enable for 00000000-7fffffff
+// 31  Enable exception
+#define DBR_ABIT(dc, a)    ((dc) & (1u << (29+(((a)>>31)^1))))
+#define DBR_EN_EXEC(dc, a) (((dc) & 0x01800000) == 0x01800000 && DBR_ABIT(dc, a))
+#define DBR_EN_LD(dc, a)   (((dc) & 0x06800000) == 0x06800000 && DBR_ABIT(dc, a))
+#define DBR_EN_ST(dc, a)   (((dc) & 0x0a800000) == 0x0a800000 && DBR_ABIT(dc, a))
+static void intExceptionDebugBp(psxRegisters *regs, u32 pc)
+{
+       psxCP0Regs *cp0 = &regs->CP0;
+       dloadFlush(regs);
+       cp0->n.Cause &= 0x300;
+       cp0->n.Cause |= (regs->branching << 30) | (R3000E_Bp << 2);
+       cp0->n.SR = (cp0->n.SR & ~0x3f) | ((cp0->n.SR & 0x0f) << 2);
+       cp0->n.EPC = regs->branching ? pc - 4 : pc;
+       psxRegs.pc = 0x80000040;
+}
+
+static int execBreakCheck(psxRegisters *regs, u32 pc)
+{
+       if (unlikely(DBR_EN_EXEC(regs->CP0.n.DCIC, pc) &&
+           ((pc ^ regs->CP0.n.BPC) & regs->CP0.n.BPCM) == 0))
+       {
+               regs->CP0.n.DCIC |= 0x03;
+               if (regs->CP0.n.DCIC & (1u << 31)) {
+                       intExceptionDebugBp(regs, pc);
+                       return 1;
+               }
+       }
+       return 0;
 }
 
 // get an opcode without triggering exceptions or affecting cache
@@ -191,12 +233,12 @@ static u32 (INT_ATTR *fetch)(psxRegisters *regs_, u8 **memRLUT, u32 pc) = fetchN
 // Make the timing events trigger faster as we are currently assuming everything
 // takes one cycle, which is not the case on real hardware.
 // FIXME: count cache misses, memory latencies, stalls to get rid of this
-static inline void addCycle(void)
+static inline void addCycle(psxRegisters *regs)
 {
-       assert(psxRegs.subCycleStep >= 0x10000);
-       psxRegs.subCycle += psxRegs.subCycleStep;
-       psxRegs.cycle += psxRegs.subCycle >> 16;
-       psxRegs.subCycle &= 0xffff;
+       assert(regs->subCycleStep >= 0x10000);
+       regs->subCycle += regs->subCycleStep;
+       regs->cycle += regs->subCycle >> 16;
+       regs->subCycle &= 0xffff;
 }
 
 /**** R3000A Instruction Macros ****/
@@ -343,7 +385,7 @@ static void psxDoDelayBranch(psxRegisters *regs, u32 tar1, u32 code1) {
         */
        for (lim = 0; lim < 8; lim++) {
                regs->code = code = fetch(regs, psxMemRLUT, tar1);
-               addCycle();
+               addCycle(regs);
                if (likely(!isBranch(code))) {
                        dloadStep(regs);
                        psxBSC[code >> 26](regs, code);
@@ -359,23 +401,26 @@ static void psxDoDelayBranch(psxRegisters *regs, u32 tar1, u32 code1) {
        SysPrintf("Evil chained DS branches @ %08x %08x %08x\n", regs->pc, tar1, tar2);
 }
 
-static void doBranch(psxRegisters *regs, u32 tar) {
-       u32 code, pc;
+static void doBranch(psxRegisters *regs, u32 tar, enum R3000Abdt taken) {
+       u32 code, pc, pc_final;
 
-       branch2 = branch = 1;
+       branchSeen = regs->branching = taken;
+       pc_final = taken == R3000A_BRANCH_TAKEN ? tar : regs->pc + 4;
 
        // fetch the delay slot
        pc = regs->pc;
        regs->pc = pc + 4;
        regs->code = code = fetch(regs, psxMemRLUT, pc);
 
-       addCycle();
+       addCycle(regs);
 
        // check for branch in delay slot
        if (unlikely(isBranch(code))) {
-               psxDoDelayBranch(regs, tar, code);
+               regs->pc = pc;
+               if (taken == R3000A_BRANCH_TAKEN)
+                       psxDoDelayBranch(regs, tar, code);
                log_unhandled("branch in DS: %08x->%08x\n", pc, regs->pc);
-               branch = 0;
+               regs->branching = 0;
                psxBranchTest();
                return;
        }
@@ -383,24 +428,30 @@ static void doBranch(psxRegisters *regs, u32 tar) {
        dloadStep(regs);
        psxBSC[code >> 26](regs, code);
 
-       branch = 0;
-       regs->pc = tar;
+       if (likely(regs->branching != R3000A_BRANCH_NONE_OR_EXCEPTION))
+               regs->pc = pc_final;
+       else
+               regs->CP0.n.Target = pc_final;
+       regs->branching = 0;
 
        psxBranchTest();
 }
 
 static void doBranchReg(psxRegisters *regs, u32 tar) {
-#ifdef DO_EXCEPTION_ALIGNMENT_BRANCH
+       doBranch(regs, tar & ~3, R3000A_BRANCH_TAKEN);
+}
+
+static void doBranchRegE(psxRegisters *regs, u32 tar) {
+       if (unlikely(DBR_EN_EXEC(regs->CP0.n.DCIC, tar) &&
+           ((tar ^ regs->CP0.n.BPC) & regs->CP0.n.BPCM) == 0))
+               regs->CP0.n.DCIC |= 0x03;
        if (unlikely(tar & 3)) {
                SysPrintf("game crash @%08x, ra=%08x\n", tar, regs->GPR.n.ra);
-               psxRegs.CP0.n.BadVAddr = tar;
+               regs->CP0.n.BadVAddr = tar;
                intException(regs, tar, R3000E_AdEL << 2);
                return;
        }
-#else
-       tar &= ~3;
-#endif
-       doBranch(regs, tar);
+       doBranch(regs, tar, R3000A_BRANCH_TAKEN);
 }
 
 #if __has_builtin(__builtin_add_overflow) || (defined(__GNUC__) && __GNUC__ >= 5)
@@ -415,7 +466,7 @@ static void addExc(psxRegisters *regs, u32 rt, s32 a1, s32 a2) {
        s32 val;
        if (add_overflow(a1, a2, val)) {
                //printf("ov %08x + %08x = %08x\n", a1, a2, val);
-               intException(regs, regs->pc - 4, R3000E_Ov << 2);
+               intExceptionInsn(regs, R3000E_Ov << 2);
                return;
        }
        dloadRt(regs, rt, val);
@@ -424,7 +475,7 @@ static void addExc(psxRegisters *regs, u32 rt, s32 a1, s32 a2) {
 static void subExc(psxRegisters *regs, u32 rt, s32 a1, s32 a2) {
        s32 val;
        if (sub_overflow(a1, a2, val)) {
-               intException(regs, regs->pc - 4, R3000E_Ov << 2);
+               intExceptionInsn(regs, R3000E_Ov << 2);
                return;
        }
        dloadRt(regs, rt, val);
@@ -538,14 +589,14 @@ OP(psxMULTU_stall) {
 * Register branch logic                                  *
 * Format:  OP rs, offset                                 *
 *********************************************************/
+#define BrCond(c) (c) ? R3000A_BRANCH_TAKEN : R3000A_BRANCH_NOT_TAKEN
 #define RepZBranchi32(op) \
-       if(_i32(_rRs_) op 0) \
-               doBranch(regs_, _BranchTarget_);
+       doBranch(regs_, _BranchTarget_, BrCond(_i32(_rRs_) op 0));
 #define RepZBranchLinki32(op)  { \
        s32 temp = _i32(_rRs_); \
+       dloadFlush(regs_); \
        _SetLink(31); \
-       if(temp op 0) \
-               doBranch(regs_, _BranchTarget_); \
+       doBranch(regs_, _BranchTarget_, BrCond(temp op 0)); \
 }
 
 OP(psxBGEZ)   { RepZBranchi32(>=) }      // Branch if Rs >= 0
@@ -608,18 +659,18 @@ OP(psxMTLO) { _rLo_ = _rRs_; } // Lo = Rs
 * Format:  OP                                            *
 *********************************************************/
 OP(psxBREAK) {
-       intException(regs_, regs_->pc - 4, R3000E_Bp << 2);
+       intExceptionInsn(regs_, R3000E_Bp << 2);
 }
 
 OP(psxSYSCALL) {
-       intException(regs_, regs_->pc - 4, R3000E_Syscall << 2);
+       intExceptionInsn(regs_, R3000E_Syscall << 2);
 }
 
 static inline void execI_(u8 **memRLUT, psxRegisters *regs_);
 
 static inline void psxTestSWInts(psxRegisters *regs_, int step) {
-       if (regs_->CP0.n.Cause & regs_->CP0.n.Status & 0x0300 &&
-          regs_->CP0.n.Status & 0x1) {
+       if ((regs_->CP0.n.Cause & regs_->CP0.n.SR & 0x0300) &&
+           (regs_->CP0.n.SR & 0x1)) {
                if (step)
                        execI_(psxMemRLUT, regs_);
                regs_->CP0.n.Cause &= ~0x7c;
@@ -628,9 +679,7 @@ static inline void psxTestSWInts(psxRegisters *regs_, int step) {
 }
 
 OP(psxRFE) {
-//     SysPrintf("psxRFE\n");
-       regs_->CP0.n.Status = (regs_->CP0.n.Status & 0xfffffff0) |
-                             ((regs_->CP0.n.Status & 0x3c) >> 2);
+       regs_->CP0.n.SR = (regs_->CP0.n.SR & ~0x0f) | ((regs_->CP0.n.SR & 0x3c) >> 2);
        psxTestSWInts(regs_, 0);
 }
 
@@ -638,10 +687,8 @@ OP(psxRFE) {
 * Register branch logic                                  *
 * Format:  OP rs, rt, offset                             *
 *********************************************************/
-#define RepBranchi32(op) { \
-       if (_i32(_rRs_) op _i32(_rRt_)) \
-               doBranch(regs_, _BranchTarget_); \
-}
+#define RepBranchi32(op) \
+       doBranch(regs_, _BranchTarget_, BrCond(_i32(_rRs_) op _i32(_rRt_)));
 
 OP(psxBEQ) { RepBranchi32(==) }  // Branch if Rs == Rt
 OP(psxBNE) { RepBranchi32(!=) }  // Branch if Rs != Rt
@@ -650,8 +697,12 @@ OP(psxBNE) { RepBranchi32(!=) }  // Branch if Rs != Rt
 * Jump to target                                         *
 * Format:  OP target                                     *
 *********************************************************/
-OP(psxJ)   {               doBranch(regs_, _JumpTarget_); }
-OP(psxJAL) { _SetLink(31); doBranch(regs_, _JumpTarget_); }
+OP(psxJ)   { doBranch(regs_, _JumpTarget_, R3000A_BRANCH_TAKEN); }
+OP(psxJAL) {
+       dloadFlush(regs_);
+       _SetLink(31);
+       doBranch(regs_, _JumpTarget_, R3000A_BRANCH_TAKEN);
+}
 
 /*********************************************************
 * Register jump                                          *
@@ -662,37 +713,76 @@ OP(psxJR) {
        psxJumpTest();
 }
 
+OP(psxJRe) {
+       doBranchRegE(regs_, _rRs_);
+       psxJumpTest();
+}
+
 OP(psxJALR) {
        u32 temp = _u32(_rRs_);
+       dloadFlush(regs_);
        if (_Rd_) { _SetLink(_Rd_); }
        doBranchReg(regs_, temp);
 }
 
+OP(psxJALRe) {
+       u32 temp = _u32(_rRs_);
+       dloadFlush(regs_);
+       if (_Rd_) { _SetLink(_Rd_); }
+       doBranchRegE(regs_, temp);
+}
+
 /*********************************************************
-* Load and store for GPR                                 *
-* Format:  OP rt, offset(base)                           *
 *********************************************************/
 
-static int algnChkL(psxRegisters *regs, u32 addr, u32 m) {
+// revisit: incomplete
+#define BUS_LOCKED_ADDR(a) \
+       ((0x1fc80000u <= (a) && (a) < 0x80000000u) || \
+        (0xc0000000u <= (a) && (a) < 0xfffe0000u))
+
+// exception checking order is important
+static inline int checkLD(psxRegisters *regs, u32 addr, u32 m) {
+       int bpException = 0;
+       if (unlikely(DBR_EN_LD(regs->CP0.n.DCIC, addr) &&
+           ((addr ^ regs->CP0.n.BDA) & regs->CP0.n.BDAM) == 0)) {
+               regs->CP0.n.DCIC |= 0x0d;
+               bpException = regs->CP0.n.DCIC >> 31;
+       }
        if (unlikely(addr & m)) {
-               log_unhandled("unaligned load %08x @%08x\n", addr, regs->pc - 4);
-#ifdef DO_EXCEPTION_ALIGNMENT_DATA
-               psxRegs.CP0.n.BadVAddr = addr;
-               intException(regs, regs->pc - 4, R3000E_AdEL << 2);
+               regs->CP0.n.BadVAddr = addr;
+               intExceptionInsn(regs, R3000E_AdEL << 2);
+               return 0;
+       }
+       if (unlikely(bpException)) {
+               intExceptionDebugBp(regs, regs->pc - 4);
+               return 0;
+       }
+       if (unlikely(BUS_LOCKED_ADDR(addr))) {
+               intException(regs, regs->pc - 4, R3000E_DBE << 2);
                return 0;
-#endif
        }
        return 1;
 }
 
-static int algnChkS(psxRegisters *regs, u32 addr, u32 m) {
+static inline int checkST(psxRegisters *regs, u32 addr, u32 m) {
+       int bpException = 0;
+       if (unlikely(DBR_EN_ST(regs->CP0.n.DCIC, addr) &&
+           ((addr ^ regs->CP0.n.BDA) & regs->CP0.n.BDAM) == 0)) {
+               regs->CP0.n.DCIC |= 0x15;
+               bpException = regs->CP0.n.DCIC >> 31;
+       }
        if (unlikely(addr & m)) {
-               log_unhandled("unaligned store %08x @%08x\n", addr, regs->pc - 4);
-#ifdef DO_EXCEPTION_ALIGNMENT_DATA
-               psxRegs.CP0.n.BadVAddr = addr;
-               intException(regs, regs->pc - 4, R3000E_AdES << 2);
+               regs->CP0.n.BadVAddr = addr;
+               intExceptionInsn(regs, R3000E_AdES << 2);
+               return 0;
+       }
+       if (unlikely(bpException)) {
+               intExceptionDebugBp(regs, regs->pc - 4);
+               return 0;
+       }
+       if (unlikely(BUS_LOCKED_ADDR(addr))) {
+               intException(regs, regs->pc - 4, R3000E_DBE << 2);
                return 0;
-#endif
        }
        return 1;
 }
@@ -702,30 +792,40 @@ static int algnChkS(psxRegisters *regs, u32 addr, u32 m) {
 * Format:  OP rt, offset(base)                           *
 *********************************************************/
 
+/*********************************************************
+* Load and store for GPR                                 *
+* Format:  OP rt, offset(base)                           *
+*********************************************************/
+
 #define _oB_ (regs_->GPR.r[_Rs_] + _Imm_)
 
-OP(psxLB)  {                               doLoad(regs_, _Rt_,  (s8)psxMemRead8(_oB_));  }
-OP(psxLBU) {                               doLoad(regs_, _Rt_,      psxMemRead8(_oB_));  }
-OP(psxLH)  { if (algnChkL(regs_, _oB_, 1)) doLoad(regs_, _Rt_, (s16)psxMemRead16(_oB_)); }
-OP(psxLHU) { if (algnChkL(regs_, _oB_, 1)) doLoad(regs_, _Rt_,      psxMemRead16(_oB_)); }
-OP(psxLW)  { if (algnChkL(regs_, _oB_, 3)) doLoad(regs_, _Rt_,      psxMemRead32(_oB_)); }
+OP(psxLB)  { doLoad(regs_, _Rt_,  (s8)psxMemRead8(_oB_)); }
+OP(psxLBU) { doLoad(regs_, _Rt_,      psxMemRead8(_oB_)); }
+OP(psxLH)  { doLoad(regs_, _Rt_, (s16)psxMemRead16(_oB_ & ~1)); }
+OP(psxLHU) { doLoad(regs_, _Rt_,      psxMemRead16(_oB_ & ~1)); }
+OP(psxLW)  { doLoad(regs_, _Rt_,      psxMemRead32(_oB_ & ~3)); }
 
-OP(psxLWL) {
+OP(psxLBe)  { if (checkLD(regs_, _oB_, 0)) doLoad(regs_, _Rt_,  (s8)psxMemRead8(_oB_)); }
+OP(psxLBUe) { if (checkLD(regs_, _oB_, 0)) doLoad(regs_, _Rt_,      psxMemRead8(_oB_)); }
+OP(psxLHe)  { if (checkLD(regs_, _oB_, 1)) doLoad(regs_, _Rt_, (s16)psxMemRead16(_oB_)); }
+OP(psxLHUe) { if (checkLD(regs_, _oB_, 1)) doLoad(regs_, _Rt_,      psxMemRead16(_oB_)); }
+OP(psxLWe)  { if (checkLD(regs_, _oB_, 3)) doLoad(regs_, _Rt_,      psxMemRead32(_oB_)); }
+
+static void doLWL(psxRegisters *regs, u32 rt, u32 addr) {
        static const u32 LWL_MASK[4] = { 0xffffff, 0xffff, 0xff, 0 };
        static const u32 LWL_SHIFT[4] = { 24, 16, 8, 0 };
-       u32 addr = _oB_, val;
        u32 shift = addr & 3;
-       u32 mem = psxMemRead32(addr & ~3);
-       u32 rt = _Rt_;
-       u32 oldval = regs_->GPR.r[rt];
+       u32 val, mem;
+       u32 oldval = regs->GPR.r[rt];
 
 #ifdef HANDLE_LOAD_DELAY
-       int sel = regs_->dloadSel;
-       if (regs_->dloadReg[sel] == rt)
-               oldval = regs_->dloadVal[sel];
+       int sel = regs->dloadSel;
+       if (regs->dloadReg[sel] == rt)
+               oldval = regs->dloadVal[sel];
 #endif
+       mem = psxMemRead32(addr & ~3);
        val = (oldval & LWL_MASK[shift]) | (mem << LWL_SHIFT[shift]);
-       doLoad(regs_, rt, val);
+       doLoad(regs, rt, val);
 
        /*
        Mem = 1234.  Reg = abcd
@@ -737,22 +837,21 @@ OP(psxLWL) {
        */
 }
 
-OP(psxLWR) {
+static void doLWR(psxRegisters *regs, u32 rt, u32 addr) {
        static const u32 LWR_MASK[4] = { 0, 0xff000000, 0xffff0000, 0xffffff00 };
        static const u32 LWR_SHIFT[4] = { 0, 8, 16, 24 };
-       u32 addr = _oB_, val;
        u32 shift = addr & 3;
-       u32 mem = psxMemRead32(addr & ~3);
-       u32 rt = _Rt_;
-       u32 oldval = regs_->GPR.r[rt];
+       u32 val, mem;
+       u32 oldval = regs->GPR.r[rt];
 
 #ifdef HANDLE_LOAD_DELAY
-       int sel = regs_->dloadSel;
-       if (regs_->dloadReg[sel] == rt)
-               oldval = regs_->dloadVal[sel];
+       int sel = regs->dloadSel;
+       if (regs->dloadReg[sel] == rt)
+               oldval = regs->dloadVal[sel];
 #endif
+       mem = psxMemRead32(addr & ~3);
        val = (oldval & LWR_MASK[shift]) | (mem >> LWR_SHIFT[shift]);
-       doLoad(regs_, rt, val);
+       doLoad(regs, rt, val);
 
        /*
        Mem = 1234.  Reg = abcd
@@ -764,20 +863,30 @@ OP(psxLWR) {
        */
 }
 
-OP(psxSB) {                               psxMemWrite8 (_oB_, _rRt_ &   0xff); }
-OP(psxSH) { if (algnChkS(regs_, _oB_, 1)) psxMemWrite16(_oB_, _rRt_ & 0xffff); }
-OP(psxSW) { if (algnChkS(regs_, _oB_, 3)) psxMemWrite32(_oB_, _rRt_); }
+OP(psxLWL) { doLWL(regs_, _Rt_, _oB_); }
+OP(psxLWR) { doLWR(regs_, _Rt_, _oB_); }
 
-// FIXME: this rmw implementation is wrong and would break on io like fifos
-OP(psxSWL) {
-       static const u32 SWL_MASK[4] = { 0xffffff00, 0xffff0000, 0xff000000, 0 };
-       static const u32 SWL_SHIFT[4] = { 24, 16, 8, 0 };
-       u32 addr = _oB_;
-       u32 shift = addr & 3;
-       u32 mem = psxMemRead32(addr & ~3);
+OP(psxLWLe) { if (checkLD(regs_, _oB_ & ~3, 0)) doLWL(regs_, _Rt_, _oB_); }
+OP(psxLWRe) { if (checkLD(regs_, _oB_     , 0)) doLWR(regs_, _Rt_, _oB_); }
 
-       psxMemWrite32(addr & ~3,  (_u32(_rRt_) >> SWL_SHIFT[shift]) |
-                            (  mem & SWL_MASK[shift]) );
+OP(psxSB) { psxMemWrite8 (_oB_, _rRt_ &   0xff); }
+OP(psxSH) { psxMemWrite16(_oB_, _rRt_ & 0xffff); }
+OP(psxSW) { psxMemWrite32(_oB_, _rRt_); }
+
+OP(psxSBe) { if (checkST(regs_, _oB_, 0)) psxMemWrite8 (_oB_, _rRt_ &   0xff); }
+OP(psxSHe) { if (checkST(regs_, _oB_, 1)) psxMemWrite16(_oB_, _rRt_ & 0xffff); }
+OP(psxSWe) { if (checkST(regs_, _oB_, 3)) psxMemWrite32(_oB_, _rRt_); }
+
+static void doSWL(psxRegisters *regs, u32 rt, u32 addr) {
+       u32 val = regs->GPR.r[rt];
+       switch (addr & 3) {
+               case 0: psxMemWrite8( addr     , val >> 24); break;
+               case 1: psxMemWrite16(addr & ~3, val >> 16); break;
+               case 2: // revisit: should be a single 24bit write
+                       psxMemWrite16(addr & ~3, (val >> 8) & 0xffff);
+                       psxMemWrite8( addr     , val >> 24); break;
+               case 3: psxMemWrite32(addr & ~3, val);       break;
+       }
        /*
        Mem = 1234.  Reg = abcd
 
@@ -788,15 +897,16 @@ OP(psxSWL) {
        */
 }
 
-OP(psxSWR) {
-       static const u32 SWR_MASK[4] = { 0, 0xff, 0xffff, 0xffffff };
-       static const u32 SWR_SHIFT[4] = { 0, 8, 16, 24 };
-       u32 addr = _oB_;
-       u32 shift = addr & 3;
-       u32 mem = psxMemRead32(addr & ~3);
-
-       psxMemWrite32(addr & ~3,  (_u32(_rRt_) << SWR_SHIFT[shift]) |
-                            (  mem & SWR_MASK[shift]) );
+static void doSWR(psxRegisters *regs, u32 rt, u32 addr) {
+       u32 val = regs->GPR.r[rt];
+       switch (addr & 3) {
+               case 0: psxMemWrite32(addr    , val); break;
+               case 1: // revisit: should be a single 24bit write
+                       psxMemWrite8 (addr    , val & 0xff);
+                       psxMemWrite16(addr + 1, (val >> 8) & 0xffff); break;
+               case 2: psxMemWrite16(addr    , val & 0xffff); break;
+               case 3: psxMemWrite8 (addr    , val & 0xff); break;
+       }
 
        /*
        Mem = 1234.  Reg = abcd
@@ -808,6 +918,12 @@ OP(psxSWR) {
        */
 }
 
+OP(psxSWL) { doSWL(regs_, _Rt_, _oB_); }
+OP(psxSWR) { doSWR(regs_, _Rt_, _oB_); }
+
+OP(psxSWLe) { if (checkST(regs_, _oB_ & ~3, 0)) doSWL(regs_, _Rt_, _oB_); }
+OP(psxSWRe) { if (checkST(regs_, _oB_     , 0)) doSWR(regs_, _Rt_, _oB_); }
+
 /*********************************************************
 * Moves between GPR and COPx                             *
 * Format:  OP rt, fs                                     *
@@ -815,25 +931,25 @@ OP(psxSWR) {
 OP(psxMFC0) {
        u32 r = _Rd_;
 #ifdef DO_EXCEPTION_RESERVEDI
-       if (unlikely(r == 0))
-               intException(regs_, regs_->pc - 4, R3000E_RI << 2);
+       if (unlikely(0x00000417u & (1u << r)))
+               intExceptionInsn(regs_, R3000E_RI << 2);
 #endif
        doLoad(regs_, _Rt_, regs_->CP0.r[r]);
 }
 
-OP(psxCFC0) { doLoad(regs_, _Rt_, regs_->CP0.r[_Rd_]); }
-
 static void setupCop(u32 sr);
 
+OP(psxCFC0) { doLoad(regs_, _Rt_, regs_->CP0.r[_Rd_]); }
+
 void MTC0(psxRegisters *regs_, int reg, u32 val) {
 //     SysPrintf("MTC0 %d: %x\n", reg, val);
        switch (reg) {
-               case 12: // Status
-                       if (unlikely((regs_->CP0.n.Status ^ val) & (1 << 16)))
+               case 12: // SR
+                       if (unlikely((regs_->CP0.n.SR ^ val) & (1 << 16)))
                                psxMemOnIsolate((val >> 16) & 1);
-                       if (unlikely((regs_->CP0.n.Status ^ val) & (7 << 29)))
+                       if (unlikely((regs_->CP0.n.SR ^ val) & (7 << 29)))
                                setupCop(val);
-                       regs_->CP0.n.Status = val;
+                       regs_->CP0.n.SR = val;
                        psxTestSWInts(regs_, 1);
                        break;
 
@@ -843,6 +959,10 @@ void MTC0(psxRegisters *regs_, int reg, u32 val) {
                        psxTestSWInts(regs_, 0);
                        break;
 
+               case 7:
+                       if ((regs_->CP0.n.DCIC ^ val) & 0xff800000)
+                               log_unhandled("DCIC: %08x->%08x\n", regs_->CP0.n.DCIC, val);
+                       // fallthrough
                default:
                        regs_->CP0.r[reg] = val;
                        break;
@@ -852,23 +972,26 @@ void MTC0(psxRegisters *regs_, int reg, u32 val) {
 OP(psxMTC0) { MTC0(regs_, _Rd_, _u32(_rRt_)); }
 OP(psxCTC0) { MTC0(regs_, _Rd_, _u32(_rRt_)); }
 
+// no exception
+static inline void psxNULLne(psxRegisters *regs) {
+       log_unhandled("unhandled op %08x @%08x\n", regs->code, regs->pc - 4);
+}
+
 /*********************************************************
 * Unknown instruction (would generate an exception)      *
 * Format:  ?                                             *
 *********************************************************/
-static inline void psxNULL_(void) {
-       //printf("op %08x @%08x\n", psxRegs.code, psxRegs.pc);
-}
 
 OP(psxNULL) {
-       psxNULL_();
+       psxNULLne(regs_);
 #ifdef DO_EXCEPTION_RESERVEDI
-       intException(regs_, regs_->pc - 4, R3000E_RI << 2);
+       intExceptionInsn(regs_, R3000E_RI << 2);
 #endif
 }
 
 void gteNULL(struct psxCP2Regs *regs) {
-       psxNULL_();
+       psxRegisters *regs_ = (psxRegisters *)((u8 *)regs - offsetof(psxRegisters, CP2));
+       psxNULLne(regs_);
 }
 
 OP(psxSPECIAL) {
@@ -882,25 +1005,16 @@ OP(psxCOP0) {
                case 0x04: psxMTC0(regs_, code); break;
                case 0x06: psxCTC0(regs_, code); break;
                case 0x10: psxRFE(regs_, code);  break;
-               default:   psxNULL_();           break;
+               default:   psxNULLne(regs_);     break;
        }
 }
 
-OP(psxLWC0) {
-       // MTC0(regs_, _Rt_, psxMemRead32(_oB_)); // ?
-       log_unhandled("LWC0 %08x\n", code);
-}
-
 OP(psxCOP1) {
        // ??? what actually happens here?
+       log_unhandled("COP1 %08x @%08x\n", code, regs_->pc - 4);
 }
 
-OP(psxCOP1d) {
-#ifdef DO_EXCEPTION_RESERVEDI
-       intException(regs_, regs_->pc - 4, (1<<28) | (R3000E_RI << 2));
-#endif
-}
-
+// TODO: wrong COP2 decoding
 OP(psxCOP2) {
        psxCP2[_Funct_](&regs_->CP2);
 }
@@ -911,12 +1025,6 @@ OP(psxCOP2_stall) {
        psxCP2[f](&regs_->CP2);
 }
 
-OP(psxCOP2d) {
-#ifdef DO_EXCEPTION_RESERVEDI
-       intException(regs_, regs_->pc - 4, (2<<28) | (R3000E_RI << 2));
-#endif
-}
-
 OP(gteMFC2) {
        doLoad(regs_, _Rt_, MFC2(&regs_->CP2, _Rd_));
 }
@@ -937,50 +1045,67 @@ OP(gteLWC2) {
        MTC2(&regs_->CP2, psxMemRead32(_oB_), _Rt_);
 }
 
-OP(gteSWC2) {
-       psxMemWrite32(_oB_, MFC2(&regs_->CP2, _Rt_));
-}
-
 OP(gteLWC2_stall) {
        gteCheckStall(0);
        gteLWC2(regs_, code);
 }
 
+OP(gteLWC2e_stall) {
+       gteCheckStall(0);
+       if (checkLD(regs_, _oB_, 3))
+               MTC2(&regs_->CP2, psxMemRead32(_oB_), _Rt_);
+}
+
+OP(gteSWC2) {
+       psxMemWrite32(_oB_, MFC2(&regs_->CP2, _Rt_));
+}
+
 OP(gteSWC2_stall) {
        gteCheckStall(0);
        gteSWC2(regs_, code);
 }
 
+OP(gteSWC2e_stall) {
+       gteCheckStall(0);
+       if (checkST(regs_, _oB_, 3))
+               gteSWC2(regs_, code);
+}
+
 OP(psxCOP3) {
        // ??? what actually happens here?
+       log_unhandled("COP3 %08x @%08x\n", code, regs_->pc - 4);
 }
 
-OP(psxCOP3d) {
+OP(psxCOPd) {
+       log_unhandled("disabled cop%d @%08x\n", (code >> 26) & 3, regs_->pc - 4);
 #ifdef DO_EXCEPTION_RESERVEDI
-       intException(regs_, regs_->pc - 4, (3<<28) | (R3000E_RI << 2));
+       intExceptionInsn(regs_, R3000E_CpU << 2);
 #endif
 }
 
 OP(psxLWCx) {
-       // does this read memory?
-       log_unhandled("LWCx %08x\n", code);
+       log_unhandled("LWCx %08x @%08x\n", code, regs_->pc - 4);
+       checkLD(regs_, _oB_, 3);
 }
 
 OP(psxSWCx) {
        // does this write something to memory?
-       log_unhandled("SWCx %08x\n", code);
+       log_unhandled("SWCx %08x @%08x\n", code, regs_->pc - 4);
+       checkST(regs_, _oB_, 3);
 }
 
 static void psxBASIC(struct psxCP2Regs *cp2regs) {
-       psxRegisters *regs_ = (void *)((char *)cp2regs - offsetof(psxRegisters, CP2));
-       u32 code = regs_->code;
-       assert(regs_ == &psxRegs);
+       psxRegisters *regs = (void *)((u8 *)cp2regs - offsetof(psxRegisters, CP2));
+       u32 code = regs->code;
+       assert(regs == &psxRegs);
        switch (_Rs_) {
-               case 0x00: gteMFC2(regs_, code); break;
-               case 0x02: gteCFC2(regs_, code); break;
-               case 0x04: gteMTC2(regs_, code); break;
-               case 0x06: gteCTC2(regs_, code); break;
-               default:   psxNULL_();           break;
+               case 0x00: gteMFC2(regs, code); break;
+               case 0x02: gteCFC2(regs, code); break;
+               case 0x04: gteMTC2(regs, code); break;
+               case 0x06: gteCTC2(regs, code); break;
+               case 0x08:
+               case 0x0c: log_unhandled("BC2 %08x @%08x\n", code, regs->pc - 4);
+               default:   psxNULLne(regs);     break;
        }
 }
 
@@ -1014,12 +1139,12 @@ OP(psxHLE) {
 static void (INT_ATTR *psxBSC[64])(psxRegisters *regs_, u32 code) = {
        psxSPECIAL, psxREGIMM, psxJ   , psxJAL  , psxBEQ , psxBNE , psxBLEZ, psxBGTZ,
        psxADDI   , psxADDIU , psxSLTI, psxSLTIU, psxANDI, psxORI , psxXORI, psxLUI ,
-       psxCOP0   , psxCOP1d , psxCOP2, psxCOP3d, psxNULL, psxCOP1d,psxCOP2d,psxCOP3d,
-       psxNULL   , psxCOP1d , psxCOP2d,psxCOP3d, psxNULL, psxCOP1d,psxCOP2d,psxCOP3d,
-       psxLB     , psxLH    , psxLWL , psxLW   , psxLBU , psxLHU , psxLWR , psxCOP3d,
-       psxSB     , psxSH    , psxSWL , psxSW   , psxNULL, psxCOP1d,psxSWR , psxCOP3d,
-       psxLWC0   , psxLWCx  , gteLWC2, psxLWCx , psxNULL, psxCOP1d,psxCOP2d,psxCOP3d,
-       psxSWCx   , psxSWCx  , gteSWC2, psxHLE  , psxNULL, psxCOP1d,psxCOP2d,psxCOP3d,
+       psxCOP0   , psxCOPd  , psxCOP2, psxCOPd,  psxNULL, psxNULL, psxNULL, psxNULL,
+       psxNULL   , psxNULL  , psxNULL, psxNULL,  psxNULL, psxNULL, psxNULL, psxNULL,
+       psxLB     , psxLH    , psxLWL , psxLW   , psxLBU , psxLHU , psxLWR , psxNULL,
+       psxSB     , psxSH    , psxSWL , psxSW   , psxNULL, psxNULL, psxSWR , psxNULL,
+       psxLWCx   , psxLWCx  , gteLWC2, psxLWCx , psxNULL, psxNULL, psxNULL, psxNULL,
+       psxSWCx   , psxSWCx  , gteSWC2, psxHLE  , psxNULL, psxNULL, psxNULL, psxNULL,
 };
 
 static void (INT_ATTR *psxSPC[64])(psxRegisters *regs_, u32 code) = {
@@ -1054,15 +1179,29 @@ static void intReset() {
        dloadClear(&psxRegs);
 }
 
-static inline void execI_(u8 **memRLUT, psxRegisters *regs_) {
-       u32 pc = regs_->pc;
-       regs_->pc += 4;
-       regs_->code = fetch(regs_, memRLUT, pc);
+static inline void execI_(u8 **memRLUT, psxRegisters *regs) {
+       u32 pc = regs->pc;
 
-       addCycle();
+       addCycle(regs);
+       dloadStep(regs);
 
-       dloadStep(regs_);
-       psxBSC[regs_->code >> 26](regs_, regs_->code);
+       regs->pc += 4;
+       regs->code = fetch(regs, memRLUT, pc);
+       psxBSC[regs->code >> 26](regs, regs->code);
+}
+
+static inline void execIbp(u8 **memRLUT, psxRegisters *regs) {
+       u32 pc = regs->pc;
+
+       addCycle(regs);
+       dloadStep(regs);
+
+       if (execBreakCheck(regs, pc))
+               return;
+
+       regs->pc += 4;
+       regs->code = fetch(regs, memRLUT, pc);
+       psxBSC[regs->code >> 26](regs, regs->code);
 }
 
 static void intExecute() {
@@ -1074,12 +1213,21 @@ static void intExecute() {
                execI_(memRLUT, regs_);
 }
 
+static void intExecuteBp() {
+       psxRegisters *regs_ = &psxRegs;
+       u8 **memRLUT = psxMemRLUT;
+       extern int stop;
+
+       while (!stop)
+               execIbp(memRLUT, regs_);
+}
+
 void intExecuteBlock(enum blockExecCaller caller) {
        psxRegisters *regs_ = &psxRegs;
        u8 **memRLUT = psxMemRLUT;
 
-       branch2 = 0;
-       while (!branch2)
+       branchSeen = 0;
+       while (!branchSeen)
                execI_(memRLUT, regs_);
 }
 
@@ -1093,7 +1241,7 @@ static void intNotify(enum R3000Anote note, void *data) {
                break;
        case R3000ACPU_NOTIFY_AFTER_LOAD:
                dloadClear(&psxRegs);
-               setupCop(psxRegs.CP0.n.Status);
+               setupCop(psxRegs.CP0.n.SR);
                // fallthrough
        case R3000ACPU_NOTIFY_CACHE_ISOLATED: // Armored Core?
                memset(&ICache, 0xff, sizeof(ICache));
@@ -1108,22 +1256,20 @@ static void setupCop(u32 sr)
        if (sr & (1u << 29))
                psxBSC[17] = psxCOP1;
        else
-               psxBSC[17] = psxCOP1d;
+               psxBSC[17] = psxCOPd;
        if (sr & (1u << 30))
                psxBSC[18] = Config.DisableStalls ? psxCOP2 : psxCOP2_stall;
        else
-               psxBSC[18] = psxCOP2d;
+               psxBSC[18] = psxCOPd;
        if (sr & (1u << 31))
                psxBSC[19] = psxCOP3;
        else
-               psxBSC[19] = psxCOP3d;
+               psxBSC[19] = psxCOPd;
 }
 
 void intApplyConfig() {
        int cycle_mult;
 
-       assert(psxBSC[50] == gteLWC2  || psxBSC[50] == gteLWC2_stall);
-       assert(psxBSC[58] == gteSWC2  || psxBSC[58] == gteSWC2_stall);
        assert(psxSPC[16] == psxMFHI  || psxSPC[16] == psxMFHI_stall);
        assert(psxSPC[18] == psxMFLO  || psxSPC[18] == psxMFLO_stall);
        assert(psxSPC[24] == psxMULT  || psxSPC[24] == psxMULT_stall);
@@ -1152,9 +1298,46 @@ void intApplyConfig() {
                psxSPC[26] = psxDIV_stall;
                psxSPC[27] = psxDIVU_stall;
        }
-       setupCop(psxRegs.CP0.n.Status);
+       setupCop(psxRegs.CP0.n.SR);
+
+       if (Config.PreciseExceptions) {
+               psxBSC[0x20] = psxLBe;
+               psxBSC[0x21] = psxLHe;
+               psxBSC[0x22] = psxLWLe;
+               psxBSC[0x23] = psxLWe;
+               psxBSC[0x24] = psxLBUe;
+               psxBSC[0x25] = psxLHUe;
+               psxBSC[0x26] = psxLWRe;
+               psxBSC[0x28] = psxSBe;
+               psxBSC[0x29] = psxSHe;
+               psxBSC[0x2a] = psxSWLe;
+               psxBSC[0x2b] = psxSWe;
+               psxBSC[0x2e] = psxSWRe;
+               psxBSC[0x32] = gteLWC2e_stall;
+               psxBSC[0x3a] = gteSWC2e_stall;
+               psxSPC[0x08] = psxJRe;
+               psxSPC[0x09] = psxJALRe;
+               psxInt.Execute = intExecuteBp;
+       } else {
+               psxBSC[0x20] = psxLB;
+               psxBSC[0x21] = psxLH;
+               psxBSC[0x22] = psxLWL;
+               psxBSC[0x23] = psxLW;
+               psxBSC[0x24] = psxLBU;
+               psxBSC[0x25] = psxLHU;
+               psxBSC[0x26] = psxLWR;
+               psxBSC[0x28] = psxSB;
+               psxBSC[0x29] = psxSH;
+               psxBSC[0x2a] = psxSWL;
+               psxBSC[0x2b] = psxSW;
+               psxBSC[0x2e] = psxSWR;
+               // LWC2, SWC2 handled by Config.DisableStalls
+               psxSPC[0x08] = psxJR;
+               psxSPC[0x09] = psxJALR;
+               psxInt.Execute = intExecute;
+       }
 
-       // dynarec may occasionally call the interpreter, in such a case the
+       // the dynarec may occasionally call the interpreter, in such a case the
        // cache won't work (cache only works right if all fetches go through it)
        if (!Config.icache_emulation || psxCpu != &psxInt)
                fetch = fetchNoCache;
@@ -1170,8 +1353,9 @@ static void intShutdown() {
 }
 
 // single step (may do several ops in case of a branch)
-void execI() {
-       execI_(psxMemRLUT, &psxRegs);
+void execI(psxRegisters *regs) {
+       execI_(psxMemRLUT, regs);
+       dloadFlush(regs);
 }
 
 R3000Acpu psxInt = {
index 746c8fe..2c3f394 100644 (file)
@@ -5,7 +5,7 @@
 u32 intFakeFetch(u32 pc);
 
 // called by "new_dynarec"
-void execI();
+void execI(psxRegisters *regs);
 void intApplyConfig();
 void MTC0(psxRegisters *regs_, int reg, u32 val);
 void gteNULL(struct psxCP2Regs *regs);
index 5374f86..2127358 100644 (file)
@@ -59,10 +59,10 @@ void psxReset() {
 
        psxRegs.pc = 0xbfc00000; // Start in bootstrap
 
-       psxRegs.CP0.r[12] = 0x10600000; // COP0 enabled | BEV = 1 | TS = 1
-       psxRegs.CP0.r[15] = 0x00000002; // PRevID = Revision ID, same as R3000A
+       psxRegs.CP0.n.SR   = 0x10600000; // COP0 enabled | BEV = 1 | TS = 1
+       psxRegs.CP0.n.PRid = 0x00000002; // PRevID = Revision ID, same as R3000A
        if (Config.HLE)
-               psxRegs.CP0.n.Status |= 1u << 30; // COP2 enabled
+               psxRegs.CP0.n.SR |= 1u << 30; // COP2 enabled
 
        psxCpu->ApplyConfig();
        psxCpu->Reset();
@@ -93,7 +93,7 @@ void psxShutdown() {
 }
 
 // cp0 is passed separately for lightrec to be less messy
-void psxException(u32 cause, u32 bd, psxCP0Regs *cp0) {
+void psxException(u32 cause, enum R3000Abdt bdt, psxCP0Regs *cp0) {
        u32 opcode = intFakeFetch(psxRegs.pc);
        
        if (unlikely(!Config.HLE && ((((opcode) >> 24) & 0xfe) == 0x4a))) {
@@ -101,31 +101,24 @@ void psxException(u32 cause, u32 bd, psxCP0Regs *cp0) {
                // BIOS does not allow to return to GTE instructions
                // (just skips it, supposedly because it's scheduled already)
                // so we execute it here
-               psxCP2Regs *cp2 = (void *)(cp0 + 1);
+               psxCP2Regs *cp2 = (psxCP2Regs *)(cp0 + 1);
                psxRegs.code = opcode;
                psxCP2[opcode & 0x3f](cp2);
        }
 
        // Set the Cause
-       cp0->n.Cause = (cp0->n.Cause & 0x300) | cause;
+       cp0->n.Cause = (bdt << 30) | (cp0->n.Cause & 0x300) | cause;
 
        // Set the EPC & PC
-       if (bd) {
-#ifdef PSXCPU_LOG
-               PSXCPU_LOG("bd set!!!\n");
-#endif
-               cp0->n.Cause |= 0x80000000;
-               cp0->n.EPC = (psxRegs.pc - 4);
-       } else
-               cp0->n.EPC = (psxRegs.pc);
+       cp0->n.EPC = bdt ? psxRegs.pc - 4 : psxRegs.pc;
 
-       if (cp0->n.Status & 0x400000)
+       if (cp0->n.SR & 0x400000)
                psxRegs.pc = 0xbfc00180;
        else
                psxRegs.pc = 0x80000080;
 
-       // Set the Status
-       cp0->n.Status = (cp0->n.Status & ~0x3f) | ((cp0->n.Status & 0x0f) << 2);
+       // Set the SR
+       cp0->n.SR = (cp0->n.SR & ~0x3f) | ((cp0->n.SR & 0x0f) << 2);
 
        if (Config.HLE) psxBiosException();
 }
@@ -204,7 +197,7 @@ void psxBranchTest() {
        }
 
        if (psxHu32(0x1070) & psxHu32(0x1074)) {
-               if ((psxRegs.CP0.n.Status & 0x401) == 0x401) {
+               if ((psxRegs.CP0.n.SR & 0x401) == 0x401) {
 #ifdef PSXCPU_LOG
                        PSXCPU_LOG("Interrupt: %x %x\n", psxHu32(0x1070), psxHu32(0x1074));
 #endif
index 778bd8d..6682314 100644 (file)
@@ -34,7 +34,7 @@ enum R3000Aexception {
        R3000E_AdEL = 4,     // Address error (on load/I-fetch)
        R3000E_AdES = 5,     // Address error (on store)
        R3000E_IBE = 6,      // Bus error (instruction fetch)
-       R3000E_DBE = 7,      // Bus error (data load)
+       R3000E_DBE = 7,      // Bus error (data load/store)
        R3000E_Syscall = 8,  // syscall instruction
        R3000E_Bp = 9,       // Breakpoint - a break instruction
        R3000E_RI = 10,      // reserved instruction
@@ -96,14 +96,11 @@ typedef union {
 
 typedef union psxCP0Regs_ {
        struct {
-               u32     Index,     Random,    EntryLo0,  EntryLo1,
-                                               Context,   PageMask,  Wired,     Reserved0,
-                                               BadVAddr,  Count,     EntryHi,   Compare,
-                                               Status,    Cause,     EPC,       PRid,
-                                               Config,    LLAddr,    WatchLO,   WatchHI,
-                                               XContext,  Reserved1, Reserved2, Reserved3,
-                                               Reserved4, Reserved5, ECC,       CacheErr,
-                                               TagLo,     TagHi,     ErrorEPC,  Reserved6;
+               u32 Reserved0, Reserved1, Reserved2,  BPC,
+                   Reserved4, BDA,       Target,     DCIC,
+                   BadVAddr,  BDAM,      Reserved10, BPCM,
+                   SR,        Cause,     EPC,        PRid,
+                   Reserved16[16];
        } n;
        u32 r[32];
        PAIR p[32];
@@ -188,6 +185,14 @@ enum {
        PSXINT_COUNT
 };
 
+enum R3000Abdt {
+       // corresponds to bits 31,30 of Cause reg
+       R3000A_BRANCH_TAKEN = 3,
+       R3000A_BRANCH_NOT_TAKEN = 2,
+       // none or tells that there was an exception in DS back to doBranch
+       R3000A_BRANCH_NONE_OR_EXCEPTION = 0,
+};
+
 typedef struct psxCP2Regs {
        psxCP2Data CP2D;        /* Cop2 data registers */
        psxCP2Ctrl CP2C;        /* Cop2 control registers */
@@ -212,11 +217,11 @@ typedef struct {
        struct { u32 sCycle, cycle; } intCycle[32];
        u32 gteBusyCycle;
        u32 muldivBusyCycle;
-       u32 subCycle;           /* interpreter cycle counting */
+       u32 subCycle;       /* interpreter cycle counting */
        u32 subCycleStep;
        u32 biuReg;
-       u8  reserved;
-       u8  dloadSel;
+       u8  branching;      /* interp. R3000A_BRANCH_TAKEN / not, 0 if not branch */
+       u8  dloadSel;       /* interp. delay load state */
        u8  dloadReg[2];
        u32 dloadVal[2];
        // warning: changing anything in psxRegisters requires update of all
@@ -247,7 +252,7 @@ void new_dyna_freeze(void *f, int mode);
 int  psxInit();
 void psxReset();
 void psxShutdown();
-void psxException(u32 code, u32 bd, psxCP0Regs *cp0);
+void psxException(u32 code, enum R3000Abdt bdt, psxCP0Regs *cp0);
 void psxBranchTest();
 void psxExecuteBios();
 void psxJumpTest();