From 397ccdc6cf5d873b4399895d6e491ea38a598a88 Mon Sep 17 00:00:00 2001 From: kub Date: Thu, 2 May 2019 23:16:55 +0200 Subject: [PATCH] sh2 drc, add detection for in-memory polling --- cpu/drc/emit_arm.c | 19 ++++-- cpu/drc/emit_x86.c | 37 +++++----- cpu/sh2/compiler.c | 94 +++++++++++++++++++++++--- cpu/sh2/sh2.h | 3 +- pico/32x/32x.c | 2 +- pico/32x/memory.c | 152 ++++++++++++++++++++++++++---------------- pico/32x/memory_arm.S | 23 ++----- pico/32x/sh2soc.c | 6 ++ pico/pico_int.h | 1 + 9 files changed, 224 insertions(+), 113 deletions(-) diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index 37d5cf1b..1b429b35 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -636,9 +636,13 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) #define EMITH_SJMP3_MID(cond) EMITH_NOTHING1(cond) #define EMITH_SJMP3_END() +#define emith_move_r_r_c(cond, d, s) \ + EOP_MOV_REG(cond,0,d,s,A_AM1_LSL,0) #define emith_move_r_r(d, s) \ - EOP_MOV_REG_SIMPLE(d, s) + emith_move_r_r_c(A_COND_AL, d, s) +#define emith_move_r_r_ptr_c(cond, d, s) \ + emith_move_r_r_c(cond, d, s) #define emith_move_r_r_ptr(d, s) \ emith_move_r_r(d, s) @@ -1116,11 +1120,16 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) #define emith_ret_to_ctx(offs) \ emith_ctx_write(LR, offs) -#define emith_push_ret() \ - EOP_STMFD_SP(M1(LR)) +/* pushes r12 for eabi alignment */ +#define emith_push_ret(r) do { \ + int r_ = (r >= 0 ? r : 12); \ + EOP_STMFD_SP(M2(r_,LR)); \ +} while (0) -#define emith_pop_and_ret() \ - EOP_LDMFD_SP(M1(PC)) +#define emith_pop_and_ret(r) do { \ + int r_ = (r >= 0 ? r : 12); \ + EOP_LDMFD_SP(M2(r_,PC)); \ +} while (0) #define host_instructions_updated(base, end) \ cache_flush_d_inval_i(base, end) diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index b8354789..9dd06262 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -381,21 +381,12 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; emith_arith_r_imm(4, r, ~(imm)) // fake conditionals (using SJMP instead) -#define emith_move_r_imm_c(cond, r, imm) do { \ - (void)(cond); \ - emith_move_r_imm(r, imm); \ -} while (0) - -#define emith_add_r_imm_c(cond, r, imm) do { \ - (void)(cond); \ - emith_add_r_imm(r, imm); \ -} while (0) - -#define emith_sub_r_imm_c(cond, r, imm) do { \ - (void)(cond); \ - emith_sub_r_imm(r, imm); \ -} while (0) - +#define emith_move_r_imm_c(cond, r, imm) \ + emith_move_r_imm(r, imm); +#define emith_add_r_imm_c(cond, r, imm) \ + emith_add_r_imm(r, imm); +#define emith_sub_r_imm_c(cond, r, imm) \ + emith_sub_r_imm(r, imm); #define emith_or_r_imm_c(cond, r, imm) \ emith_or_r_imm(r, imm) #define emith_eor_r_imm_c(cond, r, imm) \ @@ -404,6 +395,8 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; emith_bic_r_imm(r, imm) #define emith_tst_r_imm_c(cond, r, imm) \ emith_tst_r_imm(r, imm) +#define emith_move_r_r_ptr_c(cond, d, s) \ + emith_move_r_r_ptr(d, s) #define emith_ror_c(cond, d, s, cnt) \ emith_ror(d, s, cnt) #define emith_and_r_r_c(cond, d, s) \ @@ -819,12 +812,16 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; EMIT(offs, u32); \ } while (0) -#define emith_push_ret() \ - emith_push(xSI); /* to align */ +#define emith_push_ret(r) do { \ + int r_ = (r >= 0 ? r : xSI); \ + emith_push(r_); /* always push to align */ \ +} while (0) -#define emith_pop_and_ret() \ - emith_pop(xSI); \ - emith_ret() +#define emith_pop_and_ret(r) do { \ + int r_ = (r >= 0 ? r : xSI); \ + emith_pop(r_); \ + emith_ret(); \ +} while (0) #define EMITH_JMP_START(cond) { \ u8 *cond_ptr; \ diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index fd75cc44..b7f54dd9 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -532,6 +532,9 @@ static void (*sh2_drc_test_irq)(void); static u32 REGPARM(1) (*sh2_drc_read8)(u32 a); static u32 REGPARM(1) (*sh2_drc_read16)(u32 a); static u32 REGPARM(1) (*sh2_drc_read32)(u32 a); +static u32 REGPARM(1) (*sh2_drc_read8_poll)(u32 a); +static u32 REGPARM(1) (*sh2_drc_read16_poll)(u32 a); +static u32 REGPARM(1) (*sh2_drc_read32_poll)(u32 a); static void REGPARM(2) (*sh2_drc_write8)(u32 a, u32 d); static void REGPARM(2) (*sh2_drc_write16)(u32 a, u32 d); static void REGPARM(2) (*sh2_drc_write32)(u32 a, u32 d); @@ -540,6 +543,7 @@ static void REGPARM(2) (*sh2_drc_write32)(u32 a, u32 d); #define MF_SIZEMASK 0x03 // size of access #define MF_POSTINCR 0x10 // post increment (for read_rr) #define MF_PREDECR MF_POSTINCR // pre decrement (for write_rr) +#define MF_POLLING 0x20 // include polling check in read // address space stuff static int dr_is_rom(u32 a) @@ -2263,11 +2267,18 @@ static int emit_memhandler_read(int size) rcache_evict_vreg(guest_regs[SHR_SR].vreg); #endif - switch (size & MF_SIZEMASK) { - case 0: emith_call(sh2_drc_read8); break; // 8 - case 1: emith_call(sh2_drc_read16); break; // 16 - case 2: emith_call(sh2_drc_read32); break; // 32 - } + if (size & MF_POLLING) + switch (size & MF_SIZEMASK) { + case 0: emith_call(sh2_drc_read8_poll); break; // 8 + case 1: emith_call(sh2_drc_read16_poll); break; // 16 + case 2: emith_call(sh2_drc_read32_poll); break; // 32 + } + else + switch (size & MF_SIZEMASK) { + case 0: emith_call(sh2_drc_read8); break; // 8 + case 1: emith_call(sh2_drc_read16); break; // 16 + case 2: emith_call(sh2_drc_read32); break; // 32 + } rcache_invalidate_tmp(); return rcache_get_tmp_ret(); @@ -2545,6 +2556,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) struct drcf { int delay_reg:8; u32 loop_type:8; + u32 polling:8; u32 test_irq:1; u32 pending_branch_direct:1; u32 pending_branch_indirect:1; @@ -2769,6 +2781,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) #if LOOP_DETECTION drcf.loop_type = op_flags[i] & OF_LOOP; drcf.delay_reg = -1; + drcf.polling = (drcf.loop_type == OF_POLL_LOOP ? MF_POLLING : 0); #endif // must update PC @@ -3176,7 +3189,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case 0x0c: // MOV.B @(R0,Rm),Rn 0000nnnnmmmm1100 case 0x0d: // MOV.W @(R0,Rm),Rn 0000nnnnmmmm1101 case 0x0e: // MOV.L @(R0,Rm),Rn 0000nnnnmmmm1110 - emit_indirect_indexed_read(sh2, GET_Rn(), SHR_R0, GET_Rm(), op & 3); + emit_indirect_indexed_read(sh2, GET_Rn(), SHR_R0, GET_Rm(), (op & 3) | drcf.polling); goto end_op; case 0x0f: // MAC.L @Rm+,@Rn+ 0000nnnnmmmm1111 emit_indirect_read_double(sh2, &tmp, &tmp2, GET_Rn(), GET_Rm(), 2); @@ -3700,7 +3713,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) ///////////////////////////////////////////// case 0x05: // MOV.L @(disp,Rm),Rn 0101nnnnmmmmdddd - emit_memhandler_read_rr(sh2, GET_Rn(), GET_Rm(), (op & 0x0f) * 4, 2); + emit_memhandler_read_rr(sh2, GET_Rn(), GET_Rm(), (op & 0x0f) * 4, 2 | drcf.polling); goto end_op; ///////////////////////////////////////////// @@ -3713,7 +3726,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case 0x04: // MOV.B @Rm+,Rn 0110nnnnmmmm0100 case 0x05: // MOV.W @Rm+,Rn 0110nnnnmmmm0101 case 0x06: // MOV.L @Rm+,Rn 0110nnnnmmmm0110 - tmp = ((op & 7) >= 4 && GET_Rn() != GET_Rm()) ? MF_POSTINCR : 0; + tmp = ((op & 7) >= 4 && GET_Rn() != GET_Rm()) ? MF_POSTINCR : drcf.polling; emit_memhandler_read_rr(sh2, GET_Rn(), GET_Rm(), 0, (op & 3) | tmp); goto end_op; case 0x03: // MOV Rm,Rn 0110nnnnmmmm0011 @@ -3791,7 +3804,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case 0x0400: // MOV.B @(disp,Rm),R0 10000100mmmmdddd case 0x0500: // MOV.W @(disp,Rm),R0 10000101mmmmdddd tmp = (op & 0x100) >> 8; - emit_memhandler_read_rr(sh2, SHR_R0, GET_Rm(), (op & 0x0f) << tmp, tmp); + emit_memhandler_read_rr(sh2, SHR_R0, GET_Rm(), (op & 0x0f) << tmp, tmp | drcf.polling); goto end_op; case 0x0800: // CMP/EQ #imm,R0 10001000iiiiiiii tmp2 = rcache_get_reg(SHR_R0, RC_GR_READ, NULL); @@ -3817,7 +3830,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case 0x0500: // MOV.W @(disp,GBR),R0 11000101dddddddd case 0x0600: // MOV.L @(disp,GBR),R0 11000110dddddddd tmp = (op & 0x300) >> 8; - emit_memhandler_read_rr(sh2, SHR_R0, SHR_GBR, (op & 0xff) << tmp, tmp); + emit_memhandler_read_rr(sh2, SHR_R0, SHR_GBR, (op & 0xff) << tmp, tmp | drcf.polling); goto end_op; case 0x0800: // TST #imm,R0 11001000iiiiiiii tmp = rcache_get_reg(SHR_R0, RC_GR_READ, NULL); @@ -3843,7 +3856,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) } goto end_op; case 0x0c00: // TST.B #imm,@(R0,GBR) 11001100iiiiiiii - tmp = emit_indirect_indexed_read(sh2, SHR_TMP, SHR_R0, SHR_GBR, 0); + tmp = emit_indirect_indexed_read(sh2, SHR_TMP, SHR_R0, SHR_GBR, 0 | drcf.polling); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); emith_bic_r_imm(sr, T); emith_tst_r_imm(tmp, op & 0xff); @@ -4149,6 +4162,56 @@ static void sh2_generate_utils(void) emith_jump_reg(arg2); emith_flush(); + // d = sh2_drc_read8_poll(u32 a) + sh2_drc_read8_poll = (void *)tcache_ptr; + emith_ctx_read_ptr(arg1, offsetof(SH2, read8_map)); + emith_sh2_rcall(arg0, arg1, arg2, arg3); + EMITH_SJMP_START(DCOND_CC); + emith_move_r_r_ptr_c(DCOND_CS, arg1, CONTEXT_REG); + emith_jump_reg_c(DCOND_CS, arg2); + EMITH_SJMP_END(DCOND_CC); + emith_and_r_r_r(arg1, arg0, arg3); + emith_eor_r_imm(arg1, 1); + emith_read8s_r_r_r(arg1, arg1, arg2); + emith_push_ret(arg1); + emith_move_r_r_ptr(arg1, CONTEXT_REG); + emith_call(p32x_sh2_poll_memory); + emith_pop_and_ret(RET_REG); + emith_flush(); + + // d = sh2_drc_read16_poll(u32 a) + sh2_drc_read16_poll = (void *)tcache_ptr; + emith_ctx_read_ptr(arg1, offsetof(SH2, read16_map)); + emith_sh2_rcall(arg0, arg1, arg2, arg3); + EMITH_SJMP_START(DCOND_CC); + emith_move_r_r_ptr_c(DCOND_CS, arg1, CONTEXT_REG); + emith_jump_reg_c(DCOND_CS, arg2); + EMITH_SJMP_END(DCOND_CC); + emith_and_r_r_r(arg1, arg0, arg3); + emith_read16s_r_r_r(arg1, arg1, arg2); + emith_push_ret(arg1); + emith_move_r_r_ptr(arg1, CONTEXT_REG); + emith_call(p32x_sh2_poll_memory); + emith_pop_and_ret(RET_REG); + emith_flush(); + + // d = sh2_drc_read32_poll(u32 a) + sh2_drc_read32_poll = (void *)tcache_ptr; + emith_ctx_read_ptr(arg1, offsetof(SH2, read32_map)); + emith_sh2_rcall(arg0, arg1, arg2, arg3); + EMITH_SJMP_START(DCOND_CC); + emith_move_r_r_ptr_c(DCOND_CS, arg1, CONTEXT_REG); + emith_jump_reg_c(DCOND_CS, arg2); + EMITH_SJMP_END(DCOND_CC); + emith_and_r_r_r(arg1, arg0, arg3); + emith_read_r_r_r(arg1, arg1, arg2); + emith_ror(arg1, arg1, 16); + emith_push_ret(arg1); + emith_move_r_r_ptr(arg1, CONTEXT_REG); + emith_call(p32x_sh2_poll_memory); + emith_pop_and_ret(RET_REG); + emith_flush(); + // sh2_drc_exit(void) sh2_drc_exit = (void *)tcache_ptr; emit_do_static_regs(1, arg2); @@ -4289,6 +4352,9 @@ static void sh2_generate_utils(void) MAKE_WRITE_WRAPPER(sh2_drc_write8); MAKE_WRITE_WRAPPER(sh2_drc_write16); MAKE_WRITE_WRAPPER(sh2_drc_write32); + MAKE_READ_WRAPPER(sh2_drc_read8_poll); + MAKE_READ_WRAPPER(sh2_drc_read16_poll); + MAKE_READ_WRAPPER(sh2_drc_read32_poll); #endif emith_pool_commit(0); @@ -4304,6 +4370,9 @@ static void sh2_generate_utils(void) host_dasm_new_symbol(sh2_drc_read8); host_dasm_new_symbol(sh2_drc_read16); host_dasm_new_symbol(sh2_drc_read32); + host_dasm_new_symbol(sh2_drc_read8_poll); + host_dasm_new_symbol(sh2_drc_read16_poll); + host_dasm_new_symbol(sh2_drc_read32_poll); #endif } @@ -5396,11 +5465,13 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, opd->source = BITMASK2(GET_Rm(), SHR_MEM); opd->dest = BITMASK1(SHR_R0); opd->imm = (op & 0x0f); + op_flags[i] |= OF_POLL_INSN; break; case 0x0500: // MOV.W @(disp,Rm),R0 10000101mmmmdddd opd->source = BITMASK2(GET_Rm(), SHR_MEM); opd->dest = BITMASK1(SHR_R0); opd->imm = (op & 0x0f) * 2; + op_flags[i] |= OF_POLL_INSN; break; case 0x0800: // CMP/EQ #imm,R0 10001000iiiiiiii opd->source = BITMASK1(SHR_R0); @@ -5539,6 +5610,7 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, opd->source = BITMASK3(SHR_GBR, SHR_R0, SHR_MEM); opd->dest = BITMASK1(SHR_T); opd->imm = op & 0xff; + op_flags[i] |= OF_POLL_INSN; opd->cycles = 3; break; case 0x0d00: // AND.B #imm,@(R0,GBR) 11001101iiiiiiii diff --git a/cpu/sh2/sh2.h b/cpu/sh2/sh2.h index 5a0661ea..a3eb5b12 100644 --- a/cpu/sh2/sh2.h +++ b/cpu/sh2/sh2.h @@ -42,9 +42,10 @@ typedef struct SH2_ unsigned int pdb_io_csum[2]; #define SH2_STATE_RUN (1 << 0) // to prevent recursion -#define SH2_STATE_SLEEP (1 << 1) +#define SH2_STATE_SLEEP (1 << 1) // temporarily stopped (DMA, IO, ...) #define SH2_STATE_CPOLL (1 << 2) // polling comm regs #define SH2_STATE_VPOLL (1 << 3) // polling VDP +#define SH2_STATE_RPOLL (1 << 4) // polling address in SDRAM unsigned int state; unsigned int poll_addr; int poll_cycles; diff --git a/pico/32x/32x.c b/pico/32x/32x.c index 4e8377eb..19c6e0a6 100644 --- a/pico/32x/32x.c +++ b/pico/32x/32x.c @@ -12,7 +12,7 @@ struct Pico32x Pico32x; SH2 sh2s[2]; -#define SH2_IDLE_STATES (SH2_STATE_CPOLL|SH2_STATE_VPOLL|SH2_STATE_SLEEP) +#define SH2_IDLE_STATES (SH2_STATE_CPOLL|SH2_STATE_VPOLL|SH2_STATE_RPOLL|SH2_STATE_SLEEP) static int REGPARM(2) sh2_irq_cb(SH2 *sh2, int level) { diff --git a/pico/32x/memory.c b/pico/32x/memory.c index 8a4b5365..c385d141 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -61,29 +61,37 @@ static void (*m68k_write16_io)(u32 a, u32 d); #define POLL_THRESHOLD 3 static struct { - u32 addr, cycles; + u32 addr1, addr2, cycles; int cnt; } m68k_poll; static int m68k_poll_detect(u32 a, u32 cycles, u32 flags) { int ret = 0; + // support polling on 2 addresses - seen in Wolfenstein + int match = (a - m68k_poll.addr1 <= 2 || a - m68k_poll.addr2 <= 2); - if (a - 2 <= m68k_poll.addr && m68k_poll.addr <= a + 2 - && cycles - m68k_poll.cycles <= 64 && !SekNotPolling) + if (match && cycles - m68k_poll.cycles <= 64 && !SekNotPolling) { - if (m68k_poll.cnt++ > POLL_THRESHOLD) { + // detect split 32bit access by same cycle count, and ignore those + if (cycles != m68k_poll.cycles && m68k_poll.cnt++ > POLL_THRESHOLD) { if (!(Pico32x.emu_flags & flags)) { elprintf(EL_32X, "m68k poll addr %08x, cyc %u", a, cycles - m68k_poll.cycles); - ret = 1; } Pico32x.emu_flags |= flags; + ret = 1; } } else { + // reset poll state in case of restart by interrupt + Pico32x.emu_flags &= ~(P32XF_68KCPOLL|P32XF_68KVPOLL); + SekSetStop(0); m68k_poll.cnt = 0; - m68k_poll.addr = a; + if (!match) { + m68k_poll.addr2 = m68k_poll.addr1; + m68k_poll.addr1 = a; + } SekNotPolling = 0; } m68k_poll.cycles = cycles; @@ -99,15 +107,15 @@ void p32x_m68k_poll_event(u32 flags) Pico32x.emu_flags &= ~flags; SekSetStop(0); } - m68k_poll.addr = m68k_poll.cnt = 0; + m68k_poll.addr1 = m68k_poll.addr2 = m68k_poll.cnt = 0; } -static void sh2_poll_detect(SH2 *sh2, u32 a, u32 flags, int maxcnt) +static void NOINLINE sh2_poll_detect(u32 a, SH2 *sh2, u32 flags, int maxcnt) { - int cycles_left = sh2_cycles_left(sh2); + u32 cycles_done = sh2_cycles_done_t(sh2); - if (a == sh2->poll_addr && sh2->poll_cycles - cycles_left <= 10) { - if (sh2->poll_cnt++ > maxcnt) { + if (a - sh2->poll_addr <= 2 && CYCLES_GE(sh2->poll_cycles+20, cycles_done)) { + if (sh2->poll_cycles != cycles_done && ++sh2->poll_cnt >= maxcnt) { if (!(sh2->state & flags)) elprintf_sh2(sh2, EL_32X, "state: %02x->%02x", sh2->state, sh2->state | flags); @@ -115,16 +123,22 @@ static void sh2_poll_detect(SH2 *sh2, u32 a, u32 flags, int maxcnt) sh2->state |= flags; sh2_end_run(sh2, 1); pevt_log_sh2(sh2, EVT_POLL_START); - return; +#ifdef DRC_SH2 + if ((a & 0xc6000000) == 0x06000000) { + unsigned char *p = sh2->p_drcblk_ram; + p[(a & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT] |= 0x80; + } +#endif } } - else + else if (!(sh2->state & (SH2_STATE_CPOLL|SH2_STATE_VPOLL|SH2_STATE_RPOLL))) { sh2->poll_cnt = 0; - sh2->poll_addr = a; - sh2->poll_cycles = cycles_left; + sh2->poll_addr = a; + } + sh2->poll_cycles = cycles_done; } -void p32x_sh2_poll_event(SH2 *sh2, u32 flags, u32 m68k_cycles) +void NOINLINE p32x_sh2_poll_event(SH2 *sh2, u32 flags, u32 m68k_cycles) { if (sh2->state & flags) { elprintf_sh2(sh2, EL_32X, "state: %02x->%02x", sh2->state, @@ -134,10 +148,17 @@ void p32x_sh2_poll_event(SH2 *sh2, u32 flags, u32 m68k_cycles) sh2->m68krcycles_done = m68k_cycles; pevt_log_sh2_o(sh2, EVT_POLL_END); + sh2->state &= ~flags; +#ifdef DRC_SH2 + if ((sh2->poll_addr & 0xc6000000) == 0x06000000) { + unsigned char *p = sh2->p_drcblk_ram; + p[(sh2->poll_addr & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT] &= ~0x80; + } +#endif } - sh2->state &= ~flags; - sh2->poll_addr = sh2->poll_cycles = sh2->poll_cnt = 0; + if (!(sh2->state & (SH2_STATE_CPOLL|SH2_STATE_VPOLL|SH2_STATE_RPOLL))) + sh2->poll_addr = sh2->poll_cycles = sh2->poll_cnt = 0; } static void sh2s_sync_on_read(SH2 *sh2) @@ -151,6 +172,14 @@ static void sh2s_sync_on_read(SH2 *sh2) p32x_sync_other_sh2(sh2, sh2->m68krcycles_done + C_SH2_TO_M68K(sh2, cycles)); } +void p32x_sh2_poll_memory(unsigned int a, SH2 *sh2) +{ + DRC_SAVE_SR(sh2); + sh2_poll_detect(a, sh2, SH2_STATE_RPOLL, 5); + sh2s_sync_on_read(sh2); + DRC_RESTORE_SR(sh2); +} + // SH2 faking //#define FAKE_SH2 #ifdef FAKE_SH2 @@ -567,7 +596,7 @@ static u32 p32x_sh2reg_read16(u32 a, SH2 *sh2) return (r[0] & P32XS_FM) | Pico32x.sh2_regs[0] | Pico32x.sh2irq_mask[sh2->is_slave]; case 0x04: // H count (often as comm too) - sh2_poll_detect(sh2, a, SH2_STATE_CPOLL, 3); + sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 7); sh2s_sync_on_read(sh2); return Pico32x.sh2_regs[4 / 2]; case 0x06: @@ -596,7 +625,7 @@ static u32 p32x_sh2reg_read16(u32 a, SH2 *sh2) // comm port if ((a & 0x30) == 0x20) { - sh2_poll_detect(sh2, a, SH2_STATE_CPOLL, 3); + sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 7); sh2s_sync_on_read(sh2); return r[a / 2]; } @@ -614,7 +643,7 @@ static void p32x_sh2reg_write8(u32 a, u32 d, SH2 *sh2) u32 old; a &= 0x3f; - sh2->poll_addr = 0; + sh2->poll_cnt = 0; switch (a) { case 0x00: // FM @@ -695,6 +724,7 @@ static void p32x_sh2reg_write8(u32 a, u32 d, SH2 *sh2) return; REG8IN16(r, a) = d; + sh2_end_run(sh2, 1); p32x_m68k_poll_event(P32XF_68KCPOLL); p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, sh2_cycles_done_m68k(sh2)); @@ -711,7 +741,7 @@ static void p32x_sh2reg_write16(u32 a, u32 d, SH2 *sh2) { a &= 0x3e; - sh2->poll_addr = 0; + sh2->poll_cnt = 0; // comm if ((a & 0x30) == 0x20) { @@ -720,6 +750,7 @@ static void p32x_sh2reg_write16(u32 a, u32 d, SH2 *sh2) return; Pico32x.regs[a / 2] = d; + sh2_end_run(sh2, 1); p32x_m68k_poll_event(P32XF_68KCPOLL); p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, sh2_cycles_done_m68k(sh2)); @@ -1251,7 +1282,7 @@ static u32 REGPARM(2) sh2_read8_cs0(u32 a, SH2 *sh2) if ((a & 0x3fff0) == 0x4100) { d = p32x_vdp_read16(a); - sh2_poll_detect(sh2, a, SH2_STATE_VPOLL, 7); + sh2_poll_detect(a, sh2, SH2_STATE_VPOLL, 9); goto out_16to8; } @@ -1319,7 +1350,7 @@ static u32 REGPARM(2) sh2_read16_cs0(u32 a, SH2 *sh2) if ((a & 0x3fff0) == 0x4100) { d = p32x_vdp_read16(a); - sh2_poll_detect(sh2, a, SH2_STATE_VPOLL, 7); + sh2_poll_detect(a, sh2, SH2_STATE_VPOLL, 9); goto out; } @@ -1383,6 +1414,28 @@ static u32 REGPARM(2) sh2_read32_rom(u32 a, SH2 *sh2) } // writes +#ifdef DRC_SH2 +void NOINLINE sh2_sdram_checks(u32 a, int t, SH2 *sh2) +{ + int v = t & ~0x80; + + if (v) + sh2_drc_wcheck_ram(a, v, sh2); + if (t & 0x80) { + DRC_SAVE_SR(sh2); + sh2_end_run(sh2, 1); + p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_RPOLL, sh2_cycles_done_m68k(sh2)); + DRC_RESTORE_SR(sh2); + } +} + +void inline sh2_da_checks(u32 a, int t, SH2 *sh2) +{ + if (t) + sh2_drc_wcheck_da(a, t, sh2); +} +#endif + static void REGPARM(3) sh2_write_ignore(u32 a, u32 d, SH2 *sh2) { } @@ -1402,7 +1455,7 @@ static void REGPARM(3) sh2_write8_cs0(u32 a, u32 d, SH2 *sh2) if (Pico32x.regs[0] & P32XS_FM) { if ((a & 0x3fff0) == 0x4100) { - sh2->poll_addr = 0; + sh2->poll_cnt = 0; p32x_vdp_write8(a, d); goto out; } @@ -1431,38 +1484,26 @@ static void REGPARM(3) sh2_write8_dram(u32 a, u32 d, SH2 *sh2) static void REGPARM(3) sh2_write8_sdram(u32 a, u32 d, SH2 *sh2) { - u32 a1 = a & 0x3ffff; + u32 a1 = (a & 0x3ffff) ^ 1; + ((u8 *)sh2->p_sdram)[a1] = d; #ifdef DRC_SH2 u8 *p = sh2->p_drcblk_ram; int t = p[a1 >> SH2_DRCBLK_RAM_SHIFT]; if (t) - sh2_drc_wcheck_ram(a, t, sh2); + sh2_sdram_checks(a, t, sh2); #endif - ((u8 *)sh2->p_sdram)[a1 ^ 1] = d; -} - -static void REGPARM(3) sh2_write8_sdram_wt(u32 a, u32 d, SH2 *sh2) -{ - // xmen sync hack.. - if (a < 0x26000200) { - DRC_SAVE_SR(sh2); - sh2_end_run(sh2, 32); - DRC_RESTORE_SR(sh2); - } - - sh2_write8_sdram(a, d, sh2); } static void REGPARM(3) sh2_write8_da(u32 a, u32 d, SH2 *sh2) { - u32 a1 = a & 0xfff; + u32 a1 = (a & 0xfff) ^ 1; + sh2->data_array[a1] = d; #ifdef DRC_SH2 u8 *p = sh2->p_drcblk_da; int t = p[a1 >> SH2_DRCBLK_DA_SHIFT]; if (t) - sh2_drc_wcheck_da(a, t, sh2); + sh2_da_checks(a, t, sh2); #endif - sh2->data_array[a1 ^ 1] = d; } // write16 @@ -1481,7 +1522,7 @@ static void REGPARM(3) sh2_write16_cs0(u32 a, u32 d, SH2 *sh2) if (Pico32x.regs[0] & P32XS_FM) { if ((a & 0x3fff0) == 0x4100) { - sh2->poll_addr = 0; + sh2->poll_cnt = 0; p32x_vdp_write16(a, d, sh2); goto out; } @@ -1511,25 +1552,25 @@ static void REGPARM(3) sh2_write16_dram(u32 a, u32 d, SH2 *sh2) static void REGPARM(3) sh2_write16_sdram(u32 a, u32 d, SH2 *sh2) { u32 a1 = a & 0x3fffe; + ((u16 *)sh2->p_sdram)[a1 / 2] = d; #ifdef DRC_SH2 u8 *p = sh2->p_drcblk_ram; int t = p[a1 >> SH2_DRCBLK_RAM_SHIFT]; if (t) - sh2_drc_wcheck_ram(a, t, sh2); + sh2_sdram_checks(a, t, sh2); #endif - ((u16 *)sh2->p_sdram)[a1 / 2] = d; } static void REGPARM(3) sh2_write16_da(u32 a, u32 d, SH2 *sh2) { u32 a1 = a & 0xffe; + ((u16 *)sh2->data_array)[a1 / 2] = d; #ifdef DRC_SH2 u8 *p = sh2->p_drcblk_da; int t = p[a1 >> SH2_DRCBLK_DA_SHIFT]; if (t) - sh2_drc_wcheck_da(a, t, sh2); + sh2_da_checks(a, t, sh2); #endif - ((u16 *)sh2->data_array)[a1 / 2] = d; } static void REGPARM(3) sh2_write16_rom(u32 a, u32 d, SH2 *sh2) @@ -1580,31 +1621,31 @@ static void REGPARM(3) sh2_write32_dram(u32 a, u32 d, SH2 *sh2) static void REGPARM(3) sh2_write32_sdram(u32 a, u32 d, SH2 *sh2) { u32 a1 = a & 0x3fffc; + *(u32 *)(sh2->p_sdram + a1) = (d << 16) | (d >> 16); #ifdef DRC_SH2 u8 *p = sh2->p_drcblk_ram; int t = p[a1 >> SH2_DRCBLK_RAM_SHIFT]; if (t) - sh2_drc_wcheck_ram(a, t, sh2); + sh2_sdram_checks(a, t, sh2); int u = p[(a1+2) >> SH2_DRCBLK_RAM_SHIFT]; if (u) - sh2_drc_wcheck_ram(a+2, u, sh2); + sh2_sdram_checks(a+2, u, sh2); #endif - *(u32 *)(sh2->p_sdram + a1) = (d << 16) | (d >> 16); } static void REGPARM(3) sh2_write32_da(u32 a, u32 d, SH2 *sh2) { u32 a1 = a & 0xffc; + *((u32 *)sh2->data_array + a1/4) = (d << 16) | (d >> 16); #ifdef DRC_SH2 u8 *p = sh2->p_drcblk_da; int t = p[a1 >> SH2_DRCBLK_DA_SHIFT]; if (t) - sh2_drc_wcheck_da(a, t, sh2); + sh2_da_checks(a, t, sh2); int u = p[(a1+2) >> SH2_DRCBLK_DA_SHIFT]; if (u) - sh2_drc_wcheck_da(a+2, u, sh2); + sh2_da_checks(a+2, u, sh2); #endif - *((u32 *)sh2->data_array + a1/4) = (d << 16) | (d >> 16); } static void REGPARM(3) sh2_write32_rom(u32 a, u32 d, SH2 *sh2) @@ -2040,8 +2081,7 @@ void PicoMemSetup32x(void) sh2_read8_map[0x06/2].addr = sh2_read8_map[0x26/2].addr = sh2_read16_map[0x06/2].addr = sh2_read16_map[0x26/2].addr = sh2_read32_map[0x06/2].addr = sh2_read32_map[0x26/2].addr = MAP_MEMORY(Pico32xMem->sdram); - sh2_write8_map[0x06/2] = sh2_write8_sdram; - sh2_write8_map[0x26/2] = sh2_write8_sdram_wt; + sh2_write8_map[0x06/2] = sh2_write8_map[0x26/2] = sh2_write8_sdram; sh2_write16_map[0x06/2] = sh2_write16_map[0x26/2] = sh2_write16_sdram; sh2_write32_map[0x06/2] = sh2_write32_map[0x26/2] = sh2_write32_sdram; sh2_read8_map[0x06/2].mask = sh2_read8_map[0x26/2].mask = 0x03ffff; diff --git a/pico/32x/memory_arm.S b/pico/32x/memory_arm.S index 1082c7b7..b449370b 100644 --- a/pico/32x/memory_arm.S +++ b/pico/32x/memory_arm.S @@ -227,9 +227,9 @@ sh2_write32_sdram: ldrb r1, [ip, r3, lsr #SH2_RAM_SHIFT+1]! cmp r1, #0 beq 1f - stmfd sp!, {r0, r1, r2, ip} + stmfd sp!, {r0, r2, ip, lr} bl sh2_drc_wcheck_ram - ldmfd sp!, {r0, r1, r2, ip} + ldmfd sp!, {r0, r2, ip, lr} 1: ldrb r1, [ip, #1] cmp r1, #0 bxeq lr @@ -250,9 +250,9 @@ sh2_write32_da: ldrb r1, [ip, r3, lsr #SH2_DA_SHIFT+1]! cmp r1, #0 beq 1f - stmfd sp!, {r0, r1, r2, ip} + stmfd sp!, {r0, r2, ip, lr} bl sh2_drc_wcheck_da - ldmfd sp!, {r0, r1, r2, ip} + ldmfd sp!, {r0, r2, ip, lr} 1: ldrb r1, [ip, #1] cmp r1, #0 bxeq lr @@ -269,7 +269,6 @@ sh2_write32_dram: moveq r1, r1, ror #16 streq r1, [ip, r3, lsr #SH2_DRAM_SHIFT] bxeq lr -#if 1 ldr r0, [ip, r3, lsr #SH2_DRAM_SHIFT] mov r1, r1, ror #16 mov r2, #0 @@ -284,20 +283,6 @@ sh2_write32_dram: bic r0, r0, r2 orr r0, r0, r1 str r0, [ip, r3, lsr #SH2_DRAM_SHIFT] -#else - add ip, ip, r3, lsr #SH2_DRAM_SHIFT - tst r1, #0x00ff0000 - lsrne r3, r1, #16 - strneb r3, [ip, #0] - tst r1, #0xff000000 - lsrne r3, r1, #24 - strneb r3, [ip, #1] - tst r1, #0x000000ff - strneb r1, [ip, #2] - tst r1, #0x0000ff00 - lsrne r3, r1, #8 - strneb r3, [ip, #3] -#endif bx lr .pool diff --git a/pico/32x/sh2soc.c b/pico/32x/sh2soc.c index 4aae2a04..dd61a93b 100644 --- a/pico/32x/sh2soc.c +++ b/pico/32x/sh2soc.c @@ -138,6 +138,7 @@ static void dmac_trigger(SH2 *sh2, struct dma_chan *chan) if (chan->chcr & DMA_AR) { // auto-request transfer + sh2->state |= SH2_STATE_SLEEP; while ((int)chan->tcr > 0) dmac_transfer_one(sh2, chan); dmac_transfer_complete(sh2, chan); @@ -237,6 +238,7 @@ u32 REGPARM(2) sh2_peripheral_read8(u32 a, SH2 *sh2) a &= 0x1ff; d = PREG8(r, a); + sh2->poll_cnt = 0; elprintf_sh2(sh2, EL_32XP, "peri r8 [%08x] %02x @%06x", a | ~0x1ff, d, sh2_pc(sh2)); return d; @@ -250,6 +252,7 @@ u32 REGPARM(2) sh2_peripheral_read16(u32 a, SH2 *sh2) a &= 0x1fe; d = r[(a / 2) ^ 1]; + sh2->poll_cnt = 0; elprintf_sh2(sh2, EL_32XP, "peri r16 [%08x] %04x @%06x", a | ~0x1ff, d, sh2_pc(sh2)); return d; @@ -258,9 +261,11 @@ u32 REGPARM(2) sh2_peripheral_read16(u32 a, SH2 *sh2) u32 REGPARM(2) sh2_peripheral_read32(u32 a, SH2 *sh2) { u32 d; + a &= 0x1fc; d = sh2->peri_regs[a / 4]; + sh2->poll_cnt = 0; elprintf_sh2(sh2, EL_32XP, "peri r32 [%08x] %08x @%06x", a | ~0x1ff, d, sh2_pc(sh2)); return d; @@ -472,6 +477,7 @@ static void dreq1_do(SH2 *sh2, struct dma_chan *chan) if ((chan->dar & ~0xf) != 0x20004030) elprintf(EL_32XP|EL_ANOMALY, "dreq1: bad dar?: %08x\n", chan->dar); + sh2->state |= SH2_STATE_SLEEP; dmac_transfer_one(sh2, chan); if (chan->tcr == 0) dmac_transfer_complete(sh2, chan); diff --git a/pico/pico_int.h b/pico/pico_int.h index 831bfc72..2c55c941 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -932,6 +932,7 @@ void Pico32xSwapDRAM(int b); void Pico32xMemStateLoaded(void); void p32x_update_banks(void); void p32x_m68k_poll_event(unsigned int flags); +void p32x_sh2_poll_memory(unsigned int a, SH2 *sh2); void p32x_sh2_poll_event(SH2 *sh2, unsigned int flags, unsigned int m68k_cycles); // 32x/draw.c -- 2.39.2